Commit a2608432 authored by Thomas Hellström's avatar Thomas Hellström
Browse files

drm/pagemap, drm/xe: Manage drm_pagemap provider lifetimes



If a device holds a reference on a foregin device's drm_pagemap,
and a device unbind is executed on the foreign device,
Typically that foreign device would evict its device-private
pages and then continue its device-managed cleanup eventually
releasing its drm device and possibly allow for module unload.
However, since we're still holding a reference on a drm_pagemap,
when that reference is released and the provider module is
unloaded we'd execute out of undefined memory.

Therefore keep a reference on the provider device and module until
the last drm_pagemap reference is gone.

Note that in theory, the drm_gpusvm_helper module may be unloaded
as soon as the final module_put() of the provider driver module is
executed, so we need to add a module_exit() function that waits
for the work item executing the module_put() has completed.

v2:
- Better commit message (Matt Brost)

Signed-off-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> # For merging through drm-xe.
Link: https://patch.msgid.link/20251219113320.183860-7-thomas.hellstrom@linux.intel.com
parent 565477db
Loading
Loading
Loading
Loading
+97 −4
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#include <linux/pagemap.h>
#include <drm/drm_drv.h>
#include <drm/drm_pagemap.h>
#include <drm/drm_print.h>

/**
 * DOC: Overview
@@ -549,16 +550,92 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
	return -ENOMEM;
}

static void drm_pagemap_dev_unhold_work(struct work_struct *work);
static LLIST_HEAD(drm_pagemap_unhold_list);
static DECLARE_WORK(drm_pagemap_work, drm_pagemap_dev_unhold_work);

/**
 * struct drm_pagemap_dev_hold - Struct to aid in drm_device release.
 * @link: Link into drm_pagemap_unhold_list for deferred reference releases.
 * @drm: drm device to put.
 *
 * When a struct drm_pagemap is released, we also need to release the
 * reference it holds on the drm device. However, typically that needs
 * to be done separately from a system-wide workqueue.
 * Each time a struct drm_pagemap is initialized
 * (or re-initialized if cached) therefore allocate a separate
 * drm_pagemap_dev_hold item, from which we put the drm device and
 * associated module.
 */
struct drm_pagemap_dev_hold {
	struct llist_node link;
	struct drm_device *drm;
};

static void drm_pagemap_release(struct kref *ref)
{
	struct drm_pagemap *dpagemap = container_of(ref, typeof(*dpagemap), ref);
	struct drm_pagemap_dev_hold *dev_hold = dpagemap->dev_hold;

	/*
	 * We know the pagemap provider is alive at this point, since
	 * the struct drm_pagemap_dev_hold holds a reference to the
	 * pagemap provider drm_device and its module.
	 */
	dpagemap->dev_hold = NULL;
	kfree(dpagemap);
	llist_add(&dev_hold->link, &drm_pagemap_unhold_list);
	schedule_work(&drm_pagemap_work);
	/*
	 * Here, either the provider device is still alive, since if called from
	 * page_free(), the caller is holding a reference on the dev_pagemap,
	 * or if called from drm_pagemap_put(), the direct caller is still alive.
	 * This ensures we can't race with THIS module unload.
	 */
}

static void drm_pagemap_dev_unhold_work(struct work_struct *work)
{
	struct llist_node *node = llist_del_all(&drm_pagemap_unhold_list);
	struct drm_pagemap_dev_hold *dev_hold, *next;

	/*
	 * Deferred release of drm_pagemap provider device and module.
	 * THIS module is kept alive during the release by the
	 * flush_work() in the drm_pagemap_exit() function.
	 */
	llist_for_each_entry_safe(dev_hold, next, node, link) {
		struct drm_device *drm = dev_hold->drm;
		struct module *module = drm->driver->fops->owner;

		drm_dbg(drm, "Releasing reference on provider device and module.\n");
		drm_dev_put(drm);
		module_put(module);
		kfree(dev_hold);
	}
}

static struct drm_pagemap_dev_hold *
drm_pagemap_dev_hold(struct drm_pagemap *dpagemap)
{
	struct drm_pagemap_dev_hold *dev_hold;
	struct drm_device *drm = dpagemap->drm;

	dev_hold = kzalloc(sizeof(*dev_hold), GFP_KERNEL);
	if (!dev_hold)
		return ERR_PTR(-ENOMEM);

	init_llist_node(&dev_hold->link);
	dev_hold->drm = drm;
	(void)try_module_get(drm->driver->fops->owner);
	drm_dev_get(drm);

	return dev_hold;
}

/**
 * drm_pagemap_create() - Create a struct drm_pagemap.
 * @dev: Pointer to a struct device providing the device-private memory.
 * @drm: Pointer to a struct drm_device providing the device-private memory.
 * @pagemap: Pointer to a pre-setup struct dev_pagemap providing the struct pages.
 * @ops: Pointer to the struct drm_pagemap_ops.
 *
@@ -568,20 +645,28 @@ static void drm_pagemap_release(struct kref *ref)
 * Error pointer on error.
 */
struct drm_pagemap *
drm_pagemap_create(struct device *dev,
drm_pagemap_create(struct drm_device *drm,
		   struct dev_pagemap *pagemap,
		   const struct drm_pagemap_ops *ops)
{
	struct drm_pagemap *dpagemap = kzalloc(sizeof(*dpagemap), GFP_KERNEL);
	struct drm_pagemap_dev_hold *dev_hold;

	if (!dpagemap)
		return ERR_PTR(-ENOMEM);

	kref_init(&dpagemap->ref);
	dpagemap->dev = dev;
	dpagemap->drm = drm;
	dpagemap->ops = ops;
	dpagemap->pagemap = pagemap;

	dev_hold = drm_pagemap_dev_hold(dpagemap);
	if (IS_ERR(dev_hold)) {
		kfree(dpagemap);
		return ERR_CAST(dev_hold);
	}
	dpagemap->dev_hold = dev_hold;

	return dpagemap;
}
EXPORT_SYMBOL(drm_pagemap_create);
@@ -933,3 +1018,11 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
	return err;
}
EXPORT_SYMBOL(drm_pagemap_populate_mm);

static void drm_pagemap_exit(void)
{
	flush_work(&drm_pagemap_work);
	if (WARN_ON(!llist_empty(&drm_pagemap_unhold_list)))
		disable_work_sync(&drm_pagemap_work);
}
module_exit(drm_pagemap_exit);
+13 −2
Original line number Diff line number Diff line
@@ -1457,7 +1457,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
			  unsigned int order,
			  enum dma_data_direction dir)
{
	struct device *pgmap_dev = dpagemap->dev;
	struct device *pgmap_dev = dpagemap->drm->dev;
	enum drm_interconnect_protocol prot;
	dma_addr_t addr;

@@ -1477,6 +1477,14 @@ static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
	.populate_mm = xe_drm_pagemap_populate_mm,
};

static void xe_devm_release(void *data)
{
	struct xe_vram_region *vr = data;

	drm_pagemap_put(vr->dpagemap);
	vr->dpagemap = NULL;
}

/**
 * xe_devm_add: Remap and provide memmap backing for device memory
 * @tile: tile that the memory region belongs to
@@ -1502,7 +1510,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
		return ret;
	}

	vr->dpagemap = drm_pagemap_create(dev, &vr->pagemap,
	vr->dpagemap = drm_pagemap_create(&xe->drm, &vr->pagemap,
					  &xe_drm_pagemap_ops);
	if (IS_ERR(vr->dpagemap)) {
		drm_err(&xe->drm, "Failed to create drm_pagemap tile %d memory: %pe\n",
@@ -1510,6 +1518,9 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
		ret = PTR_ERR(vr->dpagemap);
		goto out_no_dpagemap;
	}
	ret = devm_add_action_or_reset(dev, xe_devm_release, vr);
	if (ret)
		goto out_no_dpagemap;

	vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
	vr->pagemap.range.start = res->start;
+7 −3
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@

struct dma_fence;
struct drm_pagemap;
struct drm_pagemap_dev_hold;
struct drm_pagemap_zdd;
struct device;

@@ -131,14 +132,17 @@ struct drm_pagemap_ops {
 * used for device p2p handshaking.
 * @ops: The struct drm_pagemap_ops.
 * @ref: Reference count.
 * @dev: The struct drevice owning the device-private memory.
 * @drm: The struct drm device owning the device-private memory.
 * @pagemap: Pointer to the underlying dev_pagemap.
 * @dev_hold: Pointer to a struct drm_pagemap_dev_hold for
 * device referencing.
 */
struct drm_pagemap {
	const struct drm_pagemap_ops *ops;
	struct kref ref;
	struct device *dev;
	struct drm_device *drm;
	struct dev_pagemap *pagemap;
	struct drm_pagemap_dev_hold *dev_hold;
};

struct drm_pagemap_devmem;
@@ -213,7 +217,7 @@ struct drm_pagemap_devmem_ops {
			   struct dma_fence *pre_migrate_fence);
};

struct drm_pagemap *drm_pagemap_create(struct device *dev,
struct drm_pagemap *drm_pagemap_create(struct drm_device *drm,
				       struct dev_pagemap *pagemap,
				       const struct drm_pagemap_ops *ops);