Commit 379e8f1c authored by Boris Brezillon's avatar Boris Brezillon
Browse files

drm/gem: Make the GEM LRU lock part of drm_device



Recently, a few races have been discovered in the GEM LRU logic, all
of them caused by the fact the LRU lock is accessed through
gem->lru->lock, and that very same lock also protects changes to
gem->lru, leading to situations where gem->lru needs to first be
accessed without the lock held, to then get the lru to access the lock
through and finally take the lock and do the expected operation.

Currently, the only driver making use of this API (MSM) declares a
device-wide lock, and the user we're about to add (panthor) will
do the same. There's no evidence that we will ever have a driver
that wants different pools of LRUs protected by different locks under
the same drm_device. So we're better off moving this lock to drm_device
and always locking it through obj->dev->gem_lru_mutex, or directly
through dev->gem_lru_mutex.

If anyone ever needs more fine-grained locking, this can be revisited
to pass some drm_gem_lru_pool object representing the pool of LRUs
under a specific lock, but for now, the per-device lock seems to be
enough.

Fixes: e7c2af13 ("drm/gem: Add LRU/shrinker helper")
Reported-by: default avatarChia-I Wu <olvaffe@gmail.com>
Closes: https://gitlab.freedesktop.org/panfrost/linux/-/work_items/86


Reviewed-by: default avatarRob Clark <rob.clark@oss.qualcomm.com>
Reviewed-by: default avatarLiviu Dudau <liviu.dudau@arm.com>
Reviewed-by: default avatarSteven Price <steven.price@arm.com>
Reviewed-by: default avatarChia-I Wu <olvaffe@gmail.com>
Link: https://patch.msgid.link/20260518-panthor-shrinker-fixes-v4-1-1920234470d5@collabora.com


Signed-off-by: default avatarBoris Brezillon <boris.brezillon@collabora.com>
parent e02b5262
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -697,6 +697,7 @@ static void drm_dev_init_release(struct drm_device *dev, void *res)
	mutex_destroy(&dev->master_mutex);
	mutex_destroy(&dev->clientlist_mutex);
	mutex_destroy(&dev->filelist_mutex);
	mutex_destroy(&dev->gem_lru_mutex);
}

static int drm_dev_init(struct drm_device *dev,
@@ -738,6 +739,7 @@ static int drm_dev_init(struct drm_device *dev,
	INIT_LIST_HEAD(&dev->vblank_event_list);

	spin_lock_init(&dev->event_lock);
	mutex_init(&dev->gem_lru_mutex);
	mutex_init(&dev->filelist_mutex);
	mutex_init(&dev->clientlist_mutex);
	mutex_init(&dev->master_mutex);
+16 −20
Original line number Diff line number Diff line
@@ -1541,12 +1541,10 @@ EXPORT_SYMBOL(drm_gem_unlock_reservations);
 * drm_gem_lru_init - initialize a LRU
 *
 * @lru: The LRU to initialize
 * @lock: The lock protecting the LRU
 */
void
drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock)
drm_gem_lru_init(struct drm_gem_lru *lru)
{
	lru->lock = lock;
	lru->count = 0;
	INIT_LIST_HEAD(&lru->list);
}
@@ -1571,14 +1569,10 @@ drm_gem_lru_remove_locked(struct drm_gem_object *obj)
void
drm_gem_lru_remove(struct drm_gem_object *obj)
{
	struct drm_gem_lru *lru = obj->lru;

	if (!lru)
		return;

	mutex_lock(lru->lock);
	mutex_lock(&obj->dev->gem_lru_mutex);
	if (obj->lru)
		drm_gem_lru_remove_locked(obj);
	mutex_unlock(lru->lock);
	mutex_unlock(&obj->dev->gem_lru_mutex);
}
EXPORT_SYMBOL(drm_gem_lru_remove);

@@ -1593,7 +1587,7 @@ EXPORT_SYMBOL(drm_gem_lru_remove);
void
drm_gem_lru_move_tail_locked(struct drm_gem_lru *lru, struct drm_gem_object *obj)
{
	lockdep_assert_held_once(lru->lock);
	lockdep_assert_held_once(&obj->dev->gem_lru_mutex);

	if (obj->lru)
		drm_gem_lru_remove_locked(obj);
@@ -1617,9 +1611,9 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail_locked);
void
drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj)
{
	mutex_lock(lru->lock);
	mutex_lock(&obj->dev->gem_lru_mutex);
	drm_gem_lru_move_tail_locked(lru, obj);
	mutex_unlock(lru->lock);
	mutex_unlock(&obj->dev->gem_lru_mutex);
}
EXPORT_SYMBOL(drm_gem_lru_move_tail);

@@ -1633,6 +1627,7 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail);
 * of the shrink callback to check for this (ie. dma_resv_test_signaled())
 * or if necessary block until the buffer becomes idle.
 *
 * @dev: DRM device the LRU belongs to
 * @lru: The LRU to scan
 * @nr_to_scan: The number of pages to try to reclaim
 * @remaining: The number of pages left to reclaim, should be initialized by caller
@@ -1640,7 +1635,8 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail);
 * @ticket: Optional ww_acquire_ctx context to use for locking
 */
unsigned long
drm_gem_lru_scan(struct drm_gem_lru *lru,
drm_gem_lru_scan(struct drm_device *dev,
		 struct drm_gem_lru *lru,
		 unsigned int nr_to_scan,
		 unsigned long *remaining,
		 bool (*shrink)(struct drm_gem_object *obj, struct ww_acquire_ctx *ticket),
@@ -1650,9 +1646,9 @@ drm_gem_lru_scan(struct drm_gem_lru *lru,
	struct drm_gem_object *obj;
	unsigned freed = 0;

	drm_gem_lru_init(&still_in_lru, lru->lock);
	drm_gem_lru_init(&still_in_lru);

	mutex_lock(lru->lock);
	mutex_lock(&dev->gem_lru_mutex);

	while (freed < nr_to_scan) {
		obj = list_first_entry_or_null(&lru->list, typeof(*obj), lru_node);
@@ -1675,7 +1671,7 @@ drm_gem_lru_scan(struct drm_gem_lru *lru,
		 * rest of the loop body, to reduce contention with other
		 * code paths that need the LRU lock
		 */
		mutex_unlock(lru->lock);
		mutex_unlock(&dev->gem_lru_mutex);

		if (ticket)
			ww_acquire_init(ticket, &reservation_ww_class);
@@ -1709,7 +1705,7 @@ drm_gem_lru_scan(struct drm_gem_lru *lru,

tail:
		drm_gem_object_put(obj);
		mutex_lock(lru->lock);
		mutex_lock(&dev->gem_lru_mutex);
	}

	/*
@@ -1721,7 +1717,7 @@ drm_gem_lru_scan(struct drm_gem_lru *lru,
	list_splice_tail(&still_in_lru.list, &lru->list);
	lru->count += still_in_lru.count;

	mutex_unlock(lru->lock);
	mutex_unlock(&dev->gem_lru_mutex);

	return freed;
}
+5 −6
Original line number Diff line number Diff line
@@ -128,11 +128,10 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv,
	/*
	 * Initialize the LRUs:
	 */
	mutex_init(&priv->lru.lock);
	drm_gem_lru_init(&priv->lru.unbacked, &priv->lru.lock);
	drm_gem_lru_init(&priv->lru.pinned,   &priv->lru.lock);
	drm_gem_lru_init(&priv->lru.willneed, &priv->lru.lock);
	drm_gem_lru_init(&priv->lru.dontneed, &priv->lru.lock);
	drm_gem_lru_init(&priv->lru.unbacked);
	drm_gem_lru_init(&priv->lru.pinned);
	drm_gem_lru_init(&priv->lru.willneed);
	drm_gem_lru_init(&priv->lru.dontneed);

	/* Initialize stall-on-fault */
	spin_lock_init(&priv->fault_stall_lock);
@@ -140,7 +139,7 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv,

	/* Teach lockdep about lock ordering wrt. shrinker: */
	fs_reclaim_acquire(GFP_KERNEL);
	might_lock(&priv->lru.lock);
	might_lock(&ddev->gem_lru_mutex);
	fs_reclaim_release(GFP_KERNEL);

	if (priv->kms_init) {
+0 −7
Original line number Diff line number Diff line
@@ -150,13 +150,6 @@ struct msm_drm_private {
		 * DONTNEED state (ie. can be purged)
		 */
		struct drm_gem_lru dontneed;

		/**
		 * lock:
		 *
		 * Protects manipulation of all of the LRUs.
		 */
		struct mutex lock;
	} lru;

	struct notifier_block vmap_notifier;
+16 −17
Original line number Diff line number Diff line
@@ -177,11 +177,11 @@ static void update_lru_locked(struct drm_gem_object *obj)

static void update_lru(struct drm_gem_object *obj)
{
	struct msm_drm_private *priv = obj->dev->dev_private;
	struct drm_device *dev = obj->dev;

	mutex_lock(&priv->lru.lock);
	mutex_lock(&dev->gem_lru_mutex);
	update_lru_locked(obj);
	mutex_unlock(&priv->lru.lock);
	mutex_unlock(&dev->gem_lru_mutex);
}

static struct page **get_pages(struct drm_gem_object *obj)
@@ -292,11 +292,11 @@ void msm_gem_pin_obj_locked(struct drm_gem_object *obj)

static void pin_obj_locked(struct drm_gem_object *obj)
{
	struct msm_drm_private *priv = obj->dev->dev_private;
	struct drm_device *dev = obj->dev;

	mutex_lock(&priv->lru.lock);
	mutex_lock(&dev->gem_lru_mutex);
	msm_gem_pin_obj_locked(obj);
	mutex_unlock(&priv->lru.lock);
	mutex_unlock(&dev->gem_lru_mutex);
}

struct page **msm_gem_pin_pages_locked(struct drm_gem_object *obj)
@@ -487,16 +487,16 @@ int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct drm_gpuva *vma)

void msm_gem_unpin_locked(struct drm_gem_object *obj)
{
	struct msm_drm_private *priv = obj->dev->dev_private;
	struct drm_device *dev = obj->dev;
	struct msm_gem_object *msm_obj = to_msm_bo(obj);

	msm_gem_assert_locked(obj);

	mutex_lock(&priv->lru.lock);
	mutex_lock(&dev->gem_lru_mutex);
	msm_obj->pin_count--;
	GEM_WARN_ON(msm_obj->pin_count < 0);
	update_lru_locked(obj);
	mutex_unlock(&priv->lru.lock);
	mutex_unlock(&dev->gem_lru_mutex);
}

/* Special unpin path for use in fence-signaling path, avoiding the need
@@ -507,10 +507,10 @@ void msm_gem_unpin_locked(struct drm_gem_object *obj)
 */
void msm_gem_unpin_active(struct drm_gem_object *obj)
{
	struct msm_drm_private *priv = obj->dev->dev_private;
	struct drm_device *dev = obj->dev;
	struct msm_gem_object *msm_obj = to_msm_bo(obj);

	GEM_WARN_ON(!mutex_is_locked(&priv->lru.lock));
	GEM_WARN_ON(!mutex_is_locked(&dev->gem_lru_mutex));

	msm_obj->pin_count--;
	GEM_WARN_ON(msm_obj->pin_count < 0);
@@ -797,12 +797,12 @@ void msm_gem_put_vaddr(struct drm_gem_object *obj)
 */
int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
{
	struct msm_drm_private *priv = obj->dev->dev_private;
	struct drm_device *dev = obj->dev;
	struct msm_gem_object *msm_obj = to_msm_bo(obj);

	msm_gem_lock(obj);

	mutex_lock(&priv->lru.lock);
	mutex_lock(&dev->gem_lru_mutex);

	if (msm_obj->madv != __MSM_MADV_PURGED)
		msm_obj->madv = madv;
@@ -814,7 +814,7 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
	 */
	update_lru_locked(obj);

	mutex_unlock(&priv->lru.lock);
	mutex_unlock(&dev->gem_lru_mutex);

	msm_gem_unlock(obj);

@@ -824,7 +824,6 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
void msm_gem_purge(struct drm_gem_object *obj)
{
	struct drm_device *dev = obj->dev;
	struct msm_drm_private *priv = obj->dev->dev_private;
	struct msm_gem_object *msm_obj = to_msm_bo(obj);

	msm_gem_assert_locked(obj);
@@ -839,10 +838,10 @@ void msm_gem_purge(struct drm_gem_object *obj)

	put_pages(obj);

	mutex_lock(&priv->lru.lock);
	mutex_lock(&dev->gem_lru_mutex);
	/* A one-way transition: */
	msm_obj->madv = __MSM_MADV_PURGED;
	mutex_unlock(&priv->lru.lock);
	mutex_unlock(&dev->gem_lru_mutex);

	drm_gem_free_mmap_offset(obj);

Loading