Commit da966b82 authored by Thomas Hellström's avatar Thomas Hellström Committed by Christian König
Browse files

drm/ttm: Provide a generic LRU walker helper



Provide a generic LRU walker in TTM, in the spirit of drm_gem_lru_scan()
but building on the restartable TTM LRU functionality.

The LRU walker optionally supports locking objects as part of
a ww mutex locking transaction, to mimic to some extent the
current functionality in ttm. However any -EDEADLK return
is converted to -ENOSPC and then to -ENOMEM before reaching
the driver, so that the driver will need to backoff and possibly retry
without being able to keep the ticket.

v3:
- Move the helper to core ttm.
- Remove the drm_exec usage from it for now, it will be
  reintroduced later in the series.
v4:
- Handle the -EALREADY case if ticketlocking.
v6:
- Some cleanup and added code comments (Matthew Brost)
- Clarified the ticketlock in the commit message (Matthew Brost)
v7:
- Use s64 rather than long for the target and progress
  (Christian König)
- Update documentation to not encourage using pages as a
  progress measure. (Christian König)
- Remove cond_resched(). (Christian König)

Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com> #v6
Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-6-thomas.hellstrom@linux.intel.com


Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
parent 4c44f89c
Loading
Loading
Loading
Loading
+151 −0
Original line number Diff line number Diff line
@@ -768,3 +768,154 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
	ttm_tt_destroy(bo->bdev, ttm);
	return ret;
}

static bool ttm_lru_walk_trylock(struct ttm_lru_walk *walk,
				 struct ttm_buffer_object *bo,
				 bool *needs_unlock)
{
	struct ttm_operation_ctx *ctx = walk->ctx;

	*needs_unlock = false;

	if (dma_resv_trylock(bo->base.resv)) {
		*needs_unlock = true;
		return true;
	}

	if (bo->base.resv == ctx->resv && ctx->allow_res_evict) {
		dma_resv_assert_held(bo->base.resv);
		return true;
	}

	return false;
}

static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk,
				   struct ttm_buffer_object *bo,
				   bool *needs_unlock)
{
	struct dma_resv *resv = bo->base.resv;
	int ret;

	if (walk->ctx->interruptible)
		ret = dma_resv_lock_interruptible(resv, walk->ticket);
	else
		ret = dma_resv_lock(resv, walk->ticket);

	if (!ret) {
		*needs_unlock = true;
		/*
		 * Only a single ticketlock per loop. Ticketlocks are prone
		 * to return -EDEADLK causing the eviction to fail, so
		 * after waiting for the ticketlock, revert back to
		 * trylocking for this walk.
		 */
		walk->ticket = NULL;
	} else if (ret == -EDEADLK) {
		/* Caller needs to exit the ww transaction. */
		ret = -ENOSPC;
	}

	return ret;
}

static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
{
	if (locked)
		dma_resv_unlock(bo->base.resv);
}

/**
 * ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on
 * valid items.
 * @walk: describe the walks and actions taken
 * @bdev: The TTM device.
 * @man: The struct ttm_resource manager whose LRU lists we're walking.
 * @target: The end condition for the walk.
 *
 * The LRU lists of @man are walk, and for each struct ttm_resource encountered,
 * the corresponding ttm_buffer_object is locked and taken a reference on, and
 * the LRU lock is dropped. the LRU lock may be dropped before locking and, in
 * that case, it's verified that the item actually remains on the LRU list after
 * the lock, and that the buffer object didn't switch resource in between.
 *
 * With a locked object, the actions indicated by @walk->process_bo are
 * performed, and after that, the bo is unlocked, the refcount dropped and the
 * next struct ttm_resource is processed. Here, the walker relies on
 * TTM's restartable LRU list implementation.
 *
 * Typically @walk->process_bo() would return the number of pages evicted,
 * swapped or shrunken, so that when the total exceeds @target, or when the
 * LRU list has been walked in full, iteration is terminated. It's also terminated
 * on error. Note that the definition of @target is done by the caller, it
 * could have a different meaning than the number of pages.
 *
 * Note that the way dma_resv individualization is done, locking needs to be done
 * either with the LRU lock held (trylocking only) or with a reference on the
 * object.
 *
 * Return: The progress made towards target or negative error code on error.
 */
s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
			   struct ttm_resource_manager *man, s64 target)
{
	struct ttm_resource_cursor cursor;
	struct ttm_resource *res;
	s64 progress = 0;
	s64 lret;

	spin_lock(&bdev->lru_lock);
	ttm_resource_manager_for_each_res(man, &cursor, res) {
		struct ttm_buffer_object *bo = res->bo;
		bool bo_needs_unlock = false;
		bool bo_locked = false;
		int mem_type;

		/*
		 * Attempt a trylock before taking a reference on the bo,
		 * since if we do it the other way around, and the trylock fails,
		 * we need to drop the lru lock to put the bo.
		 */
		if (ttm_lru_walk_trylock(walk, bo, &bo_needs_unlock))
			bo_locked = true;
		else if (!walk->ticket || walk->ctx->no_wait_gpu ||
			 walk->trylock_only)
			continue;

		if (!ttm_bo_get_unless_zero(bo)) {
			ttm_lru_walk_unlock(bo, bo_needs_unlock);
			continue;
		}

		mem_type = res->mem_type;
		spin_unlock(&bdev->lru_lock);

		lret = 0;
		if (!bo_locked)
			lret = ttm_lru_walk_ticketlock(walk, bo, &bo_needs_unlock);

		/*
		 * Note that in between the release of the lru lock and the
		 * ticketlock, the bo may have switched resource,
		 * and also memory type, since the resource may have been
		 * freed and allocated again with a different memory type.
		 * In that case, just skip it.
		 */
		if (!lret && bo->resource && bo->resource->mem_type == mem_type)
			lret = walk->ops->process_bo(walk, bo);

		ttm_lru_walk_unlock(bo, bo_needs_unlock);
		ttm_bo_put(bo);
		if (lret == -EBUSY || lret == -EALREADY)
			lret = 0;
		progress = (lret < 0) ? lret : progress + lret;

		spin_lock(&bdev->lru_lock);
		if (progress < 0 || progress >= target)
			break;
	}
	ttm_resource_cursor_fini_locked(&cursor);
	spin_unlock(&bdev->lru_lock);

	return progress;
}
+35 −0
Original line number Diff line number Diff line
@@ -194,6 +194,41 @@ struct ttm_operation_ctx {
	uint64_t bytes_moved;
};

struct ttm_lru_walk;

/** struct ttm_lru_walk_ops - Operations for a LRU walk. */
struct ttm_lru_walk_ops {
	/**
	 * process_bo - Process this bo.
	 * @walk: struct ttm_lru_walk describing the walk.
	 * @bo: A locked and referenced buffer object.
	 *
	 * Return: Negative error code on error, User-defined positive value
	 * (typically, but not always, size of the processed bo) on success.
	 * On success, the returned values are summed by the walk and the
	 * walk exits when its target is met.
	 * 0 also indicates success, -EBUSY means this bo was skipped.
	 */
	s64 (*process_bo)(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo);
};

/**
 * struct ttm_lru_walk - Structure describing a LRU walk.
 */
struct ttm_lru_walk {
	/** @ops: Pointer to the ops structure. */
	const struct ttm_lru_walk_ops *ops;
	/** @ctx: Pointer to the struct ttm_operation_ctx. */
	struct ttm_operation_ctx *ctx;
	/** @ticket: The struct ww_acquire_ctx if any. */
	struct ww_acquire_ctx *ticket;
	/** @tryock_only: Only use trylock for locking. */
	bool trylock_only;
};

s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
			   struct ttm_resource_manager *man, s64 target);

/**
 * ttm_bo_get - reference a struct ttm_buffer_object
 *