Commit bb8aa27e authored by Thomas Hellström's avatar Thomas Hellström
Browse files

drm/ttm, drm_xe, Implement ttm_lru_walk_for_evict() using the guarded LRU iteration



To avoid duplicating the tricky bo locking implementation,
Implement ttm_lru_walk_for_evict() using the guarded bo LRU iteration.

To facilitate this, support ticketlocking from the guarded bo LRU
iteration.

v2:
- Clean up some static function interfaces (Christian König)
- Fix Handling -EALREADY from ticketlocking in the loop by
  skipping to the next item. (Intel CI)

Signed-off-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Link: https://lore.kernel.org/r/20250623155313.4901-4-thomas.hellstrom@linux.intel.com
parent e1e85eb0
Loading
Loading
Loading
Loading
+75 −113
Original line number Diff line number Diff line
@@ -773,16 +773,15 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
	return ret;
}

static bool ttm_lru_walk_trylock(struct ttm_lru_walk_arg *arg,
				 struct ttm_buffer_object *bo,
				 bool *needs_unlock)
static bool ttm_lru_walk_trylock(struct ttm_bo_lru_cursor *curs,
				 struct ttm_buffer_object *bo)
{
	struct ttm_operation_ctx *ctx = arg->ctx;
	struct ttm_operation_ctx *ctx = curs->arg->ctx;

	*needs_unlock = false;
	curs->needs_unlock = false;

	if (dma_resv_trylock(bo->base.resv)) {
		*needs_unlock = true;
		curs->needs_unlock = true;
		return true;
	}

@@ -794,10 +793,10 @@ static bool ttm_lru_walk_trylock(struct ttm_lru_walk_arg *arg,
	return false;
}

static int ttm_lru_walk_ticketlock(struct ttm_lru_walk_arg *arg,
				   struct ttm_buffer_object *bo,
				   bool *needs_unlock)
static int ttm_lru_walk_ticketlock(struct ttm_bo_lru_cursor *curs,
				   struct ttm_buffer_object *bo)
{
	struct ttm_lru_walk_arg *arg = curs->arg;
	struct dma_resv *resv = bo->base.resv;
	int ret;

@@ -807,7 +806,7 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk_arg *arg,
		ret = dma_resv_lock(resv, arg->ticket);

	if (!ret) {
		*needs_unlock = true;
		curs->needs_unlock = true;
		/*
		 * Only a single ticketlock per loop. Ticketlocks are prone
		 * to return -EDEADLK causing the eviction to fail, so
@@ -823,12 +822,6 @@ static int ttm_lru_walk_ticketlock(struct ttm_lru_walk_arg *arg,
	return ret;
}

static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
{
	if (locked)
		dma_resv_unlock(bo->base.resv);
}

/**
 * ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on
 * valid items.
@@ -863,64 +856,21 @@ static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
			   struct ttm_resource_manager *man, s64 target)
{
	struct ttm_resource_cursor cursor;
	struct ttm_resource *res;
	struct ttm_bo_lru_cursor cursor;
	struct ttm_buffer_object *bo;
	s64 progress = 0;
	s64 lret;

	spin_lock(&bdev->lru_lock);
	ttm_resource_cursor_init(&cursor, man);
	ttm_resource_manager_for_each_res(&cursor, res) {
		struct ttm_buffer_object *bo = res->bo;
		bool bo_needs_unlock = false;
		bool bo_locked = false;
		int mem_type;

		/*
		 * Attempt a trylock before taking a reference on the bo,
		 * since if we do it the other way around, and the trylock fails,
		 * we need to drop the lru lock to put the bo.
		 */
		if (ttm_lru_walk_trylock(&walk->arg, bo, &bo_needs_unlock))
			bo_locked = true;
		else if (!walk->arg.ticket || walk->arg.ctx->no_wait_gpu ||
			 walk->arg.trylock_only)
			continue;

		if (!ttm_bo_get_unless_zero(bo)) {
			ttm_lru_walk_unlock(bo, bo_needs_unlock);
			continue;
		}

		mem_type = res->mem_type;
		spin_unlock(&bdev->lru_lock);

		lret = 0;
		if (!bo_locked)
			lret = ttm_lru_walk_ticketlock(&walk->arg, bo, &bo_needs_unlock);

		/*
		 * Note that in between the release of the lru lock and the
		 * ticketlock, the bo may have switched resource,
		 * and also memory type, since the resource may have been
		 * freed and allocated again with a different memory type.
		 * In that case, just skip it.
		 */
		if (!lret && bo->resource && bo->resource->mem_type == mem_type)
	ttm_bo_lru_for_each_reserved_guarded(&cursor, man, &walk->arg, bo) {
		lret = walk->ops->process_bo(walk, bo);

		ttm_lru_walk_unlock(bo, bo_needs_unlock);
		ttm_bo_put(bo);
		if (lret == -EBUSY || lret == -EALREADY)
			lret = 0;
		progress = (lret < 0) ? lret : progress + lret;

		spin_lock(&bdev->lru_lock);
		if (progress < 0 || progress >= target)
			break;
	}
	ttm_resource_cursor_fini(&cursor);
	spin_unlock(&bdev->lru_lock);
	if (IS_ERR(bo))
		return PTR_ERR(bo);

	return progress;
}
@@ -960,10 +910,7 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_fini);
 * @man: The ttm resource_manager whose LRU lists to iterate over.
 * @arg: The ttm_lru_walk_arg to govern the walk.
 *
 * Initialize a struct ttm_bo_lru_cursor. Currently only trylocking
 * or prelocked buffer objects are available as detailed by
 * @arg->ctx.resv and @arg->ctx.allow_res_evict. Ticketlocking is not
 * supported.
 * Initialize a struct ttm_bo_lru_cursor.
 *
 * Return: Pointer to @curs. The function does not fail.
 */
@@ -981,21 +928,67 @@ ttm_bo_lru_cursor_init(struct ttm_bo_lru_cursor *curs,
EXPORT_SYMBOL(ttm_bo_lru_cursor_init);

static struct ttm_buffer_object *
ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *curs)
__ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs)
{
	struct ttm_buffer_object *bo = res->bo;
	spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
	struct ttm_resource *res = NULL;
	struct ttm_buffer_object *bo;
	struct ttm_lru_walk_arg *arg = curs->arg;
	bool first = !curs->bo;

	ttm_bo_lru_cursor_cleanup_bo(curs);

	spin_lock(lru_lock);
	for (;;) {
		int mem_type, ret = 0;
		bool bo_locked = false;

		if (first) {
			res = ttm_resource_manager_first(&curs->res_curs);
			first = false;
		} else {
			res = ttm_resource_manager_next(&curs->res_curs);
		}
		if (!res)
			break;

	if (!ttm_lru_walk_trylock(curs->arg, bo, &curs->needs_unlock))
		return NULL;
		bo = res->bo;
		if (ttm_lru_walk_trylock(curs, bo))
			bo_locked = true;
		else if (!arg->ticket || arg->ctx->no_wait_gpu || arg->trylock_only)
			continue;

		if (!ttm_bo_get_unless_zero(bo)) {
			if (curs->needs_unlock)
				dma_resv_unlock(bo->base.resv);
		return NULL;
			continue;
		}

		mem_type = res->mem_type;
		spin_unlock(lru_lock);
		if (!bo_locked)
			ret = ttm_lru_walk_ticketlock(curs, bo);

		/*
		 * Note that in between the release of the lru lock and the
		 * ticketlock, the bo may have switched resource,
		 * and also memory type, since the resource may have been
		 * freed and allocated again with a different memory type.
		 * In that case, just skip it.
		 */
		curs->bo = bo;
		if (!ret && bo->resource && bo->resource->mem_type == mem_type)
			return bo;

		ttm_bo_lru_cursor_cleanup_bo(curs);
		if (ret && ret != -EALREADY)
			return ERR_PTR(ret);

		spin_lock(lru_lock);
	}

	spin_unlock(lru_lock);
	return res ? bo : NULL;
}

/**
@@ -1009,25 +1002,7 @@ ttm_bo_from_res_reserved(struct ttm_resource *res, struct ttm_bo_lru_cursor *cur
 */
struct ttm_buffer_object *ttm_bo_lru_cursor_next(struct ttm_bo_lru_cursor *curs)
{
	spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
	struct ttm_resource *res = NULL;
	struct ttm_buffer_object *bo;

	ttm_bo_lru_cursor_cleanup_bo(curs);

	spin_lock(lru_lock);
	for (;;) {
		res = ttm_resource_manager_next(&curs->res_curs);
		if (!res)
			break;

		bo = ttm_bo_from_res_reserved(res, curs);
		if (bo)
			break;
	}

	spin_unlock(lru_lock);
	return res ? bo : NULL;
	return __ttm_bo_lru_cursor_next(curs);
}
EXPORT_SYMBOL(ttm_bo_lru_cursor_next);

@@ -1041,21 +1016,8 @@ EXPORT_SYMBOL(ttm_bo_lru_cursor_next);
 */
struct ttm_buffer_object *ttm_bo_lru_cursor_first(struct ttm_bo_lru_cursor *curs)
{
	spinlock_t *lru_lock = &curs->res_curs.man->bdev->lru_lock;
	struct ttm_buffer_object *bo;
	struct ttm_resource *res;

	spin_lock(lru_lock);
	res = ttm_resource_manager_first(&curs->res_curs);
	if (!res) {
		spin_unlock(lru_lock);
		return NULL;
	}

	bo = ttm_bo_from_res_reserved(res, curs);
	spin_unlock(lru_lock);

	return bo ? bo : ttm_bo_lru_cursor_next(curs);
	ttm_bo_lru_cursor_cleanup_bo(curs);
	return __ttm_bo_lru_cursor_next(curs);
}
EXPORT_SYMBOL(ttm_bo_lru_cursor_first);

+6 −1
Original line number Diff line number Diff line
@@ -65,7 +65,10 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
		struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, mem_type);
		struct ttm_bo_lru_cursor curs;
		struct ttm_buffer_object *ttm_bo;
		struct ttm_lru_walk_arg arg = {.ctx = ctx};
		struct ttm_lru_walk_arg arg = {
			.ctx = ctx,
			.trylock_only = true,
		};

		if (!man || !man->use_tt)
			continue;
@@ -82,6 +85,8 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
			if (*scanned >= to_scan)
				break;
		}
		/* Trylocks should never error, just fail. */
		xe_assert(xe, !IS_ERR(ttm_bo));
	}

	return freed;
+7 −2
Original line number Diff line number Diff line
@@ -529,10 +529,15 @@ class_ttm_bo_lru_cursor_lock_ptr(class_ttm_bo_lru_cursor_t *_T)
 * up at looping termination, even if terminated prematurely by, for
 * example a return or break statement. Exiting the loop will also unlock
 * (if needed) and unreference @_bo.
 *
 * Return: If locking of a bo returns an error, then iteration is terminated
 * and @_bo is set to a corresponding error pointer. It's illegal to
 * dereference @_bo after loop exit.
 */
#define ttm_bo_lru_for_each_reserved_guarded(_cursor, _man, _arg, _bo)	\
	scoped_guard(ttm_bo_lru_cursor, _cursor, _man, _arg)		\
		for ((_bo) = ttm_bo_lru_cursor_first(_cursor); (_bo);	\
		for ((_bo) = ttm_bo_lru_cursor_first(_cursor);		\
		       !IS_ERR_OR_NULL(_bo);				\
		       (_bo) = ttm_bo_lru_cursor_next(_cursor))

#endif