Commit ddf055b8 authored by Pierre-Eric Pelloux-Prayer's avatar Pierre-Eric Pelloux-Prayer Committed by Christian König
Browse files

drm/ttm: rework pipelined eviction fence handling



Until now ttm stored a single pipelined eviction fence which means
drivers had to use a single entity for these evictions.

To lift this requirement, this commit allows up to 8 entities to
be used.

Ideally a dma_resv object would have been used as a container of
the eviction fences, but the locking rules makes it complex.
dma_resv all have the same ww_class, which means "Attempting to
lock more mutexes after ww_acquire_done." is an error.

One alternative considered was to introduced a 2nd ww_class for
specific resv to hold a single "transient" lock (= the resv lock
would only be held for a short period, without taking any other
locks).

The other option, is to statically reserve a fence array, and
extend the existing code to deal with N fences, instead of 1.

The driver is still responsible to reserve the correct number
of fence slots.

Signed-off-by: default avatarPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Link: https://lore.kernel.org/r/20251121101315.3585-20-pierre-eric.pelloux-prayer@amd.com


Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
parent 6f53bcb4
Loading
Loading
Loading
Loading
+6 −5
Original line number Diff line number Diff line
@@ -652,7 +652,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
	int err;

	man = ttm_manager_type(priv->ttm_dev, mem_type);
	man->move = dma_fence_get_stub();
	man->eviction_fences[0] = dma_fence_get_stub();

	bo = ttm_bo_kunit_init(test, test->priv, size, NULL);
	bo->type = bo_type;
@@ -669,7 +669,7 @@ static void ttm_bo_validate_move_fence_signaled(struct kunit *test)
	KUNIT_EXPECT_EQ(test, ctx.bytes_moved, size);

	ttm_bo_fini(bo);
	dma_fence_put(man->move);
	dma_fence_put(man->eviction_fences[0]);
}

static const struct ttm_bo_validate_test_case ttm_bo_validate_wait_cases[] = {
@@ -733,9 +733,9 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)

	spin_lock_init(&fence_lock);
	man = ttm_manager_type(priv->ttm_dev, fst_mem);
	man->move = alloc_mock_fence(test);
	man->eviction_fences[0] = alloc_mock_fence(test);

	task = kthread_create(threaded_fence_signal, man->move, "move-fence-signal");
	task = kthread_create(threaded_fence_signal, man->eviction_fences[0], "move-fence-signal");
	if (IS_ERR(task))
		KUNIT_FAIL(test, "Couldn't create move fence signal task\n");

@@ -743,7 +743,8 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test)
	err = ttm_bo_validate(bo, placement_val, &ctx_val);
	dma_resv_unlock(bo->base.resv);

	dma_fence_wait_timeout(man->move, false, MAX_SCHEDULE_TIMEOUT);
	dma_fence_wait_timeout(man->eviction_fences[0], false, MAX_SCHEDULE_TIMEOUT);
	man->eviction_fences[0] = NULL;

	KUNIT_EXPECT_EQ(test, err, 0);
	KUNIT_EXPECT_EQ(test, ctx_val.bytes_moved, size);
+3 −2
Original line number Diff line number Diff line
@@ -207,6 +207,7 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
	struct ttm_resource_test_priv *priv = test->priv;
	struct ttm_resource_manager *man;
	size_t size = SZ_16K;
	int i;

	man = kunit_kzalloc(test, sizeof(*man), GFP_KERNEL);
	KUNIT_ASSERT_NOT_NULL(test, man);
@@ -216,8 +217,8 @@ static void ttm_resource_manager_init_basic(struct kunit *test)
	KUNIT_ASSERT_PTR_EQ(test, man->bdev, priv->devs->ttm_dev);
	KUNIT_ASSERT_EQ(test, man->size, size);
	KUNIT_ASSERT_EQ(test, man->usage, 0);
	KUNIT_ASSERT_NULL(test, man->move);
	KUNIT_ASSERT_NOT_NULL(test, &man->move_lock);
	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
		KUNIT_ASSERT_NULL(test, man->eviction_fences[i]);

	for (int i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
		KUNIT_ASSERT_TRUE(test, list_empty(&man->lru[i]));
+24 −23
Original line number Diff line number Diff line
@@ -659,34 +659,35 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo)
EXPORT_SYMBOL(ttm_bo_unpin);

/*
 * Add the last move fence to the BO as kernel dependency and reserve a new
 * fence slot.
 * Add the pipelined eviction fencesto the BO as kernel dependency and reserve new
 * fence slots.
 */
static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
static int ttm_bo_add_pipelined_eviction_fences(struct ttm_buffer_object *bo,
						struct ttm_resource_manager *man,
						bool no_wait_gpu)
{
	struct dma_fence *fence;
	int ret;

	spin_lock(&man->move_lock);
	fence = dma_fence_get(man->move);
	spin_unlock(&man->move_lock);
	int i;

	spin_lock(&man->eviction_lock);
	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
		fence = man->eviction_fences[i];
		if (!fence)
		return 0;
			continue;

		if (no_wait_gpu) {
		ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;
		dma_fence_put(fence);
		return ret;
			if (!dma_fence_is_signaled(fence)) {
				spin_unlock(&man->eviction_lock);
				return -EBUSY;
			}

		} else {
			dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
		}
	}
	spin_unlock(&man->eviction_lock);

	ret = dma_resv_reserve_fences(bo->base.resv, 1);
	dma_fence_put(fence);
	return ret;
	/* TODO: this call should be removed. */
	return dma_resv_reserve_fences(bo->base.resv, 1);
}

/**
@@ -719,7 +720,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
	int i, ret;

	ticket = dma_resv_locking_ctx(bo->base.resv);
	ret = dma_resv_reserve_fences(bo->base.resv, 1);
	ret = dma_resv_reserve_fences(bo->base.resv, TTM_NUM_MOVE_FENCES);
	if (unlikely(ret))
		return ret;

@@ -758,7 +759,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo,
				return ret;
		}

		ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu);
		ret = ttm_bo_add_pipelined_eviction_fences(bo, man, ctx->no_wait_gpu);
		if (unlikely(ret)) {
			ttm_resource_free(bo, res);
			if (ret == -EBUSY)
+31 −7
Original line number Diff line number Diff line
@@ -258,7 +258,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
	ret = dma_resv_trylock(&fbo->base.base._resv);
	WARN_ON(!ret);

	ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
	ret = dma_resv_reserve_fences(&fbo->base.base._resv, TTM_NUM_MOVE_FENCES);
	if (ret) {
		dma_resv_unlock(&fbo->base.base._resv);
		kfree(fbo);
@@ -646,20 +646,44 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo,
{
	struct ttm_device *bdev = bo->bdev;
	struct ttm_resource_manager *from;
	struct dma_fence *tmp;
	int i;

	from = ttm_manager_type(bdev, bo->resource->mem_type);

	/**
	 * BO doesn't have a TTM we need to bind/unbind. Just remember
	 * this eviction and free up the allocation
	 * this eviction and free up the allocation.
	 * The fence will be saved in the first free slot or in the slot
	 * already used to store a fence from the same context. Since
	 * drivers can't use more than TTM_NUM_MOVE_FENCES contexts for
	 * evictions we should always find a slot to use.
	 */
	spin_lock(&from->move_lock);
	if (!from->move || dma_fence_is_later(fence, from->move)) {
		dma_fence_put(from->move);
		from->move = dma_fence_get(fence);
	spin_lock(&from->eviction_lock);
	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
		tmp = from->eviction_fences[i];
		if (!tmp)
			break;
		if (fence->context != tmp->context)
			continue;
		if (dma_fence_is_later(fence, tmp)) {
			dma_fence_put(tmp);
			break;
		}
		goto unlock;
	}
	if (i < TTM_NUM_MOVE_FENCES) {
		from->eviction_fences[i] = dma_fence_get(fence);
	} else {
		WARN(1, "not enough fence slots for all fence contexts");
		spin_unlock(&from->eviction_lock);
		dma_fence_wait(fence, false);
		goto end;
	}
	spin_unlock(&from->move_lock);

unlock:
	spin_unlock(&from->eviction_lock);
end:
	ttm_resource_free(bo, &bo->resource);
}

+20 −13
Original line number Diff line number Diff line
@@ -524,14 +524,15 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man,
{
	unsigned i;

	spin_lock_init(&man->move_lock);
	man->bdev = bdev;
	man->size = size;
	man->usage = 0;

	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
		INIT_LIST_HEAD(&man->lru[i]);
	man->move = NULL;
	spin_lock_init(&man->eviction_lock);
	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++)
		man->eviction_fences[i] = NULL;
}
EXPORT_SYMBOL(ttm_resource_manager_init);

@@ -552,7 +553,7 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
		.no_wait_gpu = false,
	};
	struct dma_fence *fence;
	int ret;
	int ret, i;

	do {
		ret = ttm_bo_evict_first(bdev, man, &ctx);
@@ -562,18 +563,24 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
	if (ret && ret != -ENOENT)
		return ret;

	spin_lock(&man->move_lock);
	fence = dma_fence_get(man->move);
	spin_unlock(&man->move_lock);
	ret = 0;

	if (fence) {
	spin_lock(&man->eviction_lock);
	for (i = 0; i < TTM_NUM_MOVE_FENCES; i++) {
		fence = man->eviction_fences[i];
		if (fence && !dma_fence_is_signaled(fence)) {
			dma_fence_get(fence);
			spin_unlock(&man->eviction_lock);
			ret = dma_fence_wait(fence, false);
			dma_fence_put(fence);
			if (ret)
				return ret;
			spin_lock(&man->eviction_lock);
		}
	}
	spin_unlock(&man->eviction_lock);

	return 0;
	return ret;
}
EXPORT_SYMBOL(ttm_resource_manager_evict_all);

Loading