Commit 15f77764 authored by Lin.Cao's avatar Lin.Cao Committed by Philipp Stanner
Browse files

drm/sched: Remove optimization that causes hang when killing dependent jobs



When application A submits jobs and application B submits a job with a
dependency on A's fence, the normal flow wakes up the scheduler after
processing each job. However, the optimization in
drm_sched_entity_add_dependency_cb() uses a callback that only clears
dependencies without waking up the scheduler.

When application A is killed before its jobs can run, the callback gets
triggered but only clears the dependency without waking up the scheduler,
causing the scheduler to enter sleep state and application B to hang.

Remove the optimization by deleting drm_sched_entity_clear_dep() and its
usage, ensuring the scheduler is always woken up when dependencies are
cleared.

Fixes: 777dbd45 ("drm/amdgpu: drop a dummy wakeup scheduler")
Cc: stable@vger.kernel.org # v4.6+
Signed-off-by: default avatarLin.Cao <lincao12@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarPhilipp Stanner <phasta@kernel.org>
Link: https://lore.kernel.org/r/20250717084453.921097-1-lincao12@amd.com
parent 95a16160
Loading
Loading
Loading
Loading
+2 −19
Original line number Diff line number Diff line
@@ -355,17 +355,6 @@ void drm_sched_entity_destroy(struct drm_sched_entity *entity)
}
EXPORT_SYMBOL(drm_sched_entity_destroy);

/* drm_sched_entity_clear_dep - callback to clear the entities dependency */
static void drm_sched_entity_clear_dep(struct dma_fence *f,
				       struct dma_fence_cb *cb)
{
	struct drm_sched_entity *entity =
		container_of(cb, struct drm_sched_entity, cb);

	entity->dependency = NULL;
	dma_fence_put(f);
}

/*
 * drm_sched_entity_wakeup - callback to clear the entity's dependency and
 * wake up the scheduler
@@ -376,7 +365,8 @@ static void drm_sched_entity_wakeup(struct dma_fence *f,
	struct drm_sched_entity *entity =
		container_of(cb, struct drm_sched_entity, cb);

	drm_sched_entity_clear_dep(f, cb);
	entity->dependency = NULL;
	dma_fence_put(f);
	drm_sched_wakeup(entity->rq->sched);
}

@@ -429,13 +419,6 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
		fence = dma_fence_get(&s_fence->scheduled);
		dma_fence_put(entity->dependency);
		entity->dependency = fence;
		if (!dma_fence_add_callback(fence, &entity->cb,
					    drm_sched_entity_clear_dep))
			return true;

		/* Ignore it when it is already scheduled */
		dma_fence_put(fence);
		return false;
	}

	if (!dma_fence_add_callback(entity->dependency, &entity->cb,