Commit 345c5b7c authored by Ashley Smith's avatar Ashley Smith Committed by Boris Brezillon
Browse files

drm/panthor: Make the timeout per-queue instead of per-job



The timeout logic provided by drm_sched leads to races when we try
to suspend it while the drm_sched workqueue queues more jobs. Let's
overhaul the timeout handling in panthor to have our own delayed work
that's resumed/suspended when a group is resumed/suspended. When an
actual timeout occurs, we call drm_sched_fault() to report it
through drm_sched, still. But otherwise, the drm_sched timeout is
disabled (set to MAX_SCHEDULE_TIMEOUT), which leaves us in control of
how we protect modifications on the timer.

One issue seems to be when we call drm_sched_suspend_timeout() from
both queue_run_job() and tick_work() which could lead to races due to
drm_sched_suspend_timeout() not having a lock. Another issue seems to
be in queue_run_job() if the group is not scheduled, we suspend the
timeout again which undoes what drm_sched_job_begin() did when calling
drm_sched_start_timeout(). So the timeout does not reset when a job
is finished.

v2:
- Fix syntax error

v3:
- Split the changes in two commits

v4:
- No changes

v5:
- No changes

v6:
- Fix a NULL deref in group_can_run(), and narrow the group variable
  scope to avoid such mistakes in the future
- Add an queue_timeout_is_suspended() helper to clarify things

v7:
- No changes

v8:
- Don't touch drm_gpu_scheduler::timeout in queue_timedout_job()

Fixes: de854881 ("drm/panthor: Add the scheduler logical block")
Reviewed-by: default avatarSteven Price <steven.price@arm.com>
Reviewed-by: default avatarLiviu Dudau <liviu.dudau@arm.com>
Reviewed-by: default avatarAdrián Larumbe <adrian.larumbe@collabora.com>
Signed-off-by: default avatarAshley Smith <ashley.smith@collabora.com>
Co-developed-by: default avatarBoris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: default avatarBoris Brezillon <boris.brezillon@collabora.com>
Link: https://patch.msgid.link/20251113105734.1520338-2-boris.brezillon@collabora.com
parent 922682d4
Loading
Loading
Loading
Loading
+199 −78
Original line number Diff line number Diff line
@@ -364,17 +364,20 @@ struct panthor_queue {
	/** @name: DRM scheduler name for this queue. */
	char *name;

	/** @timeout: Queue timeout related fields. */
	struct {
		/** @timeout.work: Work executed when a queue timeout occurs. */
		struct delayed_work work;

		/**
	 * @remaining_time: Time remaining before the job timeout expires.
		 * @timeout.remaining: Time remaining before a queue timeout.
		 *
	 * The job timeout is suspended when the queue is not scheduled by the
	 * FW. Every time we suspend the timer, we need to save the remaining
	 * time so we can restore it later on.
		 * When the timer is running, this value is set to MAX_SCHEDULE_TIMEOUT.
		 * When the timer is suspended, it's set to the time remaining when the
		 * timer was suspended.
		 */
	unsigned long remaining_time;

	/** @timeout_suspended: True if the job timeout was suspended. */
	bool timeout_suspended;
		unsigned long remaining;
	} timeout;

	/**
	 * @doorbell_id: Doorbell assigned to this queue.
@@ -899,6 +902,10 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
	if (IS_ERR_OR_NULL(queue))
		return;

	/* This should have been disabled before that point. */
	drm_WARN_ON(&group->ptdev->base,
		    disable_delayed_work_sync(&queue->timeout.work));

	if (queue->entity.fence_context)
		drm_sched_entity_destroy(&queue->entity);

@@ -1046,6 +1053,115 @@ group_unbind_locked(struct panthor_group *group)
	return 0;
}

static bool
group_is_idle(struct panthor_group *group)
{
	struct panthor_device *ptdev = group->ptdev;
	u32 inactive_queues;

	if (group->csg_id >= 0)
		return ptdev->scheduler->csg_slots[group->csg_id].idle;

	inactive_queues = group->idle_queues | group->blocked_queues;
	return hweight32(inactive_queues) == group->queue_count;
}

static void
queue_reset_timeout_locked(struct panthor_queue *queue)
{
	lockdep_assert_held(&queue->fence_ctx.lock);

	if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT) {
		mod_delayed_work(queue->scheduler.timeout_wq,
				 &queue->timeout.work,
				 msecs_to_jiffies(JOB_TIMEOUT_MS));
	}
}

static bool
group_can_run(struct panthor_group *group)
{
	return group->state != PANTHOR_CS_GROUP_TERMINATED &&
	       group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
	       !group->destroyed && group->fatal_queues == 0 &&
	       !group->timedout;
}

static bool
queue_timeout_is_suspended(struct panthor_queue *queue)
{
	/* When running, the remaining time is set to MAX_SCHEDULE_TIMEOUT. */
	return queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT;
}

static void
queue_suspend_timeout_locked(struct panthor_queue *queue)
{
	unsigned long qtimeout, now;
	struct panthor_group *group;
	struct panthor_job *job;
	bool timer_was_active;

	lockdep_assert_held(&queue->fence_ctx.lock);

	/* Already suspended, nothing to do. */
	if (queue_timeout_is_suspended(queue))
		return;

	job = list_first_entry_or_null(&queue->fence_ctx.in_flight_jobs,
				       struct panthor_job, node);
	group = job ? job->group : NULL;

	/* If the queue is blocked and the group is idle, we want the timer to
	 * keep running because the group can't be unblocked by other queues,
	 * so it has to come from an external source, and we want to timebox
	 * this external signalling.
	 */
	if (group && group_can_run(group) &&
	    (group->blocked_queues & BIT(job->queue_idx)) &&
	    group_is_idle(group))
		return;

	now = jiffies;
	qtimeout = queue->timeout.work.timer.expires;

	/* Cancel the timer. */
	timer_was_active = cancel_delayed_work(&queue->timeout.work);
	if (!timer_was_active || !job)
		queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
	else if (time_after(qtimeout, now))
		queue->timeout.remaining = qtimeout - now;
	else
		queue->timeout.remaining = 0;

	if (WARN_ON_ONCE(queue->timeout.remaining > msecs_to_jiffies(JOB_TIMEOUT_MS)))
		queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
}

static void
queue_suspend_timeout(struct panthor_queue *queue)
{
	spin_lock(&queue->fence_ctx.lock);
	queue_suspend_timeout_locked(queue);
	spin_unlock(&queue->fence_ctx.lock);
}

static void
queue_resume_timeout(struct panthor_queue *queue)
{
	spin_lock(&queue->fence_ctx.lock);

	if (queue_timeout_is_suspended(queue)) {
		mod_delayed_work(queue->scheduler.timeout_wq,
				 &queue->timeout.work,
				 queue->timeout.remaining);

		queue->timeout.remaining = MAX_SCHEDULE_TIMEOUT;
	}

	spin_unlock(&queue->fence_ctx.lock);
}

/**
 * cs_slot_prog_locked() - Program a queue slot
 * @ptdev: Device.
@@ -1084,10 +1200,8 @@ cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
			       CS_IDLE_EMPTY |
			       CS_STATE_MASK |
			       CS_EXTRACT_EVENT);
	if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
		drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
		queue->timeout_suspended = false;
	}
	if (queue->iface.input->insert != queue->iface.input->extract)
		queue_resume_timeout(queue);
}

/**
@@ -1114,14 +1228,7 @@ cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
			       CS_STATE_STOP,
			       CS_STATE_MASK);

	/* If the queue is blocked, we want to keep the timeout running, so
	 * we can detect unbounded waits and kill the group when that happens.
	 */
	if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
		queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
		queue->timeout_suspended = true;
		WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
	}
	queue_suspend_timeout(queue);

	return 0;
}
@@ -1921,28 +2028,6 @@ tick_ctx_is_full(const struct panthor_scheduler *sched,
	return ctx->group_count == sched->csg_slot_count;
}

static bool
group_is_idle(struct panthor_group *group)
{
	struct panthor_device *ptdev = group->ptdev;
	u32 inactive_queues;

	if (group->csg_id >= 0)
		return ptdev->scheduler->csg_slots[group->csg_id].idle;

	inactive_queues = group->idle_queues | group->blocked_queues;
	return hweight32(inactive_queues) == group->queue_count;
}

static bool
group_can_run(struct panthor_group *group)
{
	return group->state != PANTHOR_CS_GROUP_TERMINATED &&
	       group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
	       !group->destroyed && group->fatal_queues == 0 &&
	       !group->timedout;
}

static void
tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
			       struct panthor_sched_tick_ctx *ctx,
@@ -2624,6 +2709,7 @@ static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
static void queue_stop(struct panthor_queue *queue,
		       struct panthor_job *bad_job)
{
	disable_delayed_work_sync(&queue->timeout.work);
	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
}

@@ -2635,6 +2721,7 @@ static void queue_start(struct panthor_queue *queue)
	list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
		job->base.s_fence->parent = dma_fence_get(job->done_fence);

	enable_delayed_work(&queue->timeout.work);
	drm_sched_start(&queue->scheduler, 0);
}

@@ -2701,7 +2788,6 @@ void panthor_sched_suspend(struct panthor_device *ptdev)
{
	struct panthor_scheduler *sched = ptdev->scheduler;
	struct panthor_csg_slots_upd_ctx upd_ctx;
	struct panthor_group *group;
	u32 suspended_slots;
	u32 i;

@@ -2791,8 +2877,8 @@ void panthor_sched_suspend(struct panthor_device *ptdev)

	for (i = 0; i < sched->csg_slot_count; i++) {
		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
		struct panthor_group *group = csg_slot->group;

		group = csg_slot->group;
		if (!group)
			continue;

@@ -2921,35 +3007,47 @@ void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
	xa_unlock(&gpool->xa);
}

static void group_sync_upd_work(struct work_struct *work)
static bool queue_check_job_completion(struct panthor_queue *queue)
{
	struct panthor_group *group =
		container_of(work, struct panthor_group, sync_upd_work);
	struct panthor_syncobj_64b *syncobj = NULL;
	struct panthor_job *job, *job_tmp;
	bool cookie, progress = false;
	LIST_HEAD(done_jobs);
	u32 queue_idx;
	bool cookie;

	cookie = dma_fence_begin_signalling();
	for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
		struct panthor_queue *queue = group->queues[queue_idx];
		struct panthor_syncobj_64b *syncobj;

		if (!queue)
			continue;

		syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));

	spin_lock(&queue->fence_ctx.lock);
	list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
		if (!syncobj) {
			struct panthor_group *group = job->group;

			syncobj = group->syncobjs->kmap +
				  (job->queue_idx * sizeof(*syncobj));
		}

		if (syncobj->seqno < job->done_fence->seqno)
			break;

		list_move_tail(&job->node, &done_jobs);
		dma_fence_signal_locked(job->done_fence);
	}
		spin_unlock(&queue->fence_ctx.lock);

	if (list_empty(&queue->fence_ctx.in_flight_jobs)) {
		/* If we have no job left, we cancel the timer, and reset remaining
		 * time to its default so it can be restarted next time
		 * queue_resume_timeout() is called.
		 */
		queue_suspend_timeout_locked(queue);

		/* If there's no job pending, we consider it progress to avoid a
		 * spurious timeout if the timeout handler and the sync update
		 * handler raced.
		 */
		progress = true;
	} else if (!list_empty(&done_jobs)) {
		queue_reset_timeout_locked(queue);
		progress = true;
	}
	spin_unlock(&queue->fence_ctx.lock);
	dma_fence_end_signalling(cookie);

	list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
@@ -2959,6 +3057,27 @@ static void group_sync_upd_work(struct work_struct *work)
		panthor_job_put(&job->base);
	}

	return progress;
}

static void group_sync_upd_work(struct work_struct *work)
{
	struct panthor_group *group =
		container_of(work, struct panthor_group, sync_upd_work);
	u32 queue_idx;
	bool cookie;

	cookie = dma_fence_begin_signalling();
	for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
		struct panthor_queue *queue = group->queues[queue_idx];

		if (!queue)
			continue;

		queue_check_job_completion(queue);
	}
	dma_fence_end_signalling(cookie);

	group_put(group);
}

@@ -3206,17 +3325,6 @@ queue_run_job(struct drm_sched_job *sched_job)
	queue->iface.input->insert = job->ringbuf.end;

	if (group->csg_id < 0) {
		/* If the queue is blocked, we want to keep the timeout running, so we
		 * can detect unbounded waits and kill the group when that happens.
		 * Otherwise, we suspend the timeout so the time we spend waiting for
		 * a CSG slot is not counted.
		 */
		if (!(group->blocked_queues & BIT(job->queue_idx)) &&
		    !queue->timeout_suspended) {
			queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
			queue->timeout_suspended = true;
		}

		group_schedule_locked(group, BIT(job->queue_idx));
	} else {
		gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
@@ -3225,6 +3333,7 @@ queue_run_job(struct drm_sched_job *sched_job)
			pm_runtime_get(ptdev->base.dev);
			sched->pm.has_ref = true;
		}
		queue_resume_timeout(queue);
		panthor_devfreq_record_busy(sched->ptdev);
	}

@@ -3274,7 +3383,6 @@ queue_timedout_job(struct drm_sched_job *sched_job)
	mutex_unlock(&sched->lock);

	queue_start(queue);

	return DRM_GPU_SCHED_STAT_RESET;
}

@@ -3317,6 +3425,17 @@ static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev,
	return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64));
}

static void queue_timeout_work(struct work_struct *work)
{
	struct panthor_queue *queue = container_of(work, struct panthor_queue,
						   timeout.work.work);
	bool progress;

	progress = queue_check_job_completion(queue);
	if (!progress)
		drm_sched_fault(&queue->scheduler);
}

static struct panthor_queue *
group_create_queue(struct panthor_group *group,
		   const struct drm_panthor_queue_create *args,
@@ -3333,7 +3452,7 @@ group_create_queue(struct panthor_group *group,
		 * their profiling status.
		 */
		.credit_limit = args->ringbuf_size / sizeof(u64),
		.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS),
		.timeout = MAX_SCHEDULE_TIMEOUT,
		.timeout_wq = group->ptdev->reset.wq,
		.dev = group->ptdev->base.dev,
	};
@@ -3355,6 +3474,8 @@ group_create_queue(struct panthor_group *group,
	if (!queue)
		return ERR_PTR(-ENOMEM);

	queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
	INIT_DELAYED_WORK(&queue->timeout.work, queue_timeout_work);
	queue->fence_ctx.id = dma_fence_context_alloc(1);
	spin_lock_init(&queue->fence_ctx.lock);
	INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);