Commit 51a0f459 authored by Oak Zeng's avatar Oak Zeng Committed by Alex Deucher
Browse files

drm/amdkfd: Check HIQ's MQD for queue preemption status



MEC firmware can silently fail the queue preemption request
without time out. In this case, HIQ's MQD's queue_doorbell_id
will be set. Check this field to see whether last queue preemption
was successful or not.

Signed-off-by: default avatarOak Zeng <Oak.Zeng@amd.com>
Suggested-by: default avatarJay Cornwall <Jay.Cornwall@amd.com>
Acked-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 6d909c5d
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -1393,6 +1393,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
				uint32_t filter_param)
{
	int retval = 0;
	struct mqd_manager *mqd_mgr;

	if (!dqm->sched_running)
		return 0;
@@ -1424,6 +1425,22 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
		return retval;
	}

	/* In the current MEC firmware implementation, if compute queue
	 * doesn't response to the preemption request in time, HIQ will
	 * abandon the unmap request without returning any timeout error
	 * to driver. Instead, MEC firmware will log the doorbell of the
	 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields.
	 * To make sure the queue unmap was successful, driver need to
	 * check those fields
	 */
	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
	if (mqd_mgr->read_doorbell_id(dqm->packets.priv_queue->queue->mqd)) {
		pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
		while (halt_if_hws_hang)
			schedule();
		return -ETIME;
	}

	pm_release_ib(&dqm->packets);
	dqm->active_runlist = false;

+1 −0
Original line number Diff line number Diff line
@@ -101,6 +101,7 @@ struct mqd_manager {
#if defined(CONFIG_DEBUG_FS)
	int	(*debugfs_show_mqd)(struct seq_file *m, void *data);
#endif
	uint32_t (*read_doorbell_id)(void *mqd);

	struct mutex	mqd_mutex;
	struct kfd_dev	*dev;
+8 −0
Original line number Diff line number Diff line
@@ -226,6 +226,13 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
	__update_mqd(mm, mqd, q, 1);
}

static uint32_t read_doorbell_id(void *mqd)
{
	struct cik_mqd *m = (struct cik_mqd *)mqd;

	return m->queue_doorbell_id0;
}

static void update_mqd_hawaii(struct mqd_manager *mm, void *mqd,
			struct queue_properties *q)
{
@@ -398,6 +405,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
		mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
		mqd->read_doorbell_id = read_doorbell_id;
		break;
	case KFD_MQD_TYPE_DIQ:
		mqd->allocate_mqd = allocate_mqd;
+8 −0
Original line number Diff line number Diff line
@@ -224,6 +224,13 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
	q->is_active = QUEUE_IS_ACTIVE(*q);
}

static uint32_t read_doorbell_id(void *mqd)
{
	struct v10_compute_mqd *m = (struct v10_compute_mqd *)mqd;

	return m->queue_doorbell_id0;
}

static int destroy_mqd(struct mqd_manager *mm, void *mqd,
		       enum kfd_preempt_type type,
		       unsigned int timeout, uint32_t pipe_id,
@@ -425,6 +432,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
		mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
		mqd->read_doorbell_id = read_doorbell_id;
		pr_debug("%s@%i\n", __func__, __LINE__);
		break;
	case KFD_MQD_TYPE_DIQ:
+8 −0
Original line number Diff line number Diff line
@@ -276,6 +276,13 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
}


static uint32_t read_doorbell_id(void *mqd)
{
	struct v9_mqd *m = (struct v9_mqd *)mqd;

	return m->queue_doorbell_id0;
}

static int destroy_mqd(struct mqd_manager *mm, void *mqd,
			enum kfd_preempt_type type,
			unsigned int timeout, uint32_t pipe_id,
@@ -477,6 +484,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
		mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
		mqd->read_doorbell_id = read_doorbell_id;
		break;
	case KFD_MQD_TYPE_DIQ:
		mqd->allocate_mqd = allocate_mqd;
Loading