Commit a7b2451d authored by Amber Lin's avatar Amber Lin Committed by Alex Deucher
Browse files

drm/amdkfd: Fix circular lock in nocpsch path



Calling free_mqd inside of destroy_queue_nocpsch_locked can cause a
circular lock. destroy_queue_nocpsch_locked is called under a DQM lock,
which is taken in MMU notifiers, potentially in FS reclaim context.
Taking another lock, which is BO reservation lock from free_mqd, while
causing an FS reclaim inside the DQM lock creates a problematic circular
lock dependency. Therefore move free_mqd out of
destroy_queue_nocpsch_locked and call it after unlocking DQM.

Signed-off-by: default avatarAmber Lin <Amber.Lin@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent d760895d
Loading
Loading
Loading
Loading
+13 −5
Original line number Diff line number Diff line
@@ -486,9 +486,6 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
	if (retval == -ETIME)
		qpd->reset_wavefronts = true;


	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);

	list_del(&q->list);
	if (list_empty(&qpd->queues_list)) {
		if (qpd->reset_wavefronts) {
@@ -523,6 +520,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
	int retval;
	uint64_t sdma_val = 0;
	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
	struct mqd_manager *mqd_mgr =
		dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];

	/* Get the SDMA queue stats */
	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
@@ -540,6 +539,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
		pdd->sdma_past_activity_counter += sdma_val;
	dqm_unlock(dqm);

	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);

	return retval;
}

@@ -1629,7 +1630,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
static int process_termination_nocpsch(struct device_queue_manager *dqm,
		struct qcm_process_device *qpd)
{
	struct queue *q, *next;
	struct queue *q;
	struct device_process_node *cur, *next_dpn;
	int retval = 0;
	bool found = false;
@@ -1637,12 +1638,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
	dqm_lock(dqm);

	/* Clear all user mode queues */
	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
	while (!list_empty(&qpd->queues_list)) {
		struct mqd_manager *mqd_mgr;
		int ret;

		q = list_first_entry(&qpd->queues_list, struct queue, list);
		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
				q->properties.type)];
		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
		if (ret)
			retval = ret;
		dqm_unlock(dqm);
		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
		dqm_lock(dqm);
	}

	/* Unregister process */