Commit b6fe4ff3 authored by Christian König's avatar Christian König Committed by Alex Deucher
Browse files

drm/amdgpu: fix handling in amdgpu_userq_create



Well mostly the same issues the other code had as well:

1. Memory allocation while holding the userq_mutex lock is forbidden!
2. Things were created/started/published in the wrong order.
3. The reset lock was taken in the wrong order and seems to be
   unecessary in the first place.
4. Error messages on invalid input parameters can spam the logs.
5. Error messages on memory allocation failures are usually superflous
   as well.

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarSunil Khatri <sunil.khatri@amd.com>
Reviewed-by: default avatarPrike Liang <Prike.Liang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 89e50de5654dbe7a137e03d78629542e17ba7202)
parent d093c01d
Loading
Loading
Loading
Loading
+52 −66
Original line number Diff line number Diff line
@@ -708,14 +708,14 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
	const struct amdgpu_userq_funcs *uq_funcs;
	struct amdgpu_usermode_queue *queue;
	struct amdgpu_db_info db_info;
	bool skip_map_queue;
	u32 qid;
	uint64_t index;
	int r = 0;
	int priority =
		(args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
		AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
	int priority;
	u32 qid;
	int r;

	priority =
		(args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK)
		>> AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
	r = amdgpu_userq_priority_permit(filp, priority);
	if (r)
		return r;
@@ -728,40 +728,43 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)

	uq_funcs = adev->userq_funcs[args->in.ip_type];
	if (!uq_funcs) {
		drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n",
			     args->in.ip_type);
		r = -EINVAL;
		goto err_pm_runtime;
	}

	queue = kzalloc_obj(struct amdgpu_usermode_queue);
	if (!queue) {
		drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n");
		r = -ENOMEM;
		goto err_pm_runtime;
	}

	kref_init(&queue->refcount);
	INIT_LIST_HEAD(&queue->userq_va_list);
	queue->doorbell_handle = args->in.doorbell_handle;
	queue->queue_type = args->in.ip_type;
	queue->vm = &fpriv->vm;
	queue->priority = priority;

	db_info.queue_type = queue->queue_type;
	db_info.doorbell_handle = queue->doorbell_handle;
	db_info.db_obj = &queue->db_obj;
	db_info.doorbell_offset = args->in.doorbell_offset;

	queue->userq_mgr = uq_mgr;
	INIT_DELAYED_WORK(&queue->hang_detect_work,
			  amdgpu_userq_hang_detect_work);

	/* Validate the userq virtual address.*/
	r = amdgpu_bo_reserve(fpriv->vm.root.bo, false);
	mutex_init(&queue->fence_drv_lock);
	xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
	r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
	if (r)
		goto free_queue;

	if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va, args->in.queue_size) ||
	    amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
	    amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
	/* Make sure the queue can actually run with those virtual addresses. */
	r = amdgpu_bo_reserve(fpriv->vm.root.bo, false);
	if (r)
		goto free_fence_drv;

	if (amdgpu_userq_input_va_validate(adev, queue, args->in.queue_va,
					   args->in.queue_size) ||
	    amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va,
					   AMDGPU_GPU_PAGE_SIZE) ||
	    amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va,
					   AMDGPU_GPU_PAGE_SIZE)) {
		r = -EINVAL;
		amdgpu_bo_unreserve(fpriv->vm.root.bo);
		goto clean_mapping;
@@ -769,6 +772,10 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
	amdgpu_bo_unreserve(fpriv->vm.root.bo);

	/* Convert relative doorbell offset into absolute doorbell index */
	db_info.queue_type = queue->queue_type;
	db_info.doorbell_handle = queue->doorbell_handle;
	db_info.db_obj = &queue->db_obj;
	db_info.doorbell_offset = args->in.doorbell_offset;
	index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
	if (index == (uint64_t)-EINVAL) {
		drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");
@@ -777,85 +784,64 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
	}

	queue->doorbell_index = index;
	mutex_init(&queue->fence_drv_lock);
	xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
	r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
	if (r) {
		drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
		goto clean_mapping;
	}

	r = uq_funcs->mqd_create(queue, &args->in);
	if (r) {
		drm_file_err(uq_mgr->file, "Failed to create Queue\n");
		goto clean_fence_driver;
		goto clean_mapping;
	}

	/* Update VM owner at userq submit-time for page-fault attribution. */
	amdgpu_vm_set_task_info(&fpriv->vm);

	r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue,
				GFP_KERNEL));
	if (r)
		goto clean_mqd;

	amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);

	/* don't map the queue if scheduling is halted */
	if (adev->userq_halt_for_enforce_isolation &&
	    ((queue->queue_type == AMDGPU_HW_IP_GFX) ||
	     (queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
		skip_map_queue = true;
	else
		skip_map_queue = false;
	if (!skip_map_queue) {
	if (!adev->userq_halt_for_enforce_isolation ||
	    ((queue->queue_type != AMDGPU_HW_IP_GFX) &&
	     (queue->queue_type != AMDGPU_HW_IP_COMPUTE))) {
		r = amdgpu_userq_map_helper(queue);
		if (r) {
			drm_file_err(uq_mgr->file, "Failed to map Queue\n");
			goto clean_mqd;
			mutex_unlock(&uq_mgr->userq_mutex);
			goto clean_doorbell;
		}
	}

	/* drop this refcount during queue destroy */
	kref_init(&queue->refcount);

	/* Wait for mode-1 reset to complete */
	down_read(&adev->reset_domain->sem);
	atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
	mutex_unlock(&uq_mgr->userq_mutex);

	r = xa_alloc(&uq_mgr->userq_xa, &qid, queue,
		     XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
		     XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT),
		     GFP_KERNEL);
	if (r) {
		if (!skip_map_queue)
			amdgpu_userq_unmap_helper(queue);
		r = -ENOMEM;
		goto clean_reset_domain;
	}

	r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
	if (r) {
		xa_erase(&uq_mgr->userq_xa, qid);
		if (!skip_map_queue)
			amdgpu_userq_unmap_helper(queue);
		goto clean_reset_domain;
		/*
		 * This drops the last reference which should take care of
		 * all cleanup.
		 */
		amdgpu_userq_put(queue);
		return r;
	}
	up_read(&adev->reset_domain->sem);

	amdgpu_debugfs_userq_init(filp, queue, qid);
	INIT_DELAYED_WORK(&queue->hang_detect_work,
			  amdgpu_userq_hang_detect_work);

	args->out.queue_id = qid;
	atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
	mutex_unlock(&uq_mgr->userq_mutex);
	return 0;

clean_reset_domain:
	up_read(&adev->reset_domain->sem);
clean_doorbell:
	xa_erase_irq(&adev->userq_doorbell_xa, index);
clean_mqd:
	mutex_unlock(&uq_mgr->userq_mutex);
	uq_funcs->mqd_destroy(queue);
clean_fence_driver:
	amdgpu_userq_fence_driver_free(queue);
clean_mapping:
	amdgpu_bo_reserve(fpriv->vm.root.bo, true);
	amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
	amdgpu_bo_unreserve(fpriv->vm.root.bo);
	mutex_destroy(&queue->fence_drv_lock);
free_fence_drv:
	amdgpu_userq_fence_driver_free(queue);
free_queue:
	kfree(queue);
err_pm_runtime: