Commit 34f31fe4 authored by Prike Liang's avatar Prike Liang Committed by Alex Deucher
Browse files

drm/amdgpu: rework userq fence driver alloc/destroy



The correct fix is to tie the global xa entry lifetime to the
queue lifetime: insert in amdgpu_userq_create() and erase in
amdgpu_userq_cleanup(), both at the well-defined doorbell_index key,
making the operation O(1) and resolve the fence driver UAF problem
by binding the userq driver fence to per queue.

v2: clean up the local variables initialization. (Christian)

Signed-off-by: default avatarPrike Liang <Prike.Liang@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 05ce4441
Loading
Loading
Loading
Loading
+0 −5
Original line number Diff line number Diff line
@@ -1045,11 +1045,6 @@ struct amdgpu_device {
	struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
	const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM];

	/* xarray used to retrieve the user queue fence driver reference
	 * in the EOP interrupt handler to signal the particular user
	 * queue fence.
	 */
	struct xarray			userq_xa;
	/**
	 * @userq_doorbell_xa: Global user queue map (doorbell index → queue)
	 * Key: doorbell_index (unique global identifier for the queue)
+1 −3
Original line number Diff line number Diff line
@@ -3757,15 +3757,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
	spin_lock_init(&adev->virt.rlcg_reg_lock);
	spin_lock_init(&adev->wb.lock);

	xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);

	INIT_LIST_HEAD(&adev->reset_list);

	INIT_LIST_HEAD(&adev->ras_list);

	INIT_LIST_HEAD(&adev->pm.od_kobj_list);

	xa_init(&adev->userq_doorbell_xa);
	xa_init_flags(&adev->userq_doorbell_xa, XA_FLAGS_LOCK_IRQ);

	INIT_DELAYED_WORK(&adev->delayed_init_work,
			  amdgpu_device_delayed_init_work_handler);
+1 −19
Original line number Diff line number Diff line
@@ -81,7 +81,6 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
				    struct amdgpu_usermode_queue *userq)
{
	struct amdgpu_userq_fence_driver *fence_drv;
	unsigned long flags;
	int r;

	fence_drv = kzalloc_obj(*fence_drv);
@@ -104,19 +103,10 @@ int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
	fence_drv->context = dma_fence_context_alloc(1);
	get_task_comm(fence_drv->timeline_name, current);

	xa_lock_irqsave(&adev->userq_xa, flags);
	r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
			      fence_drv, GFP_KERNEL));
	xa_unlock_irqrestore(&adev->userq_xa, flags);
	if (r)
		goto free_seq64;

	userq->fence_drv = fence_drv;

	return 0;

free_seq64:
	amdgpu_seq64_free(adev, fence_drv->va);
free_fence_drv:
	kfree(fence_drv);

@@ -187,11 +177,9 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref)
	struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
					 struct amdgpu_userq_fence_driver,
					 refcount);
	struct amdgpu_userq_fence_driver *xa_fence_drv;
	struct amdgpu_device *adev = fence_drv->adev;
	struct amdgpu_userq_fence *fence, *tmp;
	struct xarray *xa = &adev->userq_xa;
	unsigned long index, flags;
	unsigned long flags;
	struct dma_fence *f;

	spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
@@ -208,12 +196,6 @@ void amdgpu_userq_fence_driver_destroy(struct kref *ref)
	}
	spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);

	xa_lock_irqsave(xa, flags);
	xa_for_each(xa, index, xa_fence_drv)
		if (xa_fence_drv == fence_drv)
			__xa_erase(xa, index);
	xa_unlock_irqrestore(xa, flags);

	/* Free seq64 memory */
	amdgpu_seq64_free(adev, fence_drv->va);
	kfree(fence_drv);
+5 −5
Original line number Diff line number Diff line
@@ -6502,14 +6502,14 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
	DRM_DEBUG("IH: CP EOP\n");

	if (adev->enable_mes && doorbell_offset) {
		struct amdgpu_userq_fence_driver *fence_drv = NULL;
		struct xarray *xa = &adev->userq_xa;
		struct amdgpu_usermode_queue *queue;
		struct xarray *xa = &adev->userq_doorbell_xa;
		unsigned long flags;

		xa_lock_irqsave(xa, flags);
		fence_drv = xa_load(xa, doorbell_offset);
		if (fence_drv)
			amdgpu_userq_fence_driver_process(fence_drv);
		queue = xa_load(xa, doorbell_offset);
		if (queue)
			amdgpu_userq_fence_driver_process(queue->fence_drv);
		xa_unlock_irqrestore(xa, flags);
	} else {
		me_id = (entry->ring_id & 0x0c) >> 2;
+5 −5
Original line number Diff line number Diff line
@@ -4854,14 +4854,14 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
	DRM_DEBUG("IH: CP EOP\n");

	if (adev->enable_mes && doorbell_offset) {
		struct amdgpu_userq_fence_driver *fence_drv = NULL;
		struct xarray *xa = &adev->userq_xa;
		struct xarray *xa = &adev->userq_doorbell_xa;
		struct amdgpu_usermode_queue *queue;
		unsigned long flags;

		xa_lock_irqsave(xa, flags);
		fence_drv = xa_load(xa, doorbell_offset);
		if (fence_drv)
			amdgpu_userq_fence_driver_process(fence_drv);
		queue = xa_load(xa, doorbell_offset);
		if (queue)
			amdgpu_userq_fence_driver_process(queue->fence_drv);
		xa_unlock_irqrestore(xa, flags);
	} else {
		me_id = (entry->ring_id & 0x0c) >> 2;
Loading