Commit 68055b28 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-fixes-7.1-2026-05-13' of...

Merge tag 'amd-drm-fixes-7.1-2026-05-13' of https://gitlab.freedesktop.org/agd5f/linux

 into drm-fixes

amd-drm-fixes-7.1-2026-05-13:

amdgpu:
- Userq fixes
- DCN 3.2 fix
- RAS fix
- GC 12 fix

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20260513224053.40670-1-alexander.deucher@amd.com
parents 9e20b4b8 5d08559c
Loading
Loading
Loading
Loading
+21 −8
Original line number Diff line number Diff line
@@ -552,8 +552,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
					size_t size, loff_t *pos)
{
	struct amdgpu_ring *ring = file_inode(f)->i_private;
	uint32_t value, result, early[3];
	u32 value, result, early[3] = { 0 };
	uint64_t p;
	u32 avail_dw, start_dw, read_dw;
	loff_t i;
	int r;

@@ -565,10 +566,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,

	result = 0;

	if (*pos < 12) {
	if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
		mutex_lock(&ring->adev->cper.ring_lock);

	if (*pos < 12) {
		early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
		early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
		early[2] = ring->wptr & ring->buf_mask;
@@ -600,13 +601,24 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
			*pos += 4;
		}
	} else {
		early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
		early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;

		p = early[0];
		if (early[0] <= early[1])
			size = (early[1] - early[0]);
			avail_dw = early[1] - early[0];
		else
			size = ring->ring_size - (early[0] - early[1]);
			avail_dw = ring->buf_mask + 1 - (early[0] - early[1]);

		while (size) {
		start_dw = (*pos > 12) ? ((*pos - 12) >> 2) : 0;
		if (start_dw >= avail_dw)
			goto out;

		p = (p + start_dw) & ring->ptr_mask;
		avail_dw -= start_dw;
		read_dw = min_t(u32, avail_dw, size >> 2);

		while (read_dw) {
			if (p == early[1])
				goto out;

@@ -619,9 +631,10 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,

			buf += 4;
			result += 4;
			size--;
			read_dw--;
			p++;
			p &= ring->ptr_mask;
			*pos += 4;
		}
	}

+54 −77
Original line number Diff line number Diff line
@@ -106,9 +106,6 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
	int r = 0;
	int i;

	/* Warning if current process mutex is not held */
	WARN_ON(!mutex_is_locked(&uq_mgr->userq_mutex));

	if (unlikely(adev->debug_disable_gpu_ring_reset)) {
		dev_err(adev->dev, "userq reset disabled by debug mask\n");
		return 0;
@@ -127,9 +124,11 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
	 */
	for (i = 0; i < num_queue_types; i++) {
		int ring_type = queue_types[i];
		const struct amdgpu_userq_funcs *funcs = adev->userq_funcs[ring_type];
		const struct amdgpu_userq_funcs *funcs =
			adev->userq_funcs[ring_type];

		if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, AMDGPU_RESET_TYPE_PER_QUEUE))
		if (!amdgpu_userq_is_reset_type_supported(adev, ring_type,
							  AMDGPU_RESET_TYPE_PER_QUEUE))
				continue;

		if (atomic_read(&uq_mgr->userq_count[ring_type]) > 0 &&
@@ -150,38 +149,22 @@ amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)

static void amdgpu_userq_hang_detect_work(struct work_struct *work)
{
	struct amdgpu_usermode_queue *queue = container_of(work,
							  struct amdgpu_usermode_queue,
	struct amdgpu_usermode_queue *queue =
		container_of(work, struct amdgpu_usermode_queue,
			     hang_detect_work.work);
	struct dma_fence *fence;
	struct amdgpu_userq_mgr *uq_mgr;

	if (!queue->userq_mgr)
		return;

	uq_mgr = queue->userq_mgr;
	fence = READ_ONCE(queue->hang_detect_fence);
	/* Fence already signaled – no action needed */
	if (!fence || dma_fence_is_signaled(fence))
		return;

	mutex_lock(&uq_mgr->userq_mutex);
	amdgpu_userq_detect_and_reset_queues(uq_mgr);
	mutex_unlock(&uq_mgr->userq_mutex);
	amdgpu_userq_detect_and_reset_queues(queue->userq_mgr);
}

/*
 * Start hang detection for a user queue fence. A delayed work will be scheduled
 * to check if the fence is still pending after the timeout period.
 * to reset the queues when the fence doesn't signal in time.
 */
void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue)
{
	struct amdgpu_device *adev;
	unsigned long timeout_ms;

	if (!queue || !queue->userq_mgr || !queue->userq_mgr->adev)
		return;

	adev = queue->userq_mgr->adev;
	/* Determine timeout based on queue type */
	switch (queue->queue_type) {
@@ -199,8 +182,6 @@ void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue)
		break;
	}

	/* Store the fence to monitor and schedule hang detection */
	WRITE_ONCE(queue->hang_detect_fence, queue->last_fence);
	schedule_delayed_work(&queue->hang_detect_work,
		     msecs_to_jiffies(timeout_ms));
}
@@ -210,18 +191,24 @@ void amdgpu_userq_process_fence_irq(struct amdgpu_device *adev, u32 doorbell)
	struct xarray *xa = &adev->userq_doorbell_xa;
	struct amdgpu_usermode_queue *queue;
	unsigned long flags;
	int r;

	xa_lock_irqsave(xa, flags);
	queue = xa_load(xa, doorbell);
	if (queue)
		amdgpu_userq_fence_driver_process(queue->fence_drv);
	xa_unlock_irqrestore(xa, flags);
}
	if (queue) {
		r = amdgpu_userq_fence_driver_process(queue->fence_drv);
		/*
		 * We are in interrupt context here, this *can't* wait for
		 * reset work to finish.
		 */
		if (r >= 0)
			cancel_delayed_work(&queue->hang_detect_work);

static void amdgpu_userq_init_hang_detect_work(struct amdgpu_usermode_queue *queue)
{
	INIT_DELAYED_WORK(&queue->hang_detect_work, amdgpu_userq_hang_detect_work);
	queue->hang_detect_fence = NULL;
		/* Restart the timer when there are still fences pending */
		if (r == 1)
			amdgpu_userq_start_hang_detect_work(queue);
	}
	xa_unlock_irqrestore(xa, flags);
}

static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue,
@@ -345,23 +332,18 @@ static int amdgpu_userq_preempt_helper(struct amdgpu_usermode_queue *queue)
	struct amdgpu_device *adev = uq_mgr->adev;
	const struct amdgpu_userq_funcs *userq_funcs =
		adev->userq_funcs[queue->queue_type];
	bool found_hung_queue = false;
	int r = 0;
	int r;

	if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
		r = userq_funcs->preempt(queue);
		if (r) {
			queue->state = AMDGPU_USERQ_STATE_HUNG;
			found_hung_queue = true;
			return r;
		} else {
			queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
		}
	}

	if (found_hung_queue)
		amdgpu_userq_detect_and_reset_queues(uq_mgr);

	return r;
	return 0;
}

static int amdgpu_userq_restore_helper(struct amdgpu_usermode_queue *queue)
@@ -390,24 +372,21 @@ static int amdgpu_userq_unmap_helper(struct amdgpu_usermode_queue *queue)
	struct amdgpu_device *adev = uq_mgr->adev;
	const struct amdgpu_userq_funcs *userq_funcs =
		adev->userq_funcs[queue->queue_type];
	bool found_hung_queue = false;
	int r = 0;
	int r;

	if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
	    (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {

		r = userq_funcs->unmap(queue);
		if (r) {
			queue->state = AMDGPU_USERQ_STATE_HUNG;
			found_hung_queue = true;
			return r;
		} else {
			queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
		}
	}

	if (found_hung_queue)
		amdgpu_userq_detect_and_reset_queues(uq_mgr);

	return r;
	return 0;
}

static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue)
@@ -416,19 +395,19 @@ static int amdgpu_userq_map_helper(struct amdgpu_usermode_queue *queue)
	struct amdgpu_device *adev = uq_mgr->adev;
	const struct amdgpu_userq_funcs *userq_funcs =
		adev->userq_funcs[queue->queue_type];
	int r = 0;
	int r;

	if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
		r = userq_funcs->map(queue);
		if (r) {
			queue->state = AMDGPU_USERQ_STATE_HUNG;
			amdgpu_userq_detect_and_reset_queues(uq_mgr);
			return r;
		} else {
			queue->state = AMDGPU_USERQ_STATE_MAPPED;
		}
	}

	return r;
	return 0;
}

static void amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue)
@@ -648,13 +627,11 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que
	amdgpu_bo_unreserve(vm->root.bo);

	mutex_lock(&uq_mgr->userq_mutex);
	queue->hang_detect_fence = NULL;
	amdgpu_userq_wait_for_last_fence(queue);

#if defined(CONFIG_DEBUG_FS)
	debugfs_remove_recursive(queue->debugfs_queue);
#endif
	amdgpu_userq_detect_and_reset_queues(uq_mgr);
	r = amdgpu_userq_unmap_helper(queue);
	atomic_dec(&uq_mgr->userq_count[queue->queue_type]);
	amdgpu_userq_cleanup(queue);
@@ -800,6 +777,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
	}

	queue->doorbell_index = index;
	mutex_init(&queue->fence_drv_lock);
	xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
	r = amdgpu_userq_fence_driver_alloc(adev, &queue->fence_drv);
	if (r) {
@@ -855,7 +833,8 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
	up_read(&adev->reset_domain->sem);

	amdgpu_debugfs_userq_init(filp, queue, qid);
	amdgpu_userq_init_hang_detect_work(queue);
	INIT_DELAYED_WORK(&queue->hang_detect_work,
			  amdgpu_userq_hang_detect_work);

	args->out.queue_id = qid;
	atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
@@ -873,6 +852,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
	amdgpu_bo_reserve(fpriv->vm.root.bo, true);
	amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
	amdgpu_bo_unreserve(fpriv->vm.root.bo);
	mutex_destroy(&queue->fence_drv_lock);
free_queue:
	kfree(queue);
err_pm_runtime:
@@ -1262,7 +1242,6 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
	unsigned long queue_id;
	int ret = 0, r;

	amdgpu_userq_detect_and_reset_queues(uq_mgr);
	/* Try to unmap all the queues in this process ctx */
	xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
		r = amdgpu_userq_preempt_helper(queue);
@@ -1270,9 +1249,11 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
			ret = r;
	}

	if (ret)
	if (ret) {
		drm_file_err(uq_mgr->file,
			     "Couldn't unmap all the queues, eviction failed ret=%d\n", ret);
		amdgpu_userq_detect_and_reset_queues(uq_mgr);
	}
	return ret;
}

@@ -1372,7 +1353,6 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev)
		uqm = queue->userq_mgr;
		cancel_delayed_work_sync(&uqm->resume_work);
		guard(mutex)(&uqm->userq_mutex);
		amdgpu_userq_detect_and_reset_queues(uqm);
		if (adev->in_s0ix)
			r = amdgpu_userq_preempt_helper(queue);
		else
@@ -1431,7 +1411,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
		if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
		     (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
		    (queue->xcp_id == idx)) {
			amdgpu_userq_detect_and_reset_queues(uqm);
			r = amdgpu_userq_preempt_helper(queue);
			if (r)
				ret = r;
@@ -1504,14 +1483,13 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev)
{
	const struct amdgpu_userq_funcs *userq_funcs;
	struct amdgpu_usermode_queue *queue;
	struct amdgpu_userq_mgr *uqm;
	unsigned long queue_id;

	/* TODO: We probably need a new lock for the queue state */
	xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
		uqm = queue->userq_mgr;
		cancel_delayed_work_sync(&uqm->resume_work);
		if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
			amdgpu_userq_wait_for_last_fence(queue);
		if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
			continue;

		userq_funcs = adev->userq_funcs[queue->queue_type];
		userq_funcs->unmap(queue);
		/* just mark all queues as hung at this point.
@@ -1522,7 +1500,6 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev)
		amdgpu_userq_fence_driver_force_completion(queue);
	}
}
}

int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost)
{
+12 −1
Original line number Diff line number Diff line
@@ -66,6 +66,18 @@ struct amdgpu_usermode_queue {
	struct amdgpu_userq_obj	db_obj;
	struct amdgpu_userq_obj fw_obj;
	struct amdgpu_userq_obj wptr_obj;

	/**
	 * @fence_drv_lock: Protecting @fence_drv_xa.
	 */
	struct mutex		fence_drv_lock;

	/**
	 * @fence_drv_xa:
	 *
	 * References to the external fence drivers returned by wait_ioctl.
	 * Dropped on the next signaled dma_fence or queue destruction.
	 */
	struct xarray		fence_drv_xa;
	struct amdgpu_userq_fence_driver *fence_drv;
	struct dma_fence	*last_fence;
@@ -73,7 +85,6 @@ struct amdgpu_usermode_queue {
	int			priority;
	struct dentry		*debugfs_queue;
	struct delayed_work hang_detect_work;
	struct dma_fence *hang_detect_fence;
	struct kref		refcount;

	struct list_head	userq_va_list;
+118 −125
Original line number Diff line number Diff line
@@ -121,6 +121,7 @@ amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
	userq->last_fence = NULL;
	amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
	xa_destroy(&userq->fence_drv_xa);
	mutex_destroy(&userq->fence_drv_lock);
	/* Drop the queue's ownership reference to fence_drv explicitly */
	amdgpu_userq_fence_driver_put(userq->fence_drv);
}
@@ -134,7 +135,14 @@ amdgpu_userq_fence_put_fence_drv_array(struct amdgpu_userq_fence *userq_fence)
	userq_fence->fence_drv_array_count = 0;
}

void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
/*
 * Returns:
 * -ENOENT when no fences were processes
 * 1 when more fences are pending
 * 0 when no fences are pending any more
 */
int
amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
{
	struct amdgpu_userq_fence *userq_fence, *tmp;
	LIST_HEAD(to_be_signaled);
@@ -142,9 +150,6 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
	unsigned long flags;
	u64 rptr;

	if (!fence_drv)
		return;

	spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
	rptr = amdgpu_userq_fence_read(fence_drv);

@@ -157,6 +162,9 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
				&userq_fence->link);
	spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);

	if (list_empty(&to_be_signaled))
		return -ENOENT;

	list_for_each_entry_safe(userq_fence, tmp, &to_be_signaled, link) {
		fence = &userq_fence->base;
		list_del_init(&userq_fence->link);
@@ -168,6 +176,8 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d
		dma_fence_put(fence);
	}

	/* That doesn't need to be accurate so no locking */
	return list_empty(&fence_drv->fences) ? 0 : 1;
}

void amdgpu_userq_fence_driver_destroy(struct kref *ref)
@@ -209,80 +219,84 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
	kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
}

static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
static int amdgpu_userq_fence_alloc(struct amdgpu_usermode_queue *userq,
				    struct amdgpu_userq_fence **pfence)
{
	*userq_fence = kmalloc(sizeof(**userq_fence), GFP_KERNEL);
	return *userq_fence ? 0 : -ENOMEM;
}

static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
				     struct amdgpu_userq_fence *userq_fence,
				     u64 seq, struct dma_fence **f)
{
	struct amdgpu_userq_fence_driver *fence_drv;
	struct dma_fence *fence;
	unsigned long flags;
	bool signaled = false;
	struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv;
	struct amdgpu_userq_fence *userq_fence;
	void *entry;

	fence_drv = userq->fence_drv;
	if (!fence_drv)
		return -EINVAL;
	userq_fence = kmalloc(sizeof(*userq_fence), GFP_KERNEL);
	if (!userq_fence)
		return -ENOMEM;

	spin_lock_init(&userq_fence->lock);
	INIT_LIST_HEAD(&userq_fence->link);
	fence = &userq_fence->base;
	userq_fence->fence_drv = fence_drv;
	/*
	 * Get the next unused entry, since we fill from the start this can be
	 * used as size to allocate the array.
	 */
	mutex_lock(&userq->fence_drv_lock);
	XA_STATE(xas, &userq->fence_drv_xa, 0);

	dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
			 fence_drv->context, seq);
	rcu_read_lock();
	do {
		entry = xas_find_marked(&xas, ULONG_MAX, XA_FREE_MARK);
	} while (xas_retry(&xas, entry));
	rcu_read_unlock();

	amdgpu_userq_fence_driver_get(fence_drv);
	dma_fence_get(fence);
	userq_fence->fence_drv_array = kvmalloc_array(xas.xa_index,
						      sizeof(fence_drv),
						      GFP_KERNEL);
	if (!userq_fence->fence_drv_array) {
		mutex_unlock(&userq->fence_drv_lock);
		kfree(userq_fence);
		return -ENOMEM;
	}

	if (!xa_empty(&userq->fence_drv_xa)) {
		struct amdgpu_userq_fence_driver *stored_fence_drv;
		unsigned long index, count = 0;
		int i = 0;
	userq_fence->fence_drv_array_count = xas.xa_index;
	xa_extract(&userq->fence_drv_xa, (void **)userq_fence->fence_drv_array,
		   0, ULONG_MAX, xas.xa_index, XA_PRESENT);
	xa_destroy(&userq->fence_drv_xa);

		xa_lock(&userq->fence_drv_xa);
		xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
			count++;
	mutex_unlock(&userq->fence_drv_lock);

		userq_fence->fence_drv_array =
			kvmalloc_objs(struct amdgpu_userq_fence_driver *, count,
				      GFP_ATOMIC);
	amdgpu_userq_fence_driver_get(fence_drv);
	userq_fence->fence_drv = fence_drv;

		if (userq_fence->fence_drv_array) {
			xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
				userq_fence->fence_drv_array[i] = stored_fence_drv;
				__xa_erase(&userq->fence_drv_xa, index);
				i++;
			}
	*pfence = userq_fence;
	return 0;
}

		userq_fence->fence_drv_array_count = i;
		xa_unlock(&userq->fence_drv_xa);
	} else {
		userq_fence->fence_drv_array = NULL;
		userq_fence->fence_drv_array_count = 0;
	}
static void amdgpu_userq_fence_init(struct amdgpu_usermode_queue *userq,
				    struct amdgpu_userq_fence *fence,
				    u64 seq)
{
	struct amdgpu_userq_fence_driver *fence_drv = userq->fence_drv;
	unsigned long flags;
	bool signaled = false;

	spin_lock_init(&fence->lock);
	dma_fence_init64(&fence->base, &amdgpu_userq_fence_ops, &fence->lock,
			 fence_drv->context, seq);

	/* Make sure the fence is visible to the hang detect worker */
	dma_fence_put(userq->last_fence);
	userq->last_fence = dma_fence_get(&fence->base);

	/* Check if hardware has already processed the job */
	/* Check if hardware has already processed the fence */
	spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
	if (!dma_fence_is_signaled(fence)) {
		list_add_tail(&userq_fence->link, &fence_drv->fences);
	if (!dma_fence_is_signaled(&fence->base)) {
		dma_fence_get(&fence->base);
		list_add_tail(&fence->link, &fence_drv->fences);
	} else {
		INIT_LIST_HEAD(&fence->link);
		signaled = true;
		dma_fence_put(fence);
	}
	spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);

	if (signaled)
		amdgpu_userq_fence_put_fence_drv_array(userq_fence);

	*f = fence;

	return 0;
		amdgpu_userq_fence_put_fence_drv_array(fence);
	else
		amdgpu_userq_start_hang_detect_work(userq);
}

static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
@@ -403,11 +417,6 @@ static int amdgpu_userq_fence_read_wptr(struct amdgpu_device *adev,
	return r;
}

static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
{
	dma_fence_put(fence);
}

static void
amdgpu_userq_fence_driver_set_error(struct amdgpu_userq_fence *fence,
				    int error)
@@ -451,13 +460,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
	const unsigned int num_read_bo_handles = args->num_bo_read_handles;
	struct amdgpu_fpriv *fpriv = filp->driver_priv;
	struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;

	struct drm_gem_object **gobj_write, **gobj_read;
	u32 *syncobj_handles, num_syncobj_handles;
	struct amdgpu_userq_fence *userq_fence;
	struct amdgpu_usermode_queue *queue = NULL;
	struct drm_syncobj **syncobj = NULL;
	struct dma_fence *fence;
	struct amdgpu_usermode_queue *queue;
	struct amdgpu_userq_fence *fence;
	struct drm_syncobj **syncobj;
	struct drm_exec exec;
	void __user *ptr;
	int r, i, entry;
	u64 wptr;

@@ -469,13 +479,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
		return -EINVAL;

	num_syncobj_handles = args->num_syncobj_handles;
	syncobj_handles = memdup_array_user(u64_to_user_ptr(args->syncobj_handles),
					    num_syncobj_handles, sizeof(u32));
	ptr = u64_to_user_ptr(args->syncobj_handles);
	syncobj_handles = memdup_array_user(ptr, num_syncobj_handles,
					    sizeof(u32));
	if (IS_ERR(syncobj_handles))
		return PTR_ERR(syncobj_handles);

	/* Array of pointers to the looked up syncobjs */
	syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
	syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj),
				GFP_KERNEL);
	if (!syncobj) {
		r = -ENOMEM;
		goto free_syncobj_handles;
@@ -489,21 +500,17 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
		}
	}

	r = drm_gem_objects_lookup(filp,
				   u64_to_user_ptr(args->bo_read_handles),
				   num_read_bo_handles,
				   &gobj_read);
	ptr = u64_to_user_ptr(args->bo_read_handles);
	r = drm_gem_objects_lookup(filp, ptr, num_read_bo_handles, &gobj_read);
	if (r)
		goto free_syncobj;

	r = drm_gem_objects_lookup(filp,
				   u64_to_user_ptr(args->bo_write_handles),
				   num_write_bo_handles,
	ptr = u64_to_user_ptr(args->bo_write_handles);
	r = drm_gem_objects_lookup(filp, ptr, num_write_bo_handles,
				   &gobj_write);
	if (r)
		goto put_gobj_read;

	/* Retrieve the user queue */
	queue = amdgpu_userq_get(userq_mgr, args->queue_id);
	if (!queue) {
		r = -ENOENT;
@@ -512,73 +519,61 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,

	r = amdgpu_userq_fence_read_wptr(adev, queue, &wptr);
	if (r)
		goto put_gobj_write;
		goto put_queue;

	r = amdgpu_userq_fence_alloc(&userq_fence);
	r = amdgpu_userq_fence_alloc(queue, &fence);
	if (r)
		goto put_gobj_write;
		goto put_queue;

	/* We are here means UQ is active, make sure the eviction fence is valid */
	amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);

	/* Create a new fence */
	r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
	if (r) {
		mutex_unlock(&userq_mgr->userq_mutex);
		kfree(userq_fence);
		goto put_gobj_write;
	}
	/* Create the new fence */
	amdgpu_userq_fence_init(queue, fence, wptr);

	dma_fence_put(queue->last_fence);
	queue->last_fence = dma_fence_get(fence);
	amdgpu_userq_start_hang_detect_work(queue);
	mutex_unlock(&userq_mgr->userq_mutex);

	/*
	 * This needs to come after the fence is created since
	 * amdgpu_userq_ensure_ev_fence() can't be called while holding the resv
	 * locks.
	 */
	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
		      (num_read_bo_handles + num_write_bo_handles));

	/* Lock all BOs with retry handling */
	drm_exec_until_all_locked(&exec) {
		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
		r = drm_exec_prepare_array(&exec, gobj_read,
					   num_read_bo_handles, 1);
		drm_exec_retry_on_contention(&exec);
		if (r) {
			amdgpu_userq_fence_cleanup(fence);
		if (r)
			goto exec_fini;
		}

		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
		r = drm_exec_prepare_array(&exec, gobj_write,
					   num_write_bo_handles, 1);
		drm_exec_retry_on_contention(&exec);
		if (r) {
			amdgpu_userq_fence_cleanup(fence);
		if (r)
			goto exec_fini;
	}
	}

	for (i = 0; i < num_read_bo_handles; i++) {
		if (!gobj_read || !gobj_read[i]->resv)
			continue;

		dma_resv_add_fence(gobj_read[i]->resv, fence,
	/* And publish the new fence in the BOs and syncobj */
	for (i = 0; i < num_read_bo_handles; i++)
		dma_resv_add_fence(gobj_read[i]->resv, &fence->base,
				   DMA_RESV_USAGE_READ);
	}

	for (i = 0; i < num_write_bo_handles; i++) {
		if (!gobj_write || !gobj_write[i]->resv)
			continue;

		dma_resv_add_fence(gobj_write[i]->resv, fence,
	for (i = 0; i < num_write_bo_handles; i++)
		dma_resv_add_fence(gobj_write[i]->resv, &fence->base,
				   DMA_RESV_USAGE_WRITE);
	}

	/* Add the created fence to syncobj/BO's */
	for (i = 0; i < num_syncobj_handles; i++)
		drm_syncobj_replace_fence(syncobj[i], fence);
		drm_syncobj_replace_fence(syncobj[i], &fence->base);

exec_fini:
	/* drop the reference acquired in fence creation function */
	dma_fence_put(fence);
	dma_fence_put(&fence->base);

exec_fini:
	drm_exec_fini(&exec);
put_queue:
	amdgpu_userq_put(queue);
put_gobj_write:
	for (i = 0; i < num_write_bo_handles; i++)
		drm_gem_object_put(gobj_write[i]);
@@ -589,15 +584,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
	kvfree(gobj_read);
free_syncobj:
	while (entry-- > 0)
		if (syncobj[entry])
		drm_syncobj_put(syncobj[entry]);
	kfree(syncobj);
free_syncobj_handles:
	kfree(syncobj_handles);

	if (queue)
		amdgpu_userq_put(queue);

	return r;
}

@@ -872,8 +863,10 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
		 * Otherwise, we would gather those references until we don't
		 * have any more space left and crash.
		 */
		mutex_lock(&waitq->fence_drv_lock);
		r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
			     xa_limit_32b, GFP_KERNEL);
		mutex_unlock(&waitq->fence_drv_lock);
		if (r)
			goto put_waitq;

+1 −1
Original line number Diff line number Diff line
@@ -63,7 +63,7 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
				    struct amdgpu_userq_fence_driver **fence_drv_req);
void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq);
void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
int amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq);
void amdgpu_userq_fence_driver_destroy(struct kref *ref);
int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
Loading