Commit a5b5ab33 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-xe-fixes-2024-04-04' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes



- Stop using system_unbound_wq for preempt fences,
  as this can cause starvation when reaching more
  than max_active defined by workqueue
- Fix saving unordered rebinding fences by attaching
  them as kernel feces to the vm's resv
- Fix TLB invalidation fences completing out of order
- Move rebind TLB invalidation to the ring ops to reduce
  the latency

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/tizan6wdpxu4ayudeikjglxdgzmnhdzj3li3z2pgkierjtozzw@lbfddeg43a7h
parents 4cf09f17 77a01101
Loading
Loading
Loading
Loading
+10 −1
Original line number Diff line number Diff line
@@ -193,6 +193,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
{
	struct xe_device *xe = to_xe_device(dev);

	if (xe->preempt_fence_wq)
		destroy_workqueue(xe->preempt_fence_wq);

	if (xe->ordered_wq)
		destroy_workqueue(xe->ordered_wq);

@@ -258,9 +261,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
	INIT_LIST_HEAD(&xe->pinned.external_vram);
	INIT_LIST_HEAD(&xe->pinned.evicted);

	xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
	xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
	if (!xe->ordered_wq || !xe->unordered_wq) {
	if (!xe->ordered_wq || !xe->unordered_wq ||
	    !xe->preempt_fence_wq) {
		/*
		 * Cleanup done in xe_device_destroy via
		 * drmm_add_action_or_reset register above
		 */
		drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
		err = -ENOMEM;
		goto err;
+3 −0
Original line number Diff line number Diff line
@@ -363,6 +363,9 @@ struct xe_device {
	/** @ufence_wq: user fence wait queue */
	wait_queue_head_t ufence_wq;

	/** @preempt_fence_wq: used to serialize preempt fences */
	struct workqueue_struct *preempt_fence_wq;

	/** @ordered_wq: used to serialize compute mode resume */
	struct workqueue_struct *ordered_wq;

+7 −72
Original line number Diff line number Diff line
@@ -94,48 +94,16 @@
 *	Unlock all
 */

/*
 * Add validation and rebinding to the drm_exec locking loop, since both can
 * trigger eviction which may require sleeping dma_resv locks.
 */
static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
{
	struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
	struct drm_gem_object *obj;
	unsigned long index;
	int num_fences;
	int ret;

	ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
	if (ret)
		return ret;

	/*
	 * 1 fence slot for the final submit, and 1 more for every per-tile for
	 * GPU bind and 1 extra for CPU bind. Note that there are potentially
	 * many vma per object/dma-resv, however the fence slot will just be
	 * re-used, since they are largely the same timeline and the seqno
	 * should be in order. In the case of CPU bind there is dummy fence used
	 * for all CPU binds, so no need to have a per-tile slot for that.
	 */
	num_fences = 1 + 1 + vm->xe->info.tile_count;

	/*
	 * We don't know upfront exactly how many fence slots we will need at
	 * the start of the exec, since the TTM bo_validate above can consume
	 * numerous fence slots. Also due to how the dma_resv_reserve_fences()
	 * works it only ensures that at least that many fence slots are
	 * available i.e if there are already 10 slots available and we reserve
	 * two more, it can just noop without reserving anything.  With this it
	 * is quite possible that TTM steals some of the fence slots and then
	 * when it comes time to do the vma binding and final exec stage we are
	 * lacking enough fence slots, leading to some nasty BUG_ON() when
	 * adding the fences. Hence just add our own fences here, after the
	 * validate stage.
	 */
	drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) {
		ret = dma_resv_reserve_fences(obj->resv, num_fences);
		if (ret)
			return ret;
	}

	return 0;
	/* The fence slot added here is intended for the exec sched job. */
	return xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
}

int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -152,7 +120,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
	struct drm_exec *exec = &vm_exec.exec;
	u32 i, num_syncs = 0, num_ufence = 0;
	struct xe_sched_job *job;
	struct dma_fence *rebind_fence;
	struct xe_vm *vm;
	bool write_locked, skip_retry = false;
	ktime_t end = 0;
@@ -290,39 +257,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
		goto err_exec;
	}

	/*
	 * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
	 * VM mode only.
	 */
	rebind_fence = xe_vm_rebind(vm, false);
	if (IS_ERR(rebind_fence)) {
		err = PTR_ERR(rebind_fence);
		goto err_put_job;
	}

	/*
	 * We store the rebind_fence in the VM so subsequent execs don't get
	 * scheduled before the rebinds of userptrs / evicted BOs is complete.
	 */
	if (rebind_fence) {
		dma_fence_put(vm->rebind_fence);
		vm->rebind_fence = rebind_fence;
	}
	if (vm->rebind_fence) {
		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
			     &vm->rebind_fence->flags)) {
			dma_fence_put(vm->rebind_fence);
			vm->rebind_fence = NULL;
		} else {
			dma_fence_get(vm->rebind_fence);
			err = drm_sched_job_add_dependency(&job->drm,
							   vm->rebind_fence);
			if (err)
				goto err_put_job;
		}
	}

	/* Wait behind munmap style rebinds */
	/* Wait behind rebinds */
	if (!xe_vm_in_lr_mode(vm)) {
		err = drm_sched_job_add_resv_dependencies(&job->drm,
							  xe_vm_resv(vm),
+5 −0
Original line number Diff line number Diff line
@@ -148,6 +148,11 @@ struct xe_exec_queue {
	const struct xe_ring_ops *ring_ops;
	/** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
	struct drm_sched_entity *entity;
	/**
	 * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed
	 * Protected by @vm's resv. Unused if @vm == NULL.
	 */
	u64 tlb_flush_seqno;
	/** @lrc: logical ring context for this exec queue */
	struct xe_lrc lrc[];
};
+1 −2
Original line number Diff line number Diff line
@@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
{
	struct xe_bo *bo = xe_vma_bo(vma);
	struct xe_vm *vm = xe_vma_vm(vma);
	unsigned int num_shared = 2; /* slots for bind + move */
	int err;

	err = xe_vm_prepare_vma(exec, vma, num_shared);
	err = xe_vm_lock_vma(exec, vma);
	if (err)
		return err;

Loading