Commit b8d5779e authored by Matthew Brost's avatar Matthew Brost
Browse files

drm/xe: Use GT TLB invalidation jobs in PT layer



Rather than open-coding GT TLB invalidations in the PT layer, use GT TLB
invalidation jobs. The real benefit is that GT TLB invalidation jobs use
a single dma-fence context, allowing the generated fences to be squashed
in dma-resv/DRM scheduler.

v2:
 - s/;;/; (checkpatch)
 - Move ijob/mjob job push after range fence install
v3:
 - Remove extra newline (Stuart)
 - Set ijob/mjob near creation (Stuart)
 - Add comment back in (Stuart)

Suggested-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarStuart Summers <stuart.summers@intel.com>
Link: https://lore.kernel.org/r/20250724191216.4076566-7-matthew.brost@intel.com
parent dba89840
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@ struct ttm_resource;

struct xe_bo;
struct xe_gt;
struct xe_gt_tlb_inval_job;
struct xe_exec_queue;
struct xe_migrate;
struct xe_migrate_pt_update;
@@ -91,6 +92,14 @@ struct xe_migrate_pt_update {
	struct xe_vma_ops *vops;
	/** @job: The job if a GPU page-table update. NULL otherwise */
	struct xe_sched_job *job;
	/**
	 * @ijob: The GT TLB invalidation job for primary tile. NULL otherwise
	 */
	struct xe_gt_tlb_inval_job *ijob;
	/**
	 * @mjob: The GT TLB invalidation job for media tile. NULL otherwise
	 */
	struct xe_gt_tlb_inval_job *mjob;
	/** @tile_id: Tile ID of the update */
	u8 tile_id;
};
+71 −107
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@
#include "xe_drm_client.h"
#include "xe_exec_queue.h"
#include "xe_gt.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_gt_tlb_inval_job.h"
#include "xe_migrate.h"
#include "xe_pt_types.h"
#include "xe_pt_walk.h"
@@ -1261,6 +1261,8 @@ static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op,
}

static int xe_pt_vm_dependencies(struct xe_sched_job *job,
				 struct xe_gt_tlb_inval_job *ijob,
				 struct xe_gt_tlb_inval_job *mjob,
				 struct xe_vm *vm,
				 struct xe_vma_ops *vops,
				 struct xe_vm_pgtable_update_ops *pt_update_ops,
@@ -1328,6 +1330,20 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job,
	for (i = 0; job && !err && i < vops->num_syncs; i++)
		err = xe_sync_entry_add_deps(&vops->syncs[i], job);

	if (job) {
		if (ijob) {
			err = xe_gt_tlb_inval_job_alloc_dep(ijob);
			if (err)
				return err;
		}

		if (mjob) {
			err = xe_gt_tlb_inval_job_alloc_dep(mjob);
			if (err)
				return err;
		}
	}

	return err;
}

@@ -1339,7 +1355,8 @@ static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update)
	struct xe_vm_pgtable_update_ops *pt_update_ops =
		&vops->pt_update_ops[pt_update->tile_id];

	return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops,
	return xe_pt_vm_dependencies(pt_update->job, pt_update->ijob,
				     pt_update->mjob, vm, pt_update->vops,
				     pt_update_ops, rftree);
}

@@ -1509,75 +1526,6 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
}
#endif

struct invalidation_fence {
	struct xe_gt_tlb_invalidation_fence base;
	struct xe_gt *gt;
	struct dma_fence *fence;
	struct dma_fence_cb cb;
	struct work_struct work;
	u64 start;
	u64 end;
	u32 asid;
};

static void invalidation_fence_cb(struct dma_fence *fence,
				  struct dma_fence_cb *cb)
{
	struct invalidation_fence *ifence =
		container_of(cb, struct invalidation_fence, cb);
	struct xe_device *xe = gt_to_xe(ifence->gt);

	trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base);
	if (!ifence->fence->error) {
		queue_work(system_wq, &ifence->work);
	} else {
		ifence->base.base.error = ifence->fence->error;
		xe_gt_tlb_invalidation_fence_signal(&ifence->base);
	}
	dma_fence_put(ifence->fence);
}

static void invalidation_fence_work_func(struct work_struct *w)
{
	struct invalidation_fence *ifence =
		container_of(w, struct invalidation_fence, work);
	struct xe_device *xe = gt_to_xe(ifence->gt);

	trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base);
	xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
				     ifence->end, ifence->asid);
}

static void invalidation_fence_init(struct xe_gt *gt,
				    struct invalidation_fence *ifence,
				    struct dma_fence *fence,
				    u64 start, u64 end, u32 asid)
{
	int ret;

	trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base);

	xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false);

	ifence->fence = fence;
	ifence->gt = gt;
	ifence->start = start;
	ifence->end = end;
	ifence->asid = asid;

	INIT_WORK(&ifence->work, invalidation_fence_work_func);
	ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
	if (ret == -ENOENT) {
		dma_fence_put(ifence->fence);	/* Usually dropped in CB */
		invalidation_fence_work_func(&ifence->work);
	} else if (ret) {
		dma_fence_put(&ifence->base.base);	/* Caller ref */
		dma_fence_put(&ifence->base.base);	/* Creation ref */
	}

	xe_gt_assert(gt, !ret || ret == -ENOENT);
}

struct xe_pt_stage_unbind_walk {
	/** @base: The pagewalk base-class. */
	struct xe_pt_walk base;
@@ -2407,8 +2355,8 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
	struct xe_vm *vm = vops->vm;
	struct xe_vm_pgtable_update_ops *pt_update_ops =
		&vops->pt_update_ops[tile->id];
	struct dma_fence *fence;
	struct invalidation_fence *ifence = NULL, *mfence = NULL;
	struct dma_fence *fence, *ifence, *mfence;
	struct xe_gt_tlb_inval_job *ijob = NULL, *mjob = NULL;
	struct dma_fence **fences = NULL;
	struct dma_fence_array *cf = NULL;
	struct xe_range_fence *rfence;
@@ -2440,26 +2388,38 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
#endif

	if (pt_update_ops->needs_invalidation) {
		ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
		if (!ifence) {
			err = -ENOMEM;
		ijob = xe_gt_tlb_inval_job_create(pt_update_ops->q,
						  tile->primary_gt,
						  pt_update_ops->start,
						  pt_update_ops->last,
						  vm->usm.asid);
		if (IS_ERR(ijob)) {
			err = PTR_ERR(ijob);
			goto kill_vm_tile1;
		}
		update.ijob = ijob;

		if (tile->media_gt) {
			mfence = kzalloc(sizeof(*ifence), GFP_KERNEL);
			if (!mfence) {
				err = -ENOMEM;
				goto free_ifence;
			mjob = xe_gt_tlb_inval_job_create(pt_update_ops->q,
							  tile->media_gt,
							  pt_update_ops->start,
							  pt_update_ops->last,
							  vm->usm.asid);
			if (IS_ERR(mjob)) {
				err = PTR_ERR(mjob);
				goto free_ijob;
			}
			update.mjob = mjob;

			fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
			if (!fences) {
				err = -ENOMEM;
				goto free_ifence;
				goto free_ijob;
			}
			cf = dma_fence_array_alloc(2);
			if (!cf) {
				err = -ENOMEM;
				goto free_ifence;
				goto free_ijob;
			}
		}
	}
@@ -2467,7 +2427,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
	rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
	if (!rfence) {
		err = -ENOMEM;
		goto free_ifence;
		goto free_ijob;
	}

	fence = xe_migrate_update_pgtables(tile->migrate, &update);
@@ -2491,30 +2451,31 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
				  pt_update_ops->last, fence))
		dma_fence_wait(fence, false);

	/* tlb invalidation must be done before signaling rebind */
	if (ifence) {
		if (mfence)
			dma_fence_get(fence);
		invalidation_fence_init(tile->primary_gt, ifence, fence,
					pt_update_ops->start,
					pt_update_ops->last, vm->usm.asid);
		if (mfence) {
			invalidation_fence_init(tile->media_gt, mfence, fence,
						pt_update_ops->start,
						pt_update_ops->last, vm->usm.asid);
			fences[0] = &ifence->base.base;
			fences[1] = &mfence->base.base;
	/* tlb invalidation must be done before signaling unbind/rebind */
	if (ijob) {
		struct dma_fence *__fence;

		ifence = xe_gt_tlb_inval_job_push(ijob, tile->migrate, fence);
		__fence = ifence;

		if (mjob) {
			fences[0] = ifence;
			mfence = xe_gt_tlb_inval_job_push(mjob, tile->migrate,
							  fence);
			fences[1] = mfence;

			dma_fence_array_init(cf, 2, fences,
					     vm->composite_fence_ctx,
					     vm->composite_fence_seqno++,
					     false);
			fence = &cf->base;
		} else {
			fence = &ifence->base.base;
			__fence = &cf->base;
		}

		dma_fence_put(fence);
		fence = __fence;
	}

	if (!mfence) {
	if (!mjob) {
		dma_resv_add_fence(xe_vm_resv(vm), fence,
				   pt_update_ops->wait_vm_bookkeep ?
				   DMA_RESV_USAGE_KERNEL :
@@ -2523,19 +2484,19 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
		list_for_each_entry(op, &vops->list, link)
			op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL);
	} else {
		dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base,
		dma_resv_add_fence(xe_vm_resv(vm), ifence,
				   pt_update_ops->wait_vm_bookkeep ?
				   DMA_RESV_USAGE_KERNEL :
				   DMA_RESV_USAGE_BOOKKEEP);

		dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base,
		dma_resv_add_fence(xe_vm_resv(vm), mfence,
				   pt_update_ops->wait_vm_bookkeep ?
				   DMA_RESV_USAGE_KERNEL :
				   DMA_RESV_USAGE_BOOKKEEP);

		list_for_each_entry(op, &vops->list, link)
			op_commit(vops->vm, tile, pt_update_ops, op,
				  &ifence->base.base, &mfence->base.base);
			op_commit(vops->vm, tile, pt_update_ops, op, ifence,
				  mfence);
	}

	if (pt_update_ops->needs_svm_lock)
@@ -2543,15 +2504,18 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
	if (pt_update_ops->needs_userptr_lock)
		up_read(&vm->userptr.notifier_lock);

	xe_gt_tlb_inval_job_put(mjob);
	xe_gt_tlb_inval_job_put(ijob);

	return fence;

free_rfence:
	kfree(rfence);
free_ifence:
free_ijob:
	kfree(cf);
	kfree(fences);
	kfree(mfence);
	kfree(ifence);
	xe_gt_tlb_inval_job_put(mjob);
	xe_gt_tlb_inval_job_put(ijob);
kill_vm_tile1:
	if (err != -EAGAIN && err != -ENODATA && tile->id)
		xe_vm_kill(vops->vm, false);