Commit c4f18703 authored by Matthew Brost's avatar Matthew Brost
Browse files

drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this



xe_gt_tlb_invalidation_range accepts a start and end address rather than
a VMA. This will enable multiple VMAs to be invalidated in a single
invalidation. Update the PT layer to use this new function.

Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarOak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-13-matthew.brost@intel.com
parent 5aa5eea0
Loading
Loading
Loading
Loading
+44 −15
Original line number Diff line number Diff line
@@ -263,11 +263,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
}

/**
 * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
 * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
 * address range
 *
 * @gt: graphics tile
 * @fence: invalidation fence which will be signal on TLB invalidation
 * completion, can be NULL
 * @vma: VMA to invalidate
 * @start: start address
 * @end: end address
 * @asid: address space id
 *
 * Issue a range based TLB invalidation if supported, if not fallback to a full
 * TLB invalidation. Completion of TLB is asynchronous and caller can either use
@@ -277,17 +281,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
 * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
 * negative error code on error.
 */
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
				 struct xe_gt_tlb_invalidation_fence *fence,
			       struct xe_vma *vma)
				 u64 start, u64 end, u32 asid)
{
	struct xe_device *xe = gt_to_xe(gt);
#define MAX_TLB_INVALIDATION_LEN	7
	u32 action[MAX_TLB_INVALIDATION_LEN];
	int len = 0;

	xe_gt_assert(gt, vma);

	/* Execlists not supported */
	if (gt_to_xe(gt)->info.force_execlist) {
		if (fence)
@@ -301,8 +303,8 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
	if (!xe->info.has_range_tlb_invalidation) {
		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
	} else {
		u64 start = xe_vma_start(vma);
		u64 length = xe_vma_size(vma);
		u64 orig_start = start;
		u64 length = end - start;
		u64 align, end;

		if (length < SZ_4K)
@@ -315,12 +317,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
		 * address mask covering the required range.
		 */
		align = roundup_pow_of_two(length);
		start = ALIGN_DOWN(xe_vma_start(vma), align);
		end = ALIGN(xe_vma_end(vma), align);
		start = ALIGN_DOWN(start, align);
		end = ALIGN(end, align);
		length = align;
		while (start + length < end) {
			length <<= 1;
			start = ALIGN_DOWN(xe_vma_start(vma), length);
			start = ALIGN_DOWN(orig_start, length);
		}

		/*
@@ -329,16 +331,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
		 */
		if (length >= SZ_2M) {
			length = max_t(u64, SZ_16M, length);
			start = ALIGN_DOWN(xe_vma_start(vma), length);
			start = ALIGN_DOWN(orig_start, length);
		}

		xe_gt_assert(gt, length >= SZ_4K);
		xe_gt_assert(gt, is_power_of_2(length));
		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
		xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
						    ilog2(SZ_2M) + 1)));
		xe_gt_assert(gt, IS_ALIGNED(start, length));

		action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
		action[len++] = xe_vma_vm(vma)->usm.asid;
		action[len++] = asid;
		action[len++] = lower_32_bits(start);
		action[len++] = upper_32_bits(start);
		action[len++] = ilog2(length) - ilog2(SZ_4K);
@@ -349,6 +352,32 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
	return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
}

/**
 * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
 * @gt: graphics tile
 * @fence: invalidation fence which will be signal on TLB invalidation
 * completion, can be NULL
 * @vma: VMA to invalidate
 *
 * Issue a range based TLB invalidation if supported, if not fallback to a full
 * TLB invalidation. Completion of TLB is asynchronous and caller can either use
 * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
 * completion.
 *
 * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
 * negative error code on error.
 */
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
			       struct xe_gt_tlb_invalidation_fence *fence,
			       struct xe_vma *vma)
{
	xe_gt_assert(gt, vma);

	return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
					    xe_vma_end(vma),
					    xe_vma_vm(vma)->usm.asid);
}

/**
 * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
 * @gt: graphics tile
+3 −0
Original line number Diff line number Diff line
@@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
			       struct xe_gt_tlb_invalidation_fence *fence,
			       struct xe_vma *vma);
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
				 struct xe_gt_tlb_invalidation_fence *fence,
				 u64 start, u64 end, u32 asid);
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);

+18 −7
Original line number Diff line number Diff line
@@ -1075,10 +1075,12 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
struct invalidation_fence {
	struct xe_gt_tlb_invalidation_fence base;
	struct xe_gt *gt;
	struct xe_vma *vma;
	struct dma_fence *fence;
	struct dma_fence_cb cb;
	struct work_struct work;
	u64 start;
	u64 end;
	u32 asid;
};

static const char *
@@ -1121,13 +1123,14 @@ static void invalidation_fence_work_func(struct work_struct *w)
		container_of(w, struct invalidation_fence, work);

	trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
	xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
	xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
				     ifence->end, ifence->asid);
}

static int invalidation_fence_init(struct xe_gt *gt,
				   struct invalidation_fence *ifence,
				   struct dma_fence *fence,
				   struct xe_vma *vma)
				   u64 start, u64 end, u32 asid)
{
	int ret;

@@ -1144,7 +1147,9 @@ static int invalidation_fence_init(struct xe_gt *gt,
	dma_fence_get(&ifence->base.base);	/* Ref for caller */
	ifence->fence = fence;
	ifence->gt = gt;
	ifence->vma = vma;
	ifence->start = start;
	ifence->end = end;
	ifence->asid = asid;

	INIT_WORK(&ifence->work, invalidation_fence_work_func);
	ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
@@ -1295,8 +1300,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue

		/* TLB invalidation must be done before signaling rebind */
		if (ifence) {
			int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
							  vma);
			int err = invalidation_fence_init(tile->primary_gt,
							  ifence, fence,
							  xe_vma_start(vma),
							  xe_vma_end(vma),
							  xe_vma_vm(vma)->usm.asid);
			if (err) {
				dma_fence_put(fence);
				kfree(ifence);
@@ -1641,7 +1649,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
			dma_fence_wait(fence, false);

		/* TLB invalidation must be done before signaling unbind */
		err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma);
		err = invalidation_fence_init(tile->primary_gt, ifence, fence,
					      xe_vma_start(vma),
					      xe_vma_end(vma),
					      xe_vma_vm(vma)->usm.asid);
		if (err) {
			dma_fence_put(fence);
			kfree(ifence);