Commit 400ee0f4 authored by Thomas Hellström's avatar Thomas Hellström
Browse files

drm/xe: Split TLB invalidation into submit and wait steps



xe_vm_range_tilemask_tlb_inval() submits TLB invalidation requests to
all GTs in a tile mask and then immediately waits for them to complete
before returning. This is fine for the existing callers, but a
subsequent patch will need to defer the wait in order to overlap TLB
invalidations across multiple VMAs.

Introduce xe_tlb_inval_range_tilemask_submit() and
xe_tlb_inval_batch_wait() in xe_tlb_inval.c as the submit and wait
halves respectively. The batch of fences is carried in the new
xe_tlb_inval_batch structure. Remove xe_vm_range_tilemask_tlb_inval()
and convert all three call sites to the new API.

v3:
- Don't wait on TLB invalidation batches if the corresponding batch
  submit returns an error. (Matt Brost)
- s/_batch/batch/ (Matt Brost)

Assisted-by: GitHub Copilot:claude-sonnet-4.6
Signed-off-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260305093909.43623-4-thomas.hellstrom@linux.intel.com
parent 18c4e536
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@
#include "xe_pt.h"
#include "xe_svm.h"
#include "xe_tile.h"
#include "xe_tlb_inval.h"
#include "xe_ttm_vram_mgr.h"
#include "xe_vm.h"
#include "xe_vm_types.h"
@@ -225,6 +226,7 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
			      const struct mmu_notifier_range *mmu_range)
{
	struct xe_vm *vm = gpusvm_to_vm(gpusvm);
	struct xe_tlb_inval_batch batch;
	struct xe_device *xe = vm->xe;
	struct drm_gpusvm_range *r, *first;
	struct xe_tile *tile;
@@ -276,8 +278,10 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,

	xe_device_wmb(xe);

	err = xe_vm_range_tilemask_tlb_inval(vm, adj_start, adj_end, tile_mask);
	WARN_ON_ONCE(err);
	err = xe_tlb_inval_range_tilemask_submit(xe, vm->usm.asid, adj_start, adj_end,
						 tile_mask, &batch);
	if (!WARN_ON_ONCE(err))
		xe_tlb_inval_batch_wait(&batch);

range_notifier_event_end:
	r = first;
+84 −0
Original line number Diff line number Diff line
@@ -486,3 +486,87 @@ bool xe_tlb_inval_idle(struct xe_tlb_inval *tlb_inval)
	guard(spinlock_irq)(&tlb_inval->pending_lock);
	return list_is_singular(&tlb_inval->pending_fences);
}

/**
 * xe_tlb_inval_batch_wait() - Wait for all fences in a TLB invalidation batch
 * @batch: Batch of TLB invalidation fences to wait on
 *
 * Waits for every fence in @batch to signal, then resets @batch so it can be
 * reused for a subsequent invalidation.
 */
void xe_tlb_inval_batch_wait(struct xe_tlb_inval_batch *batch)
{
	struct xe_tlb_inval_fence *fence = &batch->fence[0];
	unsigned int i;

	for (i = 0; i < batch->num_fences; ++i)
		xe_tlb_inval_fence_wait(fence++);

	batch->num_fences = 0;
}

/**
 * xe_tlb_inval_range_tilemask_submit() - Submit TLB invalidations for an
 * address range on a tile mask
 * @xe: The xe device
 * @asid: Address space ID
 * @start: start address
 * @end: end address
 * @tile_mask: mask for which gt's issue tlb invalidation
 * @batch: Batch of tlb invalidate fences
 *
 * Issue a range based TLB invalidation for gt's in tilemask
 * If the function returns an error, there is no need to call
 * xe_tlb_inval_batch_wait() on @batch.
 *
 * Returns 0 for success, negative error code otherwise.
 */
int xe_tlb_inval_range_tilemask_submit(struct xe_device *xe, u32 asid,
				       u64 start, u64 end, u8 tile_mask,
				       struct xe_tlb_inval_batch *batch)
{
	struct xe_tlb_inval_fence *fence = &batch->fence[0];
	struct xe_tile *tile;
	u32 fence_id = 0;
	u8 id;
	int err;

	batch->num_fences = 0;
	if (!tile_mask)
		return 0;

	for_each_tile(tile, xe, id) {
		if (!(tile_mask & BIT(id)))
			continue;

		xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
					&fence[fence_id], true);

		err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
					 &fence[fence_id], start, end,
					 asid, NULL);
		if (err)
			goto wait;
		++fence_id;

		if (!tile->media_gt)
			continue;

		xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
					&fence[fence_id], true);

		err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
					 &fence[fence_id], start, end,
					 asid, NULL);
		if (err)
			goto wait;
		++fence_id;
	}

wait:
	batch->num_fences = fence_id;
	if (err)
		xe_tlb_inval_batch_wait(batch);

	return err;
}
+6 −0
Original line number Diff line number Diff line
@@ -45,4 +45,10 @@ void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno);

bool xe_tlb_inval_idle(struct xe_tlb_inval *tlb_inval);

int xe_tlb_inval_range_tilemask_submit(struct xe_device *xe, u32 asid,
				       u64 start, u64 end, u8 tile_mask,
				       struct xe_tlb_inval_batch *batch);

void xe_tlb_inval_batch_wait(struct xe_tlb_inval_batch *batch);

#endif	/* _XE_TLB_INVAL_ */
+14 −0
Original line number Diff line number Diff line
@@ -9,6 +9,8 @@
#include <linux/workqueue.h>
#include <linux/dma-fence.h>

#include "xe_device_types.h"

struct drm_suballoc;
struct xe_tlb_inval;

@@ -132,4 +134,16 @@ struct xe_tlb_inval_fence {
	ktime_t inval_time;
};

/**
 * struct xe_tlb_inval_batch - Batch of TLB invalidation fences
 *
 * Holds one fence per GT covered by a TLB invalidation request.
 */
struct xe_tlb_inval_batch {
	/** @fence: per-GT TLB invalidation fences */
	struct xe_tlb_inval_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
	/** @num_fences: number of valid entries in @fence */
	unsigned int num_fences;
};

#endif
+7 −62
Original line number Diff line number Diff line
@@ -3969,66 +3969,6 @@ void xe_vm_unlock(struct xe_vm *vm)
	dma_resv_unlock(xe_vm_resv(vm));
}

/**
 * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an
 * address range
 * @vm: The VM
 * @start: start address
 * @end: end address
 * @tile_mask: mask for which gt's issue tlb invalidation
 *
 * Issue a range based TLB invalidation for gt's in tilemask
 *
 * Returns 0 for success, negative error code otherwise.
 */
int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
				   u64 end, u8 tile_mask)
{
	struct xe_tlb_inval_fence
		fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
	struct xe_tile *tile;
	u32 fence_id = 0;
	u8 id;
	int err;

	if (!tile_mask)
		return 0;

	for_each_tile(tile, vm->xe, id) {
		if (!(tile_mask & BIT(id)))
			continue;

		xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
					&fence[fence_id], true);

		err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
					 &fence[fence_id], start, end,
					 vm->usm.asid, NULL);
		if (err)
			goto wait;
		++fence_id;

		if (!tile->media_gt)
			continue;

		xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
					&fence[fence_id], true);

		err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
					 &fence[fence_id], start, end,
					 vm->usm.asid, NULL);
		if (err)
			goto wait;
		++fence_id;
	}

wait:
	for (id = 0; id < fence_id; ++id)
		xe_tlb_inval_fence_wait(&fence[id]);

	return err;
}

/**
 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
 * @vma: VMA to invalidate
@@ -4043,6 +3983,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
{
	struct xe_device *xe = xe_vma_vm(vma)->xe;
	struct xe_vm *vm = xe_vma_vm(vma);
	struct xe_tlb_inval_batch batch;
	struct xe_tile *tile;
	u8 tile_mask = 0;
	int ret = 0;
@@ -4083,12 +4024,16 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)

	xe_device_wmb(xe);

	ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma),
					     xe_vma_end(vma), tile_mask);
	ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid,
						 xe_vma_start(vma), xe_vma_end(vma),
						 tile_mask, &batch);

	/* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
	WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);

	if (!ret)
		xe_tlb_inval_batch_wait(&batch);

	return ret;
}

Loading