Commit 09ba0a8f authored by Himal Prasad Ghimiray's avatar Himal Prasad Ghimiray
Browse files

drm/xe/svm: Implement prefetch support for SVM ranges



This commit adds prefetch support for SVM ranges, utilizing the
existing ioctl vm_bind functionality to achieve this.

v2: rebase

v3:
   - use xa_for_each() instead of manual loop
   - check range is valid and in preferred location before adding to
     xarray
   - Fix naming conventions
   - Fix return condition as -ENODATA instead of -EAGAIN (Matthew Brost)
   - Handle sparsely populated cpu vma range (Matthew Brost)

v4:
   - fix end address to find next cpu vma in case of -ENOENT

v5:
   - Move find next vma logic to drm gpusvm layer
   - Avoid mixing declaration and logic

v6:
  - Use new function names
  - Move eviction logic to prefetch_ranges

v7:
  - devmem_only assigned 0
  - nit address

v8:
  - initialize ctx with 0

Cc: Matthew Brost <matthew.brost@intel.com>
Acked-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20250513040228.470682-15-himal.prasad.ghimiray@intel.com


Signed-off-by: default avatarHimal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
parent c904d4e2
Loading
Loading
Loading
Loading
+45 −13
Original line number Diff line number Diff line
@@ -1458,6 +1458,7 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
	struct xe_vm *vm = pt_update->vops->vm;
	struct xe_vma_ops *vops = pt_update->vops;
	struct xe_vma_op *op;
	unsigned long i;
	int err;

	err = xe_pt_pre_commit(pt_update);
@@ -1467,15 +1468,29 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
	xe_svm_notifier_lock(vm);

	list_for_each_entry(op, &vops->list, link) {
		struct xe_svm_range *range = op->map_range.range;
		struct xe_svm_range *range = NULL;

		if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE)
			continue;

		if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
			xe_assert(vm->xe,
				  xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va)));
			xa_for_each(&op->prefetch_range.range, i, range) {
				xe_svm_range_debug(range, "PRE-COMMIT");

				if (!xe_svm_range_pages_valid(range)) {
					xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
					xe_svm_notifier_unlock(vm);
					return -ENODATA;
				}
			}
		} else {
			xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
			xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE);
			range = op->map_range.range;

			xe_svm_range_debug(range, "PRE-COMMIT");

			if (!xe_svm_range_pages_valid(range)) {
				xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
@@ -1483,6 +1498,7 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
				return -EAGAIN;
			}
		}
	}

	return 0;
}
@@ -2065,11 +2081,20 @@ static int op_prepare(struct xe_vm *vm,
	{
		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);

		if (xe_vma_is_cpu_addr_mirror(vma))
			break;
		if (xe_vma_is_cpu_addr_mirror(vma)) {
			struct xe_svm_range *range;
			unsigned long i;

			xa_for_each(&op->prefetch_range.range, i, range) {
				err = bind_range_prepare(vm, tile, pt_update_ops,
							 vma, range);
				if (err)
					return err;
			}
		} else {
			err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
			pt_update_ops->wait_vm_kernel = true;
		}
		break;
	}
	case DRM_GPUVA_OP_DRIVER:
@@ -2275,9 +2300,16 @@ static void op_commit(struct xe_vm *vm,
	{
		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);

		if (!xe_vma_is_cpu_addr_mirror(vma))
		if (xe_vma_is_cpu_addr_mirror(vma)) {
			struct xe_svm_range *range = NULL;
			unsigned long i;

			xa_for_each(&op->prefetch_range.range, i, range)
				range_present_and_invalidated_tile(vm, range, tile->id);
		} else {
			bind_op_commit(vm, tile, pt_update_ops, vma, fence,
				       fence2, false);
		}
		break;
	}
	case DRM_GPUVA_OP_DRIVER:
+188 −8
Original line number Diff line number Diff line
@@ -798,10 +798,33 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
}
ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);

static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
{
	struct xe_vma *vma;

	vma = gpuva_to_vma(op->base.prefetch.va);

	if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
		xa_destroy(&op->prefetch_range.range);
}

static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
{
	struct xe_vma_op *op;

	if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
		return;

	list_for_each_entry(op, &vops->list, link)
		xe_vma_svm_prefetch_op_fini(op);
}

static void xe_vma_ops_fini(struct xe_vma_ops *vops)
{
	int i;

	xe_vma_svm_prefetch_ops_fini(vops);

	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
		kfree(vops->pt_update_ops[i].ops);
}
@@ -2248,13 +2271,25 @@ static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
	return true;
}

static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
{
	struct drm_gpuva_op *__op;

	drm_gpuva_for_each_op(__op, ops) {
		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);

		xe_vma_svm_prefetch_op_fini(op);
	}
}

/*
 * Create operations list from IOCTL arguments, setup operations fields so parse
 * and commit steps are decoupled from IOCTL arguments. This step can fail.
 */
static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
			 u64 bo_offset_or_userptr, u64 addr, u64 range,
vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
			 struct xe_bo *bo, u64 bo_offset_or_userptr,
			 u64 addr, u64 range,
			 u32 operation, u32 flags,
			 u32 prefetch_region, u16 pat_index)
{
@@ -2262,6 +2297,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
	struct drm_gpuva_ops *ops;
	struct drm_gpuva_op *__op;
	struct drm_gpuvm_bo *vm_bo;
	u64 range_end = addr + range;
	int err;

	lockdep_assert_held_write(&vm->lock);
@@ -2323,14 +2359,77 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
			op->map.invalidate_on_bind =
				__xe_vm_needs_clear_scratch_pages(vm, flags);
		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
			struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
			struct xe_svm_range *svm_range;
			struct drm_gpusvm_ctx ctx = {};
			struct xe_tile *tile;
			u8 id, tile_mask = 0;
			u32 i;

			if (!xe_vma_is_cpu_addr_mirror(vma)) {
				op->prefetch.region = prefetch_region;
				break;
			}

			ctx.read_only = xe_vma_read_only(vma);
			ctx.devmem_possible = IS_DGFX(vm->xe) &&
					      IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);

			for_each_tile(tile, vm->xe, id)
				tile_mask |= 0x1 << id;

			xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
			op->prefetch_range.region = prefetch_region;
			op->prefetch_range.ranges_count = 0;
alloc_next_range:
			svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);

			if (PTR_ERR(svm_range) == -ENOENT) {
				u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);

				addr = ret == ULONG_MAX ? 0 : ret;
				if (addr)
					goto alloc_next_range;
				else
					goto print_op_label;
			}

			if (IS_ERR(svm_range)) {
				err = PTR_ERR(svm_range);
				goto unwind_prefetch_ops;
			}

			if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region))
				goto check_next_range;

			err = xa_alloc(&op->prefetch_range.range,
				       &i, svm_range, xa_limit_32b,
				       GFP_KERNEL);

			if (err)
				goto unwind_prefetch_ops;

			op->prefetch_range.ranges_count++;
			vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
check_next_range:
			if (range_end > xe_svm_range_end(svm_range) &&
			    xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
				addr = xe_svm_range_end(svm_range);
				goto alloc_next_range;
			}
		}
print_op_label:
		print_op(vm->xe, __op);
	}

	return ops;

unwind_prefetch_ops:
	xe_svm_prefetch_gpuva_ops_fini(ops);
	drm_gpuva_ops_free(&vm->gpuvm, ops);
	return ERR_PTR(err);
}

ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);

static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
@@ -2645,8 +2744,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
					return err;
			}

			if (!xe_vma_is_cpu_addr_mirror(vma))
			if (xe_vma_is_cpu_addr_mirror(vma))
				xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
							      op->prefetch_range.ranges_count);
			else
				xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);

			break;
		default:
			drm_warn(&vm->xe->drm, "NOT POSSIBLE");
@@ -2772,6 +2875,55 @@ static int check_ufence(struct xe_vma *vma)
	return 0;
}

static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
{
	bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
	int err = 0;

	struct xe_svm_range *svm_range;
	struct drm_gpusvm_ctx ctx = {};
	struct xe_tile *tile;
	unsigned long i;
	u32 region;

	if (!xe_vma_is_cpu_addr_mirror(vma))
		return 0;

	region = op->prefetch_range.region;

	ctx.read_only = xe_vma_read_only(vma);
	ctx.devmem_possible = devmem_possible;
	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;

	/* TODO: Threading the migration */
	xa_for_each(&op->prefetch_range.range, i, svm_range) {
		if (!region)
			xe_svm_range_migrate_to_smem(vm, svm_range);

		if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
			tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
			err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx);
			if (err) {
				drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
					vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
				return -ENODATA;
			}
		}

		err = xe_svm_range_get_pages(vm, svm_range, &ctx);
		if (err) {
			if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
				err = -ENODATA;
			drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
			return err;
		}
	}

	return err;
}

static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
			    struct xe_vma_op *op)
{
@@ -2809,7 +2961,12 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
	case DRM_GPUVA_OP_PREFETCH:
	{
		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
		u32 region = op->prefetch.region;
		u32 region;

		if (xe_vma_is_cpu_addr_mirror(vma))
			region = op->prefetch_range.region;
		else
			region = op->prefetch.region;

		xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));

@@ -2828,6 +2985,25 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
	return err;
}

static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
{
	struct xe_vma_op *op;
	int err;

	if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
		return 0;

	list_for_each_entry(op, &vops->list, link) {
		if (op->base.op  == DRM_GPUVA_OP_PREFETCH) {
			err = prefetch_ranges(vm, op);
			if (err)
				return err;
		}
	}

	return 0;
}

static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
					   struct xe_vm *vm,
					   struct xe_vma_ops *vops)
@@ -3477,7 +3653,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
		u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
		u16 pat_index = bind_ops[i].pat_index;

		ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
		ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
						  addr, range, op, flags,
						  prefetch_region, pat_index);
		if (IS_ERR(ops[i])) {
@@ -3510,6 +3686,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
	if (err)
		goto unwind_ops;

	err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
	if (err)
		goto unwind_ops;

	fence = vm_bind_ioctl_ops_execute(vm, &vops);
	if (IS_ERR(fence))
		err = PTR_ERR(fence);
@@ -3579,7 +3759,7 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,

	xe_vma_ops_init(&vops, vm, q, NULL, 0);

	ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size,
	ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, bo->size,
				       DRM_XE_VM_BIND_OP_MAP, 0, 0,
				       vm->xe->pat.idx[cache_lvl]);
	if (IS_ERR(ops)) {