Commit 10aa5c80 authored by Thomas Hellström's avatar Thomas Hellström Committed by Lucas De Marchi
Browse files

drm/gpusvm, drm/xe: Fix userptr to not allow device private pages



When userptr is used on SVM-enabled VMs, a non-NULL
hmm_range::dev_private_owner value might mean that
hmm_range_fault() attempts to return device private pages.
Either that will fail, or the userptr code will not know
how to handle those.

Use NULL for hmm_range::dev_private_owner to migrate
such pages to system. In order to do that, move the
struct drm_gpusvm::device_private_page_owner field to
struct drm_gpusvm_ctx::device_private_page_owner so that
it doesn't remain immutable over the drm_gpusvm lifetime.

v2:
- Don't conditionally compile xe_svm_devm_owner().
- Kerneldoc xe_svm_devm_owner().

Fixes: 9e978741 ("drm/xe/userptr: replace xe_hmm with gpusvm")
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Acked-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://lore.kernel.org/r/20250930122752.96034-1-thomas.hellstrom@linux.intel.com


(cherry picked from commit ad298d9e)
Signed-off-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
parent 5b440a8b
Loading
Loading
Loading
Loading
+13 −11
Original line number Diff line number Diff line
@@ -361,7 +361,6 @@ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = {
 * @name: Name of the GPU SVM.
 * @drm: Pointer to the DRM device structure.
 * @mm: Pointer to the mm_struct for the address space.
 * @device_private_page_owner: Device private pages owner.
 * @mm_start: Start address of GPU SVM.
 * @mm_range: Range of the GPU SVM.
 * @notifier_size: Size of individual notifiers.
@@ -383,7 +382,7 @@ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = {
 */
int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
		    const char *name, struct drm_device *drm,
		    struct mm_struct *mm, void *device_private_page_owner,
		    struct mm_struct *mm,
		    unsigned long mm_start, unsigned long mm_range,
		    unsigned long notifier_size,
		    const struct drm_gpusvm_ops *ops,
@@ -395,15 +394,13 @@ int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
		mmgrab(mm);
	} else {
		/* No full SVM mode, only core drm_gpusvm_pages API. */
		if (ops || num_chunks || mm_range || notifier_size ||
		    device_private_page_owner)
		if (ops || num_chunks || mm_range || notifier_size)
			return -EINVAL;
	}

	gpusvm->name = name;
	gpusvm->drm = drm;
	gpusvm->mm = mm;
	gpusvm->device_private_page_owner = device_private_page_owner;
	gpusvm->mm_start = mm_start;
	gpusvm->mm_range = mm_range;
	gpusvm->notifier_size = notifier_size;
@@ -684,6 +681,7 @@ static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn,
 * @notifier: Pointer to the GPU SVM notifier structure
 * @start: Start address
 * @end: End address
 * @dev_private_owner: The device private page owner
 *
 * Check if pages between start and end have been faulted in on the CPU. Use to
 * prevent migration of pages without CPU backing store.
@@ -692,14 +690,15 @@ static unsigned int drm_gpusvm_hmm_pfn_to_order(unsigned long hmm_pfn,
 */
static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm,
				   struct drm_gpusvm_notifier *notifier,
				   unsigned long start, unsigned long end)
				   unsigned long start, unsigned long end,
				   void *dev_private_owner)
{
	struct hmm_range hmm_range = {
		.default_flags = 0,
		.notifier = &notifier->notifier,
		.start = start,
		.end = end,
		.dev_private_owner = gpusvm->device_private_page_owner,
		.dev_private_owner = dev_private_owner,
	};
	unsigned long timeout =
		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
@@ -753,6 +752,7 @@ static bool drm_gpusvm_check_pages(struct drm_gpusvm *gpusvm,
 * @gpuva_start: Start address of GPUVA which mirrors CPU
 * @gpuva_end: End address of GPUVA which mirrors CPU
 * @check_pages_threshold: Check CPU pages for present threshold
 * @dev_private_owner: The device private page owner
 *
 * This function determines the chunk size for the GPU SVM range based on the
 * fault address, GPU SVM chunk sizes, existing GPU SVM ranges, and the virtual
@@ -767,7 +767,8 @@ drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm,
			    unsigned long fault_addr,
			    unsigned long gpuva_start,
			    unsigned long gpuva_end,
			    unsigned long check_pages_threshold)
			    unsigned long check_pages_threshold,
			    void *dev_private_owner)
{
	unsigned long start, end;
	int i = 0;
@@ -814,7 +815,7 @@ drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm,
		 * process-many-malloc' mallocs at least 64k at a time.
		 */
		if (end - start <= check_pages_threshold &&
		    !drm_gpusvm_check_pages(gpusvm, notifier, start, end)) {
		    !drm_gpusvm_check_pages(gpusvm, notifier, start, end, dev_private_owner)) {
			++i;
			goto retry;
		}
@@ -957,7 +958,8 @@ drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm,
	chunk_size = drm_gpusvm_range_chunk_size(gpusvm, notifier, vas,
						 fault_addr, gpuva_start,
						 gpuva_end,
						 ctx->check_pages_threshold);
						 ctx->check_pages_threshold,
						 ctx->device_private_page_owner);
	if (chunk_size == LONG_MAX) {
		err = -EINVAL;
		goto err_notifier_remove;
@@ -1268,7 +1270,7 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm,
		.notifier = notifier,
		.start = pages_start,
		.end = pages_end,
		.dev_private_owner = gpusvm->device_private_page_owner,
		.dev_private_owner = ctx->device_private_page_owner,
	};
	void *zdd;
	unsigned long timeout =
+3 −8
Original line number Diff line number Diff line
@@ -67,11 +67,6 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
	range_debug(range, operation);
}

static void *xe_svm_devm_owner(struct xe_device *xe)
{
	return xe;
}

static struct drm_gpusvm_range *
xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
{
@@ -744,15 +739,14 @@ int xe_svm_init(struct xe_vm *vm)
			  xe_svm_garbage_collector_work_func);

		err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
				      current->mm, xe_svm_devm_owner(vm->xe), 0,
				      vm->size,
				      current->mm, 0, vm->size,
				      xe_modparam.svm_notifier_size * SZ_1M,
				      &gpusvm_ops, fault_chunk_sizes,
				      ARRAY_SIZE(fault_chunk_sizes));
		drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
	} else {
		err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)",
				      &vm->xe->drm, NULL, NULL, 0, 0, 0, NULL,
				      &vm->xe->drm, NULL, 0, 0, 0, NULL,
				      NULL, 0);
	}

@@ -1017,6 +1011,7 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
		.devmem_only = need_vram && devmem_possible,
		.timeslice_ms = need_vram && devmem_possible ?
			vm->xe->atomic_svm_timeslice_ms : 0,
		.device_private_page_owner = xe_svm_devm_owner(vm->xe),
	};
	struct xe_validation_ctx vctx;
	struct drm_exec exec;
+14 −0
Original line number Diff line number Diff line
@@ -6,6 +6,20 @@
#ifndef _XE_SVM_H_
#define _XE_SVM_H_

struct xe_device;

/**
 * xe_svm_devm_owner() - Return the owner of device private memory
 * @xe: The xe device.
 *
 * Return: The owner of this device's device private memory to use in
 * hmm_range_fault()-
 */
static inline void *xe_svm_devm_owner(struct xe_device *xe)
{
	return xe;
}

#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)

#include <drm/drm_pagemap.h>
+1 −0
Original line number Diff line number Diff line
@@ -54,6 +54,7 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
	struct xe_device *xe = vm->xe;
	struct drm_gpusvm_ctx ctx = {
		.read_only = xe_vma_read_only(vma),
		.device_private_page_owner = NULL,
	};

	lockdep_assert_held(&vm->lock);
+1 −0
Original line number Diff line number Diff line
@@ -2881,6 +2881,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
	ctx.read_only = xe_vma_read_only(vma);
	ctx.devmem_possible = devmem_possible;
	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
	ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe);

	/* TODO: Threading the migration */
	xa_for_each(&op->prefetch_range.range, i, svm_range) {
Loading