Commit c5b3eb5a authored by Matthew Brost's avatar Matthew Brost
Browse files

drm/xe: Add GPUSVM device memory copy vfunc functions



Add GPUSVM device memory copy vfunc functions and connect to migration
layer. Used for device memory migration.

v2:
 - Allow NULL device pages in xe_svm_copy
 - Use new drm_gpusvm_devmem_ops
v3:
 - Prefix defines with XE_ (Thomas)
 - Change copy chunk size to 8M
 - Add a bunch of comments to xe_svm_copy to clarify behavior (Thomas)
 - Better commit message (Thomas)
v5:
 - s/xe_mem_region/xe_vram_region (Rebase)

Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250306012657.3505757-25-matthew.brost@intel.com
parent 11bbe0d9
Loading
Loading
Loading
Loading
+152 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
 */

#include "xe_gt_tlb_invalidation.h"
#include "xe_migrate.h"
#include "xe_pt.h"
#include "xe_svm.h"
#include "xe_vm.h"
@@ -315,6 +316,157 @@ static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr,
	return dpa;
}

enum xe_svm_copy_dir {
	XE_SVM_COPY_TO_VRAM,
	XE_SVM_COPY_TO_SRAM,
};

static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
		       unsigned long npages, const enum xe_svm_copy_dir dir)
{
	struct xe_vram_region *vr = NULL;
	struct xe_tile *tile;
	struct dma_fence *fence = NULL;
	unsigned long i;
#define XE_VRAM_ADDR_INVALID	~0x0ull
	u64 vram_addr = XE_VRAM_ADDR_INVALID;
	int err = 0, pos = 0;
	bool sram = dir == XE_SVM_COPY_TO_SRAM;

	/*
	 * This flow is complex: it locates physically contiguous device pages,
	 * derives the starting physical address, and performs a single GPU copy
	 * to for every 8M chunk in a DMA address array. Both device pages and
	 * DMA addresses may be sparsely populated. If either is NULL, a copy is
	 * triggered based on the current search state. The last GPU copy is
	 * waited on to ensure all copies are complete.
	 */

	for (i = 0; i < npages; ++i) {
		struct page *spage = pages[i];
		struct dma_fence *__fence;
		u64 __vram_addr;
		bool match = false, chunk, last;

#define XE_MIGRATE_CHUNK_SIZE	SZ_8M
		chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE);
		last = (i + 1) == npages;

		/* No CPU page and no device pages queue'd to copy */
		if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID)
			continue;

		if (!vr && spage) {
			vr = page_to_vr(spage);
			tile = vr_to_tile(vr);
		}
		XE_WARN_ON(spage && page_to_vr(spage) != vr);

		/*
		 * CPU page and device page valid, capture physical address on
		 * first device page, check if physical contiguous on subsequent
		 * device pages.
		 */
		if (dma_addr[i] && spage) {
			__vram_addr = xe_vram_region_page_to_dpa(vr, spage);
			if (vram_addr == XE_VRAM_ADDR_INVALID) {
				vram_addr = __vram_addr;
				pos = i;
			}

			match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr;
		}

		/*
		 * Mismatched physical address, 8M copy chunk, or last page -
		 * trigger a copy.
		 */
		if (!match || chunk || last) {
			/*
			 * Extra page for first copy if last page and matching
			 * physical address.
			 */
			int incr = (match && last) ? 1 : 0;

			if (vram_addr != XE_VRAM_ADDR_INVALID) {
				if (sram)
					__fence = xe_migrate_from_vram(tile->migrate,
								       i - pos + incr,
								       vram_addr,
								       dma_addr + pos);
				else
					__fence = xe_migrate_to_vram(tile->migrate,
								     i - pos + incr,
								     dma_addr + pos,
								     vram_addr);
				if (IS_ERR(__fence)) {
					err = PTR_ERR(__fence);
					goto err_out;
				}

				dma_fence_put(fence);
				fence = __fence;
			}

			/* Setup physical address of next device page */
			if (dma_addr[i] && spage) {
				vram_addr = __vram_addr;
				pos = i;
			} else {
				vram_addr = XE_VRAM_ADDR_INVALID;
			}

			/* Extra mismatched device page, copy it */
			if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) {
				if (sram)
					__fence = xe_migrate_from_vram(tile->migrate, 1,
								       vram_addr,
								       dma_addr + pos);
				else
					__fence = xe_migrate_to_vram(tile->migrate, 1,
								     dma_addr + pos,
								     vram_addr);
				if (IS_ERR(__fence)) {
					err = PTR_ERR(__fence);
					goto err_out;
				}

				dma_fence_put(fence);
				fence = __fence;
			}
		}
	}

err_out:
	/* Wait for all copies to complete */
	if (fence) {
		dma_fence_wait(fence, false);
		dma_fence_put(fence);
	}

	return err;
#undef XE_MIGRATE_CHUNK_SIZE
#undef XE_VRAM_ADDR_INVALID
}

static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr,
				 unsigned long npages)
{
	return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM);
}

static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr,
			      unsigned long npages)
{
	return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM);
}

__maybe_unused
static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = {
	.copy_to_devmem = xe_svm_copy_to_devmem,
	.copy_to_ram = xe_svm_copy_to_ram,
};

static const struct drm_gpusvm_ops gpusvm_ops = {
	.range_alloc = xe_svm_range_alloc,
	.range_free = xe_svm_range_free,