Commit 57a5f45b authored by Lukasz Laguna's avatar Lukasz Laguna Committed by Michał Winiarski
Browse files

drm/xe/migrate: Add function to copy of VRAM data in chunks



Introduce a new function to copy data between VRAM and sysmem objects.
The existing xe_migrate_copy() is tailored for eviction and restore
operations, which involves additional logic and operates on entire
objects.
The xe_migrate_vram_copy_chunk() allows copying chunks of data to or
from a dedicated buffer object, which is essential in case of VF
migration.

Signed-off-by: default avatarLukasz Laguna <lukasz.laguna@intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20251112132220.516975-22-michal.winiarski@intel.com


Signed-off-by: default avatarMichał Winiarski <michal.winiarski@intel.com>
parent 274186fa
Loading
Loading
Loading
Loading
+123 −5
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@
#include "xe_lrc.h"
#include "xe_map.h"
#include "xe_mocs.h"
#include "xe_printk.h"
#include "xe_pt.h"
#include "xe_res_cursor.h"
#include "xe_sa.h"
@@ -1210,6 +1211,128 @@ struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate)
	return migrate->q;
}

/**
 * xe_migrate_vram_copy_chunk() - Copy a chunk of a VRAM buffer object.
 * @vram_bo: The VRAM buffer object.
 * @vram_offset: The VRAM offset.
 * @sysmem_bo: The sysmem buffer object.
 * @sysmem_offset: The sysmem offset.
 * @size: The size of VRAM chunk to copy.
 * @dir: The direction of the copy operation.
 *
 * Copies a portion of a buffer object between VRAM and system memory.
 * On Xe2 platforms that support flat CCS, VRAM data is decompressed when
 * copying to system memory.
 *
 * Return: Pointer to a dma_fence representing the last copy batch, or
 * an error pointer on failure. If there is a failure, any copy operation
 * started by the function call has been synced.
 */
struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset,
					     struct xe_bo *sysmem_bo, u64 sysmem_offset,
					     u64 size, enum xe_migrate_copy_dir dir)
{
	struct xe_device *xe = xe_bo_device(vram_bo);
	struct xe_tile *tile = vram_bo->tile;
	struct xe_gt *gt = tile->primary_gt;
	struct xe_migrate *m = tile->migrate;
	struct dma_fence *fence = NULL;
	struct ttm_resource *vram = vram_bo->ttm.resource;
	struct ttm_resource *sysmem = sysmem_bo->ttm.resource;
	struct xe_res_cursor vram_it, sysmem_it;
	u64 vram_L0_ofs, sysmem_L0_ofs;
	u32 vram_L0_pt, sysmem_L0_pt;
	u64 vram_L0, sysmem_L0;
	bool to_sysmem = (dir == XE_MIGRATE_COPY_TO_SRAM);
	bool use_comp_pat = to_sysmem &&
		GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe);
	int pass = 0;
	int err;

	xe_assert(xe, IS_ALIGNED(vram_offset | sysmem_offset | size, PAGE_SIZE));
	xe_assert(xe, xe_bo_is_vram(vram_bo));
	xe_assert(xe, !xe_bo_is_vram(sysmem_bo));
	xe_assert(xe, !range_overflows(vram_offset, size, (u64)vram_bo->ttm.base.size));
	xe_assert(xe, !range_overflows(sysmem_offset, size, (u64)sysmem_bo->ttm.base.size));

	xe_res_first(vram, vram_offset, size, &vram_it);
	xe_res_first_sg(xe_bo_sg(sysmem_bo), sysmem_offset, size, &sysmem_it);

	while (size) {
		u32 pte_flags = PTE_UPDATE_FLAG_IS_VRAM;
		u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
		struct xe_sched_job *job;
		struct xe_bb *bb;
		u32 update_idx;
		bool usm = xe->info.has_usm;
		u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;

		sysmem_L0 = xe_migrate_res_sizes(m, &sysmem_it);
		vram_L0 = min(xe_migrate_res_sizes(m, &vram_it), sysmem_L0);

		xe_dbg(xe, "Pass %u, size: %llu\n", pass++, vram_L0);

		pte_flags |= use_comp_pat ? PTE_UPDATE_FLAG_IS_COMP_PTE : 0;
		batch_size += pte_update_size(m, pte_flags, vram, &vram_it, &vram_L0,
					      &vram_L0_ofs, &vram_L0_pt, 0, 0, avail_pts);

		batch_size += pte_update_size(m, 0, sysmem, &sysmem_it, &vram_L0, &sysmem_L0_ofs,
					      &sysmem_L0_pt, 0, avail_pts, avail_pts);
		batch_size += EMIT_COPY_DW;

		bb = xe_bb_new(gt, batch_size, usm);
		if (IS_ERR(bb)) {
			err = PTR_ERR(bb);
			return ERR_PTR(err);
		}

		if (xe_migrate_allow_identity(vram_L0, &vram_it))
			xe_res_next(&vram_it, vram_L0);
		else
			emit_pte(m, bb, vram_L0_pt, true, use_comp_pat, &vram_it, vram_L0, vram);

		emit_pte(m, bb, sysmem_L0_pt, false, false, &sysmem_it, vram_L0, sysmem);

		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
		update_idx = bb->len;

		if (to_sysmem)
			emit_copy(gt, bb, vram_L0_ofs, sysmem_L0_ofs, vram_L0, XE_PAGE_SIZE);
		else
			emit_copy(gt, bb, sysmem_L0_ofs, vram_L0_ofs, vram_L0, XE_PAGE_SIZE);

		job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm),
						 update_idx);
		if (IS_ERR(job)) {
			xe_bb_free(bb, NULL);
			err = PTR_ERR(job);
			return ERR_PTR(err);
		}

		xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);

		xe_assert(xe, dma_resv_test_signaled(vram_bo->ttm.base.resv,
						     DMA_RESV_USAGE_BOOKKEEP));
		xe_assert(xe, dma_resv_test_signaled(sysmem_bo->ttm.base.resv,
						     DMA_RESV_USAGE_BOOKKEEP));

		scoped_guard(mutex, &m->job_mutex) {
			xe_sched_job_arm(job);
			dma_fence_put(fence);
			fence = dma_fence_get(&job->drm.s_fence->finished);
			xe_sched_job_push(job);

			dma_fence_put(m->fence);
			m->fence = dma_fence_get(fence);
		}

		xe_bb_free(bb, fence);
		size -= vram_L0;
	}

	return fence;
}

static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
				 u32 size, u32 pitch)
{
@@ -1912,11 +2035,6 @@ static bool xe_migrate_vram_use_pde(struct drm_pagemap_addr *sram_addr,
	return true;
}

enum xe_migrate_copy_dir {
	XE_MIGRATE_COPY_TO_VRAM,
	XE_MIGRATE_COPY_TO_SRAM,
};

#define XE_CACHELINE_BYTES	64ull
#define XE_CACHELINE_MASK	(XE_CACHELINE_BYTES - 1)

+8 −0
Original line number Diff line number Diff line
@@ -28,6 +28,11 @@ struct xe_vma;

enum xe_sriov_vf_ccs_rw_ctxs;

enum xe_migrate_copy_dir {
	XE_MIGRATE_COPY_TO_VRAM,
	XE_MIGRATE_COPY_TO_SRAM,
};

/**
 * struct xe_migrate_pt_update_ops - Callbacks for the
 * xe_migrate_update_pgtables() function.
@@ -131,6 +136,9 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,

struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate);
struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate);
struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset,
					     struct xe_bo *sysmem_bo, u64 sysmem_offset,
					     u64 size, enum xe_migrate_copy_dir dir);
int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
			     unsigned long offset, void *buf, int len,
			     int write);