Unverified Commit 1460eae7 authored by Satyanarayana K V P's avatar Satyanarayana K V P Committed by Rodrigo Vivi
Browse files

drm/xe/vf: Use drm mm instead of drm sa for CCS read/write



The suballocator algorithm tracks a hole cursor at the last allocation
and tries to allocate after it. This is optimized for fence-ordered
progress, where older allocations are expected to become reusable first.

In fence-enabled mode, that ordering assumption holds. In fence-disabled
mode, allocations may be freed in arbitrary order, so limiting allocation
to the current hole window can miss valid free space and fail allocations
despite sufficient total space.

Use DRM memory manager instead of sub-allocator to get rid of this issue
as CCS read/write operations do not use fences.

Fixes: 864690cf ("drm/xe/vf: Attach and detach CCS copy commands with BO")
Signed-off-by: default avatarSatyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Maarten Lankhorst <dev@lankhorst.se>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: default avatarMatthew Brost <matthew.brost@intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260408110145.1639937-6-satyanarayana.k.v.p@intel.com


(cherry picked from commit 6c84b493012aeb05dec29c709377bf0e17ac6815)
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent 36c6bac1
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include "xe_ggtt_types.h"

struct xe_device;
struct xe_mem_pool_node;
struct xe_vm;

#define XE_BO_MAX_PLACEMENTS	3
@@ -88,7 +89,7 @@ struct xe_bo {
	bool ccs_cleared;

	/** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */
	struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
	struct xe_mem_pool_node *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];

	/**
	 * @cpu_caching: CPU caching mode. Currently only used for userspace
+31 −25
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@
#include "xe_hw_engine.h"
#include "xe_lrc.h"
#include "xe_map.h"
#include "xe_mem_pool.h"
#include "xe_mocs.h"
#include "xe_printk.h"
#include "xe_pt.h"
@@ -1166,11 +1167,12 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
	u32 batch_size, batch_size_allocated;
	struct xe_device *xe = gt_to_xe(gt);
	struct xe_res_cursor src_it, ccs_it;
	struct xe_mem_pool *bb_pool;
	struct xe_sriov_vf_ccs_ctx *ctx;
	struct xe_sa_manager *bb_pool;
	u64 size = xe_bo_size(src_bo);
	struct xe_bb *bb = NULL;
	struct xe_mem_pool_node *bb;
	u64 src_L0, src_L0_ofs;
	struct xe_bb xe_bb_tmp;
	u32 src_L0_pt;
	int err;

@@ -1208,18 +1210,18 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
		size -= src_L0;
	}

	bb = xe_bb_alloc(gt);
	bb = xe_mem_pool_alloc_node();
	if (IS_ERR(bb))
		return PTR_ERR(bb);

	bb_pool = ctx->mem.ccs_bb_pool;
	scoped_guard(mutex, xe_sa_bo_swap_guard(bb_pool)) {
		xe_sa_bo_swap_shadow(bb_pool);
	scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
		xe_mem_pool_swap_shadow_locked(bb_pool);

		err = xe_bb_init(bb, bb_pool, batch_size);
		err = xe_mem_pool_insert_node(bb_pool, bb, batch_size * sizeof(u32));
		if (err) {
			xe_gt_err(gt, "BB allocation failed.\n");
			xe_bb_free(bb, NULL);
			kfree(bb);
			return err;
		}

@@ -1227,6 +1229,7 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
		size = xe_bo_size(src_bo);
		batch_size = 0;

		xe_bb_tmp = (struct xe_bb){ .cs = xe_mem_pool_node_cpu_addr(bb), .len = 0 };
		/*
		 * Emit PTE and copy commands here.
		 * The CCS copy command can only support limited size. If the size to be
@@ -1255,24 +1258,27 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
			xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
			batch_size += EMIT_COPY_CCS_DW;

			emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src);
			emit_pte(m, &xe_bb_tmp, src_L0_pt, false, true, &src_it, src_L0, src);

			emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
			emit_pte(m, &xe_bb_tmp, ccs_pt, false, false, &ccs_it, ccs_size, src);

			bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
			flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt,
			xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
							      flush_flags);
			flush_flags = xe_migrate_ccs_copy(m, &xe_bb_tmp, src_L0_ofs, src_is_pltt,
							  src_L0_ofs, dst_is_pltt,
							  src_L0, ccs_ofs, true);
			bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
			xe_bb_tmp.len = emit_flush_invalidate(xe_bb_tmp.cs, xe_bb_tmp.len,
							      flush_flags);

			size -= src_L0;
		}

		xe_assert(xe, (batch_size_allocated == bb->len));
		xe_assert(xe, (batch_size_allocated == xe_bb_tmp.len));
		xe_assert(xe, bb->sa_node.size == xe_bb_tmp.len * sizeof(u32));
		src_bo->bb_ccs[read_write] = bb;

		xe_sriov_vf_ccs_rw_update_bb_addr(ctx);
		xe_sa_bo_sync_shadow(bb->bo);
		xe_mem_pool_sync_shadow_locked(bb);
	}

	return 0;
@@ -1297,10 +1303,10 @@ int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
				  enum xe_sriov_vf_ccs_rw_ctxs read_write)
{
	struct xe_bb *bb = src_bo->bb_ccs[read_write];
	struct xe_mem_pool_node *bb = src_bo->bb_ccs[read_write];
	struct xe_device *xe = xe_bo_device(src_bo);
	struct xe_mem_pool *bb_pool;
	struct xe_sriov_vf_ccs_ctx *ctx;
	struct xe_sa_manager *bb_pool;
	u32 *cs;

	xe_assert(xe, IS_SRIOV_VF(xe));
@@ -1308,18 +1314,18 @@ void xe_migrate_ccs_rw_copy_clear(struct xe_bo *src_bo,
	ctx = &xe->sriov.vf.ccs.contexts[read_write];
	bb_pool = ctx->mem.ccs_bb_pool;

	guard(mutex) (xe_sa_bo_swap_guard(bb_pool));
	xe_sa_bo_swap_shadow(bb_pool);
	scoped_guard(mutex, xe_mem_pool_bo_swap_guard(bb_pool)) {
		xe_mem_pool_swap_shadow_locked(bb_pool);

	cs = xe_sa_bo_cpu_addr(bb->bo);
	memset(cs, MI_NOOP, bb->len * sizeof(u32));
		cs = xe_mem_pool_node_cpu_addr(bb);
		memset(cs, MI_NOOP, bb->sa_node.size);
		xe_sriov_vf_ccs_rw_update_bb_addr(ctx);

	xe_sa_bo_sync_shadow(bb->bo);

	xe_bb_free(bb, NULL);
		xe_mem_pool_sync_shadow_locked(bb);
		xe_mem_pool_free_node(bb);
		src_bo->bb_ccs[read_write] = NULL;
	}
}

/**
 * xe_migrate_exec_queue() - Get the execution queue from migrate context.
+29 −25
Original line number Diff line number Diff line
@@ -14,9 +14,9 @@
#include "xe_guc.h"
#include "xe_guc_submit.h"
#include "xe_lrc.h"
#include "xe_mem_pool.h"
#include "xe_migrate.h"
#include "xe_pm.h"
#include "xe_sa.h"
#include "xe_sriov_printk.h"
#include "xe_sriov_vf.h"
#include "xe_sriov_vf_ccs.h"
@@ -141,43 +141,47 @@ static u64 get_ccs_bb_pool_size(struct xe_device *xe)

static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
{
	struct xe_mem_pool *pool;
	struct xe_device *xe = tile_to_xe(tile);
	struct xe_sa_manager *sa_manager;
	u32 *pool_cpu_addr, *last_dw_addr;
	u64 bb_pool_size;
	int offset, err;
	int err;

	bb_pool_size = get_ccs_bb_pool_size(xe);
	xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
		      ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);

	sa_manager = __xe_sa_bo_manager_init(tile, bb_pool_size, SZ_4K, SZ_16,
					     XE_SA_BO_MANAGER_FLAG_SHADOW);

	if (IS_ERR(sa_manager)) {
		xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
			     sa_manager);
		err = PTR_ERR(sa_manager);
	pool = xe_mem_pool_init(tile, bb_pool_size, sizeof(u32),
				XE_MEM_POOL_BO_FLAG_INIT_SHADOW_COPY);
	if (IS_ERR(pool)) {
		xe_sriov_err(xe, "xe_mem_pool_init failed with error: %pe\n",
			     pool);
		err = PTR_ERR(pool);
		return err;
	}

	offset = 0;
	xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
		      bb_pool_size);
	xe_map_memset(xe, &sa_manager->shadow->vmap, offset, MI_NOOP,
		      bb_pool_size);
	pool_cpu_addr = xe_mem_pool_cpu_addr(pool);
	memset(pool_cpu_addr, 0, bb_pool_size);

	offset = bb_pool_size - sizeof(u32);
	xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
	xe_map_wr(xe, &sa_manager->shadow->vmap, offset, u32, MI_BATCH_BUFFER_END);
	last_dw_addr = pool_cpu_addr + (bb_pool_size / sizeof(u32)) - 1;
	*last_dw_addr = MI_BATCH_BUFFER_END;

	ctx->mem.ccs_bb_pool = sa_manager;
	/**
	 * Sync the main copy and shadow copy so that the shadow copy is
	 * replica of main copy. We sync only BBs after init part. So, we
	 * need to make sure the main pool and shadow copy are in sync after
	 * this point. This is needed as GuC may read the BB commands from
	 * shadow copy.
	 */
	xe_mem_pool_sync(pool);

	ctx->mem.ccs_bb_pool = pool;
	return 0;
}

static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
{
	u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
	u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
	u32 dw[10], i = 0;

@@ -388,7 +392,7 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
#define XE_SRIOV_VF_CCS_RW_BB_ADDR_OFFSET	(2 * sizeof(u32))
void xe_sriov_vf_ccs_rw_update_bb_addr(struct xe_sriov_vf_ccs_ctx *ctx)
{
	u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
	u64 addr = xe_mem_pool_gpu_addr(ctx->mem.ccs_bb_pool);
	struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
	struct xe_device *xe = gt_to_xe(ctx->mig_q->gt);

@@ -412,8 +416,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
	struct xe_device *xe = xe_bo_device(bo);
	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
	struct xe_sriov_vf_ccs_ctx *ctx;
	struct xe_mem_pool_node *bb;
	struct xe_tile *tile;
	struct xe_bb *bb;
	int err = 0;

	xe_assert(xe, IS_VF_CCS_READY(xe));
@@ -445,7 +449,7 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
{
	struct xe_device *xe = xe_bo_device(bo);
	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
	struct xe_bb *bb;
	struct xe_mem_pool_node *bb;

	xe_assert(xe, IS_VF_CCS_READY(xe));

@@ -471,8 +475,8 @@ int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
 */
void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
{
	struct xe_sa_manager *bb_pool;
	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
	struct xe_mem_pool *bb_pool;

	if (!IS_VF_CCS_READY(xe))
		return;
@@ -485,7 +489,7 @@ void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)

		drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
		drm_printf(p, "-------------------------\n");
		drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
		xe_mem_pool_dump(bb_pool, p);
		drm_puts(p, "\n");
	}
}
+1 −4
Original line number Diff line number Diff line
@@ -17,9 +17,6 @@ enum xe_sriov_vf_ccs_rw_ctxs {
	XE_SRIOV_VF_CCS_CTX_COUNT
};

struct xe_migrate;
struct xe_sa_manager;

/**
 * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data.
 */
@@ -33,7 +30,7 @@ struct xe_sriov_vf_ccs_ctx {
	/** @mem: memory data */
	struct {
		/** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */
		struct xe_sa_manager *ccs_bb_pool;
		struct xe_mem_pool *ccs_bb_pool;
	} mem;
};