Commit 0d97ecce authored by Niranjana Vishwanathapura's avatar Niranjana Vishwanathapura Committed by Rodrigo Vivi
Browse files

drm/xe: Enable Fixed CCS mode setting



Disable dynamic HW load balancing of compute resource assignment
to engines and instead enabled fixed mode of mapping compute
resources to engines on all platforms with more than one compute
engine.

By default enable only one CCS engine with all compute slices
assigned to it. This is the desired configuration for common
workloads.

PVC platform supports only the fixed CCS mode (workaround 16016805146).

v2: Rebase, make it platform agnostic
v3: Minor code refactoring

Reviewed-by: default avatarAndi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: default avatarNiranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent b279b530
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ xe-y += xe_bb.o \
	xe_gsc.o \
	xe_gsc_submit.o \
	xe_gt.o \
	xe_gt_ccs_mode.o \
	xe_gt_clock.o \
	xe_gt_debugfs.o \
	xe_gt_idle.o \
+14 −0
Original line number Diff line number Diff line
@@ -402,8 +402,22 @@
#define   COMP_CKN_IN				REG_GENMASK(30, 29)

#define RCU_MODE				XE_REG(0x14800, XE_REG_OPTION_MASKED)
#define   RCU_MODE_FIXED_SLICE_CCS_MODE		REG_BIT(1)
#define   RCU_MODE_CCS_ENABLE			REG_BIT(0)

/*
 * Total of 4 cslices, where each cslice is in the form:
 *   [0-3]     CCS ID
 *   [4-6]     RSVD
 *   [7]       Disabled
 */
#define CCS_MODE				XE_REG(0x14804)
#define   CCS_MODE_CSLICE_0_3_MASK		REG_GENMASK(11, 0) /* 3 bits per cslice */
#define   CCS_MODE_CSLICE_MASK			0x7 /* CCS0-3 + rsvd */
#define   CCS_MODE_CSLICE_WIDTH			ilog2(CCS_MODE_CSLICE_MASK + 1)
#define   CCS_MODE_CSLICE(cslice, ccs) \
	((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH))

#define FORCEWAKE_ACK_GT			XE_REG(0x130044)
#define   FORCEWAKE_KERNEL			BIT(0)
#define   FORCEWAKE_USER			BIT(1)
+10 −0
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#include "xe_force_wake.h"
#include "xe_ggtt.h"
#include "xe_gsc.h"
#include "xe_gt_ccs_mode.h"
#include "xe_gt_clock.h"
#include "xe_gt_idle.h"
#include "xe_gt_mcr.h"
@@ -450,6 +451,12 @@ static int all_fw_domain_init(struct xe_gt *gt)
	if (err)
		goto err_force_wake;

	/* Configure default CCS mode of 1 engine with all resources */
	if (xe_gt_ccs_mode_enabled(gt)) {
		gt->ccs_mode = 1;
		xe_gt_apply_ccs_mode(gt);
	}

	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
	XE_WARN_ON(err);
	xe_device_mem_access_put(gt_to_xe(gt));
@@ -560,6 +567,9 @@ static int do_gt_restart(struct xe_gt *gt)
		xe_reg_sr_apply_whitelist(hwe);
	}

	/* Get CCS mode in sync between sw/hw */
	xe_gt_apply_ccs_mode(gt);

	return 0;
}

+2 −0
Original line number Diff line number Diff line
@@ -17,6 +17,8 @@
		for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
			  xe_hw_engine_is_valid((hwe__)))

#define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0)

#ifdef CONFIG_FAULT_INJECTION
extern struct fault_attr gt_reset_failure;
static inline bool xe_fault_inject_gt_reset(void)
+78 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: MIT
/*
 * Copyright © 2023 Intel Corporation
 */

#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_gt.h"
#include "xe_gt_ccs_mode.h"
#include "xe_mmio.h"

static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
{
	u32 mode = CCS_MODE_CSLICE_0_3_MASK; /* disable all by default */
	int num_slices = hweight32(CCS_MASK(gt));
	struct xe_device *xe = gt_to_xe(gt);
	int width, cslice = 0;
	u32 config = 0;

	xe_assert(xe, xe_gt_ccs_mode_enabled(gt));

	xe_assert(xe, num_engines && num_engines <= num_slices);
	xe_assert(xe, !(num_slices % num_engines));

	/*
	 * Loop over all available slices and assign each a user engine.
	 * For example, if there are four compute slices available, the
	 * assignment of compute slices to compute engines would be,
	 *
	 * With 1 engine (ccs0):
	 *   slice 0, 1, 2, 3: ccs0
	 *
	 * With 2 engines (ccs0, ccs1):
	 *   slice 0, 2: ccs0
	 *   slice 1, 3: ccs1
	 *
	 * With 4 engines (ccs0, ccs1, ccs2, ccs3):
	 *   slice 0: ccs0
	 *   slice 1: ccs1
	 *   slice 2: ccs2
	 *   slice 3: ccs3
	 */
	for (width = num_slices / num_engines; width; width--) {
		struct xe_hw_engine *hwe;
		enum xe_hw_engine_id id;

		for_each_hw_engine(hwe, gt, id) {
			if (hwe->class != XE_ENGINE_CLASS_COMPUTE)
				continue;

			if (hwe->logical_instance >= num_engines)
				break;

			config |= BIT(hwe->instance) << XE_HW_ENGINE_CCS0;

			/* If a slice is fused off, leave disabled */
			while ((CCS_MASK(gt) & BIT(cslice)) == 0)
				cslice++;

			mode &= ~CCS_MODE_CSLICE(cslice, CCS_MODE_CSLICE_MASK);
			mode |= CCS_MODE_CSLICE(cslice, hwe->instance);
			cslice++;
		}
	}

	xe_mmio_write32(gt, CCS_MODE, mode);

	xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
		   mode, config, num_engines, num_slices);
}

void xe_gt_apply_ccs_mode(struct xe_gt *gt)
{
	if (!gt->ccs_mode)
		return;

	__xe_gt_apply_ccs_mode(gt, gt->ccs_mode);
}
Loading