Commit 82a499d2 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-xe-fixes-2026-02-26' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes



- W/a fix for multi-cast registers (Roper)
- Fix xe_sync initialization issues (Shuicheng)

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patch.msgid.link/aaBGHy_0RLGGIBP5@intel.com
parents 5e061aac 0879c3f0
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -96,6 +96,12 @@
#define   ENABLE_SEMAPHORE_POLL_BIT		REG_BIT(13)

#define RING_CMD_CCTL(base)			XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)

#define CS_MMIO_GROUP_INSTANCE_SELECT(base)	XE_REG((base) + 0xcc)
#define   SELECTIVE_READ_ADDRESSING		REG_BIT(30)
#define   SELECTIVE_READ_GROUP			REG_GENMASK(29, 23)
#define   SELECTIVE_READ_INSTANCE		REG_GENMASK(22, 16)

/*
 * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
 * The lsb of each can be considered a separate enabling bit for encryption.
+54 −12
Original line number Diff line number Diff line
@@ -210,11 +210,15 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
	return ret;
}

/* Dwords required to emit a RMW of a register */
#define EMIT_RMW_DW 20

static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
{
	struct xe_reg_sr *sr = &q->hwe->reg_lrc;
	struct xe_hw_engine *hwe = q->hwe;
	struct xe_reg_sr *sr = &hwe->reg_lrc;
	struct xe_reg_sr_entry *entry;
	int count_rmw = 0, count = 0, ret;
	int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret;
	unsigned long idx;
	struct xe_bb *bb;
	size_t bb_len = 0;
@@ -224,6 +228,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
	xa_for_each(&sr->xa, idx, entry) {
		if (entry->reg.masked || entry->clr_bits == ~0)
			++count;
		else if (entry->reg.mcr)
			++count_rmw_mcr;
		else
			++count_rmw;
	}
@@ -231,17 +237,35 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
	if (count)
		bb_len += count * 2 + 1;

	if (count_rmw)
		bb_len += count_rmw * 20 + 7;
	/*
	 * RMW of MCR registers is the same as a normal RMW, except an
	 * additional LRI (3 dwords) is required per register to steer the read
	 * to a nom-terminated instance.
	 *
	 * We could probably shorten the batch slightly by eliding the
	 * steering for consecutive MCR registers that have the same
	 * group/instance target, but it's not worth the extra complexity to do
	 * so.
	 */
	bb_len += count_rmw * EMIT_RMW_DW;
	bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3);

	/*
	 * After doing all RMW, we need 7 trailing dwords to clean up,
	 * plus an additional 3 dwords to reset steering if any of the
	 * registers were MCR.
	 */
	if (count_rmw || count_rmw_mcr)
		bb_len += 7 + (count_rmw_mcr ? 3 : 0);

	if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
	if (hwe->class == XE_ENGINE_CLASS_RENDER)
		/*
		 * Big enough to emit all of the context's 3DSTATE via
		 * xe_lrc_emit_hwe_state_instructions()
		 */
		bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32);
		bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32);

	xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len);
	xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len);

	bb = xe_bb_new(gt, bb_len, false);
	if (IS_ERR(bb))
@@ -276,13 +300,23 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
		}
	}

	if (count_rmw) {
		/* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */

	if (count_rmw || count_rmw_mcr) {
		xa_for_each(&sr->xa, idx, entry) {
			if (entry->reg.masked || entry->clr_bits == ~0)
				continue;

			if (entry->reg.mcr) {
				struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw };
				u8 group, instance;

				xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
				*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
				*cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr;
				*cs++ = SELECTIVE_READ_ADDRESSING |
					REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) |
					REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance);
			}

			*cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
			*cs++ = entry->reg.addr;
			*cs++ = CS_GPR_REG(0, 0).addr;
@@ -308,8 +342,9 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
			*cs++ = CS_GPR_REG(0, 0).addr;
			*cs++ = entry->reg.addr;

			xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n",
				  entry->reg.addr, entry->clr_bits, entry->set_bits);
			xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n",
				  entry->reg.addr, entry->clr_bits, entry->set_bits,
				  entry->reg.mcr ? " (MCR)" : "");
		}

		/* reset used GPR */
@@ -321,6 +356,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
		*cs++ = 0;
		*cs++ = CS_GPR_REG(0, 2).addr;
		*cs++ = 0;

		/* reset steering */
		if (count_rmw_mcr) {
			*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
			*cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr;
			*cs++ = 0;
		}
	}

	cs = xe_lrc_emit_hwe_state_instructions(q, cs);
+21 −9
Original line number Diff line number Diff line
@@ -146,8 +146,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,

		if (!signal) {
			sync->fence = drm_syncobj_fence_get(sync->syncobj);
			if (XE_IOCTL_DBG(xe, !sync->fence))
				return -EINVAL;
			if (XE_IOCTL_DBG(xe, !sync->fence)) {
				err = -EINVAL;
				goto free_sync;
			}
		}
		break;

@@ -167,17 +169,21 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,

		if (signal) {
			sync->chain_fence = dma_fence_chain_alloc();
			if (!sync->chain_fence)
				return -ENOMEM;
			if (!sync->chain_fence) {
				err = -ENOMEM;
				goto free_sync;
			}
		} else {
			sync->fence = drm_syncobj_fence_get(sync->syncobj);
			if (XE_IOCTL_DBG(xe, !sync->fence))
				return -EINVAL;
			if (XE_IOCTL_DBG(xe, !sync->fence)) {
				err = -EINVAL;
				goto free_sync;
			}

			err = dma_fence_chain_find_seqno(&sync->fence,
							 sync_in.timeline_value);
			if (err)
				return err;
				goto free_sync;
		}
		break;

@@ -200,8 +206,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
			if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
				return PTR_ERR(sync->ufence);
			sync->ufence_chain_fence = dma_fence_chain_alloc();
			if (!sync->ufence_chain_fence)
				return -ENOMEM;
			if (!sync->ufence_chain_fence) {
				err = -ENOMEM;
				goto free_sync;
			}
			sync->ufence_syncobj = ufence_syncobj;
		}

@@ -216,6 +224,10 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
	sync->timeline_value = sync_in.timeline_value;

	return 0;

free_sync:
	xe_sync_entry_cleanup(sync);
	return err;
}
ALLOW_ERROR_INJECTION(xe_sync_entry_parse, ERRNO);