Commit 1026c1a7 authored by Matthew Brost's avatar Matthew Brost
Browse files

drm/xe: Implement DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE



Implement DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE which sets the exec
queue default state to user data passed in. The intent is for a Mesa
tool to use this replay GPU hangs.

v2:
 - Enable the flag DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE
 - Fix the page size math calculation to avoid a crash
v4:
 - Use vmemdup_user (Maarten)
 - Copy default state first into LRC, then replay state (Testing, Carlos)

Cc: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: default avatarJonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patch.msgid.link/20251126185952.546277-10-matthew.brost@intel.com
parent 7032361d
Loading
Loading
Loading
Loading
+23 −3
Original line number Diff line number Diff line
@@ -79,6 +79,7 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q)
	if (q->xef)
		xe_file_put(q->xef);

	kvfree(q->replay_state);
	kfree(q);
}

@@ -225,8 +226,8 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags)
		struct xe_lrc *lrc;

		xe_gt_sriov_vf_wait_valid_ggtt(q->gt);
		lrc = xe_lrc_create(q->hwe, q->vm, xe_lrc_ring_size(),
				    q->msix_vec, flags);
		lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state,
				    xe_lrc_ring_size(), q->msix_vec, flags);
		if (IS_ERR(lrc)) {
			err = PTR_ERR(lrc);
			goto err_lrc;
@@ -567,6 +568,23 @@ exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value
	return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM);
}

static int exec_queue_set_hang_replay_state(struct xe_device *xe,
					    struct xe_exec_queue *q,
					    u64 value)
{
	size_t size = xe_gt_lrc_hang_replay_size(q->gt, q->class);
	u64 __user *address = u64_to_user_ptr(value);
	void *ptr;

	ptr = vmemdup_user(address, size);
	if (XE_IOCTL_DBG(xe, IS_ERR(ptr)))
		return PTR_ERR(ptr);

	q->replay_state = ptr;

	return 0;
}

typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
					     struct xe_exec_queue *q,
					     u64 value);
@@ -575,6 +593,7 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type,
	[DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state,
};

static int exec_queue_user_ext_set_property(struct xe_device *xe,
@@ -595,7 +614,8 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
	    XE_IOCTL_DBG(xe, ext.pad) ||
	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY &&
			 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE &&
			 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE))
			 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE &&
			 ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE))
		return -EINVAL;

	idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
+3 −0
Original line number Diff line number Diff line
@@ -167,6 +167,9 @@ struct xe_exec_queue {
	/** @ufence_timeline_value: User fence timeline value */
	u64 ufence_timeline_value;

	/** @replay_state: GPU hang replay state */
	void *replay_state;

	/** @ops: submission backend exec queue operations */
	const struct xe_exec_queue_ops *ops;

+1 −1
Original line number Diff line number Diff line
@@ -269,7 +269,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,

	port->hwe = hwe;

	port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0);
	port->lrc = xe_lrc_create(hwe, NULL, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0);
	if (IS_ERR(port->lrc)) {
		err = PTR_ERR(port->lrc);
		goto err;
+29 −13
Original line number Diff line number Diff line
@@ -91,13 +91,19 @@ gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class)
	return false;
}

size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
/**
 * xe_gt_lrc_hang_replay_size() - Hang replay size
 * @gt: The GT
 * @class: Hardware engine class
 *
 * Determine size of GPU hang replay state for a GT and hardware engine class.
 *
 * Return: Size of GPU hang replay size
 */
size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class)
{
	struct xe_device *xe = gt_to_xe(gt);
	size_t size;

	/* Per-process HW status page (PPHWSP) */
	size = LRC_PPHWSP_SIZE;
	size_t size = 0;

	/* Engine context image */
	switch (class) {
@@ -123,11 +129,18 @@ size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
		size += 1 * SZ_4K;
	}

	return size;
}

size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
{
	size_t size = xe_gt_lrc_hang_replay_size(gt, class);

	/* Add indirect ring state page */
	if (xe_gt_has_indirect_ring_state(gt))
		size += LRC_INDIRECT_RING_STATE_SIZE;

	return size;
	return size + LRC_PPHWSP_SIZE;
}

/*
@@ -1387,7 +1400,8 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
}

static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
		       struct xe_vm *vm, u32 ring_size, u16 msix_vec,
		       struct xe_vm *vm, void *replay_state, u32 ring_size,
		       u16 msix_vec,
		       u32 init_flags)
{
	struct xe_gt *gt = hwe->gt;
@@ -1402,9 +1416,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,

	kref_init(&lrc->refcount);
	lrc->gt = gt;
	lrc->replay_size = xe_gt_lrc_size(gt, hwe->class);
	if (xe_gt_has_indirect_ring_state(gt))
		lrc->replay_size -= LRC_INDIRECT_RING_STATE_SIZE;
	lrc->replay_size = xe_gt_lrc_hang_replay_size(gt, hwe->class);
	lrc->size = lrc_size;
	lrc->flags = 0;
	lrc->ring.size = ring_size;
@@ -1441,11 +1453,14 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
	 * scratch.
	 */
	map = __xe_lrc_pphwsp_map(lrc);
	if (gt->default_lrc[hwe->class]) {
	if (gt->default_lrc[hwe->class] || replay_state) {
		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
				 lrc_size - LRC_PPHWSP_SIZE);
		if (replay_state)
			xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
					 replay_state, lrc->replay_size);
	} else {
		void *init_data = empty_lrc_data(hwe);

@@ -1553,6 +1568,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 * xe_lrc_create - Create a LRC
 * @hwe: Hardware Engine
 * @vm: The VM (address space)
 * @replay_state: GPU hang replay state
 * @ring_size: LRC ring size
 * @msix_vec: MSI-X interrupt vector (for platforms that support it)
 * @flags: LRC initialization flags
@@ -1563,7 +1579,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 * upon failure.
 */
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
			     u32 ring_size, u16 msix_vec, u32 flags)
			     void *replay_state, u32 ring_size, u16 msix_vec, u32 flags)
{
	struct xe_lrc *lrc;
	int err;
@@ -1572,7 +1588,7 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
	if (!lrc)
		return ERR_PTR(-ENOMEM);

	err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags);
	err = xe_lrc_init(lrc, hwe, vm, replay_state, ring_size, msix_vec, flags);
	if (err) {
		kfree(lrc);
		return ERR_PTR(err);
+2 −1
Original line number Diff line number Diff line
@@ -50,7 +50,7 @@ struct xe_lrc_snapshot {
#define XE_LRC_CREATE_USER_CTX		BIT(2)

struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
			     u32 ring_size, u16 msix_vec, u32 flags);
			     void *replay_state, u32 ring_size, u16 msix_vec, u32 flags);
void xe_lrc_destroy(struct kref *ref);

/**
@@ -87,6 +87,7 @@ static inline size_t xe_lrc_ring_size(void)
	return SZ_16K;
}

size_t xe_gt_lrc_hang_replay_size(struct xe_gt *gt, enum xe_engine_class class);
size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class);
u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
u32 xe_lrc_regs_offset(struct xe_lrc *lrc);