Commit 620a09fb authored by Matthew Brost's avatar Matthew Brost
Browse files

drm/xe: Stub out new pagefault layer



Stub out the new page fault layer and add kernel documentation. This is
intended as a replacement for the GT page fault layer, enabling multiple
producers to hook into a shared page fault consumer interface.

v2:
 - Fix kernel doc typo (checkpatch)
 - Remove comment around GT (Stuart)
 - Add explaination around reclaim (Francois)
 - Add comment around u8 vs enum (Francois)
 - Include engine instance (Stuart)
v3:
 - Fix XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION kernel doc (Stuart)

Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarLucas De Marchi <lucas.demarchi@intel.com>
Tested-by: default avatarFrancois Dugast <francois.dugast@intel.com>
Link: https://patch.msgid.link/20251031165416.2871503-2-matthew.brost@intel.com
parent 1a2cf01e
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -94,6 +94,7 @@ xe-y += xe_bb.o \
	xe_nvm.o \
	xe_oa.o \
	xe_observation.o \
	xe_pagefault.o \
	xe_pat.o \
	xe_pci.o \
	xe_pcode.o \
+65 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: MIT
/*
 * Copyright © 2025 Intel Corporation
 */

#include "xe_pagefault.h"
#include "xe_pagefault_types.h"

/**
 * DOC: Xe page faults
 *
 * Xe page faults are handled in two layers. The producer layer interacts with
 * hardware or firmware to receive and parse faults into struct xe_pagefault,
 * then forwards them to the consumer. The consumer layer services the faults
 * (e.g., memory migration, page table updates) and acknowledges the result back
 * to the producer, which then forwards the results to the hardware or firmware.
 * The consumer uses a page fault queue sized to absorb all potential faults and
 * a multi-threaded worker to process them. Multiple producers are supported,
 * with a single shared consumer.
 *
 * xe_pagefault.c implements the consumer layer.
 */

/**
 * xe_pagefault_init() - Page fault init
 * @xe: xe device instance
 *
 * Initialize Xe page fault state. Must be done after reading fuses.
 *
 * Return: 0 on Success, errno on failure
 */
int xe_pagefault_init(struct xe_device *xe)
{
	/* TODO - implement */
	return 0;
}

/**
 * xe_pagefault_reset() - Page fault reset for a GT
 * @xe: xe device instance
 * @gt: GT being reset
 *
 * Reset the Xe page fault state for a GT; that is, squash any pending faults on
 * the GT.
 */
void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt)
{
	/* TODO - implement */
}

/**
 * xe_pagefault_handler() - Page fault handler
 * @xe: xe device instance
 * @pf: Page fault
 *
 * Sink the page fault to a queue (i.e., a memory buffer) and queue a worker to
 * service it. Safe to be called from IRQ or process context. Reclaim safe.
 *
 * Return: 0 on success, errno on failure
 */
int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf)
{
	/* TODO - implement */
	return 0;
}
+19 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: MIT */
/*
 * Copyright © 2025 Intel Corporation
 */

#ifndef _XE_PAGEFAULT_H_
#define _XE_PAGEFAULT_H_

struct xe_device;
struct xe_gt;
struct xe_pagefault;

int xe_pagefault_init(struct xe_device *xe);

void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt);

int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf);

#endif
+136 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: MIT */
/*
 * Copyright © 2025 Intel Corporation
 */

#ifndef _XE_PAGEFAULT_TYPES_H_
#define _XE_PAGEFAULT_TYPES_H_

#include <linux/workqueue.h>

struct xe_gt;
struct xe_pagefault;

/** enum xe_pagefault_access_type - Xe page fault access type */
enum xe_pagefault_access_type {
	/** @XE_PAGEFAULT_ACCESS_TYPE_READ: Read access type */
	XE_PAGEFAULT_ACCESS_TYPE_READ	= 0,
	/** @XE_PAGEFAULT_ACCESS_TYPE_WRITE: Write access type */
	XE_PAGEFAULT_ACCESS_TYPE_WRITE	= 1,
	/** @XE_PAGEFAULT_ACCESS_TYPE_ATOMIC: Atomic access type */
	XE_PAGEFAULT_ACCESS_TYPE_ATOMIC	= 2,
};

/** enum xe_pagefault_type - Xe page fault type */
enum xe_pagefault_type {
	/** @XE_PAGEFAULT_TYPE_NOT_PRESENT: Not present */
	XE_PAGEFAULT_TYPE_NOT_PRESENT			= 0,
	/** @XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION: Write access violation */
	XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION	= 1,
	/** @XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION: Atomic access violation */
	XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION	= 2,
};

/** struct xe_pagefault_ops - Xe pagefault ops (producer) */
struct xe_pagefault_ops {
	/**
	 * @ack_fault: Ack fault
	 * @pf: Page fault
	 * @err: Error state of fault
	 *
	 * Page fault producer receives acknowledgment from the consumer and
	 * sends the result to the HW/FW interface.
	 */
	void (*ack_fault)(struct xe_pagefault *pf, int err);
};

/**
 * struct xe_pagefault - Xe page fault
 *
 * Generic page fault structure for communication between producer and consumer.
 * Carefully sized to be 64 bytes. Upon a device page fault, the producer
 * populates this structure, and the consumer copies it into the page-fault
 * queue for deferred handling.
 */
struct xe_pagefault {
	/**
	 * @gt: GT of fault
	 */
	struct xe_gt *gt;
	/**
	 * @consumer: State for the software handling the fault. Populated by
	 * the producer and may be modified by the consumer to communicate
	 * information back to the producer upon fault acknowledgment.
	 */
	struct {
		/** @consumer.page_addr: address of page fault */
		u64 page_addr;
		/** @consumer.asid: address space ID */
		u32 asid;
		/**
		 * @consumer.access_type: access type, u8 rather than enum to
		 * keep size compact
		 */
		u8 access_type;
		/**
		 * @consumer.fault_type: fault type, u8 rather than enum to
		 * keep size compact
		 */
		u8 fault_type;
#define XE_PAGEFAULT_LEVEL_NACK		0xff	/* Producer indicates nack fault */
		/** @consumer.fault_level: fault level */
		u8 fault_level;
		/** @consumer.engine_class: engine class */
		u8 engine_class;
		/** @consumer.engine_instance: engine instance */
		u8 engine_instance;
		/** consumer.reserved: reserved bits for future expansion */
		u8 reserved[7];
	} consumer;
	/**
	 * @producer: State for the producer (i.e., HW/FW interface). Populated
	 * by the producer and should not be modified—or even inspected—by the
	 * consumer, except for calling operations.
	 */
	struct {
		/** @producer.private: private pointer */
		void *private;
		/** @producer.ops: operations */
		const struct xe_pagefault_ops *ops;
#define XE_PAGEFAULT_PRODUCER_MSG_LEN_DW	4
		/**
		 * @producer.msg: page fault message, used by producer in fault
		 * acknowledgment to formulate response to HW/FW interface.
		 * Included in the page-fault message because the producer
		 * typically receives the fault in a context where memory cannot
		 * be allocated (e.g., atomic context or the reclaim path).
		 */
		u32 msg[XE_PAGEFAULT_PRODUCER_MSG_LEN_DW];
	} producer;
};

/**
 * struct xe_pagefault_queue: Xe pagefault queue (consumer)
 *
 * Used to capture all device page faults for deferred processing. Size this
 * queue to absorb the device’s worst-case number of outstanding faults.
 */
struct xe_pagefault_queue {
	/**
	 * @data: Data in queue containing struct xe_pagefault, protected by
	 * @lock
	 */
	void *data;
	/** @size: Size of queue in bytes */
	u32 size;
	/** @head: Head pointer in bytes, moved by producer, protected by @lock */
	u32 head;
	/** @tail: Tail pointer in bytes, moved by consumer, protected by @lock */
	u32 tail;
	/** @lock: protects page fault queue */
	spinlock_t lock;
	/** @worker: to process page faults */
	struct work_struct worker;
};

#endif