Unverified Commit d863910e authored by Charlie Jenkins's avatar Charlie Jenkins Committed by Palmer Dabbelt
Browse files

riscv: vector: Support xtheadvector save/restore



Use alternatives to add support for xtheadvector vector save/restore
routines.

Signed-off-by: default avatarCharlie Jenkins <charlie@rivosinc.com>
Reviewed-by: default avatarConor Dooley <conor.dooley@microchip.com>
Tested-by: default avatarYangyu Chen <cyy@cyyself.name>
Link: https://lore.kernel.org/r/20241113-xtheadvector-v11-9-236c22791ef9@rivosinc.com


Signed-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parent 01e3313e
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -30,6 +30,12 @@
#define SR_VS_CLEAN	_AC(0x00000400, UL)
#define SR_VS_DIRTY	_AC(0x00000600, UL)

#define SR_VS_THEAD		_AC(0x01800000, UL) /* xtheadvector Status */
#define SR_VS_OFF_THEAD		_AC(0x00000000, UL)
#define SR_VS_INITIAL_THEAD	_AC(0x00800000, UL)
#define SR_VS_CLEAN_THEAD	_AC(0x01000000, UL)
#define SR_VS_DIRTY_THEAD	_AC(0x01800000, UL)

#define SR_XS		_AC(0x00018000, UL) /* Extension Status */
#define SR_XS_OFF	_AC(0x00000000, UL)
#define SR_XS_INITIAL	_AC(0x00008000, UL)
+1 −1
Original line number Diff line number Diff line
@@ -117,7 +117,7 @@ do { \
	__set_prev_cpu(__prev->thread);			\
	if (has_fpu())					\
		__switch_to_fpu(__prev, __next);	\
	if (has_vector())					\
	if (has_vector() || has_xtheadvector())		\
		__switch_to_vector(__prev, __next);	\
	if (switch_to_should_flush_icache(__next))	\
		local_flush_icache_all();		\
+173 −49
Original line number Diff line number Diff line
@@ -18,6 +18,27 @@
#include <asm/cpufeature.h>
#include <asm/csr.h>
#include <asm/asm.h>
#include <asm/vendorid_list.h>
#include <asm/vendor_extensions.h>
#include <asm/vendor_extensions/thead.h>

#define __riscv_v_vstate_or(_val, TYPE) ({				\
	typeof(_val) _res = _val;					\
	if (has_xtheadvector()) \
		_res = (_res & ~SR_VS_THEAD) | SR_VS_##TYPE##_THEAD;	\
	else								\
		_res = (_res & ~SR_VS) | SR_VS_##TYPE;			\
	_res;								\
})

#define __riscv_v_vstate_check(_val, TYPE) ({				\
	bool _res;							\
	if (has_xtheadvector()) \
		_res = ((_val) & SR_VS_THEAD) == SR_VS_##TYPE##_THEAD;	\
	else								\
		_res = ((_val) & SR_VS) == SR_VS_##TYPE;		\
	_res;								\
})

extern unsigned long riscv_v_vsize;
int riscv_v_setup_vsize(void);
@@ -41,38 +62,61 @@ static __always_inline bool has_vector(void)
	return riscv_has_extension_unlikely(RISCV_ISA_EXT_ZVE32X);
}

static __always_inline bool has_xtheadvector_no_alternatives(void)
{
	if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR))
		return riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR);
	else
		return false;
}

static __always_inline bool has_xtheadvector(void)
{
	if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR))
		return riscv_has_vendor_extension_unlikely(THEAD_VENDOR_ID,
							   RISCV_ISA_VENDOR_EXT_XTHEADVECTOR);
	else
		return false;
}

static inline void __riscv_v_vstate_clean(struct pt_regs *regs)
{
	regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN;
	regs->status = __riscv_v_vstate_or(regs->status, CLEAN);
}

static inline void __riscv_v_vstate_dirty(struct pt_regs *regs)
{
	regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY;
	regs->status = __riscv_v_vstate_or(regs->status, DIRTY);
}

static inline void riscv_v_vstate_off(struct pt_regs *regs)
{
	regs->status = (regs->status & ~SR_VS) | SR_VS_OFF;
	regs->status = __riscv_v_vstate_or(regs->status, OFF);
}

static inline void riscv_v_vstate_on(struct pt_regs *regs)
{
	regs->status = (regs->status & ~SR_VS) | SR_VS_INITIAL;
	regs->status = __riscv_v_vstate_or(regs->status, INITIAL);
}

static inline bool riscv_v_vstate_query(struct pt_regs *regs)
{
	return (regs->status & SR_VS) != 0;
	return !__riscv_v_vstate_check(regs->status, OFF);
}

static __always_inline void riscv_v_enable(void)
{
	if (has_xtheadvector())
		csr_set(CSR_SSTATUS, SR_VS_THEAD);
	else
		csr_set(CSR_SSTATUS, SR_VS);
}

static __always_inline void riscv_v_disable(void)
{
	if (has_xtheadvector())
		csr_clear(CSR_SSTATUS, SR_VS_THEAD);
	else
		csr_clear(CSR_SSTATUS, SR_VS);
}

@@ -82,10 +126,36 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
		"csrr	%0, " __stringify(CSR_VSTART) "\n\t"
		"csrr	%1, " __stringify(CSR_VTYPE) "\n\t"
		"csrr	%2, " __stringify(CSR_VL) "\n\t"
		"csrr	%3, " __stringify(CSR_VCSR) "\n\t"
		"csrr	%4, " __stringify(CSR_VLENB) "\n\t"
		: "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
		  "=r" (dest->vcsr), "=r" (dest->vlenb) : :);
		"=r" (dest->vcsr) : :);

	if (has_xtheadvector()) {
		unsigned long status;

		/*
		 * CSR_VCSR is defined as
		 * [2:1] - vxrm[1:0]
		 * [0] - vxsat
		 * The earlier vector spec implemented by T-Head uses separate
		 * registers for the same bit-elements, so just combine those
		 * into the existing output field.
		 *
		 * Additionally T-Head cores need FS to be enabled when accessing
		 * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
		 * Though the cores do not implement the VXRM and VXSAT fields in the
		 * FCSR CSR that vector-0.7.1 specifies.
		 */
		status = csr_read_set(CSR_STATUS, SR_FS_DIRTY);
		dest->vcsr = csr_read(CSR_VXSAT) | csr_read(CSR_VXRM) << CSR_VXRM_SHIFT;

		dest->vlenb = riscv_v_vsize / 32;

		if ((status & SR_FS) != SR_FS_DIRTY)
			csr_write(CSR_STATUS, status);
	} else {
		dest->vcsr = csr_read(CSR_VCSR);
		dest->vlenb = csr_read(CSR_VLENB);
	}
}

static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
@@ -96,9 +166,25 @@ static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src
		"vsetvl	 x0, %2, %1\n\t"
		".option pop\n\t"
		"csrw	" __stringify(CSR_VSTART) ", %0\n\t"
		"csrw	" __stringify(CSR_VCSR) ", %3\n\t"
		: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
		    "r" (src->vcsr) :);
		: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl));

	if (has_xtheadvector()) {
		unsigned long status = csr_read(CSR_SSTATUS);

		/*
		 * Similar to __vstate_csr_save above, restore values for the
		 * separate VXRM and VXSAT CSRs from the vcsr variable.
		 */
		status = csr_read_set(CSR_STATUS, SR_FS_DIRTY);

		csr_write(CSR_VXRM, (src->vcsr >> CSR_VXRM_SHIFT) & CSR_VXRM_MASK);
		csr_write(CSR_VXSAT, src->vcsr & CSR_VXSAT_MASK);

		if ((status & SR_FS) != SR_FS_DIRTY)
			csr_write(CSR_STATUS, status);
	} else {
		csr_write(CSR_VCSR, src->vcsr);
	}
}

static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
@@ -108,6 +194,19 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,

	riscv_v_enable();
	__vstate_csr_save(save_to);
	if (has_xtheadvector()) {
		asm volatile (
			"mv t0, %0\n\t"
			THEAD_VSETVLI_T4X0E8M8D1
			THEAD_VSB_V_V0T0
			"add		t0, t0, t4\n\t"
			THEAD_VSB_V_V0T0
			"add		t0, t0, t4\n\t"
			THEAD_VSB_V_V0T0
			"add		t0, t0, t4\n\t"
			THEAD_VSB_V_V0T0
			: : "r" (datap) : "memory", "t0", "t4");
	} else {
		asm volatile (
			".option push\n\t"
			".option arch, +zve32x\n\t"
@@ -121,6 +220,7 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
			"vse8.v		v24, (%1)\n\t"
			".option pop\n\t"
			: "=&r" (vl) : "r" (datap) : "memory");
	}
	riscv_v_disable();
}

@@ -130,6 +230,19 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
	unsigned long vl;

	riscv_v_enable();
	if (has_xtheadvector()) {
		asm volatile (
			"mv t0, %0\n\t"
			THEAD_VSETVLI_T4X0E8M8D1
			THEAD_VLB_V_V0T0
			"add		t0, t0, t4\n\t"
			THEAD_VLB_V_V0T0
			"add		t0, t0, t4\n\t"
			THEAD_VLB_V_V0T0
			"add		t0, t0, t4\n\t"
			THEAD_VLB_V_V0T0
			: : "r" (datap) : "memory", "t0", "t4");
	} else {
		asm volatile (
			".option push\n\t"
			".option arch, +zve32x\n\t"
@@ -143,6 +256,7 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
			"vle8.v		v24, (%1)\n\t"
			".option pop\n\t"
			: "=&r" (vl) : "r" (datap) : "memory");
	}
	__vstate_csr_restore(restore_from);
	riscv_v_disable();
}
@@ -152,33 +266,41 @@ static inline void __riscv_v_vstate_discard(void)
	unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1);

	riscv_v_enable();
	if (has_xtheadvector())
		asm volatile (THEAD_VSETVLI_T4X0E8M8D1 : : : "t4");
	else
		asm volatile (
			".option push\n\t"
			".option arch, +zve32x\n\t"
			"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
			".option pop\n\t": "=&r" (vl));

	asm volatile (
		".option push\n\t"
		".option arch, +zve32x\n\t"
		"vmv.v.i	v0, -1\n\t"
		"vmv.v.i	v8, -1\n\t"
		"vmv.v.i	v16, -1\n\t"
		"vmv.v.i	v24, -1\n\t"
		"vsetvl		%0, x0, %1\n\t"
		".option pop\n\t"
		: "=&r" (vl) : "r" (vtype_inval) : "memory");
		: "=&r" (vl) : "r" (vtype_inval));

	riscv_v_disable();
}

static inline void riscv_v_vstate_discard(struct pt_regs *regs)
{
	if ((regs->status & SR_VS) == SR_VS_OFF)
		return;

	if (riscv_v_vstate_query(regs)) {
		__riscv_v_vstate_discard();
		__riscv_v_vstate_dirty(regs);
	}
}

static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
				       struct pt_regs *regs)
{
	if ((regs->status & SR_VS) == SR_VS_DIRTY) {
	if (__riscv_v_vstate_check(regs->status, DIRTY)) {
		__riscv_v_vstate_save(vstate, vstate->datap);
		__riscv_v_vstate_clean(regs);
	}
@@ -187,7 +309,7 @@ static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
					  struct pt_regs *regs)
{
	if ((regs->status & SR_VS) != SR_VS_OFF) {
	if (riscv_v_vstate_query(regs)) {
		__riscv_v_vstate_restore(vstate, vstate->datap);
		__riscv_v_vstate_clean(regs);
	}
@@ -196,7 +318,7 @@ static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
static inline void riscv_v_vstate_set_restore(struct task_struct *task,
					      struct pt_regs *regs)
{
	if ((regs->status & SR_VS) != SR_VS_OFF) {
	if (riscv_v_vstate_query(regs)) {
		set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
		riscv_v_vstate_on(regs);
	}
@@ -270,6 +392,8 @@ struct pt_regs;
static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; }
static __always_inline bool has_vector(void) { return false; }
static __always_inline bool insn_is_vector(u32 insn_buf) { return false; }
static __always_inline bool has_xtheadvector_no_alternatives(void) { return false; }
static __always_inline bool has_xtheadvector(void) { return false; }
static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
+3 −3
Original line number Diff line number Diff line
@@ -874,8 +874,7 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
		riscv_fill_vendor_ext_list(cpu);
	}

	if (riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR) &&
	    has_thead_homogeneous_vlenb() < 0) {
	if (has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) {
		pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n");
		disable_xtheadvector();
	}
@@ -932,7 +931,8 @@ void __init riscv_fill_hwcap(void)
		elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
	}

	if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X)) {
	if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X) ||
	    has_xtheadvector_no_alternatives()) {
		/*
		 * This cannot fail when called on the boot hart
		 */
+4 −4
Original line number Diff line number Diff line
@@ -143,7 +143,7 @@ static int riscv_v_start_kernel_context(bool *is_nested)

	/* Transfer the ownership of V from user to kernel, then save */
	riscv_v_start(RISCV_PREEMPT_V | RISCV_PREEMPT_V_DIRTY);
	if ((task_pt_regs(current)->status & SR_VS) == SR_VS_DIRTY) {
	if (__riscv_v_vstate_check(task_pt_regs(current)->status, DIRTY)) {
		uvstate = &current->thread.vstate;
		__riscv_v_vstate_save(uvstate, uvstate->datap);
	}
@@ -160,7 +160,7 @@ asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs)
		return;

	depth = riscv_v_ctx_get_depth();
	if (depth == 0 && (regs->status & SR_VS) == SR_VS_DIRTY)
	if (depth == 0 && __riscv_v_vstate_check(regs->status, DIRTY))
		riscv_preempt_v_set_dirty();

	riscv_v_ctx_depth_inc();
@@ -208,7 +208,7 @@ void kernel_vector_begin(void)
{
	bool nested = false;

	if (WARN_ON(!has_vector()))
	if (WARN_ON(!(has_vector() || has_xtheadvector())))
		return;

	BUG_ON(!may_use_simd());
@@ -236,7 +236,7 @@ EXPORT_SYMBOL_GPL(kernel_vector_begin);
 */
void kernel_vector_end(void)
{
	if (WARN_ON(!has_vector()))
	if (WARN_ON(!(has_vector() || has_xtheadvector())))
		return;

	riscv_v_disable();
Loading