Unverified Commit b8ddb0df authored by Christoph Müllner's avatar Christoph Müllner Committed by Palmer Dabbelt
Browse files

riscv: Add Zawrs support for spinlocks

RISC-V code uses the generic ticket lock implementation, which calls
the macros smp_cond_load_relaxed() and smp_cond_load_acquire().
Introduce a RISC-V specific implementation of smp_cond_load_relaxed()
which applies WRS.NTO of the Zawrs extension in order to reduce power
consumption while waiting and allows hypervisors to enable guests to
trap while waiting. smp_cond_load_acquire() doesn't need a RISC-V
specific implementation as the generic implementation is based on
smp_cond_load_relaxed() and smp_acquire__after_ctrl_dep() sufficiently
provides the acquire semantics.

This implementation is heavily based on Arm's approach which is the
approach Andrea Parri also suggested.

The Zawrs specification can be found here:
https://github.com/riscv/riscv-zawrs/blob/main/zawrs.adoc



Signed-off-by: default avatarChristoph Müllner <christoph.muellner@vrull.eu>
Co-developed-by: default avatarAndrew Jones <ajones@ventanamicro.com>
Signed-off-by: default avatarAndrew Jones <ajones@ventanamicro.com>
Link: https://lore.kernel.org/r/20240426100820.14762-11-ajones@ventanamicro.com


Signed-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parent 6d585281
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -578,6 +578,19 @@ config RISCV_ISA_V_PREEMPTIVE
	  preemption. Enabling this config will result in higher memory
	  consumption due to the allocation of per-task's kernel Vector context.

config RISCV_ISA_ZAWRS
	bool "Zawrs extension support for more efficient busy waiting"
	depends on RISCV_ALTERNATIVE
	default y
	help
	  The Zawrs extension defines instructions to be used in polling loops
	  which allow a hart to enter a low-power state or to trap to the
	  hypervisor while waiting on a store to a memory location. Enable the
	  use of these instructions in the kernel when the Zawrs extension is
	  detected at boot.

	  If you don't know what to do here, say Y.

config TOOLCHAIN_HAS_ZBB
	bool
	default y
+30 −15
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#define _ASM_RISCV_BARRIER_H

#ifndef __ASSEMBLY__
#include <asm/cmpxchg.h>
#include <asm/fence.h>

#define nop()		__asm__ __volatile__ ("nop")
@@ -28,21 +29,6 @@
#define __smp_rmb()	RISCV_FENCE(r, r)
#define __smp_wmb()	RISCV_FENCE(w, w)

#define __smp_store_release(p, v)					\
do {									\
	compiletime_assert_atomic_type(*p);				\
	RISCV_FENCE(rw, w);						\
	WRITE_ONCE(*p, v);						\
} while (0)

#define __smp_load_acquire(p)						\
({									\
	typeof(*p) ___p1 = READ_ONCE(*p);				\
	compiletime_assert_atomic_type(*p);				\
	RISCV_FENCE(r, rw);						\
	___p1;								\
})

/*
 * This is a very specific barrier: it's currently only used in two places in
 * the kernel, both in the scheduler.  See include/linux/spinlock.h for the two
@@ -70,6 +56,35 @@ do { \
 */
#define smp_mb__after_spinlock()	RISCV_FENCE(iorw, iorw)

#define __smp_store_release(p, v)					\
do {									\
	compiletime_assert_atomic_type(*p);				\
	RISCV_FENCE(rw, w);						\
	WRITE_ONCE(*p, v);						\
} while (0)

#define __smp_load_acquire(p)						\
({									\
	typeof(*p) ___p1 = READ_ONCE(*p);				\
	compiletime_assert_atomic_type(*p);				\
	RISCV_FENCE(r, rw);						\
	___p1;								\
})

#ifdef CONFIG_RISCV_ISA_ZAWRS
#define smp_cond_load_relaxed(ptr, cond_expr) ({			\
	typeof(ptr) __PTR = (ptr);					\
	__unqual_scalar_typeof(*ptr) VAL;				\
	for (;;) {							\
		VAL = READ_ONCE(*__PTR);				\
		if (cond_expr)						\
			break;						\
		__cmpwait_relaxed(ptr, VAL);				\
	}								\
	(typeof(*ptr))VAL;						\
})
#endif

#include <asm-generic/barrier.h>

#endif /* __ASSEMBLY__ */
+58 −0
Original line number Diff line number Diff line
@@ -8,7 +8,10 @@

#include <linux/bug.h>

#include <asm/alternative-macros.h>
#include <asm/fence.h>
#include <asm/hwcap.h>
#include <asm/insn-def.h>

#define __xchg_relaxed(ptr, new, size)					\
({									\
@@ -359,4 +362,59 @@
	arch_cmpxchg_relaxed((ptr), (o), (n));				\
})

#ifdef CONFIG_RISCV_ISA_ZAWRS
/*
 * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
 * @val we expect it to still terminate within a "reasonable" amount of time
 * for an implementation-specific other reason, a pending, locally-enabled
 * interrupt, or because it has been configured to raise an illegal
 * instruction exception.
 */
static __always_inline void __cmpwait(volatile void *ptr,
				      unsigned long val,
				      int size)
{
	unsigned long tmp;

	asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
			     0, RISCV_ISA_EXT_ZAWRS, 1)
		 : : : : no_zawrs);

	switch (size) {
	case 4:
		asm volatile(
		"	lr.w	%0, %1\n"
		"	xor	%0, %0, %2\n"
		"	bnez	%0, 1f\n"
			ZAWRS_WRS_NTO "\n"
		"1:"
		: "=&r" (tmp), "+A" (*(u32 *)ptr)
		: "r" (val));
		break;
#if __riscv_xlen == 64
	case 8:
		asm volatile(
		"	lr.d	%0, %1\n"
		"	xor	%0, %0, %2\n"
		"	bnez	%0, 1f\n"
			ZAWRS_WRS_NTO "\n"
		"1:"
		: "=&r" (tmp), "+A" (*(u64 *)ptr)
		: "r" (val));
		break;
#endif
	default:
		BUILD_BUG();
	}

	return;

no_zawrs:
	asm volatile(RISCV_PAUSE : : : "memory");
}

#define __cmpwait_relaxed(ptr, val) \
	__cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
#endif

#endif /* _ASM_RISCV_CMPXCHG_H */
+1 −0
Original line number Diff line number Diff line
@@ -81,6 +81,7 @@
#define RISCV_ISA_EXT_ZTSO		72
#define RISCV_ISA_EXT_ZACAS		73
#define RISCV_ISA_EXT_XANDESPMU		74
#define RISCV_ISA_EXT_ZAWRS		75

#define RISCV_ISA_EXT_XLINUXENVCFG	127

+2 −0
Original line number Diff line number Diff line
@@ -197,5 +197,7 @@
	       RS1(base), SIMM12(4))

#define RISCV_PAUSE	".4byte 0x100000f"
#define ZAWRS_WRS_NTO	".4byte 0x00d00073"
#define ZAWRS_WRS_STO	".4byte 0x01d00073"

#endif /* __ASM_INSN_DEF_H */
Loading