libatomic: Add rcpc3 128-bit atomic operations for AArch64

The introduction of the optional RCPC3 architectural extension for
Armv8.2-A upwards provides additional support for the release
consistency model, introducing the Load-Acquire RCpc Pair Ordered, and
Store-Release Pair Ordered operations in the form of LDIAPP and STILP.

These operations are single-copy atomic on cores which also implement
LSE2 and, as such, support for these operations is added to Libatomic
and employed accordingly when the LSE2 and RCPC3 features are detected
in a given core at runtime.

libatomic/ChangeLog:

	* config/linux/aarch64/atomic_16.S (libat_load_16): Add LRCPC3
	variant.
	(libat_store_16): Likewise.
	* config/linux/aarch64/host-config.h (HWCAP2_LRCPC3): New.
	(LSE2_LRCPC3_ATOP): Previously LSE2_ATOP.  New ifuncs guarded
	under it.
	(has_rcpc3): New.
This commit is contained in:
Victor Do Nascimento 2024-06-10 11:10:36 +01:00
parent d4db77ce37
commit 7107574958
2 changed files with 74 additions and 6 deletions

View File

@ -35,16 +35,21 @@
writes, this will be true when using atomics in actual code.
The libat_<op>_16 entry points are ARMv8.0.
The libat_<op>_16_i1 entry points are used when LSE128 is available.
The libat_<op>_16_i1 entry points are used when LSE128 or LRCPC3 is available.
The libat_<op>_16_i2 entry points are used when LSE2 is available. */
#include "auto-config.h"
.arch armv8-a+lse
/* There is overlap in atomic instructions implemented in RCPC3 and LSE2.
Consequently, both _i1 and _i2 suffixes are needed for functions using these.
Elsewhere, all extension-specific implementations are mapped to _i1. */
#define LRCPC3(NAME) libat_##NAME##_i1
#define LSE128(NAME) libat_##NAME##_i1
#define LSE(NAME) libat_##NAME##_i1
#define LSE2(NAME) libat_##NAME##_i1
#define LSE2(NAME) libat_##NAME##_i2
#define CORE(NAME) libat_##NAME
#define ATOMIC(NAME) __atomic_##NAME
@ -513,6 +518,43 @@ END (test_and_set_16)
/* ifunc implementations: Carries run-time dependence on the presence of further
architectural extensions. */
ENTRY_FEAT (load_16, LRCPC3)
cbnz w1, 1f
/* RELAXED. */
ldp res0, res1, [x0]
ret
1:
cmp w1, SEQ_CST
b.eq 2f
/* ACQUIRE/CONSUME (Load-AcquirePC semantics). */
/* ldiapp res0, res1, [x0] */
.inst 0xd9411800
ret
/* SEQ_CST. */
2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */
/* ldiapp res0, res1, [x0] */
.inst 0xd9411800
ret
END_FEAT (load_16, LRCPC3)
ENTRY_FEAT (store_16, LRCPC3)
cbnz w4, 1f
/* RELAXED. */
stp in0, in1, [x0]
ret
/* RELEASE/SEQ_CST. */
1: /* stilp in0, in1, [x0] */
.inst 0xd9031802
ret
END_FEAT (store_16, LRCPC3)
ENTRY_FEAT (exchange_16, LSE128)
mov tmp0, x0
mov res0, in0

View File

@ -33,6 +33,9 @@
#ifndef HWCAP_USCAT
# define HWCAP_USCAT (1 << 25)
#endif
#ifndef HWCAP2_LRCPC3
# define HWCAP2_LRCPC3 (1UL << 46)
#endif
#ifndef HWCAP2_LSE128
# define HWCAP2_LSE128 (1UL << 47)
#endif
@ -54,7 +57,7 @@ typedef struct __ifunc_arg_t {
#if defined (LAT_CAS_N)
# define LSE_ATOP
#elif defined (LAT_LOAD_N) || defined (LAT_STORE_N)
# define LSE2_ATOP
# define LSE2_LRCPC3_ATOP
#elif defined (LAT_EXCH_N) || defined (LAT_FIOR_N) || defined (LAT_FAND_N)
# define LSE128_ATOP
#endif
@ -63,9 +66,10 @@ typedef struct __ifunc_arg_t {
# if defined (LSE_ATOP)
# define IFUNC_NCOND(N) 1
# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS)
# elif defined (LSE2_ATOP)
# define IFUNC_NCOND(N) 1
# define IFUNC_COND_1 (has_lse2 (hwcap, features))
# elif defined (LSE2_LRCPC3_ATOP)
# define IFUNC_NCOND(N) 2
# define IFUNC_COND_1 (has_rcpc3 (hwcap, features))
# define IFUNC_COND_2 (has_lse2 (hwcap, features))
# elif defined (LSE128_ATOP)
# define IFUNC_NCOND(N) 1
# define IFUNC_COND_1 (has_lse128 (hwcap, features))
@ -131,6 +135,28 @@ has_lse128 (unsigned long hwcap, const __ifunc_arg_t *features)
return false;
}
/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20]. The
expected value is 0b0011. Check that. */
static inline bool
has_rcpc3 (unsigned long hwcap, const __ifunc_arg_t *features)
{
if (hwcap & _IFUNC_ARG_HWCAP
&& features->_hwcap2 & HWCAP2_LRCPC3)
return true;
/* Try fallback feature check method to guarantee LRCPC3 is not implemented.
In the absence of HWCAP_CPUID, we are unable to check for RCPC3, return.
If feature check available, check LSE2 prerequisite before proceeding. */
if (!(hwcap & HWCAP_CPUID) || !(hwcap & HWCAP_USCAT))
return false;
unsigned long isar1;
asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1));
if (AT_FEAT_FIELD (isar1) >= 3)
return true;
return false;
}
#endif /* HAVE_IFUNC */
/* All 128-bit atomic functions are defined in aarch64/atomic_16.S. */