mirror of git://gcc.gnu.org/git/gcc.git
arm-builtins.c (enum arm_type_qualifiers): Add qualifier_lane_pair_index.
2019-01-10 Tamar Christina <tamar.christina@arm.com> * config/arm/arm-builtins.c (enum arm_type_qualifiers): Add qualifier_lane_pair_index. (MAC_LANE_PAIR_QUALIFIERS): New. (arm_expand_builtin_args): Use it. (arm_expand_builtin_1): Likewise. * config/arm/arm-protos.h (neon_vcmla_lane_prepare_operands): New. * config/arm/arm.c (neon_vcmla_lane_prepare_operands): New. * config/arm/arm-c.c (arm_cpu_builtins): Add __ARM_FEATURE_COMPLEX. * config/arm/arm_neon.h: (vcadd_rot90_f16): New. (vcaddq_rot90_f16): New. (vcadd_rot270_f16): New. (vcaddq_rot270_f16): New. (vcmla_f16): New. (vcmlaq_f16): New. (vcmla_lane_f16): New. (vcmla_laneq_f16): New. (vcmlaq_lane_f16): New. (vcmlaq_laneq_f16): New. (vcmla_rot90_f16): New. (vcmlaq_rot90_f16): New. (vcmla_rot90_lane_f16): New. (vcmla_rot90_laneq_f16): New. (vcmlaq_rot90_lane_f16): New. (vcmlaq_rot90_laneq_f16): New. (vcmla_rot180_f16): New. (vcmlaq_rot180_f16): New. (vcmla_rot180_lane_f16): New. (vcmla_rot180_laneq_f16): New. (vcmlaq_rot180_lane_f16): New. (vcmlaq_rot180_laneq_f16): New. (vcmla_rot270_f16): New. (vcmlaq_rot270_f16): New. (vcmla_rot270_lane_f16): New. (vcmla_rot270_laneq_f16): New. (vcmlaq_rot270_lane_f16): New. (vcmlaq_rot270_laneq_f16): New. (vcadd_rot90_f32): New. (vcaddq_rot90_f32): New. (vcadd_rot270_f32): New. (vcaddq_rot270_f32): New. (vcmla_f32): New. (vcmlaq_f32): New. (vcmla_lane_f32): New. (vcmla_laneq_f32): New. (vcmlaq_lane_f32): New. (vcmlaq_laneq_f32): New. (vcmla_rot90_f32): New. (vcmlaq_rot90_f32): New. (vcmla_rot90_lane_f32): New. (vcmla_rot90_laneq_f32): New. (vcmlaq_rot90_lane_f32): New. (vcmlaq_rot90_laneq_f32): New. (vcmla_rot180_f32): New. (vcmlaq_rot180_f32): New. (vcmla_rot180_lane_f32): New. (vcmla_rot180_laneq_f32): New. (vcmlaq_rot180_lane_f32): New. (vcmlaq_rot180_laneq_f32): New. (vcmla_rot270_f32): New. (vcmlaq_rot270_f32): New. (vcmla_rot270_lane_f32): New. (vcmla_rot270_laneq_f32): New. (vcmlaq_rot270_lane_f32): New. (vcmlaq_rot270_laneq_f32): New. * config/arm/arm_neon_builtins.def (vcadd90, vcadd270, vcmla0, vcmla90, vcmla180, vcmla270, vcmla_lane0, vcmla_lane90, vcmla_lane180, vcmla_lane270, vcmla_laneq0, vcmla_laneq90, vcmla_laneq180, vcmla_laneq270, vcmlaq_lane0, vcmlaq_lane90, vcmlaq_lane180, vcmlaq_lane270): New. * config/arm/neon.md (neon_vcmla_lane<rot><mode>, neon_vcmla_laneq<rot><mode>, neon_vcmlaq_lane<rot><mode>): New. * config/arm/arm.c (arm_arch8_3, arm_arch8_4): New. * config/arm/arm.h (TARGET_COMPLEX, arm_arch8_3, arm_arch8_4): New. (arm_option_reconfigure_globals): Use them. * config/arm/iterators.md (VDF, VQ_HSF): New. (VCADD, VCMLA): New. (VF_constraint, rot, rotsplit1, rotsplit2): Add V4HF and V8HF. * config/arm/neon.md (neon_vcadd<rot><mode>, neon_vcmla<rot><mode>): New. * config/arm/unspecs.md (UNSPEC_VCADD90, UNSPEC_VCADD270, UNSPEC_VCMLA, UNSPEC_VCMLA90, UNSPEC_VCMLA180, UNSPEC_VCMLA270): New. gcc/testsuite/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: Add AArch32 regexpr. * gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: Likewise. From-SVN: r267796
This commit is contained in:
parent
9d63f43b2d
commit
c2b7062d58
|
|
@ -1,3 +1,86 @@
|
|||
2019-01-10 Tamar Christina <tamar.christina@arm.com>
|
||||
|
||||
* config/arm/arm-builtins.c
|
||||
(enum arm_type_qualifiers): Add qualifier_lane_pair_index.
|
||||
(MAC_LANE_PAIR_QUALIFIERS): New.
|
||||
(arm_expand_builtin_args): Use it.
|
||||
(arm_expand_builtin_1): Likewise.
|
||||
* config/arm/arm-protos.h (neon_vcmla_lane_prepare_operands): New.
|
||||
* config/arm/arm.c (neon_vcmla_lane_prepare_operands): New.
|
||||
* config/arm/arm-c.c (arm_cpu_builtins): Add __ARM_FEATURE_COMPLEX.
|
||||
* config/arm/arm_neon.h:
|
||||
(vcadd_rot90_f16): New.
|
||||
(vcaddq_rot90_f16): New.
|
||||
(vcadd_rot270_f16): New.
|
||||
(vcaddq_rot270_f16): New.
|
||||
(vcmla_f16): New.
|
||||
(vcmlaq_f16): New.
|
||||
(vcmla_lane_f16): New.
|
||||
(vcmla_laneq_f16): New.
|
||||
(vcmlaq_lane_f16): New.
|
||||
(vcmlaq_laneq_f16): New.
|
||||
(vcmla_rot90_f16): New.
|
||||
(vcmlaq_rot90_f16): New.
|
||||
(vcmla_rot90_lane_f16): New.
|
||||
(vcmla_rot90_laneq_f16): New.
|
||||
(vcmlaq_rot90_lane_f16): New.
|
||||
(vcmlaq_rot90_laneq_f16): New.
|
||||
(vcmla_rot180_f16): New.
|
||||
(vcmlaq_rot180_f16): New.
|
||||
(vcmla_rot180_lane_f16): New.
|
||||
(vcmla_rot180_laneq_f16): New.
|
||||
(vcmlaq_rot180_lane_f16): New.
|
||||
(vcmlaq_rot180_laneq_f16): New.
|
||||
(vcmla_rot270_f16): New.
|
||||
(vcmlaq_rot270_f16): New.
|
||||
(vcmla_rot270_lane_f16): New.
|
||||
(vcmla_rot270_laneq_f16): New.
|
||||
(vcmlaq_rot270_lane_f16): New.
|
||||
(vcmlaq_rot270_laneq_f16): New.
|
||||
(vcadd_rot90_f32): New.
|
||||
(vcaddq_rot90_f32): New.
|
||||
(vcadd_rot270_f32): New.
|
||||
(vcaddq_rot270_f32): New.
|
||||
(vcmla_f32): New.
|
||||
(vcmlaq_f32): New.
|
||||
(vcmla_lane_f32): New.
|
||||
(vcmla_laneq_f32): New.
|
||||
(vcmlaq_lane_f32): New.
|
||||
(vcmlaq_laneq_f32): New.
|
||||
(vcmla_rot90_f32): New.
|
||||
(vcmlaq_rot90_f32): New.
|
||||
(vcmla_rot90_lane_f32): New.
|
||||
(vcmla_rot90_laneq_f32): New.
|
||||
(vcmlaq_rot90_lane_f32): New.
|
||||
(vcmlaq_rot90_laneq_f32): New.
|
||||
(vcmla_rot180_f32): New.
|
||||
(vcmlaq_rot180_f32): New.
|
||||
(vcmla_rot180_lane_f32): New.
|
||||
(vcmla_rot180_laneq_f32): New.
|
||||
(vcmlaq_rot180_lane_f32): New.
|
||||
(vcmlaq_rot180_laneq_f32): New.
|
||||
(vcmla_rot270_f32): New.
|
||||
(vcmlaq_rot270_f32): New.
|
||||
(vcmla_rot270_lane_f32): New.
|
||||
(vcmla_rot270_laneq_f32): New.
|
||||
(vcmlaq_rot270_lane_f32): New.
|
||||
(vcmlaq_rot270_laneq_f32): New.
|
||||
* config/arm/arm_neon_builtins.def (vcadd90, vcadd270, vcmla0, vcmla90,
|
||||
vcmla180, vcmla270, vcmla_lane0, vcmla_lane90, vcmla_lane180, vcmla_lane270,
|
||||
vcmla_laneq0, vcmla_laneq90, vcmla_laneq180, vcmla_laneq270,
|
||||
vcmlaq_lane0, vcmlaq_lane90, vcmlaq_lane180, vcmlaq_lane270): New.
|
||||
* config/arm/neon.md (neon_vcmla_lane<rot><mode>,
|
||||
neon_vcmla_laneq<rot><mode>, neon_vcmlaq_lane<rot><mode>): New.
|
||||
* config/arm/arm.c (arm_arch8_3, arm_arch8_4): New.
|
||||
* config/arm/arm.h (TARGET_COMPLEX, arm_arch8_3, arm_arch8_4): New.
|
||||
(arm_option_reconfigure_globals): Use them.
|
||||
* config/arm/iterators.md (VDF, VQ_HSF): New.
|
||||
(VCADD, VCMLA): New.
|
||||
(VF_constraint, rot, rotsplit1, rotsplit2): Add V4HF and V8HF.
|
||||
* config/arm/neon.md (neon_vcadd<rot><mode>, neon_vcmla<rot><mode>): New.
|
||||
* config/arm/unspecs.md (UNSPEC_VCADD90, UNSPEC_VCADD270,
|
||||
UNSPEC_VCMLA, UNSPEC_VCMLA90, UNSPEC_VCMLA180, UNSPEC_VCMLA270): New.
|
||||
|
||||
2019-01-10 Tamar Christina <tamar.christina@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index.
|
||||
|
|
|
|||
|
|
@ -82,7 +82,10 @@ enum arm_type_qualifiers
|
|||
/* A void pointer. */
|
||||
qualifier_void_pointer = 0x800,
|
||||
/* A const void pointer. */
|
||||
qualifier_const_void_pointer = 0x802
|
||||
qualifier_const_void_pointer = 0x802,
|
||||
/* Lane indices selected in pairs - must be within range of previous
|
||||
argument = a vector. */
|
||||
qualifier_lane_pair_index = 0x1000
|
||||
};
|
||||
|
||||
/* The qualifier_internal allows generation of a unary builtin from
|
||||
|
|
@ -144,6 +147,13 @@ arm_mac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|||
qualifier_none, qualifier_lane_index };
|
||||
#define MAC_LANE_QUALIFIERS (arm_mac_lane_qualifiers)
|
||||
|
||||
/* T (T, T, T, lane pair index). */
|
||||
static enum arm_type_qualifiers
|
||||
arm_mac_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_none, qualifier_none, qualifier_none,
|
||||
qualifier_none, qualifier_lane_pair_index };
|
||||
#define MAC_LANE_PAIR_QUALIFIERS (arm_mac_lane_pair_qualifiers)
|
||||
|
||||
/* unsigned T (unsigned T, unsigned T, unsigend T, lane index). */
|
||||
static enum arm_type_qualifiers
|
||||
arm_umac_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
|
|
@ -2129,6 +2139,7 @@ typedef enum {
|
|||
ARG_BUILTIN_CONSTANT,
|
||||
ARG_BUILTIN_LANE_INDEX,
|
||||
ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX,
|
||||
ARG_BUILTIN_LANE_PAIR_INDEX,
|
||||
ARG_BUILTIN_NEON_MEMORY,
|
||||
ARG_BUILTIN_MEMORY,
|
||||
ARG_BUILTIN_STOP
|
||||
|
|
@ -2266,6 +2277,19 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode,
|
|||
machine_mode vmode = mode[argc - 1];
|
||||
neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode), exp);
|
||||
}
|
||||
/* If the lane index isn't a constant then error out. */
|
||||
goto constant_arg;
|
||||
|
||||
case ARG_BUILTIN_LANE_PAIR_INDEX:
|
||||
/* Previous argument must be a vector, which this indexes. The
|
||||
indexing will always select i and i+1 out of the vector, which
|
||||
puts a limit on i. */
|
||||
gcc_assert (argc > 0);
|
||||
if (CONST_INT_P (op[argc]))
|
||||
{
|
||||
machine_mode vmode = mode[argc - 1];
|
||||
neon_lane_bounds (op[argc], 0, GET_MODE_NUNITS (vmode) / 2, exp);
|
||||
}
|
||||
/* If the lane index isn't a constant then the next
|
||||
case will error. */
|
||||
/* Fall through. */
|
||||
|
|
@ -2427,6 +2451,8 @@ arm_expand_builtin_1 (int fcode, tree exp, rtx target,
|
|||
|
||||
if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
|
||||
args[k] = ARG_BUILTIN_LANE_INDEX;
|
||||
else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
|
||||
args[k] = ARG_BUILTIN_LANE_PAIR_INDEX;
|
||||
else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
|
||||
args[k] = ARG_BUILTIN_STRUCT_LOAD_STORE_LANE_INDEX;
|
||||
else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
|
||||
|
|
|
|||
|
|
@ -76,6 +76,7 @@ arm_cpu_builtins (struct cpp_reader* pfile)
|
|||
|
||||
def_or_undef_macro (pfile, "__ARM_FEATURE_CRC32", TARGET_CRC32);
|
||||
def_or_undef_macro (pfile, "__ARM_FEATURE_DOTPROD", TARGET_DOTPROD);
|
||||
def_or_undef_macro (pfile, "__ARM_FEATURE_COMPLEX", TARGET_COMPLEX);
|
||||
def_or_undef_macro (pfile, "__ARM_32BIT_STATE", TARGET_32BIT);
|
||||
|
||||
cpp_undef (pfile, "__ARM_FEATURE_CMSE");
|
||||
|
|
|
|||
|
|
@ -109,6 +109,8 @@ extern int arm_coproc_mem_operand (rtx, bool);
|
|||
extern int neon_vector_mem_operand (rtx, int, bool);
|
||||
extern int neon_struct_mem_operand (rtx);
|
||||
|
||||
extern rtx *neon_vcmla_lane_prepare_operands (machine_mode, rtx *);
|
||||
|
||||
extern int tls_mentioned_p (rtx);
|
||||
extern int symbol_mentioned_p (rtx);
|
||||
extern int label_mentioned_p (rtx);
|
||||
|
|
|
|||
|
|
@ -895,6 +895,12 @@ int arm_arch8_1 = 0;
|
|||
/* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
|
||||
int arm_arch8_2 = 0;
|
||||
|
||||
/* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
|
||||
int arm_arch8_3 = 0;
|
||||
|
||||
/* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
|
||||
int arm_arch8_4 = 0;
|
||||
|
||||
/* Nonzero if this chip supports the FP16 instructions extension of ARM
|
||||
Architecture 8.2. */
|
||||
int arm_fp16_inst = 0;
|
||||
|
|
@ -3649,6 +3655,8 @@ arm_option_reconfigure_globals (void)
|
|||
arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
|
||||
arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
|
||||
arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
|
||||
arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
|
||||
arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
|
||||
arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
|
||||
arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
|
||||
arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
|
||||
|
|
@ -12713,6 +12721,44 @@ neon_struct_mem_operand (rtx op)
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
/* Prepares the operands for the VCMLA by lane instruction such that the right
|
||||
register number is selected. This instruction is special in that it always
|
||||
requires a D register, however there is a choice to be made between Dn[0],
|
||||
Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers and
|
||||
the PATTERNMODE of the insn.
|
||||
|
||||
The VCMLA by lane function always selects two values. For instance given D0
|
||||
and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
|
||||
used by the instruction. However given V4SF then index 0 and 1 are valid as
|
||||
D0[0] or D1[0] are both valid.
|
||||
|
||||
This function centralizes that information based on OPERANDS, OPERANDS[3]
|
||||
will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
|
||||
updated to contain the right index. */
|
||||
|
||||
rtx *
|
||||
neon_vcmla_lane_prepare_operands (machine_mode patternmode, rtx *operands)
|
||||
{
|
||||
int lane = NEON_ENDIAN_LANE_N (patternmode, INTVAL (operands[4]));
|
||||
machine_mode constmode = SImode;
|
||||
machine_mode mode = GET_MODE (operands[3]);
|
||||
int regno = REGNO (operands[3]);
|
||||
regno = ((regno - FIRST_VFP_REGNUM) >> 1);
|
||||
if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
|
||||
{
|
||||
operands[3] = gen_int_mode (regno + 1, constmode);
|
||||
operands[4]
|
||||
= gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
|
||||
}
|
||||
else
|
||||
{
|
||||
operands[3] = gen_int_mode (regno, constmode);
|
||||
operands[4] = gen_int_mode (lane, constmode);
|
||||
}
|
||||
return operands;
|
||||
}
|
||||
|
||||
|
||||
/* Return true if X is a register that will be eliminated later on. */
|
||||
int
|
||||
arm_eliminable_register (rtx x)
|
||||
|
|
|
|||
|
|
@ -220,6 +220,9 @@ extern tree arm_fp16_type_node;
|
|||
isa_bit_dotprod) \
|
||||
&& arm_arch8_2)
|
||||
|
||||
/* Supports the Armv8.3-a Complex number AdvSIMD extensions. */
|
||||
#define TARGET_COMPLEX (TARGET_NEON && arm_arch8_3)
|
||||
|
||||
/* FPU supports the floating point FP16 instructions for ARMv8.2-A
|
||||
and later. */
|
||||
#define TARGET_VFP_FP16INST \
|
||||
|
|
@ -442,6 +445,12 @@ extern int arm_arch8_1;
|
|||
/* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
|
||||
extern int arm_arch8_2;
|
||||
|
||||
/* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
|
||||
extern int arm_arch8_3;
|
||||
|
||||
/* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
|
||||
extern int arm_arch8_4;
|
||||
|
||||
/* Nonzero if this chip supports the FP16 instructions extension of ARM
|
||||
Architecture 8.2. */
|
||||
extern int arm_fp16_inst;
|
||||
|
|
|
|||
|
|
@ -18307,6 +18307,445 @@ vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
|
|||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
/* AdvSIMD Complex numbers intrinsics. */
|
||||
#if __ARM_ARCH >= 8
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target(("arch=armv8.3-a"))
|
||||
|
||||
|
||||
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target(("+fp16"))
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcadd_rot90_f16 (float16x4_t __a, float16x4_t __b)
|
||||
{
|
||||
return __builtin_neon_vcadd90v4hf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b)
|
||||
{
|
||||
return __builtin_neon_vcadd90v8hf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcadd_rot270_f16 (float16x4_t __a, float16x4_t __b)
|
||||
{
|
||||
return __builtin_neon_vcadd90v4hf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b)
|
||||
{
|
||||
return __builtin_neon_vcadd90v8hf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla0v4hf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla0v8hf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane0v4hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_laneq0v4hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmlaq_lane0v8hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane0v8hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot90_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla90v4hf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot90_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla90v8hf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot90_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane90v4hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot90_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_laneq90v4hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot90_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmlaq_lane90v8hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot90_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane90v8hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot180_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla180v4hf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot180_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla180v8hf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot180_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane180v4hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot180_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_laneq180v4hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot180_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmlaq_lane180v8hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot180_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane180v8hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot270_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla270v4hf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot270_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla270v8hf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot270_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane270v4hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot270_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_laneq270v4hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot270_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmlaq_lane270v8hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot270_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane270v8hf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcadd_rot90_f32 (float32x2_t __a, float32x2_t __b)
|
||||
{
|
||||
return __builtin_neon_vcadd90v2sf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b)
|
||||
{
|
||||
return __builtin_neon_vcadd90v4sf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcadd_rot270_f32 (float32x2_t __a, float32x2_t __b)
|
||||
{
|
||||
return __builtin_neon_vcadd90v2sf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b)
|
||||
{
|
||||
return __builtin_neon_vcadd90v4sf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla0v2sf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla0v4sf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane0v2sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_laneq0v2sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmlaq_lane0v4sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane0v4sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot90_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla90v2sf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot90_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla90v4sf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot90_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane90v2sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot90_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_laneq90v2sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot90_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmlaq_lane90v4sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot90_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane90v4sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot180_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla180v2sf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot180_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla180v4sf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot180_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane180v2sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot180_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_laneq180v2sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot180_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmlaq_lane180v4sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot180_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane180v4sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot270_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla270v2sf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot270_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||
{
|
||||
return __builtin_neon_vcmla270v4sf (__r, __a, __b);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot270_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane270v2sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x2_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmla_rot270_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_laneq270v2sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot270_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmlaq_lane270v4sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
__extension__ extern __inline float32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
|
||||
const int __index)
|
||||
{
|
||||
return __builtin_neon_vcmla_lane270v4sf (__r, __a, __b, __index);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -351,3 +351,25 @@ VAR2 (TERNOP, sdot, v8qi, v16qi)
|
|||
VAR2 (UTERNOP, udot, v8qi, v16qi)
|
||||
VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi)
|
||||
VAR2 (UMAC_LANE, udot_lane, v8qi, v16qi)
|
||||
|
||||
VAR4 (BINOP, vcadd90, v4hf, v2sf, v8hf, v4sf)
|
||||
VAR4 (BINOP, vcadd270, v4hf, v2sf, v8hf, v4sf)
|
||||
VAR4 (TERNOP, vcmla0, v2sf, v4sf, v4hf, v8hf)
|
||||
VAR4 (TERNOP, vcmla90, v2sf, v4sf, v4hf, v8hf)
|
||||
VAR4 (TERNOP, vcmla180, v2sf, v4sf, v4hf, v8hf)
|
||||
VAR4 (TERNOP, vcmla270, v2sf, v4sf, v4hf, v8hf)
|
||||
|
||||
VAR4 (MAC_LANE_PAIR, vcmla_lane0, v2sf, v4hf, v8hf, v4sf)
|
||||
VAR4 (MAC_LANE_PAIR, vcmla_lane90, v2sf, v4hf, v8hf, v4sf)
|
||||
VAR4 (MAC_LANE_PAIR, vcmla_lane180, v2sf, v4hf, v8hf, v4sf)
|
||||
VAR4 (MAC_LANE_PAIR, vcmla_lane270, v2sf, v4hf, v8hf, v4sf)
|
||||
|
||||
VAR2 (MAC_LANE_PAIR, vcmla_laneq0, v2sf, v4hf)
|
||||
VAR2 (MAC_LANE_PAIR, vcmla_laneq90, v2sf, v4hf)
|
||||
VAR2 (MAC_LANE_PAIR, vcmla_laneq180, v2sf, v4hf)
|
||||
VAR2 (MAC_LANE_PAIR, vcmla_laneq270, v2sf, v4hf)
|
||||
|
||||
VAR2 (MAC_LANE_PAIR, vcmlaq_lane0, v4sf, v8hf)
|
||||
VAR2 (MAC_LANE_PAIR, vcmlaq_lane90, v4sf, v8hf)
|
||||
VAR2 (MAC_LANE_PAIR, vcmlaq_lane180, v4sf, v8hf)
|
||||
VAR2 (MAC_LANE_PAIR, vcmlaq_lane270, v4sf, v8hf)
|
||||
|
|
|
|||
|
|
@ -123,6 +123,13 @@
|
|||
(define_mode_iterator VF [(V4HF "TARGET_NEON_FP16INST")
|
||||
(V8HF "TARGET_NEON_FP16INST") V2SF V4SF])
|
||||
|
||||
;; Double vector modes.
|
||||
(define_mode_iterator VDF [V2SF V4HF])
|
||||
|
||||
;; Quad vector Float modes with half/single elements.
|
||||
(define_mode_iterator VQ_HSF [V8HF V4SF])
|
||||
|
||||
|
||||
;; All supported vector modes (except those with 64-bit integer elements).
|
||||
(define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
|
||||
|
||||
|
|
@ -423,6 +430,9 @@
|
|||
|
||||
(define_int_iterator VFMLHALVES [UNSPEC_VFML_LO UNSPEC_VFML_HI])
|
||||
|
||||
(define_int_iterator VCADD [UNSPEC_VCADD90 UNSPEC_VCADD270])
|
||||
(define_int_iterator VCMLA [UNSPEC_VCMLA UNSPEC_VCMLA90 UNSPEC_VCMLA180 UNSPEC_VCMLA270])
|
||||
|
||||
;;----------------------------------------------------------------------------
|
||||
;; Mode attributes
|
||||
;;----------------------------------------------------------------------------
|
||||
|
|
@ -741,7 +751,7 @@
|
|||
(define_mode_attr F_constraint [(SF "t") (DF "w")])
|
||||
(define_mode_attr vfp_type [(SF "s") (DF "d")])
|
||||
(define_mode_attr vfp_double_cond [(SF "") (DF "&& TARGET_VFP_DOUBLE")])
|
||||
(define_mode_attr VF_constraint [(V2SF "t") (V4SF "w")])
|
||||
(define_mode_attr VF_constraint [(V4HF "t") (V8HF "t") (V2SF "t") (V4SF "w")])
|
||||
|
||||
;; Mode attribute used to build the "type" attribute.
|
||||
(define_mode_attr q [(V8QI "") (V16QI "_q")
|
||||
|
|
@ -989,6 +999,13 @@
|
|||
(UNSPEC_SHA1SU0 "V4SI") (UNSPEC_SHA256H "V4SI")
|
||||
(UNSPEC_SHA256H2 "V4SI") (UNSPEC_SHA256SU1 "V4SI")])
|
||||
|
||||
(define_int_attr rot [(UNSPEC_VCADD90 "90")
|
||||
(UNSPEC_VCADD270 "270")
|
||||
(UNSPEC_VCMLA "0")
|
||||
(UNSPEC_VCMLA90 "90")
|
||||
(UNSPEC_VCMLA180 "180")
|
||||
(UNSPEC_VCMLA270 "270")])
|
||||
|
||||
;; Both kinds of return insn.
|
||||
(define_code_iterator RETURNS [return simple_return])
|
||||
(define_code_attr return_str [(return "") (simple_return "simple_")])
|
||||
|
|
|
|||
|
|
@ -3457,6 +3457,80 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
|
||||
;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
|
||||
;; fact that their usage need to guarantee that the source vectors are
|
||||
;; contiguous. It would be wrong to describe the operation without being able
|
||||
;; to describe the permute that is also required, but even if that is done
|
||||
;; the permute would have been created as a LOAD_LANES which means the values
|
||||
;; in the registers are in the wrong order.
|
||||
(define_insn "neon_vcadd<rot><mode>"
|
||||
[(set (match_operand:VF 0 "register_operand" "=w")
|
||||
(unspec:VF [(match_operand:VF 1 "register_operand" "w")
|
||||
(match_operand:VF 2 "register_operand" "w")]
|
||||
VCADD))]
|
||||
"TARGET_COMPLEX"
|
||||
"vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
|
||||
[(set_attr "type" "neon_fcadd")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vcmla<rot><mode>"
|
||||
[(set (match_operand:VF 0 "register_operand" "=w")
|
||||
(plus:VF (match_operand:VF 1 "register_operand" "0")
|
||||
(unspec:VF [(match_operand:VF 2 "register_operand" "w")
|
||||
(match_operand:VF 3 "register_operand" "w")]
|
||||
VCMLA)))]
|
||||
"TARGET_COMPLEX"
|
||||
"vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
|
||||
[(set_attr "type" "neon_fcmla")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vcmla_lane<rot><mode>"
|
||||
[(set (match_operand:VF 0 "s_register_operand" "=w")
|
||||
(plus:VF (match_operand:VF 1 "s_register_operand" "0")
|
||||
(unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
|
||||
(match_operand:VF 3 "s_register_operand" "<VF_constraint>")
|
||||
(match_operand:SI 4 "const_int_operand" "n")]
|
||||
VCMLA)))]
|
||||
"TARGET_COMPLEX"
|
||||
{
|
||||
operands = neon_vcmla_lane_prepare_operands (<MODE>mode, operands);
|
||||
return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
|
||||
}
|
||||
[(set_attr "type" "neon_fcmla")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vcmla_laneq<rot><mode>"
|
||||
[(set (match_operand:VDF 0 "s_register_operand" "=w")
|
||||
(plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
|
||||
(unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
|
||||
(match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
|
||||
(match_operand:SI 4 "const_int_operand" "n")]
|
||||
VCMLA)))]
|
||||
"TARGET_COMPLEX"
|
||||
{
|
||||
operands = neon_vcmla_lane_prepare_operands (<MODE>mode, operands);
|
||||
return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
|
||||
}
|
||||
[(set_attr "type" "neon_fcmla")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vcmlaq_lane<rot><mode>"
|
||||
[(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
|
||||
(plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
|
||||
(unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
|
||||
(match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
|
||||
(match_operand:SI 4 "const_int_operand" "n")]
|
||||
VCMLA)))]
|
||||
"TARGET_COMPLEX"
|
||||
{
|
||||
operands = neon_vcmla_lane_prepare_operands (<MODE>mode, operands);
|
||||
return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
|
||||
}
|
||||
[(set_attr "type" "neon_fcmla")]
|
||||
)
|
||||
|
||||
|
||||
;; These instructions map to the __builtins for the Dot Product operations.
|
||||
(define_insn "neon_<sup>dot<vsi2qi>"
|
||||
[(set (match_operand:VCVTI 0 "register_operand" "=w")
|
||||
|
|
|
|||
|
|
@ -418,4 +418,10 @@
|
|||
UNSPEC_DOT_U
|
||||
UNSPEC_VFML_LO
|
||||
UNSPEC_VFML_HI
|
||||
UNSPEC_VCADD90
|
||||
UNSPEC_VCADD270
|
||||
UNSPEC_VCMLA
|
||||
UNSPEC_VCMLA90
|
||||
UNSPEC_VCMLA180
|
||||
UNSPEC_VCMLA270
|
||||
])
|
||||
|
|
|
|||
|
|
@ -1,3 +1,8 @@
|
|||
2019-01-10 Tamar Christina <tamar.christina@arm.com>
|
||||
|
||||
* gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: Add AArch32 regexpr.
|
||||
* gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: Likewise.
|
||||
|
||||
2019-01-10 Tamar Christina <tamar.christina@arm.com>
|
||||
|
||||
* gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: New test.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
/* { dg-skip-if "" { arm-*-* } } */
|
||||
/* { dg-do assemble } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_v8_3a_complex_neon_ok } */
|
||||
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||
/* { dg-additional-options "-O2 -save-temps" } */
|
||||
|
|
@ -249,3 +248,22 @@ test_vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
|||
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.s\[1\], #270} 1 { target { aarch64*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.s\[1\], #90} 1 { target { aarch64*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {dup\td[0-9]+, v[0-9]+.d\[1\]} 4 { target { aarch64*-*-* } } } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {vcadd.f32\td[0-9]+, d[0-9]+, d[0-9]+, #90} 2 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcadd.f32\tq[0-9]+, q[0-9]+, q[0-9]+, #90} 2 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+, #0} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+, #180} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+, #270} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+, #90} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #0} 2 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #180} 2 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #270} 2 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #90} 2 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #0} 2 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #180} 2 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #270} 2 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #90} 2 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, q[0-9]+, #0} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, q[0-9]+, #180} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, q[0-9]+, #270} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f32\tq[0-9]+, q[0-9]+, q[0-9]+, #90} 1 { target { arm*-*-* } } } } */
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
/* { dg-skip-if "" { arm-*-* } } */
|
||||
/* { dg-do assemble } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_v8_3a_complex_neon_ok } */
|
||||
/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
|
||||
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||
|
|
@ -304,3 +303,30 @@ test_vcmlaq_rot270_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __
|
|||
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[3\], #180} 1 { target { aarch64*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[3\], #270} 1 { target { aarch64*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[3\], #90} 1 { target { aarch64*-*-* } } } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {vcadd.f16\td[0-9]+, d[0-9]+, d[0-9]+, #90} 2 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcadd.f16\tq[0-9]+, q[0-9]+, q[0-9]+, #90} 2 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+, #0} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #0} 3 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #180} 3 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #270} 3 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\], #90} 3 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[1\], #0} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[1\], #180} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[1\], #270} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+, #180} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+\[1\], #90} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+, #270} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\td[0-9]+, d[0-9]+, d[0-9]+, #90} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #0} 3 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #180} 3 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #270} 3 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\], #90} 3 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[1\], #0} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[1\], #180} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[1\], #270} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[1\], #90} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, q[0-9]+, #0} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, q[0-9]+, #180} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, q[0-9]+, #270} 1 { target { arm*-*-* } } } } */
|
||||
/* { dg-final { scan-assembler-times {vcmla.f16\tq[0-9]+, q[0-9]+, q[0-9]+, #90} 1 { target { arm*-*-* } } } } */
|
||||
|
|
|
|||
Loading…
Reference in New Issue