mirror of git://gcc.gnu.org/git/gcc.git
aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index.
gcc/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index. (emit-rtl.h): Include. (TYPES_QUADOP_LANE_PAIR): New. (aarch64_simd_expand_args): Use it. (aarch64_simd_expand_builtin): Likewise. (AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_laneq_builtin_datum): New. (FCMLA_LANEQ_BUILTIN, AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE, AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_lane_builtin_data, aarch64_init_fcmla_laneq_builtins, aarch64_expand_fcmla_builtin): New. (aarch64_init_builtins): Add aarch64_init_fcmla_laneq_builtins. (aarch64_expand_buildin): Add AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ2700_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF. * config/aarch64/iterators.md (FCMLA_maybe_lane): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add __ARM_FEATURE_COMPLEX. * config/aarch64/aarch64-simd-builtins.def (fcadd90, fcadd270, fcmla0, fcmla90, fcmla180, fcmla270, fcmla_lane0, fcmla_lane90, fcmla_lane180, fcmla_lane270, fcmla_laneq0, fcmla_laneq90, fcmla_laneq180, fcmla_laneq270, fcmlaq_lane0, fcmlaq_lane90, fcmlaq_lane180, fcmlaq_lane270): New. * config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>, aarch64_fcmla_laneq<rot>v4hf, aarch64_fcmlaq_lane<rot><mode>,aarch64_fcadd<rot><mode>, aarch64_fcmla<rot><mode>): New. * config/aarch64/arm_neon.h: (vcadd_rot90_f16): New. (vcaddq_rot90_f16): New. (vcadd_rot270_f16): New. (vcaddq_rot270_f16): New. (vcmla_f16): New. (vcmlaq_f16): New. (vcmla_lane_f16): New. (vcmla_laneq_f16): New. (vcmlaq_lane_f16): New. (vcmlaq_rot90_lane_f16): New. (vcmla_rot90_laneq_f16): New. (vcmla_rot90_lane_f16): New. (vcmlaq_rot90_f16): New. (vcmla_rot90_f16): New. (vcmlaq_laneq_f16): New. (vcmla_rot180_laneq_f16): New. (vcmla_rot180_lane_f16): New. (vcmlaq_rot180_f16): New. (vcmla_rot180_f16): New. (vcmlaq_rot90_laneq_f16): New. (vcmlaq_rot270_laneq_f16): New. (vcmlaq_rot270_lane_f16): New. (vcmla_rot270_laneq_f16): New. (vcmlaq_rot270_f16): New. (vcmla_rot270_f16): New. (vcmlaq_rot180_laneq_f16): New. (vcmlaq_rot180_lane_f16): New. (vcmla_rot270_lane_f16): New. (vcadd_rot90_f32): New. (vcaddq_rot90_f32): New. (vcaddq_rot90_f64): New. (vcadd_rot270_f32): New. (vcaddq_rot270_f32): New. (vcaddq_rot270_f64): New. (vcmla_f32): New. (vcmlaq_f32): New. (vcmlaq_f64): New. (vcmla_lane_f32): New. (vcmla_laneq_f32): New. (vcmlaq_lane_f32): New. (vcmlaq_laneq_f32): New. (vcmla_rot90_f32): New. (vcmlaq_rot90_f32): New. (vcmlaq_rot90_f64): New. (vcmla_rot90_lane_f32): New. (vcmla_rot90_laneq_f32): New. (vcmlaq_rot90_lane_f32): New. (vcmlaq_rot90_laneq_f32): New. (vcmla_rot180_f32): New. (vcmlaq_rot180_f32): New. (vcmlaq_rot180_f64): New. (vcmla_rot180_lane_f32): New. (vcmla_rot180_laneq_f32): New. (vcmlaq_rot180_lane_f32): New. (vcmlaq_rot180_laneq_f32): New. (vcmla_rot270_f32): New. (vcmlaq_rot270_f32): New. (vcmlaq_rot270_f64): New. (vcmla_rot270_lane_f32): New. (vcmla_rot270_laneq_f32): New. (vcmlaq_rot270_lane_f32): New. (vcmlaq_rot270_laneq_f32): New. * config/aarch64/aarch64.h (TARGET_COMPLEX): New. * config/aarch64/iterators.md (UNSPEC_FCADD90, UNSPEC_FCADD270, UNSPEC_FCMLA, UNSPEC_FCMLA90, UNSPEC_FCMLA180, UNSPEC_FCMLA270): New. (FCADD, FCMLA): New. (rot): New. * config/arm/types.md (neon_fcadd, neon_fcmla): New. gcc/testsuite/ChangeLog: 2019-01-10 Tamar Christina <tamar.christina@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: New test. From-SVN: r267795
This commit is contained in:
parent
90c3d78f51
commit
9d63f43b2d
|
|
@ -1,3 +1,99 @@
|
||||||
|
2019-01-10 Tamar Christina <tamar.christina@arm.com>
|
||||||
|
|
||||||
|
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers): Add qualifier_lane_pair_index.
|
||||||
|
(emit-rtl.h): Include.
|
||||||
|
(TYPES_QUADOP_LANE_PAIR): New.
|
||||||
|
(aarch64_simd_expand_args): Use it.
|
||||||
|
(aarch64_simd_expand_builtin): Likewise.
|
||||||
|
(AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_laneq_builtin_datum): New.
|
||||||
|
(FCMLA_LANEQ_BUILTIN, AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
|
||||||
|
AARCH64_SIMD_FCMLA_LANEQ_BUILTINS, aarch64_fcmla_lane_builtin_data,
|
||||||
|
aarch64_init_fcmla_laneq_builtins, aarch64_expand_fcmla_builtin): New.
|
||||||
|
(aarch64_init_builtins): Add aarch64_init_fcmla_laneq_builtins.
|
||||||
|
(aarch64_expand_buildin): Add AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF,
|
||||||
|
AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF,
|
||||||
|
AARCH64_SIMD_BUILTIN_FCMLA_LANEQ2700_V2SF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF,
|
||||||
|
AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF, AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF,
|
||||||
|
AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF.
|
||||||
|
* config/aarch64/iterators.md (FCMLA_maybe_lane): New.
|
||||||
|
* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add __ARM_FEATURE_COMPLEX.
|
||||||
|
* config/aarch64/aarch64-simd-builtins.def (fcadd90, fcadd270, fcmla0, fcmla90,
|
||||||
|
fcmla180, fcmla270, fcmla_lane0, fcmla_lane90, fcmla_lane180, fcmla_lane270,
|
||||||
|
fcmla_laneq0, fcmla_laneq90, fcmla_laneq180, fcmla_laneq270,
|
||||||
|
fcmlaq_lane0, fcmlaq_lane90, fcmlaq_lane180, fcmlaq_lane270): New.
|
||||||
|
* config/aarch64/aarch64-simd.md (aarch64_fcmla_lane<rot><mode>,
|
||||||
|
aarch64_fcmla_laneq<rot>v4hf, aarch64_fcmlaq_lane<rot><mode>,aarch64_fcadd<rot><mode>,
|
||||||
|
aarch64_fcmla<rot><mode>): New.
|
||||||
|
* config/aarch64/arm_neon.h:
|
||||||
|
(vcadd_rot90_f16): New.
|
||||||
|
(vcaddq_rot90_f16): New.
|
||||||
|
(vcadd_rot270_f16): New.
|
||||||
|
(vcaddq_rot270_f16): New.
|
||||||
|
(vcmla_f16): New.
|
||||||
|
(vcmlaq_f16): New.
|
||||||
|
(vcmla_lane_f16): New.
|
||||||
|
(vcmla_laneq_f16): New.
|
||||||
|
(vcmlaq_lane_f16): New.
|
||||||
|
(vcmlaq_rot90_lane_f16): New.
|
||||||
|
(vcmla_rot90_laneq_f16): New.
|
||||||
|
(vcmla_rot90_lane_f16): New.
|
||||||
|
(vcmlaq_rot90_f16): New.
|
||||||
|
(vcmla_rot90_f16): New.
|
||||||
|
(vcmlaq_laneq_f16): New.
|
||||||
|
(vcmla_rot180_laneq_f16): New.
|
||||||
|
(vcmla_rot180_lane_f16): New.
|
||||||
|
(vcmlaq_rot180_f16): New.
|
||||||
|
(vcmla_rot180_f16): New.
|
||||||
|
(vcmlaq_rot90_laneq_f16): New.
|
||||||
|
(vcmlaq_rot270_laneq_f16): New.
|
||||||
|
(vcmlaq_rot270_lane_f16): New.
|
||||||
|
(vcmla_rot270_laneq_f16): New.
|
||||||
|
(vcmlaq_rot270_f16): New.
|
||||||
|
(vcmla_rot270_f16): New.
|
||||||
|
(vcmlaq_rot180_laneq_f16): New.
|
||||||
|
(vcmlaq_rot180_lane_f16): New.
|
||||||
|
(vcmla_rot270_lane_f16): New.
|
||||||
|
(vcadd_rot90_f32): New.
|
||||||
|
(vcaddq_rot90_f32): New.
|
||||||
|
(vcaddq_rot90_f64): New.
|
||||||
|
(vcadd_rot270_f32): New.
|
||||||
|
(vcaddq_rot270_f32): New.
|
||||||
|
(vcaddq_rot270_f64): New.
|
||||||
|
(vcmla_f32): New.
|
||||||
|
(vcmlaq_f32): New.
|
||||||
|
(vcmlaq_f64): New.
|
||||||
|
(vcmla_lane_f32): New.
|
||||||
|
(vcmla_laneq_f32): New.
|
||||||
|
(vcmlaq_lane_f32): New.
|
||||||
|
(vcmlaq_laneq_f32): New.
|
||||||
|
(vcmla_rot90_f32): New.
|
||||||
|
(vcmlaq_rot90_f32): New.
|
||||||
|
(vcmlaq_rot90_f64): New.
|
||||||
|
(vcmla_rot90_lane_f32): New.
|
||||||
|
(vcmla_rot90_laneq_f32): New.
|
||||||
|
(vcmlaq_rot90_lane_f32): New.
|
||||||
|
(vcmlaq_rot90_laneq_f32): New.
|
||||||
|
(vcmla_rot180_f32): New.
|
||||||
|
(vcmlaq_rot180_f32): New.
|
||||||
|
(vcmlaq_rot180_f64): New.
|
||||||
|
(vcmla_rot180_lane_f32): New.
|
||||||
|
(vcmla_rot180_laneq_f32): New.
|
||||||
|
(vcmlaq_rot180_lane_f32): New.
|
||||||
|
(vcmlaq_rot180_laneq_f32): New.
|
||||||
|
(vcmla_rot270_f32): New.
|
||||||
|
(vcmlaq_rot270_f32): New.
|
||||||
|
(vcmlaq_rot270_f64): New.
|
||||||
|
(vcmla_rot270_lane_f32): New.
|
||||||
|
(vcmla_rot270_laneq_f32): New.
|
||||||
|
(vcmlaq_rot270_lane_f32): New.
|
||||||
|
(vcmlaq_rot270_laneq_f32): New.
|
||||||
|
* config/aarch64/aarch64.h (TARGET_COMPLEX): New.
|
||||||
|
* config/aarch64/iterators.md (UNSPEC_FCADD90, UNSPEC_FCADD270,
|
||||||
|
UNSPEC_FCMLA, UNSPEC_FCMLA90, UNSPEC_FCMLA180, UNSPEC_FCMLA270): New.
|
||||||
|
(FCADD, FCMLA): New.
|
||||||
|
(rot): New.
|
||||||
|
* config/arm/types.md (neon_fcadd, neon_fcmla): New.
|
||||||
|
|
||||||
2019-01-09 Sandra Loosemore <sandra@codesourcery.com>
|
2019-01-09 Sandra Loosemore <sandra@codesourcery.com>
|
||||||
|
|
||||||
PR other/16615
|
PR other/16615
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,7 @@
|
||||||
#include "langhooks.h"
|
#include "langhooks.h"
|
||||||
#include "gimple-iterator.h"
|
#include "gimple-iterator.h"
|
||||||
#include "case-cfn-macros.h"
|
#include "case-cfn-macros.h"
|
||||||
|
#include "emit-rtl.h"
|
||||||
|
|
||||||
#define v8qi_UP E_V8QImode
|
#define v8qi_UP E_V8QImode
|
||||||
#define v4hi_UP E_V4HImode
|
#define v4hi_UP E_V4HImode
|
||||||
|
|
@ -102,7 +103,10 @@ enum aarch64_type_qualifiers
|
||||||
/* Lane indices - must be in range, and flipped for bigendian. */
|
/* Lane indices - must be in range, and flipped for bigendian. */
|
||||||
qualifier_lane_index = 0x200,
|
qualifier_lane_index = 0x200,
|
||||||
/* Lane indices for single lane structure loads and stores. */
|
/* Lane indices for single lane structure loads and stores. */
|
||||||
qualifier_struct_load_store_lane_index = 0x400
|
qualifier_struct_load_store_lane_index = 0x400,
|
||||||
|
/* Lane indices selected in pairs. - must be in range, and flipped for
|
||||||
|
bigendian. */
|
||||||
|
qualifier_lane_pair_index = 0x800,
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
|
|
@ -171,6 +175,11 @@ aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||||
#define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
|
#define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
|
||||||
|
|
||||||
|
|
||||||
|
static enum aarch64_type_qualifiers
|
||||||
|
aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||||
|
= { qualifier_none, qualifier_none, qualifier_none,
|
||||||
|
qualifier_none, qualifier_lane_pair_index };
|
||||||
|
#define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
|
||||||
static enum aarch64_type_qualifiers
|
static enum aarch64_type_qualifiers
|
||||||
aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||||
= { qualifier_none, qualifier_none, qualifier_none,
|
= { qualifier_none, qualifier_none, qualifier_none,
|
||||||
|
|
@ -356,6 +365,18 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
|
||||||
CRC32_BUILTIN (crc32cw, SI) \
|
CRC32_BUILTIN (crc32cw, SI) \
|
||||||
CRC32_BUILTIN (crc32cx, DI)
|
CRC32_BUILTIN (crc32cx, DI)
|
||||||
|
|
||||||
|
/* The next 8 FCMLA instrinsics require some special handling compared the
|
||||||
|
normal simd intrinsics. */
|
||||||
|
#define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
|
||||||
|
FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
|
||||||
|
FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
|
||||||
|
FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
|
||||||
|
FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
|
||||||
|
FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
|
||||||
|
FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
|
||||||
|
FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
|
||||||
|
FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
const char *name;
|
const char *name;
|
||||||
|
|
@ -364,9 +385,22 @@ typedef struct
|
||||||
unsigned int fcode;
|
unsigned int fcode;
|
||||||
} aarch64_crc_builtin_datum;
|
} aarch64_crc_builtin_datum;
|
||||||
|
|
||||||
|
/* Hold information about how to expand the FCMLA_LANEQ builtins. */
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
const char *name;
|
||||||
|
machine_mode mode;
|
||||||
|
const enum insn_code icode;
|
||||||
|
unsigned int fcode;
|
||||||
|
bool lane;
|
||||||
|
} aarch64_fcmla_laneq_builtin_datum;
|
||||||
|
|
||||||
#define CRC32_BUILTIN(N, M) \
|
#define CRC32_BUILTIN(N, M) \
|
||||||
AARCH64_BUILTIN_##N,
|
AARCH64_BUILTIN_##N,
|
||||||
|
|
||||||
|
#define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
|
||||||
|
AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,
|
||||||
|
|
||||||
#undef VAR1
|
#undef VAR1
|
||||||
#define VAR1(T, N, MAP, A) \
|
#define VAR1(T, N, MAP, A) \
|
||||||
AARCH64_SIMD_BUILTIN_##T##_##N##A,
|
AARCH64_SIMD_BUILTIN_##T##_##N##A,
|
||||||
|
|
@ -399,6 +433,9 @@ enum aarch64_builtins
|
||||||
AARCH64_PAUTH_BUILTIN_AUTIA1716,
|
AARCH64_PAUTH_BUILTIN_AUTIA1716,
|
||||||
AARCH64_PAUTH_BUILTIN_PACIA1716,
|
AARCH64_PAUTH_BUILTIN_PACIA1716,
|
||||||
AARCH64_PAUTH_BUILTIN_XPACLRI,
|
AARCH64_PAUTH_BUILTIN_XPACLRI,
|
||||||
|
/* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */
|
||||||
|
AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
|
||||||
|
AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
|
||||||
AARCH64_BUILTIN_MAX
|
AARCH64_BUILTIN_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -410,6 +447,18 @@ static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
|
||||||
AARCH64_CRC32_BUILTINS
|
AARCH64_CRC32_BUILTINS
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#undef FCMLA_LANEQ_BUILTIN
|
||||||
|
#define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
|
||||||
|
{"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
|
||||||
|
AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},
|
||||||
|
|
||||||
|
/* This structure contains how to manage the mapping form the builtin to the
|
||||||
|
instruction to generate in the backend and how to invoke the instruction. */
|
||||||
|
static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] {
|
||||||
|
AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
|
||||||
|
};
|
||||||
|
|
||||||
#undef CRC32_BUILTIN
|
#undef CRC32_BUILTIN
|
||||||
|
|
||||||
static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
|
static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
|
||||||
|
|
@ -746,6 +795,34 @@ aarch64_init_simd_builtin_scalar_types (void)
|
||||||
|
|
||||||
static bool aarch64_simd_builtins_initialized_p = false;
|
static bool aarch64_simd_builtins_initialized_p = false;
|
||||||
|
|
||||||
|
/* Due to the architecture not providing lane variant of the lane instructions
|
||||||
|
for fcmla we can't use the standard simd builtin expansion code, but we
|
||||||
|
still want the majority of the validation that would normally be done. */
|
||||||
|
|
||||||
|
void
|
||||||
|
aarch64_init_fcmla_laneq_builtins (void)
|
||||||
|
{
|
||||||
|
unsigned int i = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i)
|
||||||
|
{
|
||||||
|
aarch64_fcmla_laneq_builtin_datum* d
|
||||||
|
= &aarch64_fcmla_lane_builtin_data[i];
|
||||||
|
tree argtype = aarch64_lookup_simd_builtin_type (d->mode, qualifier_none);
|
||||||
|
machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
|
||||||
|
tree quadtype
|
||||||
|
= aarch64_lookup_simd_builtin_type (quadmode, qualifier_none);
|
||||||
|
tree lanetype
|
||||||
|
= aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index);
|
||||||
|
tree ftype = build_function_type_list (argtype, argtype, argtype,
|
||||||
|
quadtype, lanetype, NULL_TREE);
|
||||||
|
tree fndecl = add_builtin_function (d->name, ftype, d->fcode,
|
||||||
|
BUILT_IN_MD, NULL, NULL_TREE);
|
||||||
|
|
||||||
|
aarch64_builtin_decls[d->fcode] = fndecl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
aarch64_init_simd_builtins (void)
|
aarch64_init_simd_builtins (void)
|
||||||
{
|
{
|
||||||
|
|
@ -1001,7 +1078,10 @@ aarch64_init_builtins (void)
|
||||||
aarch64_init_fp16_types ();
|
aarch64_init_fp16_types ();
|
||||||
|
|
||||||
if (TARGET_SIMD)
|
if (TARGET_SIMD)
|
||||||
aarch64_init_simd_builtins ();
|
{
|
||||||
|
aarch64_init_simd_builtins ();
|
||||||
|
aarch64_init_fcmla_laneq_builtins ();
|
||||||
|
}
|
||||||
|
|
||||||
aarch64_init_crc32_builtins ();
|
aarch64_init_crc32_builtins ();
|
||||||
aarch64_init_builtin_rsqrt ();
|
aarch64_init_builtin_rsqrt ();
|
||||||
|
|
@ -1031,6 +1111,7 @@ typedef enum
|
||||||
SIMD_ARG_CONSTANT,
|
SIMD_ARG_CONSTANT,
|
||||||
SIMD_ARG_LANE_INDEX,
|
SIMD_ARG_LANE_INDEX,
|
||||||
SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
|
SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
|
||||||
|
SIMD_ARG_LANE_PAIR_INDEX,
|
||||||
SIMD_ARG_STOP
|
SIMD_ARG_STOP
|
||||||
} builtin_simd_arg;
|
} builtin_simd_arg;
|
||||||
|
|
||||||
|
|
@ -1102,6 +1183,22 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
|
||||||
/* Keep to GCC-vector-extension lane indices in the RTL. */
|
/* Keep to GCC-vector-extension lane indices in the RTL. */
|
||||||
op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
|
op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
|
||||||
}
|
}
|
||||||
|
/* If the lane index isn't a constant then error out. */
|
||||||
|
goto constant_arg;
|
||||||
|
|
||||||
|
case SIMD_ARG_LANE_PAIR_INDEX:
|
||||||
|
/* Must be a previous operand into which this is an index and
|
||||||
|
index is restricted to nunits / 2. */
|
||||||
|
gcc_assert (opc > 0);
|
||||||
|
if (CONST_INT_P (op[opc]))
|
||||||
|
{
|
||||||
|
machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
|
||||||
|
unsigned int nunits
|
||||||
|
= GET_MODE_NUNITS (vmode).to_constant ();
|
||||||
|
aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
|
||||||
|
/* Keep to GCC-vector-extension lane indices in the RTL. */
|
||||||
|
op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
|
||||||
|
}
|
||||||
/* Fall through - if the lane index isn't a constant then
|
/* Fall through - if the lane index isn't a constant then
|
||||||
the next case will error. */
|
the next case will error. */
|
||||||
/* FALLTHRU */
|
/* FALLTHRU */
|
||||||
|
|
@ -1215,6 +1312,8 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
|
||||||
|
|
||||||
if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
|
if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
|
||||||
args[k] = SIMD_ARG_LANE_INDEX;
|
args[k] = SIMD_ARG_LANE_INDEX;
|
||||||
|
else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
|
||||||
|
args[k] = SIMD_ARG_LANE_PAIR_INDEX;
|
||||||
else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
|
else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
|
||||||
args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
|
args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
|
||||||
else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
|
else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
|
||||||
|
|
@ -1317,6 +1416,79 @@ aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
|
||||||
return target;
|
return target;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Expand a FCMLA lane expression EXP with code FCODE and
|
||||||
|
result going to TARGET if that is convenient. */
|
||||||
|
|
||||||
|
rtx
|
||||||
|
aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
|
||||||
|
{
|
||||||
|
int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1;
|
||||||
|
aarch64_fcmla_laneq_builtin_datum* d
|
||||||
|
= &aarch64_fcmla_lane_builtin_data[bcode];
|
||||||
|
machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
|
||||||
|
rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
|
||||||
|
rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
|
||||||
|
rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2)));
|
||||||
|
tree tmp = CALL_EXPR_ARG (exp, 3);
|
||||||
|
rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER);
|
||||||
|
|
||||||
|
/* Validate that the lane index is a constant. */
|
||||||
|
if (!CONST_INT_P (lane_idx))
|
||||||
|
{
|
||||||
|
error ("%Kargument %d must be a constant immediate", exp, 4);
|
||||||
|
return const0_rtx;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Validate that the index is within the expected range. */
|
||||||
|
int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
|
||||||
|
aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);
|
||||||
|
|
||||||
|
/* Keep to GCC-vector-extension lane indices in the RTL. */
|
||||||
|
lane_idx = aarch64_endian_lane_rtx (quadmode, INTVAL (lane_idx));
|
||||||
|
|
||||||
|
/* Generate the correct register and mode. */
|
||||||
|
int lane = INTVAL (lane_idx);
|
||||||
|
|
||||||
|
if (lane < nunits / 4)
|
||||||
|
op2 = simplify_gen_subreg (d->mode, op2, quadmode, 0);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Select the upper 64 bits, either a V2SF or V4HF, this however
|
||||||
|
is quite messy, as the operation required even though simple
|
||||||
|
doesn't have a simple RTL pattern, and seems it's quite hard to
|
||||||
|
define using a single RTL pattern. The target generic version
|
||||||
|
gen_highpart_mode generates code that isn't optimal. */
|
||||||
|
rtx temp1 = gen_reg_rtx (d->mode);
|
||||||
|
rtx temp2 = gen_reg_rtx (DImode);
|
||||||
|
temp1 = simplify_gen_subreg (d->mode, op2, quadmode, 0);
|
||||||
|
temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0);
|
||||||
|
emit_insn (gen_aarch64_get_lanev2di (temp2, temp1 , const1_rtx));
|
||||||
|
op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0);
|
||||||
|
|
||||||
|
/* And recalculate the index. */
|
||||||
|
lane -= nunits / 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!target)
|
||||||
|
target = gen_reg_rtx (d->mode);
|
||||||
|
else
|
||||||
|
target = force_reg (d->mode, target);
|
||||||
|
|
||||||
|
rtx pat = NULL_RTX;
|
||||||
|
|
||||||
|
if (d->lane)
|
||||||
|
pat = GEN_FCN (d->icode) (target, op0, op1, op2,
|
||||||
|
gen_int_mode (lane, SImode));
|
||||||
|
else
|
||||||
|
pat = GEN_FCN (d->icode) (target, op0, op1, op2);
|
||||||
|
|
||||||
|
if (!pat)
|
||||||
|
return NULL_RTX;
|
||||||
|
|
||||||
|
emit_insn (pat);
|
||||||
|
return target;
|
||||||
|
}
|
||||||
|
|
||||||
/* Expand an expression EXP that calls a built-in function,
|
/* Expand an expression EXP that calls a built-in function,
|
||||||
with result going to TARGET if that's convenient. */
|
with result going to TARGET if that's convenient. */
|
||||||
rtx
|
rtx
|
||||||
|
|
@ -1395,6 +1567,16 @@ aarch64_expand_builtin (tree exp,
|
||||||
}
|
}
|
||||||
|
|
||||||
return target;
|
return target;
|
||||||
|
|
||||||
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
|
||||||
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
|
||||||
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF:
|
||||||
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF:
|
||||||
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF:
|
||||||
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF:
|
||||||
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF:
|
||||||
|
case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF:
|
||||||
|
return aarch64_expand_fcmla_builtin (exp, target, fcode);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
|
if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
|
||||||
|
|
|
||||||
|
|
@ -109,6 +109,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
|
||||||
|
|
||||||
aarch64_def_or_undef (TARGET_CRC32, "__ARM_FEATURE_CRC32", pfile);
|
aarch64_def_or_undef (TARGET_CRC32, "__ARM_FEATURE_CRC32", pfile);
|
||||||
aarch64_def_or_undef (TARGET_DOTPROD, "__ARM_FEATURE_DOTPROD", pfile);
|
aarch64_def_or_undef (TARGET_DOTPROD, "__ARM_FEATURE_DOTPROD", pfile);
|
||||||
|
aarch64_def_or_undef (TARGET_COMPLEX, "__ARM_FEATURE_COMPLEX", pfile);
|
||||||
|
|
||||||
cpp_undef (pfile, "__AARCH64_CMODEL_TINY__");
|
cpp_undef (pfile, "__AARCH64_CMODEL_TINY__");
|
||||||
cpp_undef (pfile, "__AARCH64_CMODEL_SMALL__");
|
cpp_undef (pfile, "__AARCH64_CMODEL_SMALL__");
|
||||||
|
|
|
||||||
|
|
@ -217,6 +217,25 @@
|
||||||
BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0)
|
BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0)
|
||||||
BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0)
|
BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0)
|
||||||
|
|
||||||
|
/* Implemented by aarch64_fcadd<rot><mode>. */
|
||||||
|
BUILTIN_VHSDF (BINOP, fcadd90, 0)
|
||||||
|
BUILTIN_VHSDF (BINOP, fcadd270, 0)
|
||||||
|
|
||||||
|
/* Implemented by aarch64_fcmla{_lane}{q}<rot><mode>. */
|
||||||
|
BUILTIN_VHSDF (TERNOP, fcmla0, 0)
|
||||||
|
BUILTIN_VHSDF (TERNOP, fcmla90, 0)
|
||||||
|
BUILTIN_VHSDF (TERNOP, fcmla180, 0)
|
||||||
|
BUILTIN_VHSDF (TERNOP, fcmla270, 0)
|
||||||
|
BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane0, 0)
|
||||||
|
BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane90, 0)
|
||||||
|
BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane180, 0)
|
||||||
|
BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane270, 0)
|
||||||
|
|
||||||
|
BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane0, 0)
|
||||||
|
BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane90, 0)
|
||||||
|
BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane180, 0)
|
||||||
|
BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane270, 0)
|
||||||
|
|
||||||
BUILTIN_VDQ_I (SHIFTIMM, ashr, 3)
|
BUILTIN_VDQ_I (SHIFTIMM, ashr, 3)
|
||||||
VAR1 (SHIFTIMM, ashr_simd, 0, di)
|
VAR1 (SHIFTIMM, ashr_simd, 0, di)
|
||||||
BUILTIN_VDQ_I (SHIFTIMM, lshr, 3)
|
BUILTIN_VDQ_I (SHIFTIMM, lshr, 3)
|
||||||
|
|
|
||||||
|
|
@ -419,6 +419,70 @@
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
|
||||||
|
;; fact that their usage need to guarantee that the source vectors are
|
||||||
|
;; contiguous. It would be wrong to describe the operation without being able
|
||||||
|
;; to describe the permute that is also required, but even if that is done
|
||||||
|
;; the permute would have been created as a LOAD_LANES which means the values
|
||||||
|
;; in the registers are in the wrong order.
|
||||||
|
(define_insn "aarch64_fcadd<rot><mode>"
|
||||||
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
|
||||||
|
(match_operand:VHSDF 2 "register_operand" "w")]
|
||||||
|
FCADD))]
|
||||||
|
"TARGET_COMPLEX"
|
||||||
|
"fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
|
||||||
|
[(set_attr "type" "neon_fcadd")]
|
||||||
|
)
|
||||||
|
|
||||||
|
(define_insn "aarch64_fcmla<rot><mode>"
|
||||||
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
|
(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
|
||||||
|
(unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
|
||||||
|
(match_operand:VHSDF 3 "register_operand" "w")]
|
||||||
|
FCMLA)))]
|
||||||
|
"TARGET_COMPLEX"
|
||||||
|
"fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
|
||||||
|
[(set_attr "type" "neon_fcmla")]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
(define_insn "aarch64_fcmla_lane<rot><mode>"
|
||||||
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
|
(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
|
||||||
|
(unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
|
||||||
|
(match_operand:VHSDF 3 "register_operand" "w")
|
||||||
|
(match_operand:SI 4 "const_int_operand" "n")]
|
||||||
|
FCMLA)))]
|
||||||
|
"TARGET_COMPLEX"
|
||||||
|
"fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>"
|
||||||
|
[(set_attr "type" "neon_fcmla")]
|
||||||
|
)
|
||||||
|
|
||||||
|
(define_insn "aarch64_fcmla_laneq<rot>v4hf"
|
||||||
|
[(set (match_operand:V4HF 0 "register_operand" "=w")
|
||||||
|
(plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
|
||||||
|
(unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
|
||||||
|
(match_operand:V8HF 3 "register_operand" "w")
|
||||||
|
(match_operand:SI 4 "const_int_operand" "n")]
|
||||||
|
FCMLA)))]
|
||||||
|
"TARGET_COMPLEX"
|
||||||
|
"fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>"
|
||||||
|
[(set_attr "type" "neon_fcmla")]
|
||||||
|
)
|
||||||
|
|
||||||
|
(define_insn "aarch64_fcmlaq_lane<rot><mode>"
|
||||||
|
[(set (match_operand:VQ_HSF 0 "register_operand" "=w")
|
||||||
|
(plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
|
||||||
|
(unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
|
||||||
|
(match_operand:<VHALF> 3 "register_operand" "w")
|
||||||
|
(match_operand:SI 4 "const_int_operand" "n")]
|
||||||
|
FCMLA)))]
|
||||||
|
"TARGET_COMPLEX"
|
||||||
|
"fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>"
|
||||||
|
[(set_attr "type" "neon_fcmla")]
|
||||||
|
)
|
||||||
|
|
||||||
;; These instructions map to the __builtins for the Dot Product operations.
|
;; These instructions map to the __builtins for the Dot Product operations.
|
||||||
(define_insn "aarch64_<sur>dot<vsi2qi>"
|
(define_insn "aarch64_<sur>dot<vsi2qi>"
|
||||||
[(set (match_operand:VS 0 "register_operand" "=w")
|
[(set (match_operand:VS 0 "register_operand" "=w")
|
||||||
|
|
|
||||||
|
|
@ -273,6 +273,9 @@ extern unsigned aarch64_architecture_version;
|
||||||
/* ARMv8.3-A features. */
|
/* ARMv8.3-A features. */
|
||||||
#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3)
|
#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3)
|
||||||
|
|
||||||
|
/* Armv8.3-a Complex number extension to AdvSIMD extensions. */
|
||||||
|
#define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3)
|
||||||
|
|
||||||
/* Make sure this is always defined so we don't have to check for ifdefs
|
/* Make sure this is always defined so we don't have to check for ifdefs
|
||||||
but rather use normal ifs. */
|
but rather use normal ifs. */
|
||||||
#ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
|
#ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
|
||||||
|
|
|
||||||
|
|
@ -33294,6 +33294,481 @@ vbcaxq_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
|
||||||
return __builtin_aarch64_bcaxqv2di (__a, __b, __c);
|
return __builtin_aarch64_bcaxqv2di (__a, __b, __c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#pragma GCC pop_options
|
||||||
|
|
||||||
|
/* AdvSIMD Complex numbers intrinsics. */
|
||||||
|
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target(("arch=armv8.3-a"))
|
||||||
|
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target(("+fp16"))
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcadd_rot90_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcadd90v4hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcadd90v8hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcadd_rot270_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcadd270v4hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcadd270v8hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla0v4hf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla0v8hf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane0v4hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_laneq0v4hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmlaq_lane0v8hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot90_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmlaq_lane90v8hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot90_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_laneq90v4hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot90_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane90v4hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot90_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla90v8hf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot90_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla90v4hf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane0v8hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot180_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_laneq180v4hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot180_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane180v4hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot180_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla180v8hf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot180_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla180v4hf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot90_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane90v8hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot270_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane270v8hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot270_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmlaq_lane270v8hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot270_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_laneq270v4hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot270_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla270v8hf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot270_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla270v4hf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot180_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane180v8hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x8_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot180_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmlaq_lane180v8hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float16x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot270_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane270v4hf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
#pragma GCC pop_options
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcadd_rot90_f32 (float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcadd90v2sf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcadd90v4sf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float64x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcaddq_rot90_f64 (float64x2_t __a, float64x2_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcadd90v2df (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcadd_rot270_f32 (float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcadd270v2sf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcadd270v4sf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float64x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcaddq_rot270_f64 (float64x2_t __a, float64x2_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcadd270v2df (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla0v2sf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla0v4sf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float64x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla0v2df (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane0v2sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_laneq0v2sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmlaq_lane0v4sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane0v4sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot90_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla90v2sf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot90_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla90v4sf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float64x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot90_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla90v2df (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot90_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane90v2sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot90_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_laneq90v2sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot90_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmlaq_lane90v4sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot90_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane90v4sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot180_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla180v2sf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot180_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla180v4sf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float64x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot180_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla180v2df (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot180_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane180v2sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot180_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_laneq180v2sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot180_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmlaq_lane180v4sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot180_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane180v4sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot270_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla270v2sf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot270_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla270v4sf (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float64x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot270_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla270v2df (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot270_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane270v2sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x2_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmla_rot270_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_laneq270v2sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot270_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmlaq_lane270v4sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ extern __inline float32x4_t
|
||||||
|
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||||
|
vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
|
||||||
|
const int __index)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcmla_lane270v4sf (__r, __a, __b, __index);
|
||||||
|
}
|
||||||
|
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -485,6 +485,12 @@
|
||||||
UNSPEC_COND_GE ; Used in aarch64-sve.md.
|
UNSPEC_COND_GE ; Used in aarch64-sve.md.
|
||||||
UNSPEC_COND_GT ; Used in aarch64-sve.md.
|
UNSPEC_COND_GT ; Used in aarch64-sve.md.
|
||||||
UNSPEC_LASTB ; Used in aarch64-sve.md.
|
UNSPEC_LASTB ; Used in aarch64-sve.md.
|
||||||
|
UNSPEC_FCADD90 ; Used in aarch64-simd.md.
|
||||||
|
UNSPEC_FCADD270 ; Used in aarch64-simd.md.
|
||||||
|
UNSPEC_FCMLA ; Used in aarch64-simd.md.
|
||||||
|
UNSPEC_FCMLA90 ; Used in aarch64-simd.md.
|
||||||
|
UNSPEC_FCMLA180 ; Used in aarch64-simd.md.
|
||||||
|
UNSPEC_FCMLA270 ; Used in aarch64-simd.md.
|
||||||
])
|
])
|
||||||
|
|
||||||
;; ------------------------------------------------------------------
|
;; ------------------------------------------------------------------
|
||||||
|
|
@ -1134,6 +1140,13 @@
|
||||||
(VNx16SI "vnx4bi") (VNx16SF "vnx4bi")
|
(VNx16SI "vnx4bi") (VNx16SF "vnx4bi")
|
||||||
(VNx8DI "vnx2bi") (VNx8DF "vnx2bi")])
|
(VNx8DI "vnx2bi") (VNx8DF "vnx2bi")])
|
||||||
|
|
||||||
|
;; On AArch64 the By element instruction doesn't have a 2S variant.
|
||||||
|
;; However because the instruction always selects a pair of values
|
||||||
|
;; The normal 3SAME instruction can be used here instead.
|
||||||
|
(define_mode_attr FCMLA_maybe_lane [(V2SF "<Vtype>") (V4SF "<Vetype>[%4]")
|
||||||
|
(V4HF "<Vetype>[%4]") (V8HF "<Vetype>[%4]")
|
||||||
|
])
|
||||||
|
|
||||||
;; -------------------------------------------------------------------
|
;; -------------------------------------------------------------------
|
||||||
;; Code Iterators
|
;; Code Iterators
|
||||||
;; -------------------------------------------------------------------
|
;; -------------------------------------------------------------------
|
||||||
|
|
@ -1587,6 +1600,14 @@
|
||||||
UNSPEC_COND_EQ UNSPEC_COND_NE
|
UNSPEC_COND_EQ UNSPEC_COND_NE
|
||||||
UNSPEC_COND_GE UNSPEC_COND_GT])
|
UNSPEC_COND_GE UNSPEC_COND_GT])
|
||||||
|
|
||||||
|
(define_int_iterator FCADD [UNSPEC_FCADD90
|
||||||
|
UNSPEC_FCADD270])
|
||||||
|
|
||||||
|
(define_int_iterator FCMLA [UNSPEC_FCMLA
|
||||||
|
UNSPEC_FCMLA90
|
||||||
|
UNSPEC_FCMLA180
|
||||||
|
UNSPEC_FCMLA270])
|
||||||
|
|
||||||
;; Iterators for atomic operations.
|
;; Iterators for atomic operations.
|
||||||
|
|
||||||
(define_int_iterator ATOMIC_LDOP
|
(define_int_iterator ATOMIC_LDOP
|
||||||
|
|
@ -1848,6 +1869,13 @@
|
||||||
(UNSPEC_COND_MAX "fmaxnm")
|
(UNSPEC_COND_MAX "fmaxnm")
|
||||||
(UNSPEC_COND_MIN "fminnm")])
|
(UNSPEC_COND_MIN "fminnm")])
|
||||||
|
|
||||||
|
(define_int_attr rot [(UNSPEC_FCADD90 "90")
|
||||||
|
(UNSPEC_FCADD270 "270")
|
||||||
|
(UNSPEC_FCMLA "0")
|
||||||
|
(UNSPEC_FCMLA90 "90")
|
||||||
|
(UNSPEC_FCMLA180 "180")
|
||||||
|
(UNSPEC_FCMLA270 "270")])
|
||||||
|
|
||||||
(define_int_attr sve_fmla_op [(UNSPEC_COND_FMLA "fmla")
|
(define_int_attr sve_fmla_op [(UNSPEC_COND_FMLA "fmla")
|
||||||
(UNSPEC_COND_FMLS "fmls")
|
(UNSPEC_COND_FMLS "fmls")
|
||||||
(UNSPEC_COND_FNMLA "fnmla")
|
(UNSPEC_COND_FNMLA "fnmla")
|
||||||
|
|
|
||||||
|
|
@ -763,6 +763,9 @@
|
||||||
neon_sub_halve,\
|
neon_sub_halve,\
|
||||||
neon_sub_halve_q,\
|
neon_sub_halve_q,\
|
||||||
neon_sub_halve_narrow_q,\
|
neon_sub_halve_narrow_q,\
|
||||||
|
\
|
||||||
|
neon_fcadd,\
|
||||||
|
neon_fcmla,\
|
||||||
\
|
\
|
||||||
neon_abs,\
|
neon_abs,\
|
||||||
neon_abs_q,\
|
neon_abs_q,\
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,8 @@
|
||||||
|
2019-01-10 Tamar Christina <tamar.christina@arm.com>
|
||||||
|
|
||||||
|
* gcc.target/aarch64/advsimd-intrinsics/vector-complex.c: New test.
|
||||||
|
* gcc.target/aarch64/advsimd-intrinsics/vector-complex_f16.c: New test.
|
||||||
|
|
||||||
2019-01-10 Tamar Christina <tamar.christina@arm.com>
|
2019-01-10 Tamar Christina <tamar.christina@arm.com>
|
||||||
|
|
||||||
* lib/target-supports.exp
|
* lib/target-supports.exp
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,251 @@
|
||||||
|
/* { dg-skip-if "" { arm-*-* } } */
|
||||||
|
/* { dg-do assemble } */
|
||||||
|
/* { dg-require-effective-target arm_v8_3a_complex_neon_ok } */
|
||||||
|
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||||
|
/* { dg-additional-options "-O2 -save-temps" } */
|
||||||
|
|
||||||
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcadd_rot90_f32 (float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcadd_rot90_f32 (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcaddq_rot90_f32 (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __ARM_ARCH_ISA_A64
|
||||||
|
float64x2_t
|
||||||
|
test_vcaddq_rot90_f64 (float64x2_t __a, float64x2_t __b)
|
||||||
|
{
|
||||||
|
return vcaddq_rot90_f64 (__a, __b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcadd_rot270_f32 (float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcadd_rot270_f32 (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcaddq_rot270_f32 (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __ARM_ARCH_ISA_A64
|
||||||
|
float64x2_t
|
||||||
|
test_vcaddq_rot270_f64 (float64x2_t __a, float64x2_t __b)
|
||||||
|
{
|
||||||
|
return vcaddq_rot270_f64 (__a, __b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcmla_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_f32 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcmlaq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_f32 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __ARM_ARCH_ISA_A64
|
||||||
|
float64x2_t
|
||||||
|
test_vcmlaq_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_f64 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcmla_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_lane_f32 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcmla_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_laneq_f32 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcmlaq_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_lane_f32 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcmlaq_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_laneq_f32 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcmla_rot90_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot90_f32 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcmlaq_rot90_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot90_f32 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __ARM_ARCH_ISA_A64
|
||||||
|
float64x2_t
|
||||||
|
test_vcmlaq_rot90_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot90_f64 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcmla_rot90_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot90_lane_f32 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcmla_rot90_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot90_laneq_f32 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcmlaq_rot90_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot90_lane_f32 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcmlaq_rot90_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot90_laneq_f32 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcmla_rot180_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot180_f32 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcmlaq_rot180_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot180_f32 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __ARM_ARCH_ISA_A64
|
||||||
|
float64x2_t
|
||||||
|
test_vcmlaq_rot180_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot180_f64 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcmla_rot180_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot180_lane_f32 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcmla_rot180_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot180_laneq_f32 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcmlaq_rot180_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot180_lane_f32 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcmlaq_rot180_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot180_laneq_f32 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcmla_rot270_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot270_f32 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcmlaq_rot270_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot270_f32 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __ARM_ARCH_ISA_A64
|
||||||
|
float64x2_t
|
||||||
|
test_vcmlaq_rot270_f64 (float64x2_t __r, float64x2_t __a, float64x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot270_f64 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcmla_rot270_lane_f32 (float32x2_t __r, float32x2_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot270_lane_f32 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x2_t
|
||||||
|
test_vcmla_rot270_laneq_f32 (float32x2_t __r, float32x2_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot270_laneq_f32 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcmlaq_rot270_lane_f32 (float32x4_t __r, float32x4_t __a, float32x2_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot270_lane_f32 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32x4_t
|
||||||
|
test_vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot270_laneq_f32 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s, #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s, #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s, #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s, #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, #0} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, #180} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s, #0} 3 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s, #180} 3 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s, #270} 3 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s, #90} 3 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s, #0} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s, #180} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s, #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s, #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.s\[0\], #0} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.s\[0\], #180} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.s\[0\], #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.s\[0\], #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.s\[1\], #0} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.s\[1\], #180} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.s\[1\], #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.s\[1\], #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {dup\td[0-9]+, v[0-9]+.d\[1\]} 4 { target { aarch64*-*-* } } } } */
|
||||||
|
|
@ -0,0 +1,306 @@
|
||||||
|
/* { dg-skip-if "" { arm-*-* } } */
|
||||||
|
/* { dg-do assemble } */
|
||||||
|
/* { dg-require-effective-target arm_v8_3a_complex_neon_ok } */
|
||||||
|
/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
|
||||||
|
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||||
|
/* { dg-additional-options "-O2 -march=armv8.3-a+fp16 -save-temps" } */
|
||||||
|
|
||||||
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcadd_rot90_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcadd_rot90_f16 (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcaddq_rot90_f16 (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcadd_rot270_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcadd_rot270_f16 (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcaddq_rot270_f16 (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_f16 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_f16 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_lane_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_laneq_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_lane_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_laneq_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_lane_f16_2 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_lane_f16 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_laneq_f16_2 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_laneq_f16 (__r, __a, __b, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_lane_f16_2 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_lane_f16 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_laneq_f16 (__r, __a, __b, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot90_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot90_f16 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot90_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot90_f16 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot90_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot90_lane_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot90_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot90_laneq_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot90_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot90_lane_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot90_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot90_laneq_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot90_lane_f16_2 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot90_lane_f16 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot90_laneq_f16_2 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot90_laneq_f16 (__r, __a, __b, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot90_lane_f16_2 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot90_lane_f16 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot90_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot90_laneq_f16 (__r, __a, __b, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot180_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot180_f16 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot180_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot180_f16 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot180_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot180_lane_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot180_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot180_laneq_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot180_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot180_lane_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot180_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot180_laneq_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot180_lane_f16_2 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot180_lane_f16 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot180_laneq_f16_2 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot180_laneq_f16 (__r, __a, __b, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot180_lane_f16_2 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot180_lane_f16 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot180_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot180_laneq_f16 (__r, __a, __b, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot270_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot270_f16 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot270_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot270_f16 (__r, __a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot270_lane_f16 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot270_lane_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot270_laneq_f16 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot270_laneq_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot270_lane_f16 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot270_lane_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot270_laneq_f16 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot270_laneq_f16 (__r, __a, __b, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot270_lane_f16_2 (float16x4_t __r, float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot270_lane_f16 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x4_t
|
||||||
|
test_vcmla_rot270_laneq_f16_2 (float16x4_t __r, float16x4_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmla_rot270_laneq_f16 (__r, __a, __b, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot270_lane_f16_2 (float16x8_t __r, float16x8_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot270_lane_f16 (__r, __a, __b, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float16x8_t
|
||||||
|
test_vcmlaq_rot270_laneq_f16_2 (float16x8_t __r, float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return vcmlaq_rot270_laneq_f16 (__r, __a, __b, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {dup\td[0-9]+, v[0-9]+.d\[1\]} 4 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcadd\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, #0} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, #180} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h, #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.h\[0\], #0} 2 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.h\[0\], #180} 2 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.h\[0\], #270} 2 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.h\[0\], #90} 2 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.h\[1\], #0} 2 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.h\[1\], #180} 2 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.h\[1\], #270} 2 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.h\[1\], #90} 2 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, #0} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, #180} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h, #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[0\], #0} 2 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[0\], #180} 2 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[0\], #270} 2 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[0\], #90} 2 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[1\], #0} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[1\], #180} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[1\], #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[1\], #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[3\], #0} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[3\], #180} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[3\], #270} 1 { target { aarch64*-*-* } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times {fcmla\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.h\[3\], #90} 1 { target { aarch64*-*-* } } } } */
|
||||||
Loading…
Reference in New Issue