mirror of git://gcc.gnu.org/git/gcc.git
[AArch64][6/14] ARMv8.2-A FP16 reduction vector intrinsics
gcc/ * config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_, reduc_smin_scal_): Use VDQIF_F16. (reduc_smax_nan_scal_, reduc_smin_nan_scal_): Use VHSDF. * config/aarch64/aarch64-simd.md (reduc_<maxmin_uns>_scal_<mode>): Use VHSDF. (aarch64_reduc_<maxmin_uns>_internal<mode>): Likewise. * config/aarch64/iterators.md (VDQIF_F16): New. (vp): Support HF modes. * config/aarch64/arm_neon.h (vmaxv_f16, vmaxvq_f16, vminv_f16, vminvq_f16, vmaxnmv_f16, vmaxnmvq_f16, vminnmv_f16, vminnmvq_f16): New. From-SVN: r238721
This commit is contained in:
parent
ab2e8f01f1
commit
703bbcdfe9
|
|
@ -1,3 +1,16 @@
|
|||
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_,
|
||||
reduc_smin_scal_): Use VDQIF_F16.
|
||||
(reduc_smax_nan_scal_, reduc_smin_nan_scal_): Use VHSDF.
|
||||
* config/aarch64/aarch64-simd.md (reduc_<maxmin_uns>_scal_<mode>):
|
||||
Use VHSDF.
|
||||
(aarch64_reduc_<maxmin_uns>_internal<mode>): Likewise.
|
||||
* config/aarch64/iterators.md (VDQIF_F16): New.
|
||||
(vp): Support HF modes.
|
||||
* config/aarch64/arm_neon.h (vmaxv_f16, vmaxvq_f16, vminv_f16,
|
||||
vminvq_f16, vmaxnmv_f16, vmaxnmvq_f16, vminnmv_f16, vminnmvq_f16): New.
|
||||
|
||||
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
|
||||
|
|
|
|||
|
|
@ -234,12 +234,12 @@
|
|||
BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
|
||||
|
||||
/* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar). */
|
||||
BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
|
||||
BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
|
||||
BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10)
|
||||
BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10)
|
||||
BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
|
||||
BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
|
||||
BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10)
|
||||
BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10)
|
||||
BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10)
|
||||
BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10)
|
||||
|
||||
/* Implemented by <maxmin><mode>3.
|
||||
smax variants map to fmaxnm,
|
||||
|
|
|
|||
|
|
@ -2130,8 +2130,8 @@
|
|||
;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
|
||||
(define_expand "reduc_<maxmin_uns>_scal_<mode>"
|
||||
[(match_operand:<VEL> 0 "register_operand")
|
||||
(unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
|
||||
FMAXMINV)]
|
||||
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
|
||||
FMAXMINV)]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
|
||||
|
|
@ -2178,12 +2178,12 @@
|
|||
)
|
||||
|
||||
(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
|
||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
||||
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
|
||||
FMAXMINV))]
|
||||
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
|
||||
FMAXMINV))]
|
||||
"TARGET_SIMD"
|
||||
"<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
|
||||
[(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
|
||||
[(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
|
||||
)
|
||||
|
||||
;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
|
||||
|
|
|
|||
|
|
@ -26927,6 +26927,56 @@ vmulxq_n_f16 (float16x8_t __a, float16_t __b)
|
|||
return vmulxq_f16 (__a, vdupq_n_f16 (__b));
|
||||
}
|
||||
|
||||
/* ARMv8.2-A FP16 reduction vector intrinsics. */
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vmaxv_f16 (float16x4_t __a)
|
||||
{
|
||||
return __builtin_aarch64_reduc_smax_nan_scal_v4hf (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vmaxvq_f16 (float16x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_reduc_smax_nan_scal_v8hf (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vminv_f16 (float16x4_t __a)
|
||||
{
|
||||
return __builtin_aarch64_reduc_smin_nan_scal_v4hf (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vminvq_f16 (float16x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_reduc_smin_nan_scal_v8hf (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vmaxnmv_f16 (float16x4_t __a)
|
||||
{
|
||||
return __builtin_aarch64_reduc_smax_scal_v4hf (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vmaxnmvq_f16 (float16x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_reduc_smax_scal_v8hf (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vminnmv_f16 (float16x4_t __a)
|
||||
{
|
||||
return __builtin_aarch64_reduc_smin_scal_v4hf (__a);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vminnmvq_f16 (float16x8_t __a)
|
||||
{
|
||||
return __builtin_aarch64_reduc_smin_scal_v8hf (__a);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
#undef __aarch64_vget_lane_any
|
||||
|
|
|
|||
|
|
@ -159,6 +159,8 @@
|
|||
|
||||
;; Vector modes except double int.
|
||||
(define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
|
||||
(define_mode_iterator VDQIF_F16 [V8QI V16QI V4HI V8HI V2SI V4SI
|
||||
V4HF V8HF V2SF V4SF V2DF])
|
||||
|
||||
;; Vector modes for S type.
|
||||
(define_mode_iterator VDQ_SI [V2SI V4SI])
|
||||
|
|
@ -760,8 +762,9 @@
|
|||
(define_mode_attr vp [(V8QI "v") (V16QI "v")
|
||||
(V4HI "v") (V8HI "v")
|
||||
(V2SI "p") (V4SI "v")
|
||||
(V2DI "p") (V2DF "p")
|
||||
(V2SF "p") (V4SF "v")])
|
||||
(V2DI "p") (V2DF "p")
|
||||
(V2SF "p") (V4SF "v")
|
||||
(V4HF "v") (V8HF "v")])
|
||||
|
||||
(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
|
||||
(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
|
||||
|
|
|
|||
Loading…
Reference in New Issue