[AArch64][6/14] ARMv8.2-A FP16 reduction vector intrinsics

gcc/
	* config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_,
	reduc_smin_scal_): Use VDQIF_F16.
	(reduc_smax_nan_scal_, reduc_smin_nan_scal_): Use VHSDF.
	* config/aarch64/aarch64-simd.md (reduc_<maxmin_uns>_scal_<mode>):
	Use VHSDF.
	(aarch64_reduc_<maxmin_uns>_internal<mode>): Likewise.
	* config/aarch64/iterators.md (VDQIF_F16): New.
	(vp): Support HF modes.
	* config/aarch64/arm_neon.h (vmaxv_f16, vmaxvq_f16, vminv_f16,
	vminvq_f16, vmaxnmv_f16, vmaxnmvq_f16, vminnmv_f16, vminnmvq_f16): New.

From-SVN: r238721
This commit is contained in:
Jiong Wang 2016-07-25 15:00:14 +00:00 committed by Jiong Wang
parent ab2e8f01f1
commit 703bbcdfe9
5 changed files with 78 additions and 12 deletions

View File

@ -1,3 +1,16 @@
2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd-builtins.def (reduc_smax_scal_,
reduc_smin_scal_): Use VDQIF_F16.
(reduc_smax_nan_scal_, reduc_smin_nan_scal_): Use VHSDF.
* config/aarch64/aarch64-simd.md (reduc_<maxmin_uns>_scal_<mode>):
Use VHSDF.
(aarch64_reduc_<maxmin_uns>_internal<mode>): Likewise.
* config/aarch64/iterators.md (VDQIF_F16): New.
(vp): Support HF modes.
* config/aarch64/arm_neon.h (vmaxv_f16, vmaxvq_f16, vminv_f16,
vminvq_f16, vmaxnmv_f16, vmaxnmvq_f16, vminnmv_f16, vminnmvq_f16): New.
2016-07-25 Jiong Wang <jiong.wang@arm.com> 2016-07-25 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to * config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to

View File

@ -234,12 +234,12 @@
BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
/* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar). */ /* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar). */
BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10)
BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10)
BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10) BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10)
BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10) BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10)
/* Implemented by <maxmin><mode>3. /* Implemented by <maxmin><mode>3.
smax variants map to fmaxnm, smax variants map to fmaxnm,

View File

@ -2130,8 +2130,8 @@
;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin). ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
(define_expand "reduc_<maxmin_uns>_scal_<mode>" (define_expand "reduc_<maxmin_uns>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand") [(match_operand:<VEL> 0 "register_operand")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand")] (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
FMAXMINV)] FMAXMINV)]
"TARGET_SIMD" "TARGET_SIMD"
{ {
rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
@ -2178,12 +2178,12 @@
) )
(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
FMAXMINV))] FMAXMINV))]
"TARGET_SIMD" "TARGET_SIMD"
"<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>" "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
[(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")] [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
) )
;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register

View File

@ -26927,6 +26927,56 @@ vmulxq_n_f16 (float16x8_t __a, float16_t __b)
return vmulxq_f16 (__a, vdupq_n_f16 (__b)); return vmulxq_f16 (__a, vdupq_n_f16 (__b));
} }
/* ARMv8.2-A FP16 reduction vector intrinsics. */
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxv_f16 (float16x4_t __a)
{
return __builtin_aarch64_reduc_smax_nan_scal_v4hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxvq_f16 (float16x8_t __a)
{
return __builtin_aarch64_reduc_smax_nan_scal_v8hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminv_f16 (float16x4_t __a)
{
return __builtin_aarch64_reduc_smin_nan_scal_v4hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminvq_f16 (float16x8_t __a)
{
return __builtin_aarch64_reduc_smin_nan_scal_v8hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxnmv_f16 (float16x4_t __a)
{
return __builtin_aarch64_reduc_smax_scal_v4hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vmaxnmvq_f16 (float16x8_t __a)
{
return __builtin_aarch64_reduc_smax_scal_v8hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminnmv_f16 (float16x4_t __a)
{
return __builtin_aarch64_reduc_smin_scal_v4hf (__a);
}
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
vminnmvq_f16 (float16x8_t __a)
{
return __builtin_aarch64_reduc_smin_scal_v8hf (__a);
}
#pragma GCC pop_options #pragma GCC pop_options
#undef __aarch64_vget_lane_any #undef __aarch64_vget_lane_any

View File

@ -159,6 +159,8 @@
;; Vector modes except double int. ;; Vector modes except double int.
(define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
(define_mode_iterator VDQIF_F16 [V8QI V16QI V4HI V8HI V2SI V4SI
V4HF V8HF V2SF V4SF V2DF])
;; Vector modes for S type. ;; Vector modes for S type.
(define_mode_iterator VDQ_SI [V2SI V4SI]) (define_mode_iterator VDQ_SI [V2SI V4SI])
@ -760,8 +762,9 @@
(define_mode_attr vp [(V8QI "v") (V16QI "v") (define_mode_attr vp [(V8QI "v") (V16QI "v")
(V4HI "v") (V8HI "v") (V4HI "v") (V8HI "v")
(V2SI "p") (V4SI "v") (V2SI "p") (V4SI "v")
(V2DI "p") (V2DF "p") (V2DI "p") (V2DF "p")
(V2SF "p") (V4SF "v")]) (V2SF "p") (V4SF "v")
(V4HF "v") (V8HF "v")])
(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")]) (define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")]) (define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])