mirror of git://gcc.gnu.org/git/gcc.git
[AArch64][4/10] ARMv8.2-A FP16 three operands vector intrinsics
gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (fma<mode>4, fnma<mode>4): Extend to HF modes. * config/aarch64/arm_neon.h (vfma_f16, vfmaq_f16, vfms_f16, vfmsq_f16): New. From-SVN: r238718
This commit is contained in:
parent
33d72b6386
commit
89ed6d5f5e
112
gcc/ChangeLog
112
gcc/ChangeLog
|
|
@ -1,3 +1,11 @@
|
||||||
|
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
||||||
|
|
||||||
|
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
|
||||||
|
* config/aarch64/aarch64-simd.md (fma<mode>4, fnma<mode>4): Extend to HF
|
||||||
|
modes.
|
||||||
|
* config/aarch64/arm_neon.h (vfma_f16, vfmaq_f16, vfms_f16,
|
||||||
|
vfmsq_f16): New.
|
||||||
|
|
||||||
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
||||||
|
|
||||||
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
|
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
|
||||||
|
|
@ -22,20 +30,19 @@
|
||||||
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
|
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
|
||||||
HF, V4HF and V8HF.
|
HF, V4HF and V8HF.
|
||||||
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
|
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
|
||||||
* config/aarch64/arm_neon.h (vadd_f16): New.
|
* config/aarch64/arm_neon.h (vadd_f16, vaddq_f16, vabd_f16, vabdq_f16,
|
||||||
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
|
vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16,
|
||||||
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
|
vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16,
|
||||||
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
|
vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16,
|
||||||
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
|
vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16,
|
||||||
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
|
vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16,
|
||||||
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
|
vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16,
|
||||||
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
|
vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16,
|
||||||
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
|
vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16,
|
||||||
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
|
vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16,
|
||||||
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
|
vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16,
|
||||||
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
|
vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16,
|
||||||
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
|
vrsqrtsq_f16, vsub_f16, vsubq_f16): New.
|
||||||
vsubq_f16): Likewise.
|
|
||||||
|
|
||||||
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
||||||
|
|
||||||
|
|
@ -63,19 +70,18 @@
|
||||||
(vdupq_n_f16): Likewise.
|
(vdupq_n_f16): Likewise.
|
||||||
(vld1_dup_f16): Use vdup_n_f16.
|
(vld1_dup_f16): Use vdup_n_f16.
|
||||||
(vld1q_dup_f16): Use vdupq_n_f16.
|
(vld1q_dup_f16): Use vdupq_n_f16.
|
||||||
(vabs_f16): New.
|
(vabs_f16, vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16,
|
||||||
(vabsq_f16, vceqz_f16, vceqzq_f16, vcgez_f16, vcgezq_f16, vcgtz_f16,
|
vcgtz_f16, vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16,
|
||||||
vcgtzq_f16, vclez_f16, vclezq_f16, vcltz_f16, vcltzq_f16, vcvt_f16_s16,
|
vcvt_f16_s16, vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16,
|
||||||
vcvtq_f16_s16, vcvt_f16_u16, vcvtq_f16_u16, vcvt_s16_f16, vcvtq_s16_f16,
|
vcvtq_s16_f16, vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16,
|
||||||
vcvt_u16_f16, vcvtq_u16_f16, vcvta_s16_f16, vcvtaq_s16_f16,
|
vcvtaq_s16_f16, vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16,
|
||||||
vcvta_u16_f16, vcvtaq_u16_f16, vcvtm_s16_f16, vcvtmq_s16_f16,
|
vcvtmq_s16_f16, vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16,
|
||||||
vcvtm_u16_f16, vcvtmq_u16_f16, vcvtn_s16_f16, vcvtnq_s16_f16,
|
vcvtnq_s16_f16, vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16,
|
||||||
vcvtn_u16_f16, vcvtnq_u16_f16, vcvtp_s16_f16, vcvtpq_s16_f16,
|
vcvtpq_s16_f16, vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16,
|
||||||
vcvtp_u16_f16, vcvtpq_u16_f16, vneg_f16, vnegq_f16, vrecpe_f16,
|
vrecpe_f16, vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16,
|
||||||
vrecpeq_f16, vrnd_f16, vrndq_f16, vrnda_f16, vrndaq_f16, vrndi_f16,
|
vrndi_f16, vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16,
|
||||||
vrndiq_f16, vrndm_f16, vrndmq_f16, vrndn_f16, vrndnq_f16, vrndp_f16,
|
vrndp_f16, vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16,
|
||||||
vrndpq_f16, vrndx_f16, vrndxq_f16, vrsqrte_f16, vrsqrteq_f16, vsqrt_f16,
|
vsqrt_f16, vsqrtq_f16): New.
|
||||||
vsqrtq_f16): Likewise.
|
|
||||||
|
|
||||||
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
||||||
|
|
||||||
|
|
@ -83,45 +89,19 @@
|
||||||
(aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): Use VALL_F16.
|
(aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): Use VALL_F16.
|
||||||
(aarch64_ext<mode>): Likewise.
|
(aarch64_ext<mode>): Likewise.
|
||||||
(aarch64_rev<REVERSE:rev_op><mode>): Likewise.
|
(aarch64_rev<REVERSE:rev_op><mode>): Likewise.
|
||||||
* config/aarch64/aarch64.c (aarch64_evpc_trn): Support V4HFmode and
|
* config/aarch64/aarch64.c (aarch64_evpc_trn, aarch64_evpc_uzp,
|
||||||
V8HFmode.
|
aarch64_evpc_zip, aarch64_evpc_ext, aarch64_evpc_rev): Support V4HFmode
|
||||||
(aarch64_evpc_uzp): Likewise.
|
and V8HFmode.
|
||||||
(aarch64_evpc_zip): Likewise.
|
* config/aarch64/arm_neon.h (__INTERLEAVE_LIST): Support float16x4_t,
|
||||||
(aarch64_evpc_ext): Likewise.
|
float16x8_t.
|
||||||
(aarch64_evpc_rev): Likewise.
|
(__aarch64_vdup_lane_f16, __aarch64_vdup_laneq_f16,
|
||||||
* config/aarch64/arm_neon.h (__aarch64_vdup_lane_f16): New.
|
__aarch64_vdupq_lane_f16, __aarch64_vdupq_laneq_f16, vbsl_f16,
|
||||||
(__aarch64_vdup_laneq_f16): New..
|
vbslq_f16, vdup_n_f16, vdupq_n_f16, vdup_lane_f16, vdup_laneq_f16,
|
||||||
(__aarch64_vdupq_lane_f16): New.
|
vdupq_lane_f16, vdupq_laneq_f16, vduph_lane_f16, vduph_laneq_f16,
|
||||||
(__aarch64_vdupq_laneq_f16): New.
|
vext_f16, vextq_f16, vmov_n_f16, vmovq_n_f16, vrev64_f16, vrev64q_f16,
|
||||||
(vbsl_f16): New.
|
vtrn1_f16, vtrn1q_f16, vtrn2_f16, vtrn2q_f16, vtrn_f16, vtrnq_f16,
|
||||||
(vbslq_f16): New.
|
vuzp1_f16, vuzp1q_f16, vuzp2_f16, vuzp2q_f16, vzip1_f16, vzip2q_f16):
|
||||||
(vdup_n_f16): New.
|
New.
|
||||||
(vdupq_n_f16): New.
|
|
||||||
(vdup_lane_f16): New.
|
|
||||||
(vdup_laneq_f16): New.
|
|
||||||
(vdupq_lane_f16): New.
|
|
||||||
(vdupq_laneq_f16): New.
|
|
||||||
(vduph_lane_f16): New.
|
|
||||||
(vduph_laneq_f16): New.
|
|
||||||
(vext_f16): New.
|
|
||||||
(vextq_f16): New.
|
|
||||||
(vmov_n_f16): New.
|
|
||||||
(vmovq_n_f16): New.
|
|
||||||
(vrev64_f16): New.
|
|
||||||
(vrev64q_f16): New.
|
|
||||||
(vtrn1_f16): New.
|
|
||||||
(vtrn1q_f16): New.
|
|
||||||
(vtrn2_f16): New.
|
|
||||||
(vtrn2q_f16): New.
|
|
||||||
(vtrn_f16): New.
|
|
||||||
(vtrnq_f16): New.
|
|
||||||
(__INTERLEAVE_LIST): Support float16x4_t, float16x8_t.
|
|
||||||
(vuzp1_f16): New.
|
|
||||||
(vuzp1q_f16): New.
|
|
||||||
(vuzp2_f16): New.
|
|
||||||
(vuzp2q_f16): New.
|
|
||||||
(vzip1_f16): New.
|
|
||||||
(vzip2q_f16): New.
|
|
||||||
(vmov_n_f16): Reimplement using vdup_n_f16.
|
(vmov_n_f16): Reimplement using vdup_n_f16.
|
||||||
(vmovq_n_f16): Reimplement using vdupq_n_f16..
|
(vmovq_n_f16): Reimplement using vdupq_n_f16..
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -405,7 +405,9 @@
|
||||||
BUILTIN_VALL_F16 (STORE1, st1, 0)
|
BUILTIN_VALL_F16 (STORE1, st1, 0)
|
||||||
|
|
||||||
/* Implemented by fma<mode>4. */
|
/* Implemented by fma<mode>4. */
|
||||||
BUILTIN_VDQF (TERNOP, fma, 4)
|
BUILTIN_VHSDF (TERNOP, fma, 4)
|
||||||
|
/* Implemented by fnma<mode>4. */
|
||||||
|
BUILTIN_VHSDF (TERNOP, fnma, 4)
|
||||||
|
|
||||||
/* Implemented by aarch64_simd_bsl<mode>. */
|
/* Implemented by aarch64_simd_bsl<mode>. */
|
||||||
BUILTIN_VDQQH (BSL_P, simd_bsl, 0)
|
BUILTIN_VDQQH (BSL_P, simd_bsl, 0)
|
||||||
|
|
|
||||||
|
|
@ -1581,13 +1581,13 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "fma<mode>4"
|
(define_insn "fma<mode>4"
|
||||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
(fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
|
(fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
||||||
(match_operand:VDQF 2 "register_operand" "w")
|
(match_operand:VHSDF 2 "register_operand" "w")
|
||||||
(match_operand:VDQF 3 "register_operand" "0")))]
|
(match_operand:VHSDF 3 "register_operand" "0")))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
"fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||||
[(set_attr "type" "neon_fp_mla_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_mla_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "*aarch64_fma4_elt<mode>"
|
(define_insn "*aarch64_fma4_elt<mode>"
|
||||||
|
|
@ -1654,15 +1654,15 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "fnma<mode>4"
|
(define_insn "fnma<mode>4"
|
||||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
(fma:VDQF
|
(fma:VHSDF
|
||||||
(match_operand:VDQF 1 "register_operand" "w")
|
(match_operand:VHSDF 1 "register_operand" "w")
|
||||||
(neg:VDQF
|
(neg:VHSDF
|
||||||
(match_operand:VDQF 2 "register_operand" "w"))
|
(match_operand:VHSDF 2 "register_operand" "w"))
|
||||||
(match_operand:VDQF 3 "register_operand" "0")))]
|
(match_operand:VHSDF 3 "register_operand" "0")))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
"fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||||
[(set_attr "type" "neon_fp_mla_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_mla_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "*aarch64_fnma4_elt<mode>"
|
(define_insn "*aarch64_fnma4_elt<mode>"
|
||||||
|
|
|
||||||
|
|
@ -26747,6 +26747,32 @@ vsubq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
return __a - __b;
|
return __a - __b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ARMv8.2-A FP16 three operands vector intrinsics. */
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vfma_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fmav4hf (__b, __c, __a);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vfmaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fmav8hf (__b, __c, __a);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vfms_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fnmav4hf (__b, __c, __a);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fnmav8hf (__b, __c, __a);
|
||||||
|
}
|
||||||
|
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
|
|
||||||
#undef __aarch64_vget_lane_any
|
#undef __aarch64_vget_lane_any
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue