mirror of git://gcc.gnu.org/git/gcc.git
[AArch64][8/10] ARMv8.2-A FP16 two operands scalar intrinsics
gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64.md (<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3): New. (<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3): Likewise. (add<mode>3): Likewise. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. (<fmaxmin><mode>3): Extend to HF. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Likewise. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_HSDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_HSDI:mode>3): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_fac<optab><mode>): Likewise. (aarch64_frecps<mode>): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn>hfhi3): New. (<FCVT_FIXED2F:fcvt_fixed_insn>hihf3): Likewise. * config/aarch64/iterators.md (VHSDF_SDF): Delete. (VSDQ_HSDI): Support HI. (fcvt_target, FCVT_TARGET): Likewise. * config/aarch64/arm_fp16.h (vaddh_f16, vsubh_f16, vabdh_f16, vcageh_f16, vcagth_f16, vcaleh_f16, vcalth_f16, vceqh_f16, vcgeh_f16, vcgth_f16, vcleh_f16, vclth_f16, vcvth_n_f16_s16, vcvth_n_f16_s32, vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32, vcvth_n_f16_u64, vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16, vcvth_n_u32_f16, vcvth_n_u64_f16, vdivh_f16, vmaxh_f16, vmaxnmh_f16, vminh_f16, vminnmh_f16, vmulh_f16, vmulxh_f16, vrecpsh_f16, vrsqrtsh_f16): New. From-SVN: r238723
This commit is contained in:
parent
d7f33f07d8
commit
68ad28c34a
|
|
@ -1,3 +1,36 @@
|
|||
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
|
||||
* config/aarch64/aarch64.md (<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3):
|
||||
New.
|
||||
(<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3): Likewise.
|
||||
(add<mode>3): Likewise.
|
||||
(sub<mode>3): Likewise.
|
||||
(mul<mode>3): Likewise.
|
||||
(div<mode>3): Likewise.
|
||||
(*div<mode>3): Likewise.
|
||||
(<fmaxmin><mode>3): Extend to HF.
|
||||
* config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Likewise.
|
||||
(fabd<mode>3): Likewise.
|
||||
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_HSDF:mode>3): Likewise.
|
||||
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_HSDI:mode>3): Likewise.
|
||||
(aarch64_fmulx<mode>): Likewise.
|
||||
(aarch64_fac<optab><mode>): Likewise.
|
||||
(aarch64_frecps<mode>): Likewise.
|
||||
(<FCVT_F2FIXED:fcvt_fixed_insn>hfhi3): New.
|
||||
(<FCVT_FIXED2F:fcvt_fixed_insn>hihf3): Likewise.
|
||||
* config/aarch64/iterators.md (VHSDF_SDF): Delete.
|
||||
(VSDQ_HSDI): Support HI.
|
||||
(fcvt_target, FCVT_TARGET): Likewise.
|
||||
* config/aarch64/arm_fp16.h (vaddh_f16, vsubh_f16, vabdh_f16,
|
||||
vcageh_f16, vcagth_f16, vcaleh_f16, vcalth_f16, vceqh_f16, vcgeh_f16,
|
||||
vcgth_f16, vcleh_f16, vclth_f16, vcvth_n_f16_s16, vcvth_n_f16_s32,
|
||||
vcvth_n_f16_s64, vcvth_n_f16_u16, vcvth_n_f16_u32, vcvth_n_f16_u64,
|
||||
vcvth_n_s16_f16, vcvth_n_s32_f16, vcvth_n_s64_f16, vcvth_n_u16_f16,
|
||||
vcvth_n_u32_f16, vcvth_n_u64_f16, vdivh_f16, vmaxh_f16, vmaxnmh_f16,
|
||||
vminh_f16, vminnmh_f16, vmulh_f16, vmulxh_f16, vrecpsh_f16,
|
||||
vrsqrtsh_f16): New.
|
||||
|
||||
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
||||
|
||||
* config.gcc (aarch64*-*-*): Install arm_fp16.h.
|
||||
|
|
@ -11,6 +44,7 @@
|
|||
(l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2): Likewise.
|
||||
(fix_trunc<GPF:mode><GPI:mode>2): Likewise.
|
||||
(sqrt<mode>2): Likewise.
|
||||
(*sqrt<mode>2): Likewise.
|
||||
(abs<mode>2): Likewise.
|
||||
(<optab><mode>hf2): New pattern for HF mode.
|
||||
(<optab>hihf2): Likewise.
|
||||
|
|
@ -58,7 +92,7 @@
|
|||
(f, fp): Support HF modes.
|
||||
* config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16,
|
||||
vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16,
|
||||
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
|
||||
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
|
||||
vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16,
|
||||
vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16,
|
||||
vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New.
|
||||
|
|
@ -159,7 +193,7 @@
|
|||
and V8HFmode.
|
||||
* config/aarch64/arm_neon.h (__INTERLEAVE_LIST): Support float16x4_t,
|
||||
float16x8_t.
|
||||
(__aarch64_vdup_lane_f16, __aarch64_vdup_laneq_f16,
|
||||
(__aarch64_vdup_lane_f16, __aarch64_vdup_laneq_f16,
|
||||
__aarch64_vdupq_lane_f16, __aarch64_vdupq_laneq_f16, vbsl_f16,
|
||||
vbslq_f16, vdup_n_f16, vdupq_n_f16, vdup_lane_f16, vdup_laneq_f16,
|
||||
vdupq_lane_f16, vdupq_laneq_f16, vduph_lane_f16, vduph_laneq_f16,
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@
|
|||
|
||||
BUILTIN_VDC (COMBINE, combine, 0)
|
||||
BUILTIN_VB (BINOP, pmul, 0)
|
||||
BUILTIN_VHSDF_SDF (BINOP, fmulx, 0)
|
||||
BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0)
|
||||
BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
|
||||
BUILTIN_VD_BHSI (BINOP, addp, 0)
|
||||
VAR1 (UNOP, addp, 0, di)
|
||||
|
|
@ -393,13 +393,12 @@
|
|||
/* Implemented by
|
||||
aarch64_frecp<FRECP:frecp_suffix><mode>. */
|
||||
BUILTIN_GPF_F16 (UNOP, frecpe, 0)
|
||||
BUILTIN_GPF (BINOP, frecps, 0)
|
||||
BUILTIN_GPF_F16 (UNOP, frecpx, 0)
|
||||
|
||||
BUILTIN_VDQ_SI (UNOP, urecpe, 0)
|
||||
|
||||
BUILTIN_VHSDF (UNOP, frecpe, 0)
|
||||
BUILTIN_VHSDF (BINOP, frecps, 0)
|
||||
BUILTIN_VHSDF_HSDF (BINOP, frecps, 0)
|
||||
|
||||
/* Implemented by a mixture of abs2 patterns. Note the DImode builtin is
|
||||
only ever used for the int64x1_t intrinsic, there is no scalar version. */
|
||||
|
|
@ -496,17 +495,23 @@
|
|||
/* Implemented by <FCVT_F2FIXED/FIXED2F:fcvt_fixed_insn><*><*>3. */
|
||||
BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3)
|
||||
BUILTIN_VSDQ_HSDI (FCVTIMM_SUS, ucvtf, 3)
|
||||
BUILTIN_VHSDF_SDF (SHIFTIMM, fcvtzs, 3)
|
||||
BUILTIN_VHSDF_SDF (SHIFTIMM_USS, fcvtzu, 3)
|
||||
BUILTIN_VHSDF_HSDF (SHIFTIMM, fcvtzs, 3)
|
||||
BUILTIN_VHSDF_HSDF (SHIFTIMM_USS, fcvtzu, 3)
|
||||
VAR1 (SHIFTIMM, scvtfsi, 3, hf)
|
||||
VAR1 (SHIFTIMM, scvtfdi, 3, hf)
|
||||
VAR1 (FCVTIMM_SUS, ucvtfsi, 3, hf)
|
||||
VAR1 (FCVTIMM_SUS, ucvtfdi, 3, hf)
|
||||
BUILTIN_GPI (SHIFTIMM, fcvtzshf, 3)
|
||||
BUILTIN_GPI (SHIFTIMM_USS, fcvtzuhf, 3)
|
||||
|
||||
/* Implemented by aarch64_rsqrte<mode>. */
|
||||
BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0)
|
||||
|
||||
/* Implemented by aarch64_rsqrts<mode>. */
|
||||
BUILTIN_VHSDF_SDF (BINOP, rsqrts, 0)
|
||||
BUILTIN_VHSDF_HSDF (BINOP, rsqrts, 0)
|
||||
|
||||
/* Implemented by fabd<mode>3. */
|
||||
BUILTIN_VHSDF_SDF (BINOP, fabd, 3)
|
||||
BUILTIN_VHSDF_HSDF (BINOP, fabd, 3)
|
||||
|
||||
/* Implemented by aarch64_faddp<mode>. */
|
||||
BUILTIN_VHSDF (BINOP, faddp, 0)
|
||||
|
|
@ -522,10 +527,10 @@
|
|||
BUILTIN_VHSDF_HSDF (UNOP, neg, 2)
|
||||
|
||||
/* Implemented by aarch64_fac<optab><mode>. */
|
||||
BUILTIN_VHSDF_SDF (BINOP_USS, faclt, 0)
|
||||
BUILTIN_VHSDF_SDF (BINOP_USS, facle, 0)
|
||||
BUILTIN_VHSDF_SDF (BINOP_USS, facgt, 0)
|
||||
BUILTIN_VHSDF_SDF (BINOP_USS, facge, 0)
|
||||
BUILTIN_VHSDF_HSDF (BINOP_USS, faclt, 0)
|
||||
BUILTIN_VHSDF_HSDF (BINOP_USS, facle, 0)
|
||||
BUILTIN_VHSDF_HSDF (BINOP_USS, facgt, 0)
|
||||
BUILTIN_VHSDF_HSDF (BINOP_USS, facge, 0)
|
||||
|
||||
/* Implemented by sqrt<mode>2. */
|
||||
VAR1 (UNOP, sqrt, 2, hf)
|
||||
|
|
@ -543,3 +548,7 @@
|
|||
BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
|
||||
BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
|
||||
BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
|
||||
|
||||
/* Implemented by <fmaxmin><mode>3. */
|
||||
VAR1 (BINOP, fmax, 3, hf)
|
||||
VAR1 (BINOP, fmin, 3, hf)
|
||||
|
|
|
|||
|
|
@ -391,9 +391,9 @@
|
|||
[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
|
||||
|
||||
(define_insn "aarch64_rsqrts<mode>"
|
||||
[(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
|
||||
(unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")
|
||||
(match_operand:VHSDF_SDF 2 "register_operand" "w")]
|
||||
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
|
||||
(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
|
||||
(match_operand:VHSDF_HSDF 2 "register_operand" "w")]
|
||||
UNSPEC_RSQRTS))]
|
||||
"TARGET_SIMD"
|
||||
"frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
||||
|
|
@ -475,11 +475,11 @@
|
|||
)
|
||||
|
||||
(define_insn "fabd<mode>3"
|
||||
[(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
|
||||
(abs:VHSDF_SDF
|
||||
(minus:VHSDF_SDF
|
||||
(match_operand:VHSDF_SDF 1 "register_operand" "w")
|
||||
(match_operand:VHSDF_SDF 2 "register_operand" "w"))))]
|
||||
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
|
||||
(abs:VHSDF_HSDF
|
||||
(minus:VHSDF_HSDF
|
||||
(match_operand:VHSDF_HSDF 1 "register_operand" "w")
|
||||
(match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
|
||||
"TARGET_SIMD"
|
||||
"fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
||||
[(set_attr "type" "neon_fp_abd_<stype><q>")]
|
||||
|
|
@ -3078,10 +3078,10 @@
|
|||
;; fmulx.
|
||||
|
||||
(define_insn "aarch64_fmulx<mode>"
|
||||
[(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
|
||||
(unspec:VHSDF_SDF
|
||||
[(match_operand:VHSDF_SDF 1 "register_operand" "w")
|
||||
(match_operand:VHSDF_SDF 2 "register_operand" "w")]
|
||||
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
|
||||
(unspec:VHSDF_HSDF
|
||||
[(match_operand:VHSDF_HSDF 1 "register_operand" "w")
|
||||
(match_operand:VHSDF_HSDF 2 "register_operand" "w")]
|
||||
UNSPEC_FMULX))]
|
||||
"TARGET_SIMD"
|
||||
"fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
||||
|
|
@ -4341,10 +4341,10 @@
|
|||
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
|
||||
(neg:<V_cmp_result>
|
||||
(FAC_COMPARISONS:<V_cmp_result>
|
||||
(abs:VHSDF_SDF
|
||||
(match_operand:VHSDF_SDF 1 "register_operand" "w"))
|
||||
(abs:VHSDF_SDF
|
||||
(match_operand:VHSDF_SDF 2 "register_operand" "w"))
|
||||
(abs:VHSDF_HSDF
|
||||
(match_operand:VHSDF_HSDF 1 "register_operand" "w"))
|
||||
(abs:VHSDF_HSDF
|
||||
(match_operand:VHSDF_HSDF 2 "register_operand" "w"))
|
||||
)))]
|
||||
"TARGET_SIMD"
|
||||
"fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
|
||||
|
|
@ -5460,10 +5460,10 @@
|
|||
)
|
||||
|
||||
(define_insn "aarch64_frecps<mode>"
|
||||
[(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
|
||||
(unspec:VHSDF_SDF
|
||||
[(match_operand:VHSDF_SDF 1 "register_operand" "w")
|
||||
(match_operand:VHSDF_SDF 2 "register_operand" "w")]
|
||||
[(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
|
||||
(unspec:VHSDF_HSDF
|
||||
[(match_operand:VHSDF_HSDF 1 "register_operand" "w")
|
||||
(match_operand:VHSDF_HSDF 2 "register_operand" "w")]
|
||||
UNSPEC_FRECPS))]
|
||||
"TARGET_SIMD"
|
||||
"frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
||||
|
|
|
|||
|
|
@ -4660,38 +4660,78 @@
|
|||
(set_attr "simd" "*, yes")]
|
||||
)
|
||||
|
||||
(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3"
|
||||
[(set (match_operand:GPI 0 "register_operand" "=r")
|
||||
(unspec:GPI [(match_operand:HF 1 "register_operand" "w")
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
FCVT_F2FIXED))]
|
||||
"TARGET_FP_F16INST"
|
||||
"<FCVT_F2FIXED:fcvt_fixed_insn>\t%<GPI:w>0, %h1, #%2"
|
||||
[(set_attr "type" "f_cvtf2i")]
|
||||
)
|
||||
|
||||
(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3"
|
||||
[(set (match_operand:HF 0 "register_operand" "=w")
|
||||
(unspec:HF [(match_operand:GPI 1 "register_operand" "r")
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
FCVT_FIXED2F))]
|
||||
"TARGET_FP_F16INST"
|
||||
"<FCVT_FIXED2F:fcvt_fixed_insn>\t%h0, %<GPI:w>1, #%2"
|
||||
[(set_attr "type" "f_cvti2f")]
|
||||
)
|
||||
|
||||
(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn>hf3"
|
||||
[(set (match_operand:HI 0 "register_operand" "=w")
|
||||
(unspec:HI [(match_operand:HF 1 "register_operand" "w")
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
FCVT_F2FIXED))]
|
||||
"TARGET_SIMD"
|
||||
"<FCVT_F2FIXED:fcvt_fixed_insn>\t%h0, %h1, #%2"
|
||||
[(set_attr "type" "neon_fp_to_int_s")]
|
||||
)
|
||||
|
||||
(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn>hi3"
|
||||
[(set (match_operand:HF 0 "register_operand" "=w")
|
||||
(unspec:HF [(match_operand:HI 1 "register_operand" "w")
|
||||
(match_operand:SI 2 "immediate_operand" "i")]
|
||||
FCVT_FIXED2F))]
|
||||
"TARGET_SIMD"
|
||||
"<FCVT_FIXED2F:fcvt_fixed_insn>\t%h0, %h1, #%2"
|
||||
[(set_attr "type" "neon_int_to_fp_s")]
|
||||
)
|
||||
|
||||
;; -------------------------------------------------------------------
|
||||
;; Floating-point arithmetic
|
||||
;; -------------------------------------------------------------------
|
||||
|
||||
(define_insn "add<mode>3"
|
||||
[(set (match_operand:GPF 0 "register_operand" "=w")
|
||||
(plus:GPF
|
||||
(match_operand:GPF 1 "register_operand" "w")
|
||||
(match_operand:GPF 2 "register_operand" "w")))]
|
||||
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
|
||||
(plus:GPF_F16
|
||||
(match_operand:GPF_F16 1 "register_operand" "w")
|
||||
(match_operand:GPF_F16 2 "register_operand" "w")))]
|
||||
"TARGET_FLOAT"
|
||||
"fadd\\t%<s>0, %<s>1, %<s>2"
|
||||
[(set_attr "type" "fadd<s>")]
|
||||
[(set_attr "type" "fadd<stype>")]
|
||||
)
|
||||
|
||||
(define_insn "sub<mode>3"
|
||||
[(set (match_operand:GPF 0 "register_operand" "=w")
|
||||
(minus:GPF
|
||||
(match_operand:GPF 1 "register_operand" "w")
|
||||
(match_operand:GPF 2 "register_operand" "w")))]
|
||||
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
|
||||
(minus:GPF_F16
|
||||
(match_operand:GPF_F16 1 "register_operand" "w")
|
||||
(match_operand:GPF_F16 2 "register_operand" "w")))]
|
||||
"TARGET_FLOAT"
|
||||
"fsub\\t%<s>0, %<s>1, %<s>2"
|
||||
[(set_attr "type" "fadd<s>")]
|
||||
[(set_attr "type" "fadd<stype>")]
|
||||
)
|
||||
|
||||
(define_insn "mul<mode>3"
|
||||
[(set (match_operand:GPF 0 "register_operand" "=w")
|
||||
(mult:GPF
|
||||
(match_operand:GPF 1 "register_operand" "w")
|
||||
(match_operand:GPF 2 "register_operand" "w")))]
|
||||
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
|
||||
(mult:GPF_F16
|
||||
(match_operand:GPF_F16 1 "register_operand" "w")
|
||||
(match_operand:GPF_F16 2 "register_operand" "w")))]
|
||||
"TARGET_FLOAT"
|
||||
"fmul\\t%<s>0, %<s>1, %<s>2"
|
||||
[(set_attr "type" "fmul<s>")]
|
||||
[(set_attr "type" "fmul<stype>")]
|
||||
)
|
||||
|
||||
(define_insn "*fnmul<mode>3"
|
||||
|
|
@ -4715,9 +4755,9 @@
|
|||
)
|
||||
|
||||
(define_expand "div<mode>3"
|
||||
[(set (match_operand:GPF 0 "register_operand")
|
||||
(div:GPF (match_operand:GPF 1 "general_operand")
|
||||
(match_operand:GPF 2 "register_operand")))]
|
||||
[(set (match_operand:GPF_F16 0 "register_operand")
|
||||
(div:GPF_F16 (match_operand:GPF_F16 1 "general_operand")
|
||||
(match_operand:GPF_F16 2 "register_operand")))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
|
||||
|
|
@ -4727,12 +4767,12 @@
|
|||
})
|
||||
|
||||
(define_insn "*div<mode>3"
|
||||
[(set (match_operand:GPF 0 "register_operand" "=w")
|
||||
(div:GPF (match_operand:GPF 1 "register_operand" "w")
|
||||
(match_operand:GPF 2 "register_operand" "w")))]
|
||||
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
|
||||
(div:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")
|
||||
(match_operand:GPF_F16 2 "register_operand" "w")))]
|
||||
"TARGET_FLOAT"
|
||||
"fdiv\\t%<s>0, %<s>1, %<s>2"
|
||||
[(set_attr "type" "fdiv<s>")]
|
||||
[(set_attr "type" "fdiv<stype>")]
|
||||
)
|
||||
|
||||
(define_insn "neg<mode>2"
|
||||
|
|
@ -4792,13 +4832,13 @@
|
|||
|
||||
;; Scalar forms for the IEEE-754 fmax()/fmin() functions
|
||||
(define_insn "<fmaxmin><mode>3"
|
||||
[(set (match_operand:GPF 0 "register_operand" "=w")
|
||||
(unspec:GPF [(match_operand:GPF 1 "register_operand" "w")
|
||||
(match_operand:GPF 2 "register_operand" "w")]
|
||||
[(set (match_operand:GPF_F16 0 "register_operand" "=w")
|
||||
(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")
|
||||
(match_operand:GPF_F16 2 "register_operand" "w")]
|
||||
FMAXMIN))]
|
||||
"TARGET_FLOAT"
|
||||
"<fmaxmin_op>\\t%<s>0, %<s>1, %<s>2"
|
||||
[(set_attr "type" "f_minmax<s>")]
|
||||
[(set_attr "type" "f_minmax<stype>")]
|
||||
)
|
||||
|
||||
;; For copysign (x, y), we want to generate:
|
||||
|
|
|
|||
|
|
@ -360,6 +360,206 @@ vsqrth_f16 (float16_t __a)
|
|||
return __builtin_aarch64_sqrthf (__a);
|
||||
}
|
||||
|
||||
/* ARMv8.2-A FP16 two operands scalar intrinsics. */
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vaddh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __a + __b;
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vabdh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_fabdhf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vcageh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_facgehf_uss (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vcagth_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_facgthf_uss (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vcaleh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_faclehf_uss (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vcalth_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_faclthf_uss (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vceqh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_cmeqhf_uss (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vcgeh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_cmgehf_uss (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vcgth_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_cmgthf_uss (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vcleh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_cmlehf_uss (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vclth_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_cmlthf_uss (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vcvth_n_f16_s16 (int16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_scvtfhi (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vcvth_n_f16_s32 (int32_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_scvtfsihf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vcvth_n_f16_s64 (int64_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_scvtfdihf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vcvth_n_f16_u16 (uint16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_ucvtfhi_sus (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vcvth_n_f16_u32 (uint32_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_ucvtfsihf_sus (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vcvth_n_f16_u64 (uint64_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_ucvtfdihf_sus (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
|
||||
vcvth_n_s16_f16 (float16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_fcvtzshf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
|
||||
vcvth_n_s32_f16 (float16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_fcvtzshfsi (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
|
||||
vcvth_n_s64_f16 (float16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_fcvtzshfdi (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vcvth_n_u16_f16 (float16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_fcvtzuhf_uss (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
|
||||
vcvth_n_u32_f16 (float16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_fcvtzuhfsi_uss (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
|
||||
vcvth_n_u64_f16 (float16_t __a, const int __b)
|
||||
{
|
||||
return __builtin_aarch64_fcvtzuhfdi_uss (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vdivh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __a / __b;
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vmaxh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_fmaxhf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vmaxnmh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_fmaxhf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vminh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_fminhf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vminnmh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_fminhf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vmulh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __a * __b;
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vmulxh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_fmulxhf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vrecpsh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_frecpshf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vrsqrtsh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __builtin_aarch64_rsqrtshf (__a, __b);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16_t __attribute__ ((__always_inline__))
|
||||
vsubh_f16 (float16_t __a, float16_t __b)
|
||||
{
|
||||
return __a - __b;
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -105,9 +105,6 @@
|
|||
(define_mode_iterator VHSDF_DF [(V4HF "TARGET_SIMD_F16INST")
|
||||
(V8HF "TARGET_SIMD_F16INST")
|
||||
V2SF V4SF V2DF DF])
|
||||
(define_mode_iterator VHSDF_SDF [(V4HF "TARGET_SIMD_F16INST")
|
||||
(V8HF "TARGET_SIMD_F16INST")
|
||||
V2SF V4SF V2DF SF DF])
|
||||
(define_mode_iterator VHSDF_HSDF [(V4HF "TARGET_SIMD_F16INST")
|
||||
(V8HF "TARGET_SIMD_F16INST")
|
||||
V2SF V4SF V2DF
|
||||
|
|
@ -190,7 +187,9 @@
|
|||
;; Scalar and Vector modes for S and D, Vector modes for H.
|
||||
(define_mode_iterator VSDQ_HSDI [(V4HI "TARGET_SIMD_F16INST")
|
||||
(V8HI "TARGET_SIMD_F16INST")
|
||||
V2SI V4SI V2DI SI DI])
|
||||
V2SI V4SI V2DI
|
||||
(HI "TARGET_SIMD_F16INST")
|
||||
SI DI])
|
||||
|
||||
;; Vector modes for Q and H types.
|
||||
(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI])
|
||||
|
|
@ -705,12 +704,12 @@
|
|||
(V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf")
|
||||
(SF "si") (DF "di") (SI "sf") (DI "df")
|
||||
(V4HF "v4hi") (V8HF "v8hi") (V4HI "v4hf")
|
||||
(V8HI "v8hf")])
|
||||
(V8HI "v8hf") (HF "hi") (HI "hf")])
|
||||
(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")
|
||||
(V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF")
|
||||
(SF "SI") (DF "DI") (SI "SF") (DI "DF")
|
||||
(V4HF "V4HI") (V8HF "V8HI") (V4HI "V4HF")
|
||||
(V8HI "V8HF")])
|
||||
(V8HI "V8HF") (HF "HI") (HI "HF")])
|
||||
|
||||
|
||||
;; for the inequal width integer to fp conversions
|
||||
|
|
|
|||
Loading…
Reference in New Issue