mirror of git://gcc.gnu.org/git/gcc.git
[AArch64][3/10] ARMv8.2-A FP16 two operands vector intrinsics
gcc/ * config/aarch64/aarch64-simd-builtins.def: Register new builtins. * config/aarch64/aarch64-simd.md (aarch64_rsqrts<mode>): Extend to HF modes. (fabd<mode>3): Likewise. (<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise. (<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise. (aarch64_<maxmin_uns>p<mode>): Likewise. (<su><maxmin><mode>3): Likewise. (<maxmin_uns><mode>3): Likewise. (<fmaxmin><mode>3): Likewise. (aarch64_faddp<mode>): Likewise. (aarch64_fmulx<mode>): Likewise. (aarch64_frecps<mode>): Likewise. (*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>. (add<mode>3): Extend to HF modes. (sub<mode>3): Likewise. (mul<mode>3): Likewise. (div<mode>3): Likewise. (*div<mode>3): Likewise. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for HF, V4HF and V8HF. * config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator. * config/aarch64/arm_neon.h (vadd_f16): New. (vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16, vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16, vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16, vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16, vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16, vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16, vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16, vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16, vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16, vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16, vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16, vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16, vsubq_f16): Likewise. From-SVN: r238717
This commit is contained in:
parent
daef0a8c7e
commit
33d72b6386
|
|
@ -1,3 +1,42 @@
|
||||||
|
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
||||||
|
|
||||||
|
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
|
||||||
|
* config/aarch64/aarch64-simd.md
|
||||||
|
(aarch64_rsqrts<mode>): Extend to HF modes.
|
||||||
|
(fabd<mode>3): Likewise.
|
||||||
|
(<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF_SDF:mode>3): Likewise.
|
||||||
|
(<FCVT_FIXED2F:fcvt_fixed_insn><VHSDI_SDI:mode>3): Likewise.
|
||||||
|
(aarch64_<maxmin_uns>p<mode>): Likewise.
|
||||||
|
(<su><maxmin><mode>3): Likewise.
|
||||||
|
(<maxmin_uns><mode>3): Likewise.
|
||||||
|
(<fmaxmin><mode>3): Likewise.
|
||||||
|
(aarch64_faddp<mode>): Likewise.
|
||||||
|
(aarch64_fmulx<mode>): Likewise.
|
||||||
|
(aarch64_frecps<mode>): Likewise.
|
||||||
|
(*aarch64_fac<optab><mode>): Rename to aarch64_fac<optab><mode>.
|
||||||
|
(add<mode>3): Extend to HF modes.
|
||||||
|
(sub<mode>3): Likewise.
|
||||||
|
(mul<mode>3): Likewise.
|
||||||
|
(div<mode>3): Likewise.
|
||||||
|
(*div<mode>3): Likewise.
|
||||||
|
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Return false for
|
||||||
|
HF, V4HF and V8HF.
|
||||||
|
* config/aarch64/iterators.md (VDQ_HSDI, VSDQ_HSDI): New mode iterator.
|
||||||
|
* config/aarch64/arm_neon.h (vadd_f16): New.
|
||||||
|
(vaddq_f16, vabd_f16, vabdq_f16, vcage_f16, vcageq_f16, vcagt_f16,
|
||||||
|
vcagtq_f16, vcale_f16, vcaleq_f16, vcalt_f16, vcaltq_f16, vceq_f16,
|
||||||
|
vceqq_f16, vcge_f16, vcgeq_f16, vcgt_f16, vcgtq_f16, vcle_f16,
|
||||||
|
vcleq_f16, vclt_f16, vcltq_f16, vcvt_n_f16_s16, vcvtq_n_f16_s16,
|
||||||
|
vcvt_n_f16_u16, vcvtq_n_f16_u16, vcvt_n_s16_f16, vcvtq_n_s16_f16,
|
||||||
|
vcvt_n_u16_f16, vcvtq_n_u16_f16, vdiv_f16, vdivq_f16, vdup_lane_f16,
|
||||||
|
vdup_laneq_f16, vdupq_lane_f16, vdupq_laneq_f16, vdups_lane_f16,
|
||||||
|
vdups_laneq_f16, vmax_f16, vmaxq_f16, vmaxnm_f16, vmaxnmq_f16, vmin_f16,
|
||||||
|
vminq_f16, vminnm_f16, vminnmq_f16, vmul_f16, vmulq_f16, vmulx_f16,
|
||||||
|
vmulxq_f16, vpadd_f16, vpaddq_f16, vpmax_f16, vpmaxq_f16, vpmaxnm_f16,
|
||||||
|
vpmaxnmq_f16, vpmin_f16, vpminq_f16, vpminnm_f16, vpminnmq_f16,
|
||||||
|
vrecps_f16, vrecpsq_f16, vrsqrts_f16, vrsqrtsq_f16, vsub_f16,
|
||||||
|
vsubq_f16): Likewise.
|
||||||
|
|
||||||
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
||||||
|
|
||||||
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
|
* config/aarch64/aarch64-builtins.c (TYPES_BINOP_USS): New.
|
||||||
|
|
@ -15,8 +54,8 @@
|
||||||
(*sqrt<mode>2): Likewise.
|
(*sqrt<mode>2): Likewise.
|
||||||
(aarch64_frecpe<mode>): Likewise.
|
(aarch64_frecpe<mode>): Likewise.
|
||||||
(aarch64_cm<optab><mode>): Likewise.
|
(aarch64_cm<optab><mode>): Likewise.
|
||||||
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return
|
* config/aarch64/aarch64.c (aarch64_emit_approx_sqrt): Return false for
|
||||||
false for V4HF and V8HF.
|
HF, V4HF and V8HF.
|
||||||
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
|
* config/aarch64/iterators.md (VHSDF, VHSDF_DF, VHSDF_SDF): New.
|
||||||
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
|
(VDQF_COND, fcvt_target, FCVT_TARGET, hcon): Extend mode attribute to HF modes.
|
||||||
(stype): New.
|
(stype): New.
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@
|
||||||
|
|
||||||
BUILTIN_VDC (COMBINE, combine, 0)
|
BUILTIN_VDC (COMBINE, combine, 0)
|
||||||
BUILTIN_VB (BINOP, pmul, 0)
|
BUILTIN_VB (BINOP, pmul, 0)
|
||||||
BUILTIN_VALLF (BINOP, fmulx, 0)
|
BUILTIN_VHSDF_SDF (BINOP, fmulx, 0)
|
||||||
BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
|
BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
|
||||||
BUILTIN_VD_BHSI (BINOP, addp, 0)
|
BUILTIN_VD_BHSI (BINOP, addp, 0)
|
||||||
VAR1 (UNOP, addp, 0, di)
|
VAR1 (UNOP, addp, 0, di)
|
||||||
|
|
@ -248,22 +248,22 @@
|
||||||
BUILTIN_VDQ_BHSI (BINOP, smin, 3)
|
BUILTIN_VDQ_BHSI (BINOP, smin, 3)
|
||||||
BUILTIN_VDQ_BHSI (BINOP, umax, 3)
|
BUILTIN_VDQ_BHSI (BINOP, umax, 3)
|
||||||
BUILTIN_VDQ_BHSI (BINOP, umin, 3)
|
BUILTIN_VDQ_BHSI (BINOP, umin, 3)
|
||||||
BUILTIN_VDQF (BINOP, smax_nan, 3)
|
BUILTIN_VHSDF (BINOP, smax_nan, 3)
|
||||||
BUILTIN_VDQF (BINOP, smin_nan, 3)
|
BUILTIN_VHSDF (BINOP, smin_nan, 3)
|
||||||
|
|
||||||
/* Implemented by <fmaxmin><mode>3. */
|
/* Implemented by <fmaxmin><mode>3. */
|
||||||
BUILTIN_VDQF (BINOP, fmax, 3)
|
BUILTIN_VHSDF (BINOP, fmax, 3)
|
||||||
BUILTIN_VDQF (BINOP, fmin, 3)
|
BUILTIN_VHSDF (BINOP, fmin, 3)
|
||||||
|
|
||||||
/* Implemented by aarch64_<maxmin_uns>p<mode>. */
|
/* Implemented by aarch64_<maxmin_uns>p<mode>. */
|
||||||
BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
|
BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
|
||||||
BUILTIN_VDQ_BHSI (BINOP, sminp, 0)
|
BUILTIN_VDQ_BHSI (BINOP, sminp, 0)
|
||||||
BUILTIN_VDQ_BHSI (BINOP, umaxp, 0)
|
BUILTIN_VDQ_BHSI (BINOP, umaxp, 0)
|
||||||
BUILTIN_VDQ_BHSI (BINOP, uminp, 0)
|
BUILTIN_VDQ_BHSI (BINOP, uminp, 0)
|
||||||
BUILTIN_VDQF (BINOP, smaxp, 0)
|
BUILTIN_VHSDF (BINOP, smaxp, 0)
|
||||||
BUILTIN_VDQF (BINOP, sminp, 0)
|
BUILTIN_VHSDF (BINOP, sminp, 0)
|
||||||
BUILTIN_VDQF (BINOP, smax_nanp, 0)
|
BUILTIN_VHSDF (BINOP, smax_nanp, 0)
|
||||||
BUILTIN_VDQF (BINOP, smin_nanp, 0)
|
BUILTIN_VHSDF (BINOP, smin_nanp, 0)
|
||||||
|
|
||||||
/* Implemented by <frint_pattern><mode>2. */
|
/* Implemented by <frint_pattern><mode>2. */
|
||||||
BUILTIN_VHSDF (UNOP, btrunc, 2)
|
BUILTIN_VHSDF (UNOP, btrunc, 2)
|
||||||
|
|
@ -383,7 +383,7 @@
|
||||||
BUILTIN_VDQ_SI (UNOP, urecpe, 0)
|
BUILTIN_VDQ_SI (UNOP, urecpe, 0)
|
||||||
|
|
||||||
BUILTIN_VHSDF (UNOP, frecpe, 0)
|
BUILTIN_VHSDF (UNOP, frecpe, 0)
|
||||||
BUILTIN_VDQF (BINOP, frecps, 0)
|
BUILTIN_VHSDF (BINOP, frecps, 0)
|
||||||
|
|
||||||
/* Implemented by a mixture of abs2 patterns. Note the DImode builtin is
|
/* Implemented by a mixture of abs2 patterns. Note the DImode builtin is
|
||||||
only ever used for the int64x1_t intrinsic, there is no scalar version. */
|
only ever used for the int64x1_t intrinsic, there is no scalar version. */
|
||||||
|
|
@ -475,22 +475,22 @@
|
||||||
BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0)
|
BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0)
|
||||||
|
|
||||||
/* Implemented by <FCVT_F2FIXED/FIXED2F:fcvt_fixed_insn><*><*>3. */
|
/* Implemented by <FCVT_F2FIXED/FIXED2F:fcvt_fixed_insn><*><*>3. */
|
||||||
BUILTIN_VSDQ_SDI (SHIFTIMM, scvtf, 3)
|
BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3)
|
||||||
BUILTIN_VSDQ_SDI (FCVTIMM_SUS, ucvtf, 3)
|
BUILTIN_VSDQ_HSDI (FCVTIMM_SUS, ucvtf, 3)
|
||||||
BUILTIN_VALLF (SHIFTIMM, fcvtzs, 3)
|
BUILTIN_VHSDF_SDF (SHIFTIMM, fcvtzs, 3)
|
||||||
BUILTIN_VALLF (SHIFTIMM_USS, fcvtzu, 3)
|
BUILTIN_VHSDF_SDF (SHIFTIMM_USS, fcvtzu, 3)
|
||||||
|
|
||||||
/* Implemented by aarch64_rsqrte<mode>. */
|
/* Implemented by aarch64_rsqrte<mode>. */
|
||||||
BUILTIN_VHSDF_SDF (UNOP, rsqrte, 0)
|
BUILTIN_VHSDF_SDF (UNOP, rsqrte, 0)
|
||||||
|
|
||||||
/* Implemented by aarch64_rsqrts<mode>. */
|
/* Implemented by aarch64_rsqrts<mode>. */
|
||||||
BUILTIN_VALLF (BINOP, rsqrts, 0)
|
BUILTIN_VHSDF_SDF (BINOP, rsqrts, 0)
|
||||||
|
|
||||||
/* Implemented by fabd<mode>3. */
|
/* Implemented by fabd<mode>3. */
|
||||||
BUILTIN_VALLF (BINOP, fabd, 3)
|
BUILTIN_VHSDF_SDF (BINOP, fabd, 3)
|
||||||
|
|
||||||
/* Implemented by aarch64_faddp<mode>. */
|
/* Implemented by aarch64_faddp<mode>. */
|
||||||
BUILTIN_VDQF (BINOP, faddp, 0)
|
BUILTIN_VHSDF (BINOP, faddp, 0)
|
||||||
|
|
||||||
/* Implemented by aarch64_cm<optab><mode>. */
|
/* Implemented by aarch64_cm<optab><mode>. */
|
||||||
BUILTIN_VHSDF_SDF (BINOP_USS, cmeq, 0)
|
BUILTIN_VHSDF_SDF (BINOP_USS, cmeq, 0)
|
||||||
|
|
@ -501,3 +501,9 @@
|
||||||
|
|
||||||
/* Implemented by neg<mode>2. */
|
/* Implemented by neg<mode>2. */
|
||||||
BUILTIN_VHSDF (UNOP, neg, 2)
|
BUILTIN_VHSDF (UNOP, neg, 2)
|
||||||
|
|
||||||
|
/* Implemented by aarch64_fac<optab><mode>. */
|
||||||
|
BUILTIN_VHSDF_SDF (BINOP_USS, faclt, 0)
|
||||||
|
BUILTIN_VHSDF_SDF (BINOP_USS, facle, 0)
|
||||||
|
BUILTIN_VHSDF_SDF (BINOP_USS, facgt, 0)
|
||||||
|
BUILTIN_VHSDF_SDF (BINOP_USS, facge, 0)
|
||||||
|
|
|
||||||
|
|
@ -391,13 +391,13 @@
|
||||||
[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
|
[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
|
||||||
|
|
||||||
(define_insn "aarch64_rsqrts<mode>"
|
(define_insn "aarch64_rsqrts<mode>"
|
||||||
[(set (match_operand:VALLF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
|
||||||
(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
|
(unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")
|
||||||
(match_operand:VALLF 2 "register_operand" "w")]
|
(match_operand:VHSDF_SDF 2 "register_operand" "w")]
|
||||||
UNSPEC_RSQRTS))]
|
UNSPEC_RSQRTS))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
"frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
||||||
[(set_attr "type" "neon_fp_rsqrts_<Vetype><q>")])
|
[(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
|
||||||
|
|
||||||
(define_expand "rsqrt<mode>2"
|
(define_expand "rsqrt<mode>2"
|
||||||
[(set (match_operand:VALLF 0 "register_operand" "=w")
|
[(set (match_operand:VALLF 0 "register_operand" "=w")
|
||||||
|
|
@ -475,14 +475,14 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "fabd<mode>3"
|
(define_insn "fabd<mode>3"
|
||||||
[(set (match_operand:VALLF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
|
||||||
(abs:VALLF
|
(abs:VHSDF_SDF
|
||||||
(minus:VALLF
|
(minus:VHSDF_SDF
|
||||||
(match_operand:VALLF 1 "register_operand" "w")
|
(match_operand:VHSDF_SDF 1 "register_operand" "w")
|
||||||
(match_operand:VALLF 2 "register_operand" "w"))))]
|
(match_operand:VHSDF_SDF 2 "register_operand" "w"))))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
"fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
||||||
[(set_attr "type" "neon_fp_abd_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_abd_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "and<mode>3"
|
(define_insn "and<mode>3"
|
||||||
|
|
@ -1105,10 +1105,10 @@
|
||||||
|
|
||||||
;; Pairwise FP Max/Min operations.
|
;; Pairwise FP Max/Min operations.
|
||||||
(define_insn "aarch64_<maxmin_uns>p<mode>"
|
(define_insn "aarch64_<maxmin_uns>p<mode>"
|
||||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
|
||||||
(match_operand:VDQF 2 "register_operand" "w")]
|
(match_operand:VHSDF 2 "register_operand" "w")]
|
||||||
FMAXMINV))]
|
FMAXMINV))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
"<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||||
[(set_attr "type" "neon_minmax<q>")]
|
[(set_attr "type" "neon_minmax<q>")]
|
||||||
|
|
@ -1517,36 +1517,36 @@
|
||||||
;; FP arithmetic operations.
|
;; FP arithmetic operations.
|
||||||
|
|
||||||
(define_insn "add<mode>3"
|
(define_insn "add<mode>3"
|
||||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
(plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
|
(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
||||||
(match_operand:VDQF 2 "register_operand" "w")))]
|
(match_operand:VHSDF 2 "register_operand" "w")))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
"fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||||
[(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_addsub_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "sub<mode>3"
|
(define_insn "sub<mode>3"
|
||||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
(minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
|
(minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
||||||
(match_operand:VDQF 2 "register_operand" "w")))]
|
(match_operand:VHSDF 2 "register_operand" "w")))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
"fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||||
[(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_addsub_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "mul<mode>3"
|
(define_insn "mul<mode>3"
|
||||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
(mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
|
(mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
||||||
(match_operand:VDQF 2 "register_operand" "w")))]
|
(match_operand:VHSDF 2 "register_operand" "w")))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
"fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||||
[(set_attr "type" "neon_fp_mul_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_mul_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_expand "div<mode>3"
|
(define_expand "div<mode>3"
|
||||||
[(set (match_operand:VDQF 0 "register_operand")
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
(div:VDQF (match_operand:VDQF 1 "general_operand")
|
(div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
||||||
(match_operand:VDQF 2 "register_operand")))]
|
(match_operand:VHSDF 2 "register_operand" "w")))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
{
|
{
|
||||||
if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
|
if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
|
||||||
|
|
@ -1556,12 +1556,12 @@
|
||||||
})
|
})
|
||||||
|
|
||||||
(define_insn "*div<mode>3"
|
(define_insn "*div<mode>3"
|
||||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
(div:VDQF (match_operand:VDQF 1 "register_operand" "w")
|
(div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
||||||
(match_operand:VDQF 2 "register_operand" "w")))]
|
(match_operand:VHSDF 2 "register_operand" "w")))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
"fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||||
[(set_attr "type" "neon_fp_div_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_div_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "neg<mode>2"
|
(define_insn "neg<mode>2"
|
||||||
|
|
@ -1826,24 +1826,26 @@
|
||||||
|
|
||||||
;; Convert between fixed-point and floating-point (vector modes)
|
;; Convert between fixed-point and floating-point (vector modes)
|
||||||
|
|
||||||
(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VDQF:mode>3"
|
(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
|
||||||
[(set (match_operand:<VDQF:FCVT_TARGET> 0 "register_operand" "=w")
|
[(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
|
||||||
(unspec:<VDQF:FCVT_TARGET> [(match_operand:VDQF 1 "register_operand" "w")
|
(unspec:<VHSDF:FCVT_TARGET>
|
||||||
(match_operand:SI 2 "immediate_operand" "i")]
|
[(match_operand:VHSDF 1 "register_operand" "w")
|
||||||
|
(match_operand:SI 2 "immediate_operand" "i")]
|
||||||
FCVT_F2FIXED))]
|
FCVT_F2FIXED))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
|
"<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
|
||||||
[(set_attr "type" "neon_fp_to_int_<VDQF:Vetype><q>")]
|
[(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_SDI:mode>3"
|
(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
|
||||||
[(set (match_operand:<VDQ_SDI:FCVT_TARGET> 0 "register_operand" "=w")
|
[(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
|
||||||
(unspec:<VDQ_SDI:FCVT_TARGET> [(match_operand:VDQ_SDI 1 "register_operand" "w")
|
(unspec:<VDQ_HSDI:FCVT_TARGET>
|
||||||
(match_operand:SI 2 "immediate_operand" "i")]
|
[(match_operand:VDQ_HSDI 1 "register_operand" "w")
|
||||||
|
(match_operand:SI 2 "immediate_operand" "i")]
|
||||||
FCVT_FIXED2F))]
|
FCVT_FIXED2F))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
|
"<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
|
||||||
[(set_attr "type" "neon_int_to_fp_<VDQ_SDI:Vetype><q>")]
|
[(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
|
;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
|
||||||
|
|
@ -2002,33 +2004,33 @@
|
||||||
;; NaNs.
|
;; NaNs.
|
||||||
|
|
||||||
(define_insn "<su><maxmin><mode>3"
|
(define_insn "<su><maxmin><mode>3"
|
||||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
(FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
|
(FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
|
||||||
(match_operand:VDQF 2 "register_operand" "w")))]
|
(match_operand:VHSDF 2 "register_operand" "w")))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
"f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||||
[(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "<maxmin_uns><mode>3"
|
(define_insn "<maxmin_uns><mode>3"
|
||||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
|
||||||
(match_operand:VDQF 2 "register_operand" "w")]
|
(match_operand:VHSDF 2 "register_operand" "w")]
|
||||||
FMAXMIN_UNS))]
|
FMAXMIN_UNS))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
"<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||||
[(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
|
;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
|
||||||
(define_insn "<fmaxmin><mode>3"
|
(define_insn "<fmaxmin><mode>3"
|
||||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
|
||||||
(match_operand:VDQF 2 "register_operand" "w")]
|
(match_operand:VHSDF 2 "register_operand" "w")]
|
||||||
FMAXMIN))]
|
FMAXMIN))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
"<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||||
[(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_minmax_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
;; 'across lanes' add.
|
;; 'across lanes' add.
|
||||||
|
|
@ -2048,13 +2050,13 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "aarch64_faddp<mode>"
|
(define_insn "aarch64_faddp<mode>"
|
||||||
[(set (match_operand:VDQF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||||
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
|
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
|
||||||
(match_operand:VDQF 2 "register_operand" "w")]
|
(match_operand:VHSDF 2 "register_operand" "w")]
|
||||||
UNSPEC_FADDV))]
|
UNSPEC_FADDV))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
"faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||||
[(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "aarch64_reduc_plus_internal<mode>"
|
(define_insn "aarch64_reduc_plus_internal<mode>"
|
||||||
|
|
@ -3050,13 +3052,14 @@
|
||||||
;; fmulx.
|
;; fmulx.
|
||||||
|
|
||||||
(define_insn "aarch64_fmulx<mode>"
|
(define_insn "aarch64_fmulx<mode>"
|
||||||
[(set (match_operand:VALLF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
|
||||||
(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
|
(unspec:VHSDF_SDF
|
||||||
(match_operand:VALLF 2 "register_operand" "w")]
|
[(match_operand:VHSDF_SDF 1 "register_operand" "w")
|
||||||
UNSPEC_FMULX))]
|
(match_operand:VHSDF_SDF 2 "register_operand" "w")]
|
||||||
|
UNSPEC_FMULX))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
"fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
||||||
[(set_attr "type" "neon_fp_mul_<Vetype>")]
|
[(set_attr "type" "neon_fp_mul_<stype>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
;; vmulxq_lane_f32, and vmulx_laneq_f32
|
;; vmulxq_lane_f32, and vmulx_laneq_f32
|
||||||
|
|
@ -4310,16 +4313,18 @@
|
||||||
;; Note we can also handle what would be fac(le|lt) by
|
;; Note we can also handle what would be fac(le|lt) by
|
||||||
;; generating fac(ge|gt).
|
;; generating fac(ge|gt).
|
||||||
|
|
||||||
(define_insn "*aarch64_fac<optab><mode>"
|
(define_insn "aarch64_fac<optab><mode>"
|
||||||
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
|
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
|
||||||
(neg:<V_cmp_result>
|
(neg:<V_cmp_result>
|
||||||
(FAC_COMPARISONS:<V_cmp_result>
|
(FAC_COMPARISONS:<V_cmp_result>
|
||||||
(abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
|
(abs:VHSDF_SDF
|
||||||
(abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
|
(match_operand:VHSDF_SDF 1 "register_operand" "w"))
|
||||||
|
(abs:VHSDF_SDF
|
||||||
|
(match_operand:VHSDF_SDF 2 "register_operand" "w"))
|
||||||
)))]
|
)))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
|
"fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
|
||||||
[(set_attr "type" "neon_fp_compare_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_compare_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
;; addp
|
;; addp
|
||||||
|
|
@ -5431,13 +5436,14 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "aarch64_frecps<mode>"
|
(define_insn "aarch64_frecps<mode>"
|
||||||
[(set (match_operand:VALLF 0 "register_operand" "=w")
|
[(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
|
||||||
(unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
|
(unspec:VHSDF_SDF
|
||||||
(match_operand:VALLF 2 "register_operand" "w")]
|
[(match_operand:VHSDF_SDF 1 "register_operand" "w")
|
||||||
UNSPEC_FRECPS))]
|
(match_operand:VHSDF_SDF 2 "register_operand" "w")]
|
||||||
|
UNSPEC_FRECPS))]
|
||||||
"TARGET_SIMD"
|
"TARGET_SIMD"
|
||||||
"frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
"frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
|
||||||
[(set_attr "type" "neon_fp_recps_<Vetype><q>")]
|
[(set_attr "type" "neon_fp_recps_<stype><q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
(define_insn "aarch64_urecpe<mode>"
|
(define_insn "aarch64_urecpe<mode>"
|
||||||
|
|
|
||||||
|
|
@ -7604,6 +7604,10 @@ bool
|
||||||
aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
|
aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
|
||||||
{
|
{
|
||||||
machine_mode mode = GET_MODE (quo);
|
machine_mode mode = GET_MODE (quo);
|
||||||
|
|
||||||
|
if (GET_MODE_INNER (mode) == HFmode)
|
||||||
|
return false;
|
||||||
|
|
||||||
bool use_approx_division_p = (flag_mlow_precision_div
|
bool use_approx_division_p = (flag_mlow_precision_div
|
||||||
|| (aarch64_tune_params.approx_modes->division
|
|| (aarch64_tune_params.approx_modes->division
|
||||||
& AARCH64_APPROX_MODE (mode)));
|
& AARCH64_APPROX_MODE (mode)));
|
||||||
|
|
|
||||||
|
|
@ -26385,6 +26385,368 @@ vsqrtq_f16 (float16x8_t a)
|
||||||
return __builtin_aarch64_sqrtv8hf (a);
|
return __builtin_aarch64_sqrtv8hf (a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ARMv8.2-A FP16 two operands vector intrinsics. */
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vadd_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __a + __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vaddq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __a + __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vabd_f16 (float16x4_t a, float16x4_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fabdv4hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vabdq_f16 (float16x8_t a, float16x8_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fabdv8hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vcage_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_facgev4hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vcageq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_facgev8hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vcagt_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_facgtv4hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vcagtq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_facgtv8hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vcale_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_faclev4hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vcaleq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_faclev8hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vcalt_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_facltv4hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vcaltq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_facltv8hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vceq_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_cmeqv4hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vceqq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_cmeqv8hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vcge_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_cmgev4hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vcgeq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_cmgev8hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vcgt_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_cmgtv4hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vcgtq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_cmgtv8hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vcle_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_cmlev4hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vcleq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_cmlev8hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vclt_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_cmltv4hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vcltq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_cmltv8hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vcvt_n_f16_s16 (int16x4_t __a, const int __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_scvtfv4hi (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vcvtq_n_f16_s16 (int16x8_t __a, const int __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_scvtfv8hi (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vcvt_n_f16_u16 (uint16x4_t __a, const int __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_ucvtfv4hi_sus (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vcvtq_n_f16_u16 (uint16x8_t __a, const int __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_ucvtfv8hi_sus (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vcvt_n_s16_f16 (float16x4_t __a, const int __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcvtzsv4hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vcvtq_n_s16_f16 (float16x8_t __a, const int __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcvtzsv8hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vcvt_n_u16_f16 (float16x4_t __a, const int __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcvtzuv4hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vcvtq_n_u16_f16 (float16x8_t __a, const int __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fcvtzuv8hf_uss (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vdiv_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __a / __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vdivq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __a / __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vmax_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_smax_nanv4hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vmaxq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_smax_nanv8hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vmaxnm_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fmaxv4hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vmaxnmq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fmaxv8hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vmin_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_smin_nanv4hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vminq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_smin_nanv8hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vminnm_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fminv4hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vminnmq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fminv8hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vmul_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __a * __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vmulq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __a * __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vmulx_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fmulxv4hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vmulxq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_fmulxv8hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vpadd_f16 (float16x4_t a, float16x4_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_faddpv4hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vpaddq_f16 (float16x8_t a, float16x8_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_faddpv8hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vpmax_f16 (float16x4_t a, float16x4_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_smax_nanpv4hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vpmaxq_f16 (float16x8_t a, float16x8_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_smax_nanpv8hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vpmaxnm_f16 (float16x4_t a, float16x4_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_smaxpv4hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vpmaxnmq_f16 (float16x8_t a, float16x8_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_smaxpv8hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vpmin_f16 (float16x4_t a, float16x4_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_smin_nanpv4hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vpminq_f16 (float16x8_t a, float16x8_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_smin_nanpv8hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vpminnm_f16 (float16x4_t a, float16x4_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_sminpv4hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vpminnmq_f16 (float16x8_t a, float16x8_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_sminpv8hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vrecps_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_frecpsv4hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vrecpsq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_frecpsv8hf (__a, __b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vrsqrts_f16 (float16x4_t a, float16x4_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_rsqrtsv4hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vrsqrtsq_f16 (float16x8_t a, float16x8_t b)
|
||||||
|
{
|
||||||
|
return __builtin_aarch64_rsqrtsv8hf (a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||||
|
vsub_f16 (float16x4_t __a, float16x4_t __b)
|
||||||
|
{
|
||||||
|
return __a - __b;
|
||||||
|
}
|
||||||
|
|
||||||
|
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||||
|
vsubq_f16 (float16x8_t __a, float16x8_t __b)
|
||||||
|
{
|
||||||
|
return __a - __b;
|
||||||
|
}
|
||||||
|
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
|
|
||||||
#undef __aarch64_vget_lane_any
|
#undef __aarch64_vget_lane_any
|
||||||
|
|
|
||||||
|
|
@ -166,9 +166,19 @@
|
||||||
;; Vector modes for S and D
|
;; Vector modes for S and D
|
||||||
(define_mode_iterator VDQ_SDI [V2SI V4SI V2DI])
|
(define_mode_iterator VDQ_SDI [V2SI V4SI V2DI])
|
||||||
|
|
||||||
|
;; Vector modes for H, S and D
|
||||||
|
(define_mode_iterator VDQ_HSDI [(V4HI "TARGET_SIMD_F16INST")
|
||||||
|
(V8HI "TARGET_SIMD_F16INST")
|
||||||
|
V2SI V4SI V2DI])
|
||||||
|
|
||||||
;; Scalar and Vector modes for S and D
|
;; Scalar and Vector modes for S and D
|
||||||
(define_mode_iterator VSDQ_SDI [V2SI V4SI V2DI SI DI])
|
(define_mode_iterator VSDQ_SDI [V2SI V4SI V2DI SI DI])
|
||||||
|
|
||||||
|
;; Scalar and Vector modes for S and D, Vector modes for H.
|
||||||
|
(define_mode_iterator VSDQ_HSDI [(V4HI "TARGET_SIMD_F16INST")
|
||||||
|
(V8HI "TARGET_SIMD_F16INST")
|
||||||
|
V2SI V4SI V2DI SI DI])
|
||||||
|
|
||||||
;; Vector modes for Q and H types.
|
;; Vector modes for Q and H types.
|
||||||
(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI])
|
(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI])
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue