mirror of git://gcc.gnu.org/git/gcc.git
[AArch64][5/10] ARMv8.2-A FP16 lane vector intrinsics
gcc/
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
"*aarch64_mulx_elt_from_dup<mode>".
(*aarch64_mul3_elt<mode>): Update schedule type.
(*aarch64_mul3_elt_from_dup<mode>): Likewise.
(*aarch64_fma4_elt_from_dup<mode>): Likewise.
(*aarch64_fnma4_elt_from_dup<mode>): Likewise.
* config/aarch64/iterators.md (VMUL): Supprt half precision float modes.
(f, fp): Support HF modes.
* config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16,
vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16,
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16,
vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16,
vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New.
From-SVN: r238719
This commit is contained in:
parent
89ed6d5f5e
commit
ab2e8f01f1
|
|
@ -1,3 +1,20 @@
|
|||
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd.md (*aarch64_mulx_elt_to_64v2df): Rename to
|
||||
"*aarch64_mulx_elt_from_dup<mode>".
|
||||
(*aarch64_mul3_elt<mode>): Update schedule type.
|
||||
(*aarch64_mul3_elt_from_dup<mode>): Likewise.
|
||||
(*aarch64_fma4_elt_from_dup<mode>): Likewise.
|
||||
(*aarch64_fnma4_elt_from_dup<mode>): Likewise.
|
||||
* config/aarch64/iterators.md (VMUL): Supprt half precision float modes.
|
||||
(f, fp): Support HF modes.
|
||||
* config/aarch64/arm_neon.h (vfma_lane_f16, vfmaq_lane_f16,
|
||||
vfma_laneq_f16, vfmaq_laneq_f16, vfma_n_f16, vfmaq_n_f16, vfms_lane_f16,
|
||||
vfmsq_lane_f16, vfms_laneq_f16, vfmsq_laneq_f16, vfms_n_f16,
|
||||
vfmsq_n_f16, vmul_lane_f16, vmulq_lane_f16, vmul_laneq_f16,
|
||||
vmulq_laneq_f16, vmul_n_f16, vmulq_n_f16, vmulx_lane_f16,
|
||||
vmulxq_lane_f16, vmulx_laneq_f16, vmulxq_laneq_f16): New.
|
||||
|
||||
2016-07-25 Jiong Wang <jiong.wang@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd-builtins.def: Register new builtins.
|
||||
|
|
|
|||
|
|
@ -351,7 +351,7 @@
|
|||
operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
|
||||
return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
|
||||
}
|
||||
[(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
|
||||
[(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
|
||||
|
|
@ -379,7 +379,7 @@
|
|||
(match_operand:VMUL 2 "register_operand" "w")))]
|
||||
"TARGET_SIMD"
|
||||
"<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
|
||||
[(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
|
||||
[(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_rsqrte<mode>"
|
||||
|
|
@ -1634,7 +1634,7 @@
|
|||
(match_operand:VMUL 3 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
|
||||
[(set_attr "type" "neon<fp>_mla_<Vetype>_scalar<q>")]
|
||||
[(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_fma4_elt_to_64v2df"
|
||||
|
|
@ -1712,7 +1712,7 @@
|
|||
(match_operand:VMUL 3 "register_operand" "0")))]
|
||||
"TARGET_SIMD"
|
||||
"fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
|
||||
[(set_attr "type" "neon<fp>_mla_<Vetype>_scalar<q>")]
|
||||
[(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_fnma4_elt_to_64v2df"
|
||||
|
|
@ -3101,20 +3101,18 @@
|
|||
[(set_attr "type" "neon_fp_mul_<Vetype><q>")]
|
||||
)
|
||||
|
||||
;; vmulxq_lane_f64
|
||||
;; vmulxq_lane
|
||||
|
||||
(define_insn "*aarch64_mulx_elt_to_64v2df"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=w")
|
||||
(unspec:V2DF
|
||||
[(match_operand:V2DF 1 "register_operand" "w")
|
||||
(vec_duplicate:V2DF
|
||||
(match_operand:DF 2 "register_operand" "w"))]
|
||||
(define_insn "*aarch64_mulx_elt_from_dup<mode>"
|
||||
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||
(unspec:VHSDF
|
||||
[(match_operand:VHSDF 1 "register_operand" "w")
|
||||
(vec_duplicate:VHSDF
|
||||
(match_operand:<VEL> 2 "register_operand" "w"))]
|
||||
UNSPEC_FMULX))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
return "fmulx\t%0.2d, %1.2d, %2.d[0]";
|
||||
}
|
||||
[(set_attr "type" "neon_fp_mul_d_scalar_q")]
|
||||
"fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
|
||||
[(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
|
||||
)
|
||||
|
||||
;; vmulxs_lane_f32, vmulxs_laneq_f32
|
||||
|
|
|
|||
|
|
@ -26773,6 +26773,160 @@ vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
|
|||
return __builtin_aarch64_fnmav8hf (__b, __c, __a);
|
||||
}
|
||||
|
||||
/* ARMv8.2-A FP16 lane vector intrinsics. */
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vfma_lane_f16 (float16x4_t __a, float16x4_t __b,
|
||||
float16x4_t __c, const int __lane)
|
||||
{
|
||||
return vfma_f16 (__a, __b, __aarch64_vdup_lane_f16 (__c, __lane));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vfmaq_lane_f16 (float16x8_t __a, float16x8_t __b,
|
||||
float16x4_t __c, const int __lane)
|
||||
{
|
||||
return vfmaq_f16 (__a, __b, __aarch64_vdupq_lane_f16 (__c, __lane));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vfma_laneq_f16 (float16x4_t __a, float16x4_t __b,
|
||||
float16x8_t __c, const int __lane)
|
||||
{
|
||||
return vfma_f16 (__a, __b, __aarch64_vdup_laneq_f16 (__c, __lane));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vfmaq_laneq_f16 (float16x8_t __a, float16x8_t __b,
|
||||
float16x8_t __c, const int __lane)
|
||||
{
|
||||
return vfmaq_f16 (__a, __b, __aarch64_vdupq_laneq_f16 (__c, __lane));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vfma_n_f16 (float16x4_t __a, float16x4_t __b, float16_t __c)
|
||||
{
|
||||
return vfma_f16 (__a, __b, vdup_n_f16 (__c));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
|
||||
{
|
||||
return vfmaq_f16 (__a, __b, vdupq_n_f16 (__c));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vfms_lane_f16 (float16x4_t __a, float16x4_t __b,
|
||||
float16x4_t __c, const int __lane)
|
||||
{
|
||||
return vfms_f16 (__a, __b, __aarch64_vdup_lane_f16 (__c, __lane));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vfmsq_lane_f16 (float16x8_t __a, float16x8_t __b,
|
||||
float16x4_t __c, const int __lane)
|
||||
{
|
||||
return vfmsq_f16 (__a, __b, __aarch64_vdupq_lane_f16 (__c, __lane));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vfms_laneq_f16 (float16x4_t __a, float16x4_t __b,
|
||||
float16x8_t __c, const int __lane)
|
||||
{
|
||||
return vfms_f16 (__a, __b, __aarch64_vdup_laneq_f16 (__c, __lane));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vfmsq_laneq_f16 (float16x8_t __a, float16x8_t __b,
|
||||
float16x8_t __c, const int __lane)
|
||||
{
|
||||
return vfmsq_f16 (__a, __b, __aarch64_vdupq_laneq_f16 (__c, __lane));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vfms_n_f16 (float16x4_t __a, float16x4_t __b, float16_t __c)
|
||||
{
|
||||
return vfms_f16 (__a, __b, vdup_n_f16 (__c));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vfmsq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
|
||||
{
|
||||
return vfmsq_f16 (__a, __b, vdupq_n_f16 (__c));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane)
|
||||
{
|
||||
return vmul_f16 (__a, vdup_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane)
|
||||
{
|
||||
return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vmul_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane)
|
||||
{
|
||||
return vmul_f16 (__a, vdup_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vmulq_laneq_f16 (float16x8_t __a, float16x8_t __b, const int __lane)
|
||||
{
|
||||
return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vmul_n_f16 (float16x4_t __a, float16_t __b)
|
||||
{
|
||||
return vmul_lane_f16 (__a, vdup_n_f16 (__b), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vmulq_n_f16 (float16x8_t __a, float16_t __b)
|
||||
{
|
||||
return vmulq_laneq_f16 (__a, vdupq_n_f16 (__b), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vmulx_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane)
|
||||
{
|
||||
return vmulx_f16 (__a, __aarch64_vdup_lane_f16 (__b, __lane));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vmulxq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane)
|
||||
{
|
||||
return vmulxq_f16 (__a, __aarch64_vdupq_lane_f16 (__b, __lane));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vmulx_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane)
|
||||
{
|
||||
return vmulx_f16 (__a, __aarch64_vdup_laneq_f16 (__b, __lane));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vmulxq_laneq_f16 (float16x8_t __a, float16x8_t __b, const int __lane)
|
||||
{
|
||||
return vmulxq_f16 (__a, __aarch64_vdupq_laneq_f16 (__b, __lane));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
|
||||
vmulx_n_f16 (float16x4_t __a, float16_t __b)
|
||||
{
|
||||
return vmulx_f16 (__a, vdup_n_f16 (__b));
|
||||
}
|
||||
|
||||
__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
|
||||
vmulxq_n_f16 (float16x8_t __a, float16_t __b)
|
||||
{
|
||||
return vmulxq_f16 (__a, vdupq_n_f16 (__b));
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
#undef __aarch64_vget_lane_any
|
||||
|
|
|
|||
|
|
@ -218,7 +218,10 @@
|
|||
(define_mode_iterator DX [DI DF])
|
||||
|
||||
;; Modes available for <f>mul lane operations.
|
||||
(define_mode_iterator VMUL [V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
|
||||
(define_mode_iterator VMUL [V4HI V8HI V2SI V4SI
|
||||
(V4HF "TARGET_SIMD_F16INST")
|
||||
(V8HF "TARGET_SIMD_F16INST")
|
||||
V2SF V4SF V2DF])
|
||||
|
||||
;; Modes available for <f>mul lane operations changing lane count.
|
||||
(define_mode_iterator VMUL_CHANGE_NLANES [V4HI V8HI V2SI V4SI V2SF V4SF])
|
||||
|
|
@ -730,6 +733,7 @@
|
|||
(V4HI "") (V8HI "")
|
||||
(V2SI "") (V4SI "")
|
||||
(DI "") (V2DI "")
|
||||
(V4HF "f") (V8HF "f")
|
||||
(V2SF "f") (V4SF "f")
|
||||
(V2DF "f") (DF "f")])
|
||||
|
||||
|
|
@ -738,6 +742,7 @@
|
|||
(V4HI "") (V8HI "")
|
||||
(V2SI "") (V4SI "")
|
||||
(DI "") (V2DI "")
|
||||
(V4HF "_fp") (V8HF "_fp")
|
||||
(V2SF "_fp") (V4SF "_fp")
|
||||
(V2DF "_fp") (DF "_fp")
|
||||
(SF "_fp")])
|
||||
|
|
|
|||
Loading…
Reference in New Issue