mirror of git://gcc.gnu.org/git/gcc.git
i386: Enable AVX512 memory broadcast for FNMADD
Many AVX512 vector operations can broadcast from a scalar memory source. This patch enables memory broadcast for FNMADD operations. In order to support AVX512 memory broadcast for FNMADD, FNMADD builtin functions are also added, instead of passing the negated value to FMA builtin functions. gcc/ PR target/72782 * config/i386/avx512fintrin.h (_mm512_fnmadd_round_pd): Use __builtin_ia32_vfnmaddpd512_mask. (_mm512_mask_fnmadd_round_pd): Likewise. (_mm512_fnmadd_pd): Likewise. (_mm512_mask_fnmadd_pd): Likewise. (_mm512_maskz_fnmadd_round_pd): Use __builtin_ia32_vfnmaddpd512_maskz. (_mm512_maskz_fnmadd_pd): Likewise. (_mm512_fnmadd_round_ps): Use __builtin_ia32_vfnmaddps512_mask. (_mm512_mask_fnmadd_round_ps): Likewise. (_mm512_fnmadd_ps): Likewise. (_mm512_mask_fnmadd_ps): Likewise. (_mm512_maskz_fnmadd_round_ps): Use __builtin_ia32_vfnmaddps512_maskz. (_mm512_maskz_fnmadd_ps): Likewise. * config/i386/avx512vlintrin.h (_mm256_mask_fnmadd_pd): Use __builtin_ia32_vfnmaddpd256_mask. (_mm256_maskz_fnmadd_pd): Use __builtin_ia32_vfnmaddpd256_maskz. (_mm_mask_fnmadd_pd): Use __builtin_ia32_vfmaddpd128_mask (_mm_maskz_fnmadd_pd): Use __builtin_ia32_vfnmaddpd128_maskz. (_mm256_mask_fnmadd_ps): Use __builtin_ia32_vfnmaddps256_mask. (_mm256_mask_fnmadd_ps): Use __builtin_ia32_vfnmaddps256_mask. (_mm256_maskz_fnmadd_ps): Use __builtin_ia32_vfnmaddps256_maskz. (_mm_mask_fnmadd_ps): Use __builtin_ia32_vfnmaddps128_mask. (_mm_maskz_fnmadd_ps): Use __builtin_ia32_vfnmaddps128_maskz. * config/i386/fmaintrin.h (_mm_fnmadd_pd): Use __builtin_ia32_vfnmaddpd. (_mm256_fnmadd_pd): Use __builtin_ia32_vfnmaddpd256. (_mm_fnmadd_ps): Use __builtin_ia32_vfnmaddps. (_mm256_fnmadd_ps): Use __builtin_ia32_vfnmaddps256. (_mm_fnmadd_sd): Use __builtin_ia32_vfnmaddsd3. (_mm_fnmadd_ss): Use __builtin_ia32_vfnmaddss3. * config/i386/i386-builtin.def: Add __builtin_ia32_vfnmaddpd256_mask, __builtin_ia32_vfnmaddpd256_maskz, __builtin_ia32_vfnmaddpd128_mask, __builtin_ia32_vfnmaddpd128_maskz, __builtin_ia32_vfnmaddps256_mask, __builtin_ia32_vfnmaddps256_maskz, __builtin_ia32_vfnmaddps128_mask, __builtin_ia32_vfnmaddps128_maskz, __builtin_ia32_vfnmaddpd512_mask, __builtin_ia32_vfnmaddpd512_maskz, __builtin_ia32_vfnmaddps512_mask, __builtin_ia32_vfnmaddps512_maskz, __builtin_ia32_vfnmaddss3, __builtin_ia32_vfnmaddsd3, __builtin_ia32_vfnmaddps, __builtin_ia32_vfnmaddpd, __builtin_ia32_vfnmaddps256 and. __builtin_ia32_vfnmaddpd256. * config/i386/sse.md (fma4i_fnmadd_<mode>): New. (<avx512>_fnmadd_<mode>_maskz<round_expand_name>): Likewise. (*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_1): Likewise. (*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_2): Likewise. (*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_3): Likewise. (fmai_vmfnmadd_<mode><round_name>): Likewise. gcc/testsuite/ PR target/72782 * gcc.target/i386/avx512f-fnmadd-df-zmm-1.c: New test. * gcc.target/i386/avx512f-fnmadd-sf-zmm-1.c: Likewise. * gcc.target/i386/avx512f-fnmadd-sf-zmm-2.c: Likewise. * gcc.target/i386/avx512f-fnmadd-sf-zmm-3.c: Likewise. * gcc.target/i386/avx512f-fnmadd-sf-zmm-4.c: Likewise. * gcc.target/i386/avx512f-fnmadd-sf-zmm-5.c: Likewise. * gcc.target/i386/avx512f-fnmadd-sf-zmm-6.c: Likewise. * gcc.target/i386/avx512f-fnmadd-sf-zmm-7.c: Likewise. * gcc.target/i386/avx512f-fnmadd-sf-zmm-8.c: Likewise. * gcc.target/i386/avx512vl-fnmadd-sf-xmm-1.c: Likewise. * gcc.target/i386/avx512vl-fnmadd-sf-ymm-1.c: Likewise. From-SVN: r265357
This commit is contained in:
parent
fe7f972d6e
commit
5ca9497788
|
|
@ -1,3 +1,64 @@
|
||||||
|
2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
|
PR target/72782
|
||||||
|
* config/i386/avx512fintrin.h (_mm512_fnmadd_round_pd): Use
|
||||||
|
__builtin_ia32_vfnmaddpd512_mask.
|
||||||
|
(_mm512_mask_fnmadd_round_pd): Likewise.
|
||||||
|
(_mm512_fnmadd_pd): Likewise.
|
||||||
|
(_mm512_mask_fnmadd_pd): Likewise.
|
||||||
|
(_mm512_maskz_fnmadd_round_pd): Use
|
||||||
|
__builtin_ia32_vfnmaddpd512_maskz.
|
||||||
|
(_mm512_maskz_fnmadd_pd): Likewise.
|
||||||
|
(_mm512_fnmadd_round_ps): Use __builtin_ia32_vfnmaddps512_mask.
|
||||||
|
(_mm512_mask_fnmadd_round_ps): Likewise.
|
||||||
|
(_mm512_fnmadd_ps): Likewise.
|
||||||
|
(_mm512_mask_fnmadd_ps): Likewise.
|
||||||
|
(_mm512_maskz_fnmadd_round_ps): Use
|
||||||
|
__builtin_ia32_vfnmaddps512_maskz.
|
||||||
|
(_mm512_maskz_fnmadd_ps): Likewise.
|
||||||
|
* config/i386/avx512vlintrin.h (_mm256_mask_fnmadd_pd): Use
|
||||||
|
__builtin_ia32_vfnmaddpd256_mask.
|
||||||
|
(_mm256_maskz_fnmadd_pd): Use __builtin_ia32_vfnmaddpd256_maskz.
|
||||||
|
(_mm_mask_fnmadd_pd): Use __builtin_ia32_vfmaddpd128_mask
|
||||||
|
(_mm_maskz_fnmadd_pd): Use __builtin_ia32_vfnmaddpd128_maskz.
|
||||||
|
(_mm256_mask_fnmadd_ps): Use __builtin_ia32_vfnmaddps256_mask.
|
||||||
|
(_mm256_mask_fnmadd_ps): Use __builtin_ia32_vfnmaddps256_mask.
|
||||||
|
(_mm256_maskz_fnmadd_ps): Use __builtin_ia32_vfnmaddps256_maskz.
|
||||||
|
(_mm_mask_fnmadd_ps): Use __builtin_ia32_vfnmaddps128_mask.
|
||||||
|
(_mm_maskz_fnmadd_ps): Use __builtin_ia32_vfnmaddps128_maskz.
|
||||||
|
* config/i386/fmaintrin.h (_mm_fnmadd_pd): Use
|
||||||
|
__builtin_ia32_vfnmaddpd.
|
||||||
|
(_mm256_fnmadd_pd): Use __builtin_ia32_vfnmaddpd256.
|
||||||
|
(_mm_fnmadd_ps): Use __builtin_ia32_vfnmaddps.
|
||||||
|
(_mm256_fnmadd_ps): Use __builtin_ia32_vfnmaddps256.
|
||||||
|
(_mm_fnmadd_sd): Use __builtin_ia32_vfnmaddsd3.
|
||||||
|
(_mm_fnmadd_ss): Use __builtin_ia32_vfnmaddss3.
|
||||||
|
* config/i386/i386-builtin.def: Add
|
||||||
|
__builtin_ia32_vfnmaddpd256_mask,
|
||||||
|
__builtin_ia32_vfnmaddpd256_maskz,
|
||||||
|
__builtin_ia32_vfnmaddpd128_mask,
|
||||||
|
__builtin_ia32_vfnmaddpd128_maskz,
|
||||||
|
__builtin_ia32_vfnmaddps256_mask,
|
||||||
|
__builtin_ia32_vfnmaddps256_maskz,
|
||||||
|
__builtin_ia32_vfnmaddps128_mask,
|
||||||
|
__builtin_ia32_vfnmaddps128_maskz,
|
||||||
|
__builtin_ia32_vfnmaddpd512_mask,
|
||||||
|
__builtin_ia32_vfnmaddpd512_maskz,
|
||||||
|
__builtin_ia32_vfnmaddps512_mask,
|
||||||
|
__builtin_ia32_vfnmaddps512_maskz, __builtin_ia32_vfnmaddss3,
|
||||||
|
__builtin_ia32_vfnmaddsd3, __builtin_ia32_vfnmaddps,
|
||||||
|
__builtin_ia32_vfnmaddpd, __builtin_ia32_vfnmaddps256 and.
|
||||||
|
__builtin_ia32_vfnmaddpd256.
|
||||||
|
* config/i386/sse.md (fma4i_fnmadd_<mode>): New.
|
||||||
|
(<avx512>_fnmadd_<mode>_maskz<round_expand_name>): Likewise.
|
||||||
|
(*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_1):
|
||||||
|
Likewise.
|
||||||
|
(*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_2):
|
||||||
|
Likewise.
|
||||||
|
(*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_3):
|
||||||
|
Likewise.
|
||||||
|
(fmai_vmfnmadd_<mode><round_name>): Likewise.
|
||||||
|
|
||||||
2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
|
2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
PR target/72782
|
PR target/72782
|
||||||
|
|
|
||||||
|
|
@ -3613,10 +3613,10 @@ extern __inline __m512d
|
||||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
|
_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
|
||||||
{
|
{
|
||||||
return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
|
return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
|
||||||
(__v8df) __B,
|
(__v8df) __B,
|
||||||
(__v8df) __C,
|
(__v8df) __C,
|
||||||
(__mmask8) -1, __R);
|
(__mmask8) -1, __R);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m512d
|
extern __inline __m512d
|
||||||
|
|
@ -3635,10 +3635,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
|
_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
|
||||||
__mmask8 __U, const int __R)
|
__mmask8 __U, const int __R)
|
||||||
{
|
{
|
||||||
return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
|
return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
|
||||||
(__v8df) __B,
|
(__v8df) __B,
|
||||||
(__v8df) __C,
|
(__v8df) __C,
|
||||||
(__mmask8) __U, __R);
|
(__mmask8) __U, __R);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m512d
|
extern __inline __m512d
|
||||||
|
|
@ -3646,20 +3646,20 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
|
_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
|
||||||
__m512d __C, const int __R)
|
__m512d __C, const int __R)
|
||||||
{
|
{
|
||||||
return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
|
return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
|
||||||
(__v8df) __B,
|
(__v8df) __B,
|
||||||
(__v8df) __C,
|
(__v8df) __C,
|
||||||
(__mmask8) __U, __R);
|
(__mmask8) __U, __R);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m512
|
extern __inline __m512
|
||||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
|
_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
|
||||||
{
|
{
|
||||||
return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
|
return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
|
||||||
(__v16sf) __B,
|
(__v16sf) __B,
|
||||||
(__v16sf) __C,
|
(__v16sf) __C,
|
||||||
(__mmask16) -1, __R);
|
(__mmask16) -1, __R);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m512
|
extern __inline __m512
|
||||||
|
|
@ -3678,10 +3678,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
|
_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
|
||||||
__mmask16 __U, const int __R)
|
__mmask16 __U, const int __R)
|
||||||
{
|
{
|
||||||
return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
|
return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
|
||||||
(__v16sf) __B,
|
(__v16sf) __B,
|
||||||
(__v16sf) __C,
|
(__v16sf) __C,
|
||||||
(__mmask16) __U, __R);
|
(__mmask16) __U, __R);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m512
|
extern __inline __m512
|
||||||
|
|
@ -3689,10 +3689,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
|
_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
|
||||||
__m512 __C, const int __R)
|
__m512 __C, const int __R)
|
||||||
{
|
{
|
||||||
return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
|
return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
|
||||||
(__v16sf) __B,
|
(__v16sf) __B,
|
||||||
(__v16sf) __C,
|
(__v16sf) __C,
|
||||||
(__mmask16) __U, __R);
|
(__mmask16) __U, __R);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m512d
|
extern __inline __m512d
|
||||||
|
|
@ -3878,28 +3878,28 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
|
||||||
(__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
|
(__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
|
||||||
|
|
||||||
#define _mm512_fnmadd_round_pd(A, B, C, R) \
|
#define _mm512_fnmadd_round_pd(A, B, C, R) \
|
||||||
(__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
|
(__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, -1, R)
|
||||||
|
|
||||||
#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
|
#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
|
||||||
(__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
|
(__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, U, R)
|
||||||
|
|
||||||
#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
|
#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
|
||||||
(__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
|
(__m512d)__builtin_ia32_vfnmaddpd512_mask3(A, B, C, U, R)
|
||||||
|
|
||||||
#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
|
#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
|
||||||
(__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
|
(__m512d)__builtin_ia32_vfnmaddpd512_maskz(A, B, C, U, R)
|
||||||
|
|
||||||
#define _mm512_fnmadd_round_ps(A, B, C, R) \
|
#define _mm512_fnmadd_round_ps(A, B, C, R) \
|
||||||
(__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
|
(__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, -1, R)
|
||||||
|
|
||||||
#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
|
#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
|
||||||
(__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
|
(__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, U, R)
|
||||||
|
|
||||||
#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
|
#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
|
||||||
(__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
|
(__m512)__builtin_ia32_vfnmaddps512_mask3(A, B, C, U, R)
|
||||||
|
|
||||||
#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
|
#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
|
||||||
(__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
|
(__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R)
|
||||||
|
|
||||||
#define _mm512_fnmsub_round_pd(A, B, C, R) \
|
#define _mm512_fnmsub_round_pd(A, B, C, R) \
|
||||||
(__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
|
(__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
|
||||||
|
|
@ -12680,11 +12680,11 @@ extern __inline __m512d
|
||||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
|
_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
|
||||||
{
|
{
|
||||||
return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
|
return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
|
||||||
(__v8df) __B,
|
(__v8df) __B,
|
||||||
(__v8df) __C,
|
(__v8df) __C,
|
||||||
(__mmask8) -1,
|
(__mmask8) -1,
|
||||||
_MM_FROUND_CUR_DIRECTION);
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m512d
|
extern __inline __m512d
|
||||||
|
|
@ -12702,33 +12702,33 @@ extern __inline __m512d
|
||||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
|
_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
|
||||||
{
|
{
|
||||||
return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
|
return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
|
||||||
(__v8df) __B,
|
(__v8df) __B,
|
||||||
(__v8df) __C,
|
(__v8df) __C,
|
||||||
(__mmask8) __U,
|
(__mmask8) __U,
|
||||||
_MM_FROUND_CUR_DIRECTION);
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m512d
|
extern __inline __m512d
|
||||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
|
_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
|
||||||
{
|
{
|
||||||
return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
|
return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
|
||||||
(__v8df) __B,
|
(__v8df) __B,
|
||||||
(__v8df) __C,
|
(__v8df) __C,
|
||||||
(__mmask8) __U,
|
(__mmask8) __U,
|
||||||
_MM_FROUND_CUR_DIRECTION);
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m512
|
extern __inline __m512
|
||||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
|
_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
|
||||||
{
|
{
|
||||||
return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
|
return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
|
||||||
(__v16sf) __B,
|
(__v16sf) __B,
|
||||||
(__v16sf) __C,
|
(__v16sf) __C,
|
||||||
(__mmask16) -1,
|
(__mmask16) -1,
|
||||||
_MM_FROUND_CUR_DIRECTION);
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m512
|
extern __inline __m512
|
||||||
|
|
@ -12746,22 +12746,22 @@ extern __inline __m512
|
||||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
|
_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
|
||||||
{
|
{
|
||||||
return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
|
return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
|
||||||
(__v16sf) __B,
|
(__v16sf) __B,
|
||||||
(__v16sf) __C,
|
(__v16sf) __C,
|
||||||
(__mmask16) __U,
|
(__mmask16) __U,
|
||||||
_MM_FROUND_CUR_DIRECTION);
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m512
|
extern __inline __m512
|
||||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
|
_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
|
||||||
{
|
{
|
||||||
return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
|
return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
|
||||||
(__v16sf) __B,
|
(__v16sf) __B,
|
||||||
(__v16sf) __C,
|
(__v16sf) __C,
|
||||||
(__mmask16) __U,
|
(__mmask16) __U,
|
||||||
_MM_FROUND_CUR_DIRECTION);
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m512d
|
extern __inline __m512d
|
||||||
|
|
|
||||||
|
|
@ -4525,10 +4525,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
|
_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
|
||||||
__mmask8 __U)
|
__mmask8 __U)
|
||||||
{
|
{
|
||||||
return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
|
return (__m256d) __builtin_ia32_vfnmaddpd256_mask3 ((__v4df) __A,
|
||||||
(__v4df) __B,
|
(__v4df) __B,
|
||||||
(__v4df) __C,
|
(__v4df) __C,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m256d
|
extern __inline __m256d
|
||||||
|
|
@ -4536,10 +4536,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
|
_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
|
||||||
__m256d __C)
|
__m256d __C)
|
||||||
{
|
{
|
||||||
return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
|
return (__m256d) __builtin_ia32_vfnmaddpd256_maskz ((__v4df) __A,
|
||||||
(__v4df) __B,
|
(__v4df) __B,
|
||||||
(__v4df) __C,
|
(__v4df) __C,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m128d
|
extern __inline __m128d
|
||||||
|
|
@ -4558,10 +4558,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
|
_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
|
||||||
__mmask8 __U)
|
__mmask8 __U)
|
||||||
{
|
{
|
||||||
return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
|
return (__m128d) __builtin_ia32_vfnmaddpd128_mask3 ((__v2df) __A,
|
||||||
(__v2df) __B,
|
(__v2df) __B,
|
||||||
(__v2df) __C,
|
(__v2df) __C,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m128d
|
extern __inline __m128d
|
||||||
|
|
@ -4569,10 +4569,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
|
_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
|
||||||
__m128d __C)
|
__m128d __C)
|
||||||
{
|
{
|
||||||
return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
|
return (__m128d) __builtin_ia32_vfnmaddpd128_maskz ((__v2df) __A,
|
||||||
(__v2df) __B,
|
(__v2df) __B,
|
||||||
(__v2df) __C,
|
(__v2df) __C,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m256
|
extern __inline __m256
|
||||||
|
|
@ -4591,10 +4591,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
|
_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
|
||||||
__mmask8 __U)
|
__mmask8 __U)
|
||||||
{
|
{
|
||||||
return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
|
return (__m256) __builtin_ia32_vfnmaddps256_mask3 ((__v8sf) __A,
|
||||||
(__v8sf) __B,
|
(__v8sf) __B,
|
||||||
(__v8sf) __C,
|
(__v8sf) __C,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m256
|
extern __inline __m256
|
||||||
|
|
@ -4602,10 +4602,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
|
_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
|
||||||
__m256 __C)
|
__m256 __C)
|
||||||
{
|
{
|
||||||
return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
|
return (__m256) __builtin_ia32_vfnmaddps256_maskz ((__v8sf) __A,
|
||||||
(__v8sf) __B,
|
(__v8sf) __B,
|
||||||
(__v8sf) __C,
|
(__v8sf) __C,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m128
|
extern __inline __m128
|
||||||
|
|
@ -4622,20 +4622,20 @@ extern __inline __m128
|
||||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
|
_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
|
||||||
{
|
{
|
||||||
return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
|
return (__m128) __builtin_ia32_vfnmaddps128_mask3 ((__v4sf) __A,
|
||||||
(__v4sf) __B,
|
(__v4sf) __B,
|
||||||
(__v4sf) __C,
|
(__v4sf) __C,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m128
|
extern __inline __m128
|
||||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
|
_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
|
||||||
{
|
{
|
||||||
return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
|
return (__m128) __builtin_ia32_vfnmaddps128_maskz ((__v4sf) __A,
|
||||||
(__v4sf) __B,
|
(__v4sf) __B,
|
||||||
(__v4sf) __C,
|
(__v4sf) __C,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m256d
|
extern __inline __m256d
|
||||||
|
|
|
||||||
|
|
@ -134,48 +134,48 @@ extern __inline __m128d
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
{
|
{
|
||||||
return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
|
return (__m128d)__builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B,
|
||||||
(__v2df)__C);
|
(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m256d
|
extern __inline __m256d
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
{
|
{
|
||||||
return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
|
return (__m256d)__builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B,
|
||||||
(__v4df)__C);
|
(__v4df)__C);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m128
|
extern __inline __m128
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
{
|
{
|
||||||
return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
|
return (__m128)__builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B,
|
||||||
(__v4sf)__C);
|
(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m256
|
extern __inline __m256
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
{
|
{
|
||||||
return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
|
return (__m256)__builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B,
|
||||||
(__v8sf)__C);
|
(__v8sf)__C);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m128d
|
extern __inline __m128d
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
|
_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
{
|
{
|
||||||
return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B,
|
return (__m128d)__builtin_ia32_vfnmaddsd3 ((__v2df)__A, (__v2df)__B,
|
||||||
(__v2df)__C);
|
(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m128
|
extern __inline __m128
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
|
_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||||
{
|
{
|
||||||
return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B,
|
return (__m128)__builtin_ia32_vfnmaddss3 ((__v4sf)__A, (__v4sf)__B,
|
||||||
(__v4sf)__C);
|
(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern __inline __m128d
|
extern __inline __m128d
|
||||||
|
|
|
||||||
|
|
@ -1916,9 +1916,19 @@ BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask, "__builtin_i
|
||||||
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_maskz, "__builtin_ia32_vfmsubps128_maskz", IX86_BUILTIN_VFMSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_maskz, "__builtin_ia32_vfmsubps128_maskz", IX86_BUILTIN_VFMSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask3, "__builtin_ia32_vfnmaddpd256_mask3", IX86_BUILTIN_VFNMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_maskz,
|
||||||
|
"__builtin_ia32_vfnmaddpd256_maskz", IX86_BUILTIN_VFNMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask3, "__builtin_ia32_vfnmaddpd128_mask3", IX86_BUILTIN_VFNMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_maskz,
|
||||||
|
"__builtin_ia32_vfnmaddpd128_maskz", IX86_BUILTIN_VFNMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask3, "__builtin_ia32_vfnmaddps256_mask3", IX86_BUILTIN_VFNMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_maskz, "__builtin_ia32_vfnmaddps256_maskz", IX86_BUILTIN_VFNMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask3, "__builtin_ia32_vfnmaddps128_mask3", IX86_BUILTIN_VFNMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_maskz, "__builtin_ia32_vfnmaddps128_maskz", IX86_BUILTIN_VFNMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
|
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
|
||||||
|
|
@ -2783,7 +2793,11 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__buil
|
||||||
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
|
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
|
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
|
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
|
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
|
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
|
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
|
||||||
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
|
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
|
||||||
|
|
@ -2869,6 +2883,8 @@ BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf, "__builtin_ia32_vfmaddss
|
||||||
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df, "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
|
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df, "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
|
||||||
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v4sf, "__builtin_ia32_vfmsubss3", IX86_BUILTIN_VFMSUBSS3, UNKNOWN, (int)MULTI_ARG_3_SF)
|
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v4sf, "__builtin_ia32_vfmsubss3", IX86_BUILTIN_VFMSUBSS3, UNKNOWN, (int)MULTI_ARG_3_SF)
|
||||||
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v2df, "__builtin_ia32_vfmsubsd3", IX86_BUILTIN_VFMSUBSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
|
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v2df, "__builtin_ia32_vfmsubsd3", IX86_BUILTIN_VFMSUBSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
|
||||||
|
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmadd_v4sf, "__builtin_ia32_vfnmaddss3", IX86_BUILTIN_VFNMADDSS3, UNKNOWN, (int)MULTI_ARG_3_SF)
|
||||||
|
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmadd_v2df, "__builtin_ia32_vfnmaddsd3", IX86_BUILTIN_VFNMADDSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
|
||||||
|
|
||||||
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF)
|
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF)
|
||||||
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF)
|
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF)
|
||||||
|
|
@ -2878,6 +2894,10 @@ BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v4sf, "_
|
||||||
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v2df, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF)
|
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v2df, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF)
|
||||||
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v8sf, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2)
|
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v8sf, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2)
|
||||||
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v4df, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2)
|
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v4df, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2)
|
||||||
|
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v4sf, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF)
|
||||||
|
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v2df, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF)
|
||||||
|
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v8sf, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2)
|
||||||
|
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v4df, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2)
|
||||||
|
|
||||||
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF)
|
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF)
|
||||||
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF)
|
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF)
|
||||||
|
|
|
||||||
|
|
@ -3756,6 +3756,14 @@
|
||||||
(neg:FMAMODE_AVX512
|
(neg:FMAMODE_AVX512
|
||||||
(match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
|
(match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
|
||||||
|
|
||||||
|
(define_expand "fma4i_fnmadd_<mode>"
|
||||||
|
[(set (match_operand:FMAMODE_AVX512 0 "register_operand")
|
||||||
|
(fma:FMAMODE_AVX512
|
||||||
|
(neg:FMAMODE_AVX512
|
||||||
|
(match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
|
||||||
|
(match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
|
||||||
|
(match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
|
||||||
|
|
||||||
(define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
|
(define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
|
||||||
[(match_operand:VF_AVX512VL 0 "register_operand")
|
[(match_operand:VF_AVX512VL 0 "register_operand")
|
||||||
(match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
|
(match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
|
||||||
|
|
@ -4016,6 +4024,20 @@
|
||||||
(set_attr "type" "ssemuladd")
|
(set_attr "type" "ssemuladd")
|
||||||
(set_attr "mode" "<MODE>")])
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
(define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
|
||||||
|
[(match_operand:VF_AVX512VL 0 "register_operand")
|
||||||
|
(match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
|
||||||
|
(match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
|
||||||
|
(match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
|
||||||
|
(match_operand:<avx512fmaskmode> 4 "register_operand")]
|
||||||
|
"TARGET_AVX512F && <round_mode512bit_condition>"
|
||||||
|
{
|
||||||
|
emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
|
||||||
|
operands[0], operands[1], operands[2], operands[3],
|
||||||
|
CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
|
(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
|
||||||
[(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
|
[(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
|
||||||
(fma:VF_SF_AVX512VL
|
(fma:VF_SF_AVX512VL
|
||||||
|
|
@ -4031,6 +4053,49 @@
|
||||||
[(set_attr "type" "ssemuladd")
|
[(set_attr "type" "ssemuladd")
|
||||||
(set_attr "mode" "<MODE>")])
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
(define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_1"
|
||||||
|
[(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
|
||||||
|
(fma:VF_AVX512
|
||||||
|
(neg:VF_AVX512
|
||||||
|
(match_operand:VF_AVX512 1 "register_operand" "0,v"))
|
||||||
|
(match_operand:VF_AVX512 2 "register_operand" "v,0")
|
||||||
|
(vec_duplicate:VF_AVX512
|
||||||
|
(match_operand:<ssescalarmode> 3 "memory_operand" "m,m"))))]
|
||||||
|
"TARGET_AVX512F && <sd_mask_mode512bit_condition>"
|
||||||
|
"vfnmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
|
||||||
|
[(set_attr "type" "ssemuladd")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
(define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_2"
|
||||||
|
[(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
|
||||||
|
(fma:VF_AVX512
|
||||||
|
(neg:VF_AVX512
|
||||||
|
(vec_duplicate:VF_AVX512
|
||||||
|
(match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
|
||||||
|
(match_operand:VF_AVX512 2 "register_operand" "0,v")
|
||||||
|
(match_operand:VF_AVX512 3 "register_operand" "v,0")))]
|
||||||
|
"TARGET_AVX512F && <sd_mask_mode512bit_condition>"
|
||||||
|
"@
|
||||||
|
vfnmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
|
||||||
|
vfnmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
|
||||||
|
[(set_attr "type" "ssemuladd")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
(define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_3"
|
||||||
|
[(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
|
||||||
|
(fma:VF_AVX512
|
||||||
|
(neg:VF_AVX512
|
||||||
|
(match_operand:VF_AVX512 1 "register_operand" "0,v"))
|
||||||
|
(vec_duplicate:VF_AVX512
|
||||||
|
(match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
|
||||||
|
(match_operand:VF_AVX512 3 "register_operand" "v,0")))]
|
||||||
|
"TARGET_AVX512F && <sd_mask_mode512bit_condition>"
|
||||||
|
"@
|
||||||
|
vfnmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
|
||||||
|
vfnmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
|
||||||
|
[(set_attr "type" "ssemuladd")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
(define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
|
(define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
|
||||||
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
|
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
|
||||||
(vec_merge:VF_AVX512VL
|
(vec_merge:VF_AVX512VL
|
||||||
|
|
@ -4326,6 +4391,18 @@
|
||||||
(const_int 1)))]
|
(const_int 1)))]
|
||||||
"TARGET_FMA")
|
"TARGET_FMA")
|
||||||
|
|
||||||
|
(define_expand "fmai_vmfnmadd_<mode><round_name>"
|
||||||
|
[(set (match_operand:VF_128 0 "register_operand")
|
||||||
|
(vec_merge:VF_128
|
||||||
|
(fma:VF_128
|
||||||
|
(neg:VF_128
|
||||||
|
(match_operand:VF_128 2 "<round_nimm_predicate>"))
|
||||||
|
(match_operand:VF_128 1 "<round_nimm_predicate>")
|
||||||
|
(match_operand:VF_128 3 "<round_nimm_predicate>"))
|
||||||
|
(match_dup 1)
|
||||||
|
(const_int 1)))]
|
||||||
|
"TARGET_FMA")
|
||||||
|
|
||||||
(define_insn "*fmai_fmadd_<mode>"
|
(define_insn "*fmai_fmadd_<mode>"
|
||||||
[(set (match_operand:VF_128 0 "register_operand" "=v,v")
|
[(set (match_operand:VF_128 0 "register_operand" "=v,v")
|
||||||
(vec_merge:VF_128
|
(vec_merge:VF_128
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,18 @@
|
||||||
|
2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
|
PR target/72782
|
||||||
|
* gcc.target/i386/avx512f-fnmadd-df-zmm-1.c: New test.
|
||||||
|
* gcc.target/i386/avx512f-fnmadd-sf-zmm-1.c: Likewise.
|
||||||
|
* gcc.target/i386/avx512f-fnmadd-sf-zmm-2.c: Likewise.
|
||||||
|
* gcc.target/i386/avx512f-fnmadd-sf-zmm-3.c: Likewise.
|
||||||
|
* gcc.target/i386/avx512f-fnmadd-sf-zmm-4.c: Likewise.
|
||||||
|
* gcc.target/i386/avx512f-fnmadd-sf-zmm-5.c: Likewise.
|
||||||
|
* gcc.target/i386/avx512f-fnmadd-sf-zmm-6.c: Likewise.
|
||||||
|
* gcc.target/i386/avx512f-fnmadd-sf-zmm-7.c: Likewise.
|
||||||
|
* gcc.target/i386/avx512f-fnmadd-sf-zmm-8.c: Likewise.
|
||||||
|
* gcc.target/i386/avx512vl-fnmadd-sf-xmm-1.c: Likewise.
|
||||||
|
* gcc.target/i386/avx512vl-fnmadd-sf-ymm-1.c: Likewise.
|
||||||
|
|
||||||
2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
|
2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
PR target/72782
|
PR target/72782
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vfnmadd...pd\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-not "vbroadcastsd\[^\n\]*%zmm\[0-9\]+" } } */
|
||||||
|
|
||||||
|
#define type __m512d
|
||||||
|
#define vec 512
|
||||||
|
#define op fnmadd
|
||||||
|
#define suffix pd
|
||||||
|
#define SCALAR double
|
||||||
|
|
||||||
|
#include "avx512-fma-1.h"
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
|
||||||
|
|
||||||
|
#define type __m512
|
||||||
|
#define vec 512
|
||||||
|
#define op fnmadd
|
||||||
|
#define suffix ps
|
||||||
|
#define SCALAR float
|
||||||
|
|
||||||
|
#include "avx512-fma-1.h"
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
|
||||||
|
|
||||||
|
#define type __m512
|
||||||
|
#define vec 512
|
||||||
|
#define op fnmadd
|
||||||
|
#define suffix ps
|
||||||
|
#define SCALAR float
|
||||||
|
|
||||||
|
#include "avx512-fma-2.h"
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
|
||||||
|
|
||||||
|
#define type __m512
|
||||||
|
#define vec 512
|
||||||
|
#define op fnmadd
|
||||||
|
#define suffix ps
|
||||||
|
#define SCALAR float
|
||||||
|
|
||||||
|
#include "avx512-fma-3.h"
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
|
||||||
|
|
||||||
|
#define type __m512
|
||||||
|
#define vec 512
|
||||||
|
#define op fnmadd
|
||||||
|
#define suffix ps
|
||||||
|
#define SCALAR float
|
||||||
|
|
||||||
|
#include "avx512-fma-4.h"
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
|
||||||
|
|
||||||
|
#define type __m512
|
||||||
|
#define vec 512
|
||||||
|
#define op fnmadd
|
||||||
|
#define suffix ps
|
||||||
|
#define SCALAR float
|
||||||
|
|
||||||
|
#include "avx512-fma-5.h"
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
|
||||||
|
|
||||||
|
#define type __m512
|
||||||
|
#define vec 512
|
||||||
|
#define op fnmadd
|
||||||
|
#define suffix ps
|
||||||
|
#define SCALAR float
|
||||||
|
|
||||||
|
#include "avx512-fma-6.h"
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vfnmadd...ps\[^\n\]*%zmm\[0-9\]+" 1 } } */
|
||||||
|
|
||||||
|
#define type __m512
|
||||||
|
#define vec 512
|
||||||
|
#define op fnmadd
|
||||||
|
#define suffix ps
|
||||||
|
#define SCALAR float
|
||||||
|
|
||||||
|
#include "avx512-fma-7.h"
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
/* { dg-do compile { target { ! ia32 } } } */
|
||||||
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+%zmm\[0-9\]+, %zmm\[0-9\]+, %zmm0" 1 } } */
|
||||||
|
|
||||||
|
#define type __m512
|
||||||
|
#define vec 512
|
||||||
|
#define op fnmadd
|
||||||
|
#define suffix ps
|
||||||
|
#define SCALAR float
|
||||||
|
|
||||||
|
#include "avx512-fma-8.h"
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-mfma -mavx512vl -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %xmm\[0-9\]+, %xmm0" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%xmm\[0-9\]+" } } */
|
||||||
|
|
||||||
|
#define type __m128
|
||||||
|
#define vec
|
||||||
|
#define op fnmadd
|
||||||
|
#define suffix ps
|
||||||
|
#define SCALAR float
|
||||||
|
|
||||||
|
#include "avx512-fma-1.h"
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-mfma -mavx512vl -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %ymm\[0-9\]+, %ymm0" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%ymm\[0-9\]+" } } */
|
||||||
|
|
||||||
|
#define type __m256
|
||||||
|
#define vec 256
|
||||||
|
#define op fnmadd
|
||||||
|
#define suffix ps
|
||||||
|
#define SCALAR float
|
||||||
|
|
||||||
|
#include "avx512-fma-1.h"
|
||||||
Loading…
Reference in New Issue