diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 11d05f7e9606..1148b15809ee 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,64 @@ +2018-10-21 H.J. Lu + + PR target/72782 + * config/i386/avx512fintrin.h (_mm512_fnmadd_round_pd): Use + __builtin_ia32_vfnmaddpd512_mask. + (_mm512_mask_fnmadd_round_pd): Likewise. + (_mm512_fnmadd_pd): Likewise. + (_mm512_mask_fnmadd_pd): Likewise. + (_mm512_maskz_fnmadd_round_pd): Use + __builtin_ia32_vfnmaddpd512_maskz. + (_mm512_maskz_fnmadd_pd): Likewise. + (_mm512_fnmadd_round_ps): Use __builtin_ia32_vfnmaddps512_mask. + (_mm512_mask_fnmadd_round_ps): Likewise. + (_mm512_fnmadd_ps): Likewise. + (_mm512_mask_fnmadd_ps): Likewise. + (_mm512_maskz_fnmadd_round_ps): Use + __builtin_ia32_vfnmaddps512_maskz. + (_mm512_maskz_fnmadd_ps): Likewise. + * config/i386/avx512vlintrin.h (_mm256_mask_fnmadd_pd): Use + __builtin_ia32_vfnmaddpd256_mask. + (_mm256_maskz_fnmadd_pd): Use __builtin_ia32_vfnmaddpd256_maskz. + (_mm_mask_fnmadd_pd): Use __builtin_ia32_vfmaddpd128_mask + (_mm_maskz_fnmadd_pd): Use __builtin_ia32_vfnmaddpd128_maskz. + (_mm256_mask_fnmadd_ps): Use __builtin_ia32_vfnmaddps256_mask. + (_mm256_mask_fnmadd_ps): Use __builtin_ia32_vfnmaddps256_mask. + (_mm256_maskz_fnmadd_ps): Use __builtin_ia32_vfnmaddps256_maskz. + (_mm_mask_fnmadd_ps): Use __builtin_ia32_vfnmaddps128_mask. + (_mm_maskz_fnmadd_ps): Use __builtin_ia32_vfnmaddps128_maskz. + * config/i386/fmaintrin.h (_mm_fnmadd_pd): Use + __builtin_ia32_vfnmaddpd. + (_mm256_fnmadd_pd): Use __builtin_ia32_vfnmaddpd256. + (_mm_fnmadd_ps): Use __builtin_ia32_vfnmaddps. + (_mm256_fnmadd_ps): Use __builtin_ia32_vfnmaddps256. + (_mm_fnmadd_sd): Use __builtin_ia32_vfnmaddsd3. + (_mm_fnmadd_ss): Use __builtin_ia32_vfnmaddss3. + * config/i386/i386-builtin.def: Add + __builtin_ia32_vfnmaddpd256_mask, + __builtin_ia32_vfnmaddpd256_maskz, + __builtin_ia32_vfnmaddpd128_mask, + __builtin_ia32_vfnmaddpd128_maskz, + __builtin_ia32_vfnmaddps256_mask, + __builtin_ia32_vfnmaddps256_maskz, + __builtin_ia32_vfnmaddps128_mask, + __builtin_ia32_vfnmaddps128_maskz, + __builtin_ia32_vfnmaddpd512_mask, + __builtin_ia32_vfnmaddpd512_maskz, + __builtin_ia32_vfnmaddps512_mask, + __builtin_ia32_vfnmaddps512_maskz, __builtin_ia32_vfnmaddss3, + __builtin_ia32_vfnmaddsd3, __builtin_ia32_vfnmaddps, + __builtin_ia32_vfnmaddpd, __builtin_ia32_vfnmaddps256 and. + __builtin_ia32_vfnmaddpd256. + * config/i386/sse.md (fma4i_fnmadd_): New. + (_fnmadd__maskz): Likewise. + (*fma_fnmadd__bcst_1): + Likewise. + (*fma_fnmadd__bcst_2): + Likewise. + (*fma_fnmadd__bcst_3): + Likewise. + (fmai_vmfnmadd_): Likewise. + 2018-10-21 H.J. Lu PR target/72782 diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index c0c8fa1efd04..1445e9e8b3c2 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -3613,10 +3613,10 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) { - return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) -1, __R); + return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, __R); } extern __inline __m512d @@ -3635,10 +3635,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U, const int __R) { - return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, __R); + return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); } extern __inline __m512d @@ -3646,20 +3646,20 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C, const int __R) { - return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, __R); + return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); } extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) { - return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) -1, __R); + return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, __R); } extern __inline __m512 @@ -3678,10 +3678,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U, const int __R) { - return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, __R); + return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); } extern __inline __m512 @@ -3689,10 +3689,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C, const int __R) { - return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, __R); + return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); } extern __inline __m512d @@ -3878,28 +3878,28 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R) #define _mm512_fnmadd_round_pd(A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R) + (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, -1, R) #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ - (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R) + (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, U, R) #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R) + (__m512d)__builtin_ia32_vfnmaddpd512_mask3(A, B, C, U, R) #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R) + (__m512d)__builtin_ia32_vfnmaddpd512_maskz(A, B, C, U, R) #define _mm512_fnmadd_round_ps(A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R) + (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, -1, R) #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ - (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R) + (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, U, R) #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R) + (__m512)__builtin_ia32_vfnmaddps512_mask3(A, B, C, U, R) #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R) + (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R) #define _mm512_fnmsub_round_pd(A, B, C, R) \ (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R) @@ -12680,11 +12680,11 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C) { - return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m512d @@ -12702,33 +12702,33 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { - return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, - (__v8df) __B, - (__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C) { - return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m512 @@ -12746,22 +12746,22 @@ extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { - return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, - (__v16sf) __B, - (__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m512d diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index fcc35c30076f..b46c38e1f819 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -4525,10 +4525,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { - return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_vfnmaddpd256_mask3 ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); } extern __inline __m256d @@ -4536,10 +4536,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, - (__v4df) __B, - (__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_vfnmaddpd256_maskz ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); } extern __inline __m128d @@ -4558,10 +4558,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_vfnmaddpd128_mask3 ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); } extern __inline __m128d @@ -4569,10 +4569,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, - (__v2df) __B, - (__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_vfnmaddpd128_maskz ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); } extern __inline __m256 @@ -4591,10 +4591,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { - return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_vfnmaddps256_mask3 ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); } extern __inline __m256 @@ -4602,10 +4602,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, - (__v8sf) __B, - (__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_vfnmaddps256_maskz ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); } extern __inline __m128 @@ -4622,20 +4622,20 @@ extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { - return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_vfnmaddps128_mask3 ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, - (__v4sf) __B, - (__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_vfnmaddps128_maskz ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); } extern __inline __m256d diff --git a/gcc/config/i386/fmaintrin.h b/gcc/config/i386/fmaintrin.h index 2eddd8965799..0a2f4a7f676f 100644 --- a/gcc/config/i386/fmaintrin.h +++ b/gcc/config/i386/fmaintrin.h @@ -134,48 +134,48 @@ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, - (__v2df)__C); + return (__m128d)__builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B, + (__v2df)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, - (__v4df)__C); + return (__m256d)__builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B, + (__v4df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, - (__v4sf)__C); + return (__m128)__builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, - (__v8sf)__C); + return (__m256)__builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B, - (__v2df)__C); + return (__m128d)__builtin_ia32_vfnmaddsd3 ((__v2df)__A, (__v2df)__B, + (__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B, - (__v4sf)__C); + return (__m128)__builtin_ia32_vfnmaddss3 ((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } extern __inline __m128d diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index f5b5e56a01cf..74343db1cbeb 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1916,9 +1916,19 @@ BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask, "__builtin_i BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_maskz, "__builtin_ia32_vfmsubps128_maskz", IX86_BUILTIN_VFMSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask3, "__builtin_ia32_vfnmaddpd256_mask3", IX86_BUILTIN_VFNMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_maskz, +"__builtin_ia32_vfnmaddpd256_maskz", IX86_BUILTIN_VFNMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask3, "__builtin_ia32_vfnmaddpd128_mask3", IX86_BUILTIN_VFNMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_maskz, +"__builtin_ia32_vfnmaddpd128_maskz", IX86_BUILTIN_VFNMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask3, "__builtin_ia32_vfnmaddps256_mask3", IX86_BUILTIN_VFNMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_maskz, "__builtin_ia32_vfnmaddps256_maskz", IX86_BUILTIN_VFNMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask3, "__builtin_ia32_vfnmaddps128_mask3", IX86_BUILTIN_VFNMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_maskz, "__builtin_ia32_vfnmaddps128_maskz", IX86_BUILTIN_VFNMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) @@ -2783,7 +2793,11 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__buil BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) @@ -2869,6 +2883,8 @@ BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf, "__builtin_ia32_vfmaddss BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df, "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3, UNKNOWN, (int)MULTI_ARG_3_DF) BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v4sf, "__builtin_ia32_vfmsubss3", IX86_BUILTIN_VFMSUBSS3, UNKNOWN, (int)MULTI_ARG_3_SF) BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v2df, "__builtin_ia32_vfmsubsd3", IX86_BUILTIN_VFMSUBSD3, UNKNOWN, (int)MULTI_ARG_3_DF) +BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmadd_v4sf, "__builtin_ia32_vfnmaddss3", IX86_BUILTIN_VFNMADDSS3, UNKNOWN, (int)MULTI_ARG_3_SF) +BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmadd_v2df, "__builtin_ia32_vfnmaddsd3", IX86_BUILTIN_VFNMADDSD3, UNKNOWN, (int)MULTI_ARG_3_DF) BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF) BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF) @@ -2878,6 +2894,10 @@ BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v4sf, "_ BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v2df, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF) BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v8sf, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2) BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v4df, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2) +BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v4sf, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF) +BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v2df, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF) +BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v8sf, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2) +BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v4df, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2) BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF) BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0fdaaed04aa5..0426da420e86 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3756,6 +3756,14 @@ (neg:FMAMODE_AVX512 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))]) +(define_expand "fma4i_fnmadd_" + [(set (match_operand:FMAMODE_AVX512 0 "register_operand") + (fma:FMAMODE_AVX512 + (neg:FMAMODE_AVX512 + (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")) + (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand") + (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))]) + (define_expand "_fmadd__maskz" [(match_operand:VF_AVX512VL 0 "register_operand") (match_operand:VF_AVX512VL 1 "") @@ -4016,6 +4024,20 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "")]) +(define_expand "_fnmadd__maskz" + [(match_operand:VF_AVX512VL 0 "register_operand") + (match_operand:VF_AVX512VL 1 "") + (match_operand:VF_AVX512VL 2 "") + (match_operand:VF_AVX512VL 3 "") + (match_operand: 4 "register_operand")] + "TARGET_AVX512F && " +{ + emit_insn (gen_fma_fnmadd__maskz_1 ( + operands[0], operands[1], operands[2], operands[3], + CONST0_RTX (mode), operands[4])); + DONE; +}) + (define_insn "fma_fnmadd_" [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") (fma:VF_SF_AVX512VL @@ -4031,6 +4053,49 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) +(define_insn "*fma_fnmadd__bcst_1" + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") + (fma:VF_AVX512 + (neg:VF_AVX512 + (match_operand:VF_AVX512 1 "register_operand" "0,v")) + (match_operand:VF_AVX512 2 "register_operand" "v,0") + (vec_duplicate:VF_AVX512 + (match_operand: 3 "memory_operand" "m,m"))))] + "TARGET_AVX512F && " + "vfnmadd213\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*fma_fnmadd__bcst_2" + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") + (fma:VF_AVX512 + (neg:VF_AVX512 + (vec_duplicate:VF_AVX512 + (match_operand: 1 "memory_operand" "m,m"))) + (match_operand:VF_AVX512 2 "register_operand" "0,v") + (match_operand:VF_AVX512 3 "register_operand" "v,0")))] + "TARGET_AVX512F && " + "@ + vfnmadd132\t{%1, %3, %0|%0, %3, %1} + vfnmadd231\t{%1, %2, %0|%0, %2, %1}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*fma_fnmadd__bcst_3" + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") + (fma:VF_AVX512 + (neg:VF_AVX512 + (match_operand:VF_AVX512 1 "register_operand" "0,v")) + (vec_duplicate:VF_AVX512 + (match_operand: 2 "memory_operand" "m,m")) + (match_operand:VF_AVX512 3 "register_operand" "v,0")))] + "TARGET_AVX512F && " + "@ + vfnmadd132\t{%2, %3, %0|%0, %3, %2} + vfnmadd231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + (define_insn "_fnmadd__mask" [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") (vec_merge:VF_AVX512VL @@ -4326,6 +4391,18 @@ (const_int 1)))] "TARGET_FMA") +(define_expand "fmai_vmfnmadd_" + [(set (match_operand:VF_128 0 "register_operand") + (vec_merge:VF_128 + (fma:VF_128 + (neg:VF_128 + (match_operand:VF_128 2 "")) + (match_operand:VF_128 1 "") + (match_operand:VF_128 3 "")) + (match_dup 1) + (const_int 1)))] + "TARGET_FMA") + (define_insn "*fmai_fmadd_" [(set (match_operand:VF_128 0 "register_operand" "=v,v") (vec_merge:VF_128 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a1d22409d1b5..649a8384c929 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,18 @@ +2018-10-21 H.J. Lu + + PR target/72782 + * gcc.target/i386/avx512f-fnmadd-df-zmm-1.c: New test. + * gcc.target/i386/avx512f-fnmadd-sf-zmm-1.c: Likewise. + * gcc.target/i386/avx512f-fnmadd-sf-zmm-2.c: Likewise. + * gcc.target/i386/avx512f-fnmadd-sf-zmm-3.c: Likewise. + * gcc.target/i386/avx512f-fnmadd-sf-zmm-4.c: Likewise. + * gcc.target/i386/avx512f-fnmadd-sf-zmm-5.c: Likewise. + * gcc.target/i386/avx512f-fnmadd-sf-zmm-6.c: Likewise. + * gcc.target/i386/avx512f-fnmadd-sf-zmm-7.c: Likewise. + * gcc.target/i386/avx512f-fnmadd-sf-zmm-8.c: Likewise. + * gcc.target/i386/avx512vl-fnmadd-sf-xmm-1.c: Likewise. + * gcc.target/i386/avx512vl-fnmadd-sf-ymm-1.c: Likewise. + 2018-10-21 H.J. Lu PR target/72782 diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-df-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-df-zmm-1.c new file mode 100644 index 000000000000..d4db6f74ff85 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-df-zmm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmadd...pd\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastsd\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512d +#define vec 512 +#define op fnmadd +#define suffix pd +#define SCALAR double + +#include "avx512-fma-1.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-1.c new file mode 100644 index 000000000000..9da0cad2f09c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fnmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-1.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-2.c new file mode 100644 index 000000000000..cf355d16dd93 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fnmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-2.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-3.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-3.c new file mode 100644 index 000000000000..a4e723e23207 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-3.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fnmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-3.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-4.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-4.c new file mode 100644 index 000000000000..ceeba440ca57 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-4.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fnmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-4.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-5.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-5.c new file mode 100644 index 000000000000..4733dce087ea --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-5.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fnmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-5.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-6.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-6.c new file mode 100644 index 000000000000..cbf90ca7587e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-6.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fnmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-6.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-7.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-7.c new file mode 100644 index 000000000000..db5c34678c07 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-7.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...ps\[^\n\]*%zmm\[0-9\]+" 1 } } */ + +#define type __m512 +#define vec 512 +#define op fnmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-7.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-8.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-8.c new file mode 100644 index 000000000000..909ab25796bb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmadd-sf-zmm-8.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+%zmm\[0-9\]+, %zmm\[0-9\]+, %zmm0" 1 } } */ + +#define type __m512 +#define vec 512 +#define op fnmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-8.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fnmadd-sf-xmm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fnmadd-sf-xmm-1.c new file mode 100644 index 000000000000..ec68fa9e0846 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-fnmadd-sf-xmm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mfma -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %xmm\[0-9\]+, %xmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%xmm\[0-9\]+" } } */ + +#define type __m128 +#define vec +#define op fnmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-1.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fnmadd-sf-ymm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fnmadd-sf-ymm-1.c new file mode 100644 index 000000000000..d668d0ee3199 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-fnmadd-sf-ymm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mfma -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %ymm\[0-9\]+, %ymm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%ymm\[0-9\]+" } } */ + +#define type __m256 +#define vec 256 +#define op fnmadd +#define suffix ps +#define SCALAR float + +#include "avx512-fma-1.h"