mirror of git://gcc.gnu.org/git/gcc.git
* config/i386/avx512fintrin.h (_mm_mask_max_sd)
(_mm_maskz_max_sd, _mm_mask_max_ss, _mm_maskz_max_ss)
(_mm_mask_min_sd, _mm_maskz_min_sd, _mm_mask_min_ss)
(_mm_maskz_min_ss): New intrinsics.
testsuite/ChangeLog:
* gcc.target/i386/avx512f-vmaxsd-1.c (_mm_mask_max_sd)
(_mm_maskz_max_sd): Test new intrinsics.
* gcc.target/i386/avx512f-vmaxsd-2.c (_mm_mask_max_sd)
(_mm_maskz_max_sd): Test new intrinsics.
* gcc.target/i386/avx512f-vmaxss-1.c (_mm_mask_max_ss)
(_mm_maskz_max_ss): Test new intrinsics.
* gcc.target/i386/avx512f-vmaxss-2.c (_mm_mask_max_ss)
(_mm_maskz_max_ss): Test new intrinsics.
* gcc.target/i386/avx512f-vminsd-1.c (_mm_mask_min_sd)
(_mm_maskz_min_sd): Test new intrinsics.
* gcc.target/i386/avx512f-vminsd-2.c (_mm_mask_min_sd)
(_mm_maskz_min_sd): Test new intrinsics.
* gcc.target/i386/avx512f-vminss-1.c (_mm_mask_min_ss)
(_mm_maskz_min_ss): Test new intrinsics.
* gcc.target/i386/avx512f-vminss-2.c (_mm_mask_min_ss)
(_mm_maskz_min_ss): Test new intrinsics.
From-SVN: r248756
This commit is contained in:
parent
049a5d31b9
commit
dc7401c0c6
|
|
@ -1,3 +1,10 @@
|
||||||
|
2017-05-31 Sebastian Peryt <sebastian.peryt@intel.com>
|
||||||
|
|
||||||
|
* config/i386/avx512fintrin.h (_mm_mask_max_sd)
|
||||||
|
(_mm_maskz_max_sd, _mm_mask_max_ss, _mm_maskz_max_ss)
|
||||||
|
(_mm_mask_min_sd, _mm_maskz_min_sd, _mm_mask_min_ss)
|
||||||
|
(_mm_maskz_min_ss): New intrinsics.
|
||||||
|
|
||||||
2017-05-31 Martin Liska <mliska@suse.cz>
|
2017-05-31 Martin Liska <mliska@suse.cz>
|
||||||
|
|
||||||
* tree-vect-loop.c (vect_create_epilog_for_reduction):
|
* tree-vect-loop.c (vect_create_epilog_for_reduction):
|
||||||
|
|
@ -102,11 +109,9 @@
|
||||||
m_format_postprocessor.
|
m_format_postprocessor.
|
||||||
(pretty_printer::~pretty_printer): Delete any
|
(pretty_printer::~pretty_printer): Delete any
|
||||||
m_format_postprocessor.
|
m_format_postprocessor.
|
||||||
* pretty-print.h (printer_fn): Add bool and const char **
|
* pretty-print.h (printer_fn): Add bool and const char ** parameters.
|
||||||
parameters.
|
|
||||||
(class format_postprocessor): New class.
|
(class format_postprocessor): New class.
|
||||||
(struct pretty_printer::format_decoder): Document the new
|
(struct pretty_printer::format_decoder): Document the new parameters.
|
||||||
parameters.
|
|
||||||
(struct pretty_printer::m_format_postprocessor): New field.
|
(struct pretty_printer::m_format_postprocessor): New field.
|
||||||
* tree-diagnostic.c (default_tree_printer): Update for new
|
* tree-diagnostic.c (default_tree_printer): Update for new
|
||||||
bool and const char ** params.
|
bool and const char ** params.
|
||||||
|
|
@ -363,8 +368,7 @@
|
||||||
2017-05-26 Richard Biener <rguenther@suse.de>
|
2017-05-26 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
PR tree-optimization/80844
|
PR tree-optimization/80844
|
||||||
* tree-vectorizer.c (adjust_simduid_builtins): Propagate
|
* tree-vectorizer.c (adjust_simduid_builtins): Propagate results.
|
||||||
results.
|
|
||||||
|
|
||||||
2017-05-25 Sebastian Peryt <sebastian.peryt@intel.com>
|
2017-05-25 Sebastian Peryt <sebastian.peryt@intel.com>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11737,6 +11737,52 @@ _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
|
||||||
_MM_FROUND_CUR_DIRECTION);
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern __inline __m128d
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
|
||||||
|
(__v2df) __B,
|
||||||
|
(__v2df) __W,
|
||||||
|
(__mmask8) __U,
|
||||||
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128d
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
|
||||||
|
(__v2df) __B,
|
||||||
|
(__v2df)
|
||||||
|
_mm_setzero_pd (),
|
||||||
|
(__mmask8) __U,
|
||||||
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
|
||||||
|
(__v4sf) __B,
|
||||||
|
(__v4sf) __W,
|
||||||
|
(__mmask8) __U,
|
||||||
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
|
||||||
|
(__v4sf) __B,
|
||||||
|
(__v4sf)
|
||||||
|
_mm_setzero_ps (),
|
||||||
|
(__mmask8) __U,
|
||||||
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
|
}
|
||||||
|
|
||||||
extern __inline __m512d
|
extern __inline __m512d
|
||||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_min_pd (__m512d __A, __m512d __B)
|
_mm512_min_pd (__m512d __A, __m512d __B)
|
||||||
|
|
@ -11807,6 +11853,52 @@ _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
|
||||||
_MM_FROUND_CUR_DIRECTION);
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern __inline __m128d
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
|
||||||
|
(__v2df) __B,
|
||||||
|
(__v2df) __W,
|
||||||
|
(__mmask8) __U,
|
||||||
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128d
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
|
||||||
|
(__v2df) __B,
|
||||||
|
(__v2df)
|
||||||
|
_mm_setzero_pd (),
|
||||||
|
(__mmask8) __U,
|
||||||
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
|
||||||
|
(__v4sf) __B,
|
||||||
|
(__v4sf) __W,
|
||||||
|
(__mmask8) __U,
|
||||||
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
|
||||||
|
(__v4sf) __B,
|
||||||
|
(__v4sf)
|
||||||
|
_mm_setzero_ps (),
|
||||||
|
(__mmask8) __U,
|
||||||
|
_MM_FROUND_CUR_DIRECTION);
|
||||||
|
}
|
||||||
|
|
||||||
extern __inline __m512d
|
extern __inline __m512d
|
||||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
_mm512_scalef_pd (__m512d __A, __m512d __B)
|
_mm512_scalef_pd (__m512d __A, __m512d __B)
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,22 @@
|
||||||
|
2017-05-31 Sebastian Peryt <sebastian.peryt@intel.com>
|
||||||
|
|
||||||
|
* gcc.target/i386/avx512f-vmaxsd-1.c (_mm_mask_max_sd)
|
||||||
|
(_mm_maskz_max_sd): Test new intrinsics.
|
||||||
|
* gcc.target/i386/avx512f-vmaxsd-2.c (_mm_mask_max_sd)
|
||||||
|
(_mm_maskz_max_sd): Test new intrinsics.
|
||||||
|
* gcc.target/i386/avx512f-vmaxss-1.c (_mm_mask_max_ss)
|
||||||
|
(_mm_maskz_max_ss): Test new intrinsics.
|
||||||
|
* gcc.target/i386/avx512f-vmaxss-2.c (_mm_mask_max_ss)
|
||||||
|
(_mm_maskz_max_ss): Test new intrinsics.
|
||||||
|
* gcc.target/i386/avx512f-vminsd-1.c (_mm_mask_min_sd)
|
||||||
|
(_mm_maskz_min_sd): Test new intrinsics.
|
||||||
|
* gcc.target/i386/avx512f-vminsd-2.c (_mm_mask_min_sd)
|
||||||
|
(_mm_maskz_min_sd): Test new intrinsics.
|
||||||
|
* gcc.target/i386/avx512f-vminss-1.c (_mm_mask_min_ss)
|
||||||
|
(_mm_maskz_min_ss): Test new intrinsics.
|
||||||
|
* gcc.target/i386/avx512f-vminss-2.c (_mm_mask_min_ss)
|
||||||
|
(_mm_maskz_min_ss): Test new intrinsics.
|
||||||
|
|
||||||
2017-05-31 Nathan Sidwell <nathan@acm.org>
|
2017-05-31 Nathan Sidwell <nathan@acm.org>
|
||||||
|
|
||||||
* g++.dg/lookup/lambda1.C New.
|
* g++.dg/lookup/lambda1.C New.
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
/* { dg-do compile } */
|
/* { dg-do compile } */
|
||||||
/* { dg-options "-mavx512f -O2" } */
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
|
|
@ -12,6 +14,8 @@ volatile __mmask8 m;
|
||||||
void extern
|
void extern
|
||||||
avx512f_test (void)
|
avx512f_test (void)
|
||||||
{
|
{
|
||||||
|
x1 = _mm_mask_max_sd (x1, m, x2, x3);
|
||||||
|
x1 = _mm_maskz_max_sd (m, x1, x2);
|
||||||
x1 = _mm_max_round_sd (x1, x2, _MM_FROUND_NO_EXC);
|
x1 = _mm_max_round_sd (x1, x2, _MM_FROUND_NO_EXC);
|
||||||
x1 = _mm_mask_max_round_sd (x1, m, x2, x3, _MM_FROUND_NO_EXC);
|
x1 = _mm_mask_max_round_sd (x1, m, x2, x3, _MM_FROUND_NO_EXC);
|
||||||
x1 = _mm_maskz_max_round_sd (m, x1, x2, _MM_FROUND_NO_EXC);
|
x1 = _mm_maskz_max_round_sd (m, x1, x2, _MM_FROUND_NO_EXC);
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ void
|
||||||
avx512f_test (void)
|
avx512f_test (void)
|
||||||
{
|
{
|
||||||
int i, sign;
|
int i, sign;
|
||||||
union128d res1, res2, res3, src1, src2;
|
union128d res1, res2, res3, res4, res5, src1, src2;
|
||||||
MASK_TYPE mask = MASK_VALUE;
|
MASK_TYPE mask = MASK_VALUE;
|
||||||
double res_ref[SIZE];
|
double res_ref[SIZE];
|
||||||
|
|
||||||
|
|
@ -29,26 +29,38 @@ avx512f_test (void)
|
||||||
{
|
{
|
||||||
src1.a[i] = 1.5 + 34.67 * i * sign;
|
src1.a[i] = 1.5 + 34.67 * i * sign;
|
||||||
src2.a[i] = -22.17 * i * sign + 1.0;
|
src2.a[i] = -22.17 * i * sign + 1.0;
|
||||||
|
res1.a[i] = DEFAULT_VALUE;
|
||||||
|
res4.a[i] = DEFAULT_VALUE;
|
||||||
sign = sign * -1;
|
sign = sign * -1;
|
||||||
}
|
}
|
||||||
for (i = 0; i < SIZE; i++)
|
|
||||||
res2.a[i] = DEFAULT_VALUE;
|
|
||||||
|
|
||||||
res1.x = _mm_max_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
|
res1.x = _mm_mask_max_sd (res1.x, mask, src1.x, src2.x);
|
||||||
res2.x = _mm_mask_max_round_sd (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
res2.x = _mm_maskz_max_sd (mask, src1.x, src2.x);
|
||||||
res3.x = _mm_maskz_max_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
res3.x = _mm_max_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||||
|
res4.x = _mm_mask_max_round_sd (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||||
|
res5.x = _mm_maskz_max_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||||
|
|
||||||
calc_max (res_ref, src1.a, src2.a);
|
calc_max (res_ref, src1.a, src2.a);
|
||||||
|
|
||||||
if (check_union128d (res1, res_ref))
|
|
||||||
abort();
|
|
||||||
|
|
||||||
MASK_MERGE (d) (res_ref, mask, 1);
|
MASK_MERGE (d) (res_ref, mask, 1);
|
||||||
if (check_union128d (res2, res_ref))
|
if (check_union128d (res1, res_ref))
|
||||||
abort ();
|
abort ();
|
||||||
|
|
||||||
MASK_ZERO (d) (res_ref, mask, 1);
|
MASK_ZERO (d) (res_ref, mask, 1);
|
||||||
|
if (check_union128d (res2, res_ref))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
calc_max (res_ref, src1.a, src2.a);
|
||||||
|
|
||||||
if (check_union128d (res3, res_ref))
|
if (check_union128d (res3, res_ref))
|
||||||
|
abort();
|
||||||
|
|
||||||
|
MASK_MERGE (d) (res_ref, mask, 1);
|
||||||
|
if (check_union128d (res4, res_ref))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
MASK_ZERO (d) (res_ref, mask, 1);
|
||||||
|
if (check_union128d (res5, res_ref))
|
||||||
abort ();
|
abort ();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
/* { dg-do compile } */
|
/* { dg-do compile } */
|
||||||
/* { dg-options "-mavx512f -O2" } */
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
|
|
@ -12,6 +14,8 @@ volatile __mmask8 m;
|
||||||
void extern
|
void extern
|
||||||
avx512f_test (void)
|
avx512f_test (void)
|
||||||
{
|
{
|
||||||
|
x1 = _mm_mask_max_ss (x1, m, x2, x3);
|
||||||
|
x1 = _mm_maskz_max_ss (m, x1, x2);
|
||||||
x1 = _mm_max_round_ss (x1, x2, _MM_FROUND_NO_EXC);
|
x1 = _mm_max_round_ss (x1, x2, _MM_FROUND_NO_EXC);
|
||||||
x1 = _mm_mask_max_round_ss (x1, m, x2, x3, _MM_FROUND_NO_EXC);
|
x1 = _mm_mask_max_round_ss (x1, m, x2, x3, _MM_FROUND_NO_EXC);
|
||||||
x1 = _mm_maskz_max_round_ss (m, x1, x2, _MM_FROUND_NO_EXC);
|
x1 = _mm_maskz_max_round_ss (m, x1, x2, _MM_FROUND_NO_EXC);
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ void
|
||||||
avx512f_test (void)
|
avx512f_test (void)
|
||||||
{
|
{
|
||||||
int i, sign;
|
int i, sign;
|
||||||
union128 res1, res2, res3, src1, src2;
|
union128 res1, res2, res3, res4, res5, src1, src2;
|
||||||
MASK_TYPE mask = MASK_VALUE;
|
MASK_TYPE mask = MASK_VALUE;
|
||||||
float res_ref[SIZE];
|
float res_ref[SIZE];
|
||||||
|
|
||||||
|
|
@ -33,26 +33,38 @@ avx512f_test (void)
|
||||||
{
|
{
|
||||||
src1.a[i] = 1.5 + 34.67 * i * sign;
|
src1.a[i] = 1.5 + 34.67 * i * sign;
|
||||||
src2.a[i] = -22.17 * i * sign + 1.0;
|
src2.a[i] = -22.17 * i * sign + 1.0;
|
||||||
|
res1.a[i] = DEFAULT_VALUE;
|
||||||
|
res4.a[i] = DEFAULT_VALUE;
|
||||||
sign = sign * -1;
|
sign = sign * -1;
|
||||||
}
|
}
|
||||||
for (i = 0; i < SIZE; i++)
|
|
||||||
res2.a[i] = DEFAULT_VALUE;
|
|
||||||
|
|
||||||
res1.x = _mm_max_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
|
res1.x = _mm_mask_max_ss (res1.x, mask, src1.x, src2.x);
|
||||||
res2.x = _mm_mask_max_round_ss (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
res2.x = _mm_maskz_max_ss (mask, src1.x, src2.x);
|
||||||
res3.x = _mm_maskz_max_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
res3.x = _mm_max_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||||
|
res4.x = _mm_mask_max_round_ss (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||||
|
res5.x = _mm_maskz_max_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||||
|
|
||||||
calc_max (res_ref, src1.a, src2.a);
|
calc_max (res_ref, src1.a, src2.a);
|
||||||
|
|
||||||
if (check_union128 (res1, res_ref))
|
|
||||||
abort();
|
|
||||||
|
|
||||||
MASK_MERGE () (res_ref, mask, 1);
|
MASK_MERGE () (res_ref, mask, 1);
|
||||||
if (check_union128 (res2, res_ref))
|
if (check_union128 (res1, res_ref))
|
||||||
abort ();
|
abort ();
|
||||||
|
|
||||||
MASK_ZERO () (res_ref, mask, 1);
|
MASK_ZERO () (res_ref, mask, 1);
|
||||||
|
if (check_union128 (res2, res_ref))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
calc_max (res_ref, src1.a, src2.a);
|
||||||
|
|
||||||
if (check_union128 (res3, res_ref))
|
if (check_union128 (res3, res_ref))
|
||||||
|
abort();
|
||||||
|
|
||||||
|
MASK_MERGE () (res_ref, mask, 1);
|
||||||
|
if (check_union128 (res4, res_ref))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
MASK_ZERO () (res_ref, mask, 1);
|
||||||
|
if (check_union128 (res5, res_ref))
|
||||||
abort ();
|
abort ();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
/* { dg-do compile } */
|
/* { dg-do compile } */
|
||||||
/* { dg-options "-mavx512f -O2" } */
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
|
|
@ -12,6 +14,8 @@ volatile __mmask8 m;
|
||||||
void extern
|
void extern
|
||||||
avx512f_test (void)
|
avx512f_test (void)
|
||||||
{
|
{
|
||||||
|
x1 = _mm_mask_min_sd (x1, m, x2, x3);
|
||||||
|
x1 = _mm_maskz_min_sd (m, x1, x2);
|
||||||
x1 = _mm_min_round_sd (x1, x2, _MM_FROUND_NO_EXC);
|
x1 = _mm_min_round_sd (x1, x2, _MM_FROUND_NO_EXC);
|
||||||
x1 = _mm_mask_min_round_sd (x1, m, x2, x3, _MM_FROUND_NO_EXC);
|
x1 = _mm_mask_min_round_sd (x1, m, x2, x3, _MM_FROUND_NO_EXC);
|
||||||
x1 = _mm_maskz_min_round_sd (m, x1, x2, _MM_FROUND_NO_EXC);
|
x1 = _mm_maskz_min_round_sd (m, x1, x2, _MM_FROUND_NO_EXC);
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ void
|
||||||
avx512f_test (void)
|
avx512f_test (void)
|
||||||
{
|
{
|
||||||
int i, sign;
|
int i, sign;
|
||||||
union128d res1, res2, res3, src1, src2;
|
union128d res1, res2, res3, res4, res5, src1, src2;
|
||||||
MASK_TYPE mask = MASK_VALUE;
|
MASK_TYPE mask = MASK_VALUE;
|
||||||
double res_ref[SIZE];
|
double res_ref[SIZE];
|
||||||
|
|
||||||
|
|
@ -29,26 +29,38 @@ avx512f_test (void)
|
||||||
{
|
{
|
||||||
src1.a[i] = 1.5 + 34.67 * i * sign;
|
src1.a[i] = 1.5 + 34.67 * i * sign;
|
||||||
src2.a[i] = -22.17 * i * sign + 1.0;
|
src2.a[i] = -22.17 * i * sign + 1.0;
|
||||||
|
res1.a[i] = DEFAULT_VALUE;
|
||||||
|
res4.a[i] = DEFAULT_VALUE;
|
||||||
sign = sign * -1;
|
sign = sign * -1;
|
||||||
}
|
}
|
||||||
for (i = 0; i < SIZE; i++)
|
|
||||||
res2.a[i] = DEFAULT_VALUE;
|
res1.x = _mm_mask_min_sd (res1.x, mask, src1.x, src2.x);
|
||||||
|
res2.x = _mm_maskz_min_sd (mask, src1.x, src2.x);
|
||||||
res1.x = _mm_min_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
|
res3.x = _mm_min_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||||
res2.x = _mm_mask_min_round_sd (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
res4.x = _mm_mask_min_round_sd (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||||
res3.x = _mm_maskz_min_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
res5.x = _mm_maskz_min_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||||
|
|
||||||
calc_min (res_ref, src1.a, src2.a);
|
calc_min (res_ref, src1.a, src2.a);
|
||||||
|
|
||||||
if (check_union128d (res1, res_ref))
|
|
||||||
abort();
|
|
||||||
|
|
||||||
MASK_MERGE (d) (res_ref, mask, 1);
|
MASK_MERGE (d) (res_ref, mask, 1);
|
||||||
if (check_union128d (res2, res_ref))
|
if (check_union128d (res1, res_ref))
|
||||||
abort ();
|
abort ();
|
||||||
|
|
||||||
MASK_ZERO (d) (res_ref, mask, 1);
|
MASK_ZERO (d) (res_ref, mask, 1);
|
||||||
|
if (check_union128d (res2, res_ref))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
calc_min (res_ref, src1.a, src2.a);
|
||||||
|
|
||||||
if (check_union128d (res3, res_ref))
|
if (check_union128d (res3, res_ref))
|
||||||
|
abort();
|
||||||
|
|
||||||
|
MASK_MERGE (d) (res_ref, mask, 1);
|
||||||
|
if (check_union128d (res4, res_ref))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
MASK_ZERO (d) (res_ref, mask, 1);
|
||||||
|
if (check_union128d (res5, res_ref))
|
||||||
abort ();
|
abort ();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
/* { dg-do compile } */
|
/* { dg-do compile } */
|
||||||
/* { dg-options "-mavx512f -O2" } */
|
/* { dg-options "-mavx512f -O2" } */
|
||||||
|
/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
|
||||||
|
|
@ -12,6 +14,8 @@ volatile __mmask8 m;
|
||||||
void extern
|
void extern
|
||||||
avx512f_test (void)
|
avx512f_test (void)
|
||||||
{
|
{
|
||||||
|
x1 = _mm_mask_min_ss (x1, m, x2, x3);
|
||||||
|
x1 = _mm_maskz_min_ss (m, x1, x2);
|
||||||
x1 = _mm_min_round_ss (x1, x2, _MM_FROUND_NO_EXC);
|
x1 = _mm_min_round_ss (x1, x2, _MM_FROUND_NO_EXC);
|
||||||
x1 = _mm_mask_min_round_ss (x1, m, x2, x3, _MM_FROUND_NO_EXC);
|
x1 = _mm_mask_min_round_ss (x1, m, x2, x3, _MM_FROUND_NO_EXC);
|
||||||
x1 = _mm_maskz_min_round_ss (m, x1, x2, _MM_FROUND_NO_EXC);
|
x1 = _mm_maskz_min_round_ss (m, x1, x2, _MM_FROUND_NO_EXC);
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ void
|
||||||
avx512f_test (void)
|
avx512f_test (void)
|
||||||
{
|
{
|
||||||
int i, sign;
|
int i, sign;
|
||||||
union128 res1, res2, res3, src1, src2;
|
union128 res1, res2, res3, res4, res5, src1, src2;
|
||||||
MASK_TYPE mask = MASK_VALUE;
|
MASK_TYPE mask = MASK_VALUE;
|
||||||
float res_ref[SIZE];
|
float res_ref[SIZE];
|
||||||
|
|
||||||
|
|
@ -33,26 +33,38 @@ avx512f_test (void)
|
||||||
{
|
{
|
||||||
src1.a[i] = 1.5 + 34.67 * i * sign;
|
src1.a[i] = 1.5 + 34.67 * i * sign;
|
||||||
src2.a[i] = -22.17 * i * sign + 1.0;
|
src2.a[i] = -22.17 * i * sign + 1.0;
|
||||||
|
res1.a[i] = DEFAULT_VALUE;
|
||||||
|
res4.a[i] = DEFAULT_VALUE;
|
||||||
sign = sign * -1;
|
sign = sign * -1;
|
||||||
}
|
}
|
||||||
for (i = 0; i < SIZE; i++)
|
|
||||||
res2.a[i] = DEFAULT_VALUE;
|
|
||||||
|
|
||||||
res1.x = _mm_min_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
|
res1.x = _mm_mask_min_ss (res1.x, mask, src1.x, src2.x);
|
||||||
res2.x = _mm_mask_min_round_ss (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
res2.x = _mm_maskz_min_ss (mask, src1.x, src2.x);
|
||||||
res3.x = _mm_maskz_min_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
res3.x = _mm_min_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||||
|
res4.x = _mm_mask_min_round_ss (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||||
|
res5.x = _mm_maskz_min_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
|
||||||
|
|
||||||
calc_min (res_ref, src1.a, src2.a);
|
calc_min (res_ref, src1.a, src2.a);
|
||||||
|
|
||||||
if (check_union128 (res1, res_ref))
|
|
||||||
abort();
|
|
||||||
|
|
||||||
MASK_MERGE () (res_ref, mask, 1);
|
MASK_MERGE () (res_ref, mask, 1);
|
||||||
if (check_union128 (res2, res_ref))
|
if (check_union128 (res1, res_ref))
|
||||||
abort ();
|
abort ();
|
||||||
|
|
||||||
MASK_ZERO () (res_ref, mask, 1);
|
MASK_ZERO () (res_ref, mask, 1);
|
||||||
|
if (check_union128 (res2, res_ref))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
calc_min (res_ref, src1.a, src2.a);
|
||||||
|
|
||||||
if (check_union128 (res3, res_ref))
|
if (check_union128 (res3, res_ref))
|
||||||
|
abort();
|
||||||
|
|
||||||
|
MASK_MERGE () (res_ref, mask, 1);
|
||||||
|
if (check_union128 (res4, res_ref))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
MASK_ZERO () (res_ref, mask, 1);
|
||||||
|
if (check_union128 (res5, res_ref))
|
||||||
abort ();
|
abort ();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue