mirror of git://gcc.gnu.org/git/gcc.git
re PR target/47754 ([missed optimization] AVX allows unaligned memory operands but GCC uses unaligned load and register operand)
PR target/47754 * config/i386/i386.c (ix86_avx256_split_vector_move_misalign): If op1 is misaligned_operand, just use *mov<mode>_internal insn rather than UNSPEC_LOADU load. (ix86_expand_vector_move_misalign): Likewise (for TARGET_AVX only). Avoid gen_lowpart on op0 if it isn't MEM. * gcc.target/i386/avx256-unaligned-load-1.c: Adjust scan-assembler and scan-assembler-not regexps. * gcc.target/i386/avx256-unaligned-load-2.c: Likewise. * gcc.target/i386/avx256-unaligned-load-3.c: Likewise. * gcc.target/i386/avx256-unaligned-load-4.c: Likewise. * gcc.target/i386/l_fma_float_1.c: Use pattern for scan-assembler-times instead of just one insn name. * gcc.target/i386/l_fma_float_2.c: Likewise. * gcc.target/i386/l_fma_float_3.c: Likewise. * gcc.target/i386/l_fma_float_4.c: Likewise. * gcc.target/i386/l_fma_float_5.c: Likewise. * gcc.target/i386/l_fma_float_6.c: Likewise. * gcc.target/i386/l_fma_double_1.c: Likewise. * gcc.target/i386/l_fma_double_2.c: Likewise. * gcc.target/i386/l_fma_double_3.c: Likewise. * gcc.target/i386/l_fma_double_4.c: Likewise. * gcc.target/i386/l_fma_double_5.c: Likewise. * gcc.target/i386/l_fma_double_6.c: Likewise. From-SVN: r204219
This commit is contained in:
parent
3e4403a4c4
commit
1079f7a198
|
|
@ -16560,6 +16560,12 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
|
||||||
r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
|
r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
|
||||||
emit_move_insn (op0, r);
|
emit_move_insn (op0, r);
|
||||||
}
|
}
|
||||||
|
/* Normal *mov<mode>_internal pattern will handle
|
||||||
|
unaligned loads just fine if misaligned_operand
|
||||||
|
is true, and without the UNSPEC it can be combined
|
||||||
|
with arithmetic instructions. */
|
||||||
|
else if (misaligned_operand (op1, GET_MODE (op1)))
|
||||||
|
emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
|
||||||
else
|
else
|
||||||
emit_insn (load_unaligned (op0, op1));
|
emit_insn (load_unaligned (op0, op1));
|
||||||
}
|
}
|
||||||
|
|
@ -16634,7 +16640,7 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
|
||||||
void
|
void
|
||||||
ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||||
{
|
{
|
||||||
rtx op0, op1, m;
|
rtx op0, op1, orig_op0 = NULL_RTX, m;
|
||||||
rtx (*load_unaligned) (rtx, rtx);
|
rtx (*load_unaligned) (rtx, rtx);
|
||||||
rtx (*store_unaligned) (rtx, rtx);
|
rtx (*store_unaligned) (rtx, rtx);
|
||||||
|
|
||||||
|
|
@ -16647,7 +16653,16 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||||
{
|
{
|
||||||
case MODE_VECTOR_INT:
|
case MODE_VECTOR_INT:
|
||||||
case MODE_INT:
|
case MODE_INT:
|
||||||
op0 = gen_lowpart (V16SImode, op0);
|
if (GET_MODE (op0) != V16SImode)
|
||||||
|
{
|
||||||
|
if (!MEM_P (op0))
|
||||||
|
{
|
||||||
|
orig_op0 = op0;
|
||||||
|
op0 = gen_reg_rtx (V16SImode);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
op0 = gen_lowpart (V16SImode, op0);
|
||||||
|
}
|
||||||
op1 = gen_lowpart (V16SImode, op1);
|
op1 = gen_lowpart (V16SImode, op1);
|
||||||
/* FALLTHRU */
|
/* FALLTHRU */
|
||||||
|
|
||||||
|
|
@ -16676,6 +16691,8 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||||
emit_insn (store_unaligned (op0, op1));
|
emit_insn (store_unaligned (op0, op1));
|
||||||
else
|
else
|
||||||
gcc_unreachable ();
|
gcc_unreachable ();
|
||||||
|
if (orig_op0)
|
||||||
|
emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
@ -16692,12 +16709,23 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||||
{
|
{
|
||||||
case MODE_VECTOR_INT:
|
case MODE_VECTOR_INT:
|
||||||
case MODE_INT:
|
case MODE_INT:
|
||||||
op0 = gen_lowpart (V32QImode, op0);
|
if (GET_MODE (op0) != V32QImode)
|
||||||
|
{
|
||||||
|
if (!MEM_P (op0))
|
||||||
|
{
|
||||||
|
orig_op0 = op0;
|
||||||
|
op0 = gen_reg_rtx (V32QImode);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
op0 = gen_lowpart (V32QImode, op0);
|
||||||
|
}
|
||||||
op1 = gen_lowpart (V32QImode, op1);
|
op1 = gen_lowpart (V32QImode, op1);
|
||||||
/* FALLTHRU */
|
/* FALLTHRU */
|
||||||
|
|
||||||
case MODE_VECTOR_FLOAT:
|
case MODE_VECTOR_FLOAT:
|
||||||
ix86_avx256_split_vector_move_misalign (op0, op1);
|
ix86_avx256_split_vector_move_misalign (op0, op1);
|
||||||
|
if (orig_op0)
|
||||||
|
emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
@ -16709,15 +16737,30 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||||
|
|
||||||
if (MEM_P (op1))
|
if (MEM_P (op1))
|
||||||
{
|
{
|
||||||
|
/* Normal *mov<mode>_internal pattern will handle
|
||||||
|
unaligned loads just fine if misaligned_operand
|
||||||
|
is true, and without the UNSPEC it can be combined
|
||||||
|
with arithmetic instructions. */
|
||||||
|
if (TARGET_AVX
|
||||||
|
&& (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|
||||||
|
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
|
||||||
|
&& misaligned_operand (op1, GET_MODE (op1)))
|
||||||
|
emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
|
||||||
/* ??? If we have typed data, then it would appear that using
|
/* ??? If we have typed data, then it would appear that using
|
||||||
movdqu is the only way to get unaligned data loaded with
|
movdqu is the only way to get unaligned data loaded with
|
||||||
integer type. */
|
integer type. */
|
||||||
if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
|
else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
|
||||||
{
|
{
|
||||||
op0 = gen_lowpart (V16QImode, op0);
|
if (GET_MODE (op0) != V16QImode)
|
||||||
|
{
|
||||||
|
orig_op0 = op0;
|
||||||
|
op0 = gen_reg_rtx (V16QImode);
|
||||||
|
}
|
||||||
op1 = gen_lowpart (V16QImode, op1);
|
op1 = gen_lowpart (V16QImode, op1);
|
||||||
/* We will eventually emit movups based on insn attributes. */
|
/* We will eventually emit movups based on insn attributes. */
|
||||||
emit_insn (gen_sse2_loaddquv16qi (op0, op1));
|
emit_insn (gen_sse2_loaddquv16qi (op0, op1));
|
||||||
|
if (orig_op0)
|
||||||
|
emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
|
||||||
}
|
}
|
||||||
else if (TARGET_SSE2 && mode == V2DFmode)
|
else if (TARGET_SSE2 && mode == V2DFmode)
|
||||||
{
|
{
|
||||||
|
|
@ -16765,9 +16808,16 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||||
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|
||||||
|| optimize_insn_for_size_p ())
|
|| optimize_insn_for_size_p ())
|
||||||
{
|
{
|
||||||
op0 = gen_lowpart (V4SFmode, op0);
|
if (GET_MODE (op0) != V4SFmode)
|
||||||
|
{
|
||||||
|
orig_op0 = op0;
|
||||||
|
op0 = gen_reg_rtx (V4SFmode);
|
||||||
|
}
|
||||||
op1 = gen_lowpart (V4SFmode, op1);
|
op1 = gen_lowpart (V4SFmode, op1);
|
||||||
emit_insn (gen_sse_loadups (op0, op1));
|
emit_insn (gen_sse_loadups (op0, op1));
|
||||||
|
if (orig_op0)
|
||||||
|
emit_move_insn (orig_op0,
|
||||||
|
gen_lowpart (GET_MODE (orig_op0), op0));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,6 @@ avx_test (void)
|
||||||
c[i] = a[i] * b[i+3];
|
c[i] = a[i] * b[i+3];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-not "avx_loadups256" } } */
|
/* { dg-final { scan-assembler-not "(avx_loadups256|vmovups\[^\n\r]*movv8sf_internal)" } } */
|
||||||
/* { dg-final { scan-assembler "sse_loadups" } } */
|
/* { dg-final { scan-assembler "(sse_loadups|movv4sf_internal)" } } */
|
||||||
/* { dg-final { scan-assembler "vinsertf128" } } */
|
/* { dg-final { scan-assembler "vinsertf128" } } */
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,6 @@ avx_test (char **cp, char **ep)
|
||||||
*ap++ = *cp++;
|
*ap++ = *cp++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-not "avx_loaddqu256" } } */
|
/* { dg-final { scan-assembler-not "(avx_loaddqu256|vmovdqu\[^\n\r]*movv32qi_internal)" } } */
|
||||||
/* { dg-final { scan-assembler "sse2_loaddqu" } } */
|
/* { dg-final { scan-assembler "(sse2_loaddqu|vmovdqu\[^\n\r]*movv16qi_internal)" } } */
|
||||||
/* { dg-final { scan-assembler "vinsert.128" } } */
|
/* { dg-final { scan-assembler "vinsert.128" } } */
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,6 @@ avx_test (void)
|
||||||
c[i] = a[i] * b[i+3];
|
c[i] = a[i] * b[i+3];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-not "avx_loadupd256" } } */
|
/* { dg-final { scan-assembler-not "(avx_loadupd256|vmovupd\[^\n\r]*movv4df_internal)" } } */
|
||||||
/* { dg-final { scan-assembler "sse2_loadupd" } } */
|
/* { dg-final { scan-assembler "(sse2_loadupd|vmovupd\[^\n\r]*movv2df_internal)" } } */
|
||||||
/* { dg-final { scan-assembler "vinsertf128" } } */
|
/* { dg-final { scan-assembler "vinsertf128" } } */
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,6 @@ avx_test (void)
|
||||||
b[i] = a[i+3] * 2;
|
b[i] = a[i+3] * 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* { dg-final { scan-assembler "avx_loadups256" } } */
|
/* { dg-final { scan-assembler "(avx_loadups256|vmovups\[^\n\r]*movv8sf_internal)" } } */
|
||||||
/* { dg-final { scan-assembler-not "sse_loadups" } } */
|
/* { dg-final { scan-assembler-not "(sse_loadups|vmovups\[^\n\r]*movv4sf_internal)" } } */
|
||||||
/* { dg-final { scan-assembler-not "vinsertf128" } } */
|
/* { dg-final { scan-assembler-not "vinsertf128" } } */
|
||||||
|
|
|
||||||
|
|
@ -9,15 +9,11 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
|
||||||
|
|
||||||
#include "l_fma_1.h"
|
#include "l_fma_1.h"
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
|
||||||
|
|
|
||||||
|
|
@ -9,11 +9,11 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
|
||||||
|
|
||||||
#include "l_fma_2.h"
|
#include "l_fma_2.h"
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
|
||||||
|
|
|
||||||
|
|
@ -9,14 +9,10 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
|
||||||
|
|
||||||
#include "l_fma_3.h"
|
#include "l_fma_3.h"
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
|
||||||
|
|
|
||||||
|
|
@ -9,10 +9,10 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
|
||||||
|
|
||||||
#include "l_fma_4.h"
|
#include "l_fma_4.h"
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
|
||||||
|
|
|
||||||
|
|
@ -9,11 +9,11 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
|
||||||
|
|
||||||
#include "l_fma_5.h"
|
#include "l_fma_5.h"
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
|
||||||
|
|
|
||||||
|
|
@ -9,11 +9,11 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
|
||||||
|
|
||||||
#include "l_fma_6.h"
|
#include "l_fma_6.h"
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
|
||||||
|
|
|
||||||
|
|
@ -8,14 +8,10 @@
|
||||||
|
|
||||||
#include "l_fma_1.h"
|
#include "l_fma_1.h"
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,11 @@
|
||||||
|
|
||||||
#include "l_fma_2.h"
|
#include "l_fma_2.h"
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
|
||||||
|
|
|
||||||
|
|
@ -8,15 +8,11 @@
|
||||||
|
|
||||||
#include "l_fma_3.h"
|
#include "l_fma_3.h"
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
|
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
|
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,11 @@
|
||||||
|
|
||||||
#include "l_fma_4.h"
|
#include "l_fma_4.h"
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,11 @@
|
||||||
|
|
||||||
#include "l_fma_5.h"
|
#include "l_fma_5.h"
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,11 @@
|
||||||
|
|
||||||
#include "l_fma_6.h"
|
#include "l_fma_6.h"
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "vfmadd132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
|
||||||
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
|
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue