mirror of git://gcc.gnu.org/git/gcc.git
S/390: Don't emit prefetch instructions for clrmem
gcc/ChangeLog: 2018-07-31 Andreas Krebbel <krebbel@linux.ibm.com> * config/s390/s390.c (s390_expand_setmem): Make the unrolling to depend on whether prefetch instructions will be emitted or not. Use TARGET_SETMEM_PFD for checking whether prefetch instructions will be emitted or not. * config/s390/s390.h (TARGET_SETMEM_PREFETCH_DISTANCE) (TARGET_SETMEM_PFD): New macros. gcc/testsuite/ChangeLog: 2018-07-31 Andreas Krebbel <krebbel@linux.ibm.com> * gcc.target/s390/memset-1.c: Improve testcase. From-SVN: r263165
This commit is contained in:
parent
77b4604223
commit
4de3a1e16a
|
|
@ -1,3 +1,12 @@
|
||||||
|
2018-07-31 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||||
|
|
||||||
|
* config/s390/s390.c (s390_expand_setmem): Make the unrolling to
|
||||||
|
depend on whether prefetch instructions will be emitted or not.
|
||||||
|
Use TARGET_SETMEM_PFD for checking whether prefetch instructions
|
||||||
|
will be emitted or not.
|
||||||
|
* config/s390/s390.h (TARGET_SETMEM_PREFETCH_DISTANCE)
|
||||||
|
(TARGET_SETMEM_PFD): New macros.
|
||||||
|
|
||||||
2018-07-31 Richard Sandiford <richard.sandiford@arm.com>
|
2018-07-31 Richard Sandiford <richard.sandiford@arm.com>
|
||||||
|
|
||||||
* tree-vectorizer.h (stmt_vec_info): Turn back into a typedef.
|
* tree-vectorizer.h (stmt_vec_info): Turn back into a typedef.
|
||||||
|
|
|
||||||
|
|
@ -5499,12 +5499,15 @@ s390_expand_setmem (rtx dst, rtx len, rtx val)
|
||||||
|
|
||||||
/* Expand setmem/clrmem for a constant length operand without a
|
/* Expand setmem/clrmem for a constant length operand without a
|
||||||
loop if it will be shorter that way.
|
loop if it will be shorter that way.
|
||||||
With a constant length and without pfd argument a
|
clrmem loop (with PFD) is 30 bytes -> 5 * xc
|
||||||
clrmem loop is 32 bytes -> 5.3 * xc
|
clrmem loop (without PFD) is 24 bytes -> 4 * xc
|
||||||
setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
|
setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
|
||||||
|
setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
|
||||||
if (GET_CODE (len) == CONST_INT
|
if (GET_CODE (len) == CONST_INT
|
||||||
&& ((INTVAL (len) <= 256 * 5 && val == const0_rtx)
|
&& ((val == const0_rtx
|
||||||
|| INTVAL (len) <= 257 * 3)
|
&& (INTVAL (len) <= 256 * 4
|
||||||
|
|| (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
|
||||||
|
|| (val != const0_rtx && INTVAL (len) <= 257 * 4))
|
||||||
&& (!TARGET_MVCLE || INTVAL (len) <= 256))
|
&& (!TARGET_MVCLE || INTVAL (len) <= 256))
|
||||||
{
|
{
|
||||||
HOST_WIDE_INT o, l;
|
HOST_WIDE_INT o, l;
|
||||||
|
|
@ -5618,12 +5621,11 @@ s390_expand_setmem (rtx dst, rtx len, rtx val)
|
||||||
|
|
||||||
emit_label (loop_start_label);
|
emit_label (loop_start_label);
|
||||||
|
|
||||||
if (TARGET_Z10
|
if (TARGET_SETMEM_PFD (val, len))
|
||||||
&& (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
|
|
||||||
{
|
{
|
||||||
/* Issue a write prefetch for the +4 cache line. */
|
/* Issue a write prefetch. */
|
||||||
rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
|
rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
|
||||||
GEN_INT (1024)),
|
rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
|
||||||
const1_rtx, const0_rtx);
|
const1_rtx, const0_rtx);
|
||||||
emit_insn (prefetch);
|
emit_insn (prefetch);
|
||||||
PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
|
PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
|
||||||
|
|
|
||||||
|
|
@ -181,6 +181,16 @@ enum processor_flags
|
||||||
|
|
||||||
#define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196)
|
#define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196)
|
||||||
|
|
||||||
|
/* Issue a write prefetch for the +4 cache line. */
|
||||||
|
#define TARGET_SETMEM_PREFETCH_DISTANCE 1024
|
||||||
|
|
||||||
|
/* Expand to a C expressions evaluating to true if a setmem to VAL of
|
||||||
|
length LEN should be emitted using prefetch instructions. */
|
||||||
|
#define TARGET_SETMEM_PFD(VAL,LEN) \
|
||||||
|
(TARGET_Z10 \
|
||||||
|
&& (s390_tune < PROCESSOR_2964_Z13 || (VAL) != const0_rtx) \
|
||||||
|
&& (!CONST_INT_P (LEN) || INTVAL ((LEN)) > TARGET_SETMEM_PREFETCH_DISTANCE))
|
||||||
|
|
||||||
/* Run-time target specification. */
|
/* Run-time target specification. */
|
||||||
|
|
||||||
/* Defaults for option flags defined only on some subtargets. */
|
/* Defaults for option flags defined only on some subtargets. */
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,7 @@
|
||||||
|
2018-07-31 Andreas Krebbel <krebbel@linux.ibm.com>
|
||||||
|
|
||||||
|
* gcc.target/s390/memset-1.c: Improve testcase.
|
||||||
|
|
||||||
2018-07-31 Tom de Vries <tdevries@suse.de>
|
2018-07-31 Tom de Vries <tdevries@suse.de>
|
||||||
|
|
||||||
PR debug/86687
|
PR debug/86687
|
||||||
|
|
|
||||||
|
|
@ -2,16 +2,23 @@
|
||||||
without loop statements. */
|
without loop statements. */
|
||||||
|
|
||||||
/* { dg-do compile } */
|
/* { dg-do compile } */
|
||||||
/* { dg-options "-O3 -mzarch" } */
|
/* { dg-options "-O3 -mzarch -march=z13" } */
|
||||||
|
|
||||||
/* 1 mvc */
|
/* 1 stc */
|
||||||
|
void
|
||||||
|
*memset0(void *s, int c)
|
||||||
|
{
|
||||||
|
return __builtin_memset (s, c, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1 stc 1 mvc */
|
||||||
void
|
void
|
||||||
*memset1(void *s, int c)
|
*memset1(void *s, int c)
|
||||||
{
|
{
|
||||||
return __builtin_memset (s, c, 42);
|
return __builtin_memset (s, c, 42);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 3 mvc */
|
/* 3 stc 3 mvc */
|
||||||
void
|
void
|
||||||
*memset2(void *s, int c)
|
*memset2(void *s, int c)
|
||||||
{
|
{
|
||||||
|
|
@ -25,55 +32,62 @@ void
|
||||||
return __builtin_memset (s, c, 0);
|
return __builtin_memset (s, c, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* mvc */
|
/* 1 stc 1 mvc */
|
||||||
void
|
void
|
||||||
*memset4(void *s, int c)
|
*memset4(void *s, int c)
|
||||||
{
|
{
|
||||||
return __builtin_memset (s, c, 256);
|
return __builtin_memset (s, c, 256);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 2 mvc */
|
/* 2 stc 2 mvc */
|
||||||
void
|
void
|
||||||
*memset5(void *s, int c)
|
*memset5(void *s, int c)
|
||||||
{
|
{
|
||||||
return __builtin_memset (s, c, 512);
|
return __builtin_memset (s, c, 512);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* still 2 mvc through the additional first byte */
|
/* 2 stc 2 mvc - still due to the stc bytes */
|
||||||
void
|
void
|
||||||
*memset6(void *s, int c)
|
*memset6(void *s, int c)
|
||||||
{
|
{
|
||||||
return __builtin_memset (s, c, 514);
|
return __builtin_memset (s, c, 514);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 3 mvc */
|
/* 3 stc 2 mvc */
|
||||||
void
|
void
|
||||||
*memset7(void *s, int c)
|
*memset7(void *s, int c)
|
||||||
{
|
{
|
||||||
return __builtin_memset (s, c, 515);
|
return __builtin_memset (s, c, 515);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* still 3 mvc through the additional first byte */
|
/* 4 stc 4 mvc - 4 * 256 + 4 stc bytes */
|
||||||
void
|
void
|
||||||
*memset8(void *s, int c)
|
*memset8(void *s, int c)
|
||||||
{
|
{
|
||||||
return __builtin_memset (s, c, 771);
|
return __builtin_memset (s, c, 1028);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Use mvc loop: 2 mvc */
|
/* 2 stc 1 pfd 2 mvc - start using mvc loop */
|
||||||
void
|
void
|
||||||
*memset9(void *s, int c)
|
*memset9(void *s, int c)
|
||||||
{
|
{
|
||||||
return __builtin_memset (s, c, 772);
|
return __builtin_memset (s, c, 1029);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 3 mvc with displacement overflow after the first */
|
/* 2 stc 1 stcy 3 mvc - displacement overflow after the first */
|
||||||
void
|
void
|
||||||
*memset10(void *s, int c)
|
*memset10(void *s, int c)
|
||||||
{
|
{
|
||||||
return __builtin_memset ((char*)s + 4000, c, 700);
|
return __builtin_memset ((char*)s + 4000, c, 700);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* 1 mvi */
|
||||||
|
void
|
||||||
|
*clrmem0(void *s)
|
||||||
|
{
|
||||||
|
return __builtin_memset (s, 0, 1);
|
||||||
|
}
|
||||||
|
|
||||||
/* 1 xc */
|
/* 1 xc */
|
||||||
void
|
void
|
||||||
*clrmem1(void *s)
|
*clrmem1(void *s)
|
||||||
|
|
@ -109,26 +123,55 @@ void
|
||||||
return __builtin_memset (s, 0, 512);
|
return __builtin_memset (s, 0, 512);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 3 xc */
|
/* 4 xc */
|
||||||
void
|
void
|
||||||
*clrmem6(void *s)
|
*clrmem6(void *s)
|
||||||
{
|
{
|
||||||
return __builtin_memset (s, 0, 768);
|
return __builtin_memset (s, 0, 1024);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* start using xc loop */
|
/* 2 xc - start using xc loop*/
|
||||||
void
|
void
|
||||||
*clrmem7(void *s)
|
*clrmem7(void *s)
|
||||||
|
{
|
||||||
|
return __builtin_memset (s, 0, 1025);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 5 xc - on z10 PFD would be used in the loop body so the unrolled
|
||||||
|
variant would still be shorter. */
|
||||||
|
__attribute__ ((target("tune=z10")))
|
||||||
|
void
|
||||||
|
*clrmem7_z10(void *s)
|
||||||
|
{
|
||||||
|
return __builtin_memset (s, 0, 1025);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 5 xc */
|
||||||
|
__attribute__ ((target("tune=z10")))
|
||||||
|
void
|
||||||
|
*clrmem8_z10(void *s)
|
||||||
|
{
|
||||||
|
return __builtin_memset (s, 0, 1280);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 1 pfd 2 xc - start using xc loop also on z10 */
|
||||||
|
__attribute__ ((target("tune=z10")))
|
||||||
|
void
|
||||||
|
*clrmem9_z10(void *s)
|
||||||
{
|
{
|
||||||
return __builtin_memset (s, 0, 1281);
|
return __builtin_memset (s, 0, 1281);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 3 xc with displacement overflow after the first */
|
/* 3 xc - displacement overflow after the first */
|
||||||
void
|
void
|
||||||
*clrmem8(void *s)
|
*clrmem10(void *s)
|
||||||
{
|
{
|
||||||
return __builtin_memset (s + 4000, 0, 700);
|
return __builtin_memset (s + 4000, 0, 700);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* { dg-final { scan-assembler-times "mvc" 19 } } */
|
/* { dg-final { scan-assembler-times "mvi\\s" 1 } } */
|
||||||
/* { dg-final { scan-assembler-times "xc" 15 } } */
|
/* { dg-final { scan-assembler-times "mvc\\s" 20 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "xc\\s" 28 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "stc\\s" 21 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "stcy\\s" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "pfd\\s" 2 } } */
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue