mirror of git://gcc.gnu.org/git/gcc.git
re PR target/40657 (allocate local variables with fewer instructions)
PR target/40657 * config/arm/arm.c (thumb1_extra_regs_pushed): New arg FOR_PROLOGUE. All callers changed. Handle the case when we're called for the epilogue. (thumb_unexpanded_epilogue): Use it. (thumb1_expand_epilogue): Likewise. testsuite/ PR target/40657 * gcc.target/arm/pr40657-1.c: New test. * gcc.target/arm/pr40657-2.c: New test. * gcc.c-torture/execute/pr40657.c: New test. From-SVN: r161988
This commit is contained in:
parent
ac2856486d
commit
e784c52cd2
|
|
@ -1,3 +1,12 @@
|
||||||
|
2010-07-09 Bernd Schmidt <bernds@codesourcery.com>
|
||||||
|
|
||||||
|
PR target/40657
|
||||||
|
* config/arm/arm.c (thumb1_extra_regs_pushed): New arg FOR_PROLOGUE.
|
||||||
|
All callers changed.
|
||||||
|
Handle the case when we're called for the epilogue.
|
||||||
|
(thumb_unexpanded_epilogue): Use it.
|
||||||
|
(thumb1_expand_epilogue): Likewise.
|
||||||
|
|
||||||
2010-07-09 Jakub Jelinek <jakub@redhat.com>
|
2010-07-09 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
* tree-vrp.c (extract_range_from_binary_expr) <BIT_AND_EXPR>: If
|
* tree-vrp.c (extract_range_from_binary_expr) <BIT_AND_EXPR>: If
|
||||||
|
|
|
||||||
|
|
@ -19565,6 +19565,81 @@ is_called_in_ARM_mode (tree func)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Given the stack offsets and register mask in OFFSETS, decide how
|
||||||
|
many additional registers to push instead of subtracting a constant
|
||||||
|
from SP. For epilogues the principle is the same except we use pop.
|
||||||
|
FOR_PROLOGUE indicates which we're generating. */
|
||||||
|
static int
|
||||||
|
thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
|
||||||
|
{
|
||||||
|
HOST_WIDE_INT amount;
|
||||||
|
unsigned long live_regs_mask = offsets->saved_regs_mask;
|
||||||
|
/* Extract a mask of the ones we can give to the Thumb's push/pop
|
||||||
|
instruction. */
|
||||||
|
unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
|
||||||
|
/* Then count how many other high registers will need to be pushed. */
|
||||||
|
unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
|
||||||
|
int n_free, reg_base;
|
||||||
|
|
||||||
|
if (!for_prologue && frame_pointer_needed)
|
||||||
|
amount = offsets->locals_base - offsets->saved_regs;
|
||||||
|
else
|
||||||
|
amount = offsets->outgoing_args - offsets->saved_regs;
|
||||||
|
|
||||||
|
/* If the stack frame size is 512 exactly, we can save one load
|
||||||
|
instruction, which should make this a win even when optimizing
|
||||||
|
for speed. */
|
||||||
|
if (!optimize_size && amount != 512)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Can't do this if there are high registers to push. */
|
||||||
|
if (high_regs_pushed != 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Shouldn't do it in the prologue if no registers would normally
|
||||||
|
be pushed at all. In the epilogue, also allow it if we'll have
|
||||||
|
a pop insn for the PC. */
|
||||||
|
if (l_mask == 0
|
||||||
|
&& (for_prologue
|
||||||
|
|| TARGET_BACKTRACE
|
||||||
|
|| (live_regs_mask & 1 << LR_REGNUM) == 0
|
||||||
|
|| TARGET_INTERWORK
|
||||||
|
|| crtl->args.pretend_args_size != 0))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Don't do this if thumb_expand_prologue wants to emit instructions
|
||||||
|
between the push and the stack frame allocation. */
|
||||||
|
if (for_prologue
|
||||||
|
&& ((flag_pic && arm_pic_register != INVALID_REGNUM)
|
||||||
|
|| (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
reg_base = 0;
|
||||||
|
n_free = 0;
|
||||||
|
if (!for_prologue)
|
||||||
|
{
|
||||||
|
reg_base = arm_size_return_regs () / UNITS_PER_WORD;
|
||||||
|
live_regs_mask >>= reg_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (reg_base + n_free < 8 && !(live_regs_mask & 1)
|
||||||
|
&& (for_prologue || call_used_regs[reg_base + n_free]))
|
||||||
|
{
|
||||||
|
live_regs_mask >>= 1;
|
||||||
|
n_free++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n_free == 0)
|
||||||
|
return 0;
|
||||||
|
gcc_assert (amount / 4 * 4 == amount);
|
||||||
|
|
||||||
|
if (amount >= 512 && (amount - n_free * 4) < 512)
|
||||||
|
return (amount - 508) / 4;
|
||||||
|
if (amount <= n_free * 4)
|
||||||
|
return amount / 4;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* The bits which aren't usefully expanded as rtl. */
|
/* The bits which aren't usefully expanded as rtl. */
|
||||||
const char *
|
const char *
|
||||||
thumb_unexpanded_epilogue (void)
|
thumb_unexpanded_epilogue (void)
|
||||||
|
|
@ -19573,6 +19648,7 @@ thumb_unexpanded_epilogue (void)
|
||||||
int regno;
|
int regno;
|
||||||
unsigned long live_regs_mask = 0;
|
unsigned long live_regs_mask = 0;
|
||||||
int high_regs_pushed = 0;
|
int high_regs_pushed = 0;
|
||||||
|
int extra_pop;
|
||||||
int had_to_push_lr;
|
int had_to_push_lr;
|
||||||
int size;
|
int size;
|
||||||
|
|
||||||
|
|
@ -19592,6 +19668,13 @@ thumb_unexpanded_epilogue (void)
|
||||||
the register is used to hold a return value. */
|
the register is used to hold a return value. */
|
||||||
size = arm_size_return_regs ();
|
size = arm_size_return_regs ();
|
||||||
|
|
||||||
|
extra_pop = thumb1_extra_regs_pushed (offsets, false);
|
||||||
|
if (extra_pop > 0)
|
||||||
|
{
|
||||||
|
unsigned long extra_mask = (1 << extra_pop) - 1;
|
||||||
|
live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
|
||||||
|
}
|
||||||
|
|
||||||
/* The prolog may have pushed some high registers to use as
|
/* The prolog may have pushed some high registers to use as
|
||||||
work registers. e.g. the testsuite file:
|
work registers. e.g. the testsuite file:
|
||||||
gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
|
gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
|
||||||
|
|
@ -19675,7 +19758,9 @@ thumb_unexpanded_epilogue (void)
|
||||||
live_regs_mask);
|
live_regs_mask);
|
||||||
|
|
||||||
/* We have either just popped the return address into the
|
/* We have either just popped the return address into the
|
||||||
PC or it is was kept in LR for the entire function. */
|
PC or it is was kept in LR for the entire function.
|
||||||
|
Note that thumb_pushpop has already called thumb_exit if the
|
||||||
|
PC was in the list. */
|
||||||
if (!had_to_push_lr)
|
if (!had_to_push_lr)
|
||||||
thumb_exit (asm_out_file, LR_REGNUM);
|
thumb_exit (asm_out_file, LR_REGNUM);
|
||||||
}
|
}
|
||||||
|
|
@ -19821,51 +19906,6 @@ thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Given the stack offsets and register mask in OFFSETS, decide
|
|
||||||
how many additional registers to push instead of subtracting
|
|
||||||
a constant from SP. */
|
|
||||||
static int
|
|
||||||
thumb1_extra_regs_pushed (arm_stack_offsets *offsets)
|
|
||||||
{
|
|
||||||
HOST_WIDE_INT amount = offsets->outgoing_args - offsets->saved_regs;
|
|
||||||
unsigned long live_regs_mask = offsets->saved_regs_mask;
|
|
||||||
/* Extract a mask of the ones we can give to the Thumb's push instruction. */
|
|
||||||
unsigned long l_mask = live_regs_mask & 0x40ff;
|
|
||||||
/* Then count how many other high registers will need to be pushed. */
|
|
||||||
unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
|
|
||||||
int n_free;
|
|
||||||
|
|
||||||
/* If the stack frame size is 512 exactly, we can save one load
|
|
||||||
instruction, which should make this a win even when optimizing
|
|
||||||
for speed. */
|
|
||||||
if (!optimize_size && amount != 512)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* Can't do this if there are high registers to push, or if we
|
|
||||||
are not going to do a push at all. */
|
|
||||||
if (high_regs_pushed != 0 || l_mask == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* Don't do this if thumb1_expand_prologue wants to emit instructions
|
|
||||||
between the push and the stack frame allocation. */
|
|
||||||
if ((flag_pic && arm_pic_register != INVALID_REGNUM)
|
|
||||||
|| (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
for (n_free = 0; n_free < 8 && !(live_regs_mask & 1); live_regs_mask >>= 1)
|
|
||||||
n_free++;
|
|
||||||
|
|
||||||
if (n_free == 0)
|
|
||||||
return 0;
|
|
||||||
gcc_assert (amount / 4 * 4 == amount);
|
|
||||||
|
|
||||||
if (amount >= 512 && (amount - n_free * 4) < 512)
|
|
||||||
return (amount - 508) / 4;
|
|
||||||
if (amount <= n_free * 4)
|
|
||||||
return amount / 4;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Generate the rest of a function's prologue. */
|
/* Generate the rest of a function's prologue. */
|
||||||
void
|
void
|
||||||
thumb1_expand_prologue (void)
|
thumb1_expand_prologue (void)
|
||||||
|
|
@ -19902,7 +19942,7 @@ thumb1_expand_prologue (void)
|
||||||
stack_pointer_rtx);
|
stack_pointer_rtx);
|
||||||
|
|
||||||
amount = offsets->outgoing_args - offsets->saved_regs;
|
amount = offsets->outgoing_args - offsets->saved_regs;
|
||||||
amount -= 4 * thumb1_extra_regs_pushed (offsets);
|
amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
|
||||||
if (amount)
|
if (amount)
|
||||||
{
|
{
|
||||||
if (amount < 512)
|
if (amount < 512)
|
||||||
|
|
@ -19987,6 +20027,7 @@ thumb1_expand_epilogue (void)
|
||||||
emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
|
emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
|
||||||
amount = offsets->locals_base - offsets->saved_regs;
|
amount = offsets->locals_base - offsets->saved_regs;
|
||||||
}
|
}
|
||||||
|
amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
|
||||||
|
|
||||||
gcc_assert (amount >= 0);
|
gcc_assert (amount >= 0);
|
||||||
if (amount)
|
if (amount)
|
||||||
|
|
@ -20209,7 +20250,7 @@ thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
|
||||||
|| (high_regs_pushed == 0 && l_mask))
|
|| (high_regs_pushed == 0 && l_mask))
|
||||||
{
|
{
|
||||||
unsigned long mask = l_mask;
|
unsigned long mask = l_mask;
|
||||||
mask |= (1 << thumb1_extra_regs_pushed (offsets)) - 1;
|
mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
|
||||||
thumb_pushpop (f, mask, 1, &cfa_offset, mask);
|
thumb_pushpop (f, mask, 1, &cfa_offset, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,10 @@
|
||||||
|
2010-07-09 Bernd Schmidt <bernds@codesourcery.com>
|
||||||
|
|
||||||
|
PR target/40657
|
||||||
|
* gcc.target/arm/pr40657-1.c: New test.
|
||||||
|
* gcc.target/arm/pr40657-2.c: New test.
|
||||||
|
* gcc.c-torture/execute/pr40657.c: New test.
|
||||||
|
|
||||||
2010-07-09 Jakub Jelinek <jakub@redhat.com>
|
2010-07-09 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
* gcc.dg/tree-ssa/vrp50.c: New test.
|
* gcc.dg/tree-ssa/vrp50.c: New test.
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
/* Verify that that Thumb-1 epilogue size optimization does not clobber the
|
||||||
|
return value. */
|
||||||
|
|
||||||
|
long long v = 0x123456789abc;
|
||||||
|
|
||||||
|
__attribute__((noinline)) void bar (int *x)
|
||||||
|
{
|
||||||
|
asm volatile ("" : "=m" (x) ::);
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((noinline)) long long foo()
|
||||||
|
{
|
||||||
|
int x;
|
||||||
|
bar(&x);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main ()
|
||||||
|
{
|
||||||
|
if (foo () != v)
|
||||||
|
abort ();
|
||||||
|
exit (0);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
/* { dg-options "-Os -march=armv5te -mthumb" } */
|
||||||
|
/* { dg-require-effective-target arm_thumb1_ok } */
|
||||||
|
/* { dg-final { scan-assembler "pop.*r1.*pc" } } */
|
||||||
|
/* { dg-final { scan-assembler-not "sub\[\\t \]*sp,\[\\t \]*sp" } } */
|
||||||
|
/* { dg-final { scan-assembler-not "add\[\\t \]*sp,\[\\t \]*sp" } } */
|
||||||
|
|
||||||
|
extern void bar(int*);
|
||||||
|
int foo()
|
||||||
|
{
|
||||||
|
int x;
|
||||||
|
bar(&x);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
/* { dg-options "-Os -march=armv4t -mthumb" } */
|
||||||
|
/* { dg-require-effective-target arm_thumb1_ok } */
|
||||||
|
/* { dg-final { scan-assembler-not "sub\[\\t \]*sp,\[\\t \]*sp" } } */
|
||||||
|
/* { dg-final { scan-assembler-not "add\[\\t \]*sp,\[\\t \]*sp" } } */
|
||||||
|
|
||||||
|
/* Here, we test that if there's a pop of r[4567] in the epilogue,
|
||||||
|
add sp,sp,#12 is removed and replaced by three additional pops
|
||||||
|
of lower-numbered regs. */
|
||||||
|
|
||||||
|
extern void bar(int*);
|
||||||
|
|
||||||
|
int t1, t2, t3, t4, t5;
|
||||||
|
int foo()
|
||||||
|
{
|
||||||
|
int i,j,k,x = 0;
|
||||||
|
for (i = 0; i < t1; i++)
|
||||||
|
for (j = 0; j < t2; j++)
|
||||||
|
bar(&x);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue