mirror of git://gcc.gnu.org/git/gcc.git
timevar.def (TV_SCHED_FUSION): New time var.
* timevar.def (TV_SCHED_FUSION): New time var. * passes.def (pass_sched_fusion): New pass. * config/arm/arm.c (TARGET_SCHED_FUSION_PRIORITY): New. (extract_base_offset_in_addr, fusion_load_store): New. (arm_sched_fusion_priority): New. (arm_option_override): Disable scheduling fusion by default on non-armv7 processors or ldrd/strd isn't preferred. * sched-int.h (struct _haifa_insn_data): New field. (INSN_FUSION_PRIORITY, FUSION_MAX_PRIORITY, sched_fusion): New. * sched-rgn.c (rest_of_handle_sched_fusion): New. (pass_data_sched_fusion, pass_sched_fusion): New. (make_pass_sched_fusion): New. * haifa-sched.c (sched_fusion): New. (insn_cost): Handle sched_fusion. (priority): Handle sched_fusion by calling target hook. (enum rfs_decision): New enum value. (rfs_str): New element for RFS_FUSION. (rank_for_schedule): Support sched_fusion. (schedule_insn, max_issue, prune_ready_list): Handle sched_fusion. (schedule_block, fix_tick_ready): Handle sched_fusion. * common.opt (flag_schedule_fusion): New. * tree-pass.h (make_pass_sched_fusion): New. * target.def (fusion_priority): New. * doc/tm.texi.in (TARGET_SCHED_FUSION_PRIORITY): New. * doc/tm.texi: Regenerated. * doc/invoke.texi (-fschedule-fusion): New. testsuite: * gcc.target/arm/ldrd-strd-pair-1.c: New test. * gcc.target/arm/vfp-1.c: Improve scanning string. From-SVN: r217533
This commit is contained in:
parent
0fb3402f69
commit
b16abbcb85
|
|
@ -1,3 +1,32 @@
|
||||||
|
2014-11-14 Bin Cheng <bin.cheng@arm.com>
|
||||||
|
|
||||||
|
* timevar.def (TV_SCHED_FUSION): New time var.
|
||||||
|
* passes.def (pass_sched_fusion): New pass.
|
||||||
|
* config/arm/arm.c (TARGET_SCHED_FUSION_PRIORITY): New.
|
||||||
|
(extract_base_offset_in_addr, fusion_load_store): New.
|
||||||
|
(arm_sched_fusion_priority): New.
|
||||||
|
(arm_option_override): Disable scheduling fusion by default
|
||||||
|
on non-armv7 processors or ldrd/strd isn't preferred.
|
||||||
|
* sched-int.h (struct _haifa_insn_data): New field.
|
||||||
|
(INSN_FUSION_PRIORITY, FUSION_MAX_PRIORITY, sched_fusion): New.
|
||||||
|
* sched-rgn.c (rest_of_handle_sched_fusion): New.
|
||||||
|
(pass_data_sched_fusion, pass_sched_fusion): New.
|
||||||
|
(make_pass_sched_fusion): New.
|
||||||
|
* haifa-sched.c (sched_fusion): New.
|
||||||
|
(insn_cost): Handle sched_fusion.
|
||||||
|
(priority): Handle sched_fusion by calling target hook.
|
||||||
|
(enum rfs_decision): New enum value.
|
||||||
|
(rfs_str): New element for RFS_FUSION.
|
||||||
|
(rank_for_schedule): Support sched_fusion.
|
||||||
|
(schedule_insn, max_issue, prune_ready_list): Handle sched_fusion.
|
||||||
|
(schedule_block, fix_tick_ready): Handle sched_fusion.
|
||||||
|
* common.opt (flag_schedule_fusion): New.
|
||||||
|
* tree-pass.h (make_pass_sched_fusion): New.
|
||||||
|
* target.def (fusion_priority): New.
|
||||||
|
* doc/tm.texi.in (TARGET_SCHED_FUSION_PRIORITY): New.
|
||||||
|
* doc/tm.texi: Regenerated.
|
||||||
|
* doc/invoke.texi (-fschedule-fusion): New.
|
||||||
|
|
||||||
2014-11-13 Rong Xu <xur@google.com>
|
2014-11-13 Rong Xu <xur@google.com>
|
||||||
|
|
||||||
PR debug/63581
|
PR debug/63581
|
||||||
|
|
|
||||||
|
|
@ -1848,6 +1848,10 @@ frename-registers
|
||||||
Common Report Var(flag_rename_registers) Init(2) Optimization
|
Common Report Var(flag_rename_registers) Init(2) Optimization
|
||||||
Perform a register renaming optimization pass
|
Perform a register renaming optimization pass
|
||||||
|
|
||||||
|
fschedule-fusion
|
||||||
|
Common Report Var(flag_schedule_fusion) Init(2) Optimization
|
||||||
|
Perform a target dependent instruction fusion optimization pass
|
||||||
|
|
||||||
freorder-blocks
|
freorder-blocks
|
||||||
Common Report Var(flag_reorder_blocks) Optimization
|
Common Report Var(flag_reorder_blocks) Optimization
|
||||||
Reorder basic blocks to improve code placement
|
Reorder basic blocks to improve code placement
|
||||||
|
|
|
||||||
|
|
@ -311,6 +311,8 @@ static unsigned arm_add_stmt_cost (void *data, int count,
|
||||||
static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
|
static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
|
||||||
bool op0_preserve_value);
|
bool op0_preserve_value);
|
||||||
static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
|
static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
|
||||||
|
|
||||||
|
static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
|
||||||
|
|
||||||
/* Table of machine attributes. */
|
/* Table of machine attributes. */
|
||||||
static const struct attribute_spec arm_attribute_table[] =
|
static const struct attribute_spec arm_attribute_table[] =
|
||||||
|
|
@ -708,6 +710,9 @@ static const struct attribute_spec arm_attribute_table[] =
|
||||||
#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
|
#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
|
||||||
#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
|
#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
|
||||||
|
|
||||||
|
#undef TARGET_SCHED_FUSION_PRIORITY
|
||||||
|
#define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
|
||||||
|
|
||||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||||
|
|
||||||
/* Obstack for minipool constant handling. */
|
/* Obstack for minipool constant handling. */
|
||||||
|
|
@ -3168,6 +3173,12 @@ arm_option_override (void)
|
||||||
if (TARGET_THUMB2)
|
if (TARGET_THUMB2)
|
||||||
inline_asm_unified = 1;
|
inline_asm_unified = 1;
|
||||||
|
|
||||||
|
/* Disable scheduling fusion by default if it's not armv7 processor
|
||||||
|
or doesn't prefer ldrd/strd. */
|
||||||
|
if (flag_schedule_fusion == 2
|
||||||
|
&& (!arm_arch7 || !current_tune->prefer_ldrd_strd))
|
||||||
|
flag_schedule_fusion = 0;
|
||||||
|
|
||||||
/* Register global variables with the garbage collector. */
|
/* Register global variables with the garbage collector. */
|
||||||
arm_add_gc_roots ();
|
arm_add_gc_roots ();
|
||||||
}
|
}
|
||||||
|
|
@ -32350,4 +32361,124 @@ arm_is_constant_pool_ref (rtx x)
|
||||||
&& CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
|
&& CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If MEM is in the form of [base+offset], extract the two parts
|
||||||
|
of address and set to BASE and OFFSET, otherwise return false
|
||||||
|
after clearing BASE and OFFSET. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
|
||||||
|
{
|
||||||
|
rtx addr;
|
||||||
|
|
||||||
|
gcc_assert (MEM_P (mem));
|
||||||
|
|
||||||
|
addr = XEXP (mem, 0);
|
||||||
|
|
||||||
|
/* Strip off const from addresses like (const (addr)). */
|
||||||
|
if (GET_CODE (addr) == CONST)
|
||||||
|
addr = XEXP (addr, 0);
|
||||||
|
|
||||||
|
if (GET_CODE (addr) == REG)
|
||||||
|
{
|
||||||
|
*base = addr;
|
||||||
|
*offset = const0_rtx;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (GET_CODE (addr) == PLUS
|
||||||
|
&& GET_CODE (XEXP (addr, 0)) == REG
|
||||||
|
&& CONST_INT_P (XEXP (addr, 1)))
|
||||||
|
{
|
||||||
|
*base = XEXP (addr, 0);
|
||||||
|
*offset = XEXP (addr, 1);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
*base = NULL_RTX;
|
||||||
|
*offset = NULL_RTX;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If INSN is a load or store of address in the form of [base+offset],
|
||||||
|
extract the two parts and set to BASE and OFFSET. IS_LOAD is set
|
||||||
|
to TRUE if it's a load. Return TRUE if INSN is such an instruction,
|
||||||
|
otherwise return FALSE. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
|
||||||
|
{
|
||||||
|
rtx x, dest, src;
|
||||||
|
|
||||||
|
gcc_assert (INSN_P (insn));
|
||||||
|
x = PATTERN (insn);
|
||||||
|
if (GET_CODE (x) != SET)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
src = SET_SRC (x);
|
||||||
|
dest = SET_DEST (x);
|
||||||
|
if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
|
||||||
|
{
|
||||||
|
*is_load = false;
|
||||||
|
extract_base_offset_in_addr (dest, base, offset);
|
||||||
|
}
|
||||||
|
else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
|
||||||
|
{
|
||||||
|
*is_load = true;
|
||||||
|
extract_base_offset_in_addr (src, base, offset);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return (*base != NULL_RTX && *offset != NULL_RTX);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
|
||||||
|
|
||||||
|
Currently we only support to fuse ldr or str instructions, so FUSION_PRI
|
||||||
|
and PRI are only calculated for these instructions. For other instruction,
|
||||||
|
FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
|
||||||
|
instruction fusion can be supported by returning different priorities.
|
||||||
|
|
||||||
|
It's important that irrelevant instructions get the largest FUSION_PRI. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
|
||||||
|
int *fusion_pri, int *pri)
|
||||||
|
{
|
||||||
|
int tmp, off_val;
|
||||||
|
bool is_load;
|
||||||
|
rtx base, offset;
|
||||||
|
|
||||||
|
gcc_assert (INSN_P (insn));
|
||||||
|
|
||||||
|
tmp = max_pri - 1;
|
||||||
|
if (!fusion_load_store (insn, &base, &offset, &is_load))
|
||||||
|
{
|
||||||
|
*pri = tmp;
|
||||||
|
*fusion_pri = tmp;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Load goes first. */
|
||||||
|
if (is_load)
|
||||||
|
*fusion_pri = tmp - 1;
|
||||||
|
else
|
||||||
|
*fusion_pri = tmp - 2;
|
||||||
|
|
||||||
|
tmp /= 2;
|
||||||
|
|
||||||
|
/* INSN with smaller base register goes first. */
|
||||||
|
tmp -= ((REGNO (base) & 0xff) << 20);
|
||||||
|
|
||||||
|
/* INSN with smaller offset goes first. */
|
||||||
|
off_val = (int)(INTVAL (offset));
|
||||||
|
if (off_val >= 0)
|
||||||
|
tmp -= (off_val & 0xfffff);
|
||||||
|
else
|
||||||
|
tmp += ((- off_val) & 0xfffff);
|
||||||
|
|
||||||
|
*pri = tmp;
|
||||||
|
return;
|
||||||
|
}
|
||||||
#include "gt-arm.h"
|
#include "gt-arm.h"
|
||||||
|
|
|
||||||
|
|
@ -406,7 +406,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||||
-fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
|
-fprofile-correction -fprofile-dir=@var{path} -fprofile-generate @gol
|
||||||
-fprofile-generate=@var{path} @gol
|
-fprofile-generate=@var{path} @gol
|
||||||
-fprofile-use -fprofile-use=@var{path} -fprofile-values -fprofile-reorder-functions @gol
|
-fprofile-use -fprofile-use=@var{path} -fprofile-values -fprofile-reorder-functions @gol
|
||||||
-freciprocal-math -free -frename-registers -freorder-blocks @gol
|
-freciprocal-math -free -frename-registers -fschedule-fusion -freorder-blocks @gol
|
||||||
-freorder-blocks-and-partition -freorder-functions @gol
|
-freorder-blocks-and-partition -freorder-functions @gol
|
||||||
-frerun-cse-after-loop -freschedule-modulo-scheduled-loops @gol
|
-frerun-cse-after-loop -freschedule-modulo-scheduled-loops @gol
|
||||||
-frounding-math -fsched2-use-superblocks -fsched-pressure @gol
|
-frounding-math -fsched2-use-superblocks -fsched-pressure @gol
|
||||||
|
|
@ -9575,6 +9575,14 @@ a ``home register''.
|
||||||
|
|
||||||
Enabled by default with @option{-funroll-loops} and @option{-fpeel-loops}.
|
Enabled by default with @option{-funroll-loops} and @option{-fpeel-loops}.
|
||||||
|
|
||||||
|
@item -fschedule-fusion
|
||||||
|
@opindex fschedule-fusion
|
||||||
|
Performs a target dependent pass over the instruction stream to schedule
|
||||||
|
instructions of same type together because target machine can execute them
|
||||||
|
more efficiently if they are adjacent to each other in the instruction flow.
|
||||||
|
|
||||||
|
Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}.
|
||||||
|
|
||||||
@item -ftracer
|
@item -ftracer
|
||||||
@opindex ftracer
|
@opindex ftracer
|
||||||
Perform tail duplication to enlarge superblock size. This transformation
|
Perform tail duplication to enlarge superblock size. This transformation
|
||||||
|
|
|
||||||
|
|
@ -6771,6 +6771,76 @@ This hook is called by tree reassociator to determine a level of
|
||||||
parallelism required in output calculations chain.
|
parallelism required in output calculations chain.
|
||||||
@end deftypefn
|
@end deftypefn
|
||||||
|
|
||||||
|
@deftypefn {Target Hook} void TARGET_SCHED_FUSION_PRIORITY (rtx_insn *@var{insn}, int @var{max_pri}, int *@var{fusion_pri}, int *@var{pri})
|
||||||
|
This hook is called by scheduling fusion pass. It calculates fusion
|
||||||
|
priorities for each instruction passed in by parameter. The priorities
|
||||||
|
are returned via pointer parameters.
|
||||||
|
|
||||||
|
@var{insn} is the instruction whose priorities need to be calculated.
|
||||||
|
@var{max_pri} is the maximum priority can be returned in any cases.
|
||||||
|
@var{fusion_pri} is the pointer parameter through which @var{insn}'s
|
||||||
|
fusion priority should be calculated and returned.
|
||||||
|
@var{pri} is the pointer parameter through which @var{insn}'s priority
|
||||||
|
should be calculated and returned.
|
||||||
|
|
||||||
|
Same @var{fusion_pri} should be returned for instructions which should
|
||||||
|
be scheduled together. Different @var{pri} should be returned for
|
||||||
|
instructions with same @var{fusion_pri}. @var{fusion_pri} is the major
|
||||||
|
sort key, @var{pri} is the minor sort key. All instructions will be
|
||||||
|
scheduled according to the two priorities. All priorities calculated
|
||||||
|
should be between 0 (exclusive) and @var{max_pri} (inclusive). To avoid
|
||||||
|
false dependencies, @var{fusion_pri} of instructions which need to be
|
||||||
|
scheduled together should be smaller than @var{fusion_pri} of irrelevant
|
||||||
|
instructions.
|
||||||
|
|
||||||
|
Given below example:
|
||||||
|
|
||||||
|
ldr r10, [r1, 4]
|
||||||
|
add r4, r4, r10
|
||||||
|
ldr r15, [r2, 8]
|
||||||
|
sub r5, r5, r15
|
||||||
|
ldr r11, [r1, 0]
|
||||||
|
add r4, r4, r11
|
||||||
|
ldr r16, [r2, 12]
|
||||||
|
sub r5, r5, r16
|
||||||
|
|
||||||
|
On targets like ARM/AArch64, the two pairs of consecutive loads should be
|
||||||
|
merged. Since peephole2 pass can't help in this case unless consecutive
|
||||||
|
loads are actually next to each other in instruction flow. That's where
|
||||||
|
this scheduling fusion pass works. This hook calculates priority for each
|
||||||
|
instruction based on its fustion type, like:
|
||||||
|
|
||||||
|
ldr r10, [r1, 4] ; fusion_pri=99, pri=96
|
||||||
|
add r4, r4, r10 ; fusion_pri=100, pri=100
|
||||||
|
ldr r15, [r2, 8] ; fusion_pri=98, pri=92
|
||||||
|
sub r5, r5, r15 ; fusion_pri=100, pri=100
|
||||||
|
ldr r11, [r1, 0] ; fusion_pri=99, pri=100
|
||||||
|
add r4, r4, r11 ; fusion_pri=100, pri=100
|
||||||
|
ldr r16, [r2, 12] ; fusion_pri=98, pri=88
|
||||||
|
sub r5, r5, r16 ; fusion_pri=100, pri=100
|
||||||
|
|
||||||
|
Scheduling fusion pass then sorts all ready to issue instructions according
|
||||||
|
to the priorities. As a result, instructions of same fusion type will be
|
||||||
|
pushed together in instruction flow, like:
|
||||||
|
|
||||||
|
ldr r11, [r1, 0]
|
||||||
|
ldr r10, [r1, 4]
|
||||||
|
ldr r15, [r2, 8]
|
||||||
|
ldr r16, [r2, 12]
|
||||||
|
add r4, r4, r10
|
||||||
|
sub r5, r5, r15
|
||||||
|
add r4, r4, r11
|
||||||
|
sub r5, r5, r16
|
||||||
|
|
||||||
|
Now peephole2 pass can simply merge the two pairs of loads.
|
||||||
|
|
||||||
|
Since scheduling fusion pass relies on peephole2 to do real fusion
|
||||||
|
work, it is only enabled by default when peephole2 is in effect.
|
||||||
|
|
||||||
|
This is firstly introduced on ARM/AArch64 targets, please refer to
|
||||||
|
the hook implementation for how different fusion types are supported.
|
||||||
|
@end deftypefn
|
||||||
|
|
||||||
@node Sections
|
@node Sections
|
||||||
@section Dividing the Output into Sections (Texts, Data, @dots{})
|
@section Dividing the Output into Sections (Texts, Data, @dots{})
|
||||||
@c the above section title is WAY too long. maybe cut the part between
|
@c the above section title is WAY too long. maybe cut the part between
|
||||||
|
|
|
||||||
|
|
@ -4811,6 +4811,8 @@ them: try the first ones in this list first.
|
||||||
|
|
||||||
@hook TARGET_SCHED_REASSOCIATION_WIDTH
|
@hook TARGET_SCHED_REASSOCIATION_WIDTH
|
||||||
|
|
||||||
|
@hook TARGET_SCHED_FUSION_PRIORITY
|
||||||
|
|
||||||
@node Sections
|
@node Sections
|
||||||
@section Dividing the Output into Sections (Texts, Data, @dots{})
|
@section Dividing the Output into Sections (Texts, Data, @dots{})
|
||||||
@c the above section title is WAY too long. maybe cut the part between
|
@c the above section title is WAY too long. maybe cut the part between
|
||||||
|
|
|
||||||
|
|
@ -1391,6 +1391,9 @@ insn_cost (rtx_insn *insn)
|
||||||
{
|
{
|
||||||
int cost;
|
int cost;
|
||||||
|
|
||||||
|
if (sched_fusion)
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (sel_sched_p ())
|
if (sel_sched_p ())
|
||||||
{
|
{
|
||||||
if (recog_memoized (insn) < 0)
|
if (recog_memoized (insn) < 0)
|
||||||
|
|
@ -1603,6 +1606,8 @@ dep_list_size (rtx insn, sd_list_types_def list)
|
||||||
return nodbgcount;
|
return nodbgcount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool sched_fusion;
|
||||||
|
|
||||||
/* Compute the priority number for INSN. */
|
/* Compute the priority number for INSN. */
|
||||||
static int
|
static int
|
||||||
priority (rtx_insn *insn)
|
priority (rtx_insn *insn)
|
||||||
|
|
@ -1617,7 +1622,15 @@ priority (rtx_insn *insn)
|
||||||
{
|
{
|
||||||
int this_priority = -1;
|
int this_priority = -1;
|
||||||
|
|
||||||
if (dep_list_size (insn, SD_LIST_FORW) == 0)
|
if (sched_fusion)
|
||||||
|
{
|
||||||
|
int this_fusion_priority;
|
||||||
|
|
||||||
|
targetm.sched.fusion_priority (insn, FUSION_MAX_PRIORITY,
|
||||||
|
&this_fusion_priority, &this_priority);
|
||||||
|
INSN_FUSION_PRIORITY (insn) = this_fusion_priority;
|
||||||
|
}
|
||||||
|
else if (dep_list_size (insn, SD_LIST_FORW) == 0)
|
||||||
/* ??? We should set INSN_PRIORITY to insn_cost when and insn has
|
/* ??? We should set INSN_PRIORITY to insn_cost when and insn has
|
||||||
some forward deps but all of them are ignored by
|
some forward deps but all of them are ignored by
|
||||||
contributes_to_priority hook. At the moment we set priority of
|
contributes_to_priority hook. At the moment we set priority of
|
||||||
|
|
@ -2548,7 +2561,7 @@ enum rfs_decision {
|
||||||
RFS_SCHED_GROUP, RFS_PRESSURE_DELAY, RFS_PRESSURE_TICK,
|
RFS_SCHED_GROUP, RFS_PRESSURE_DELAY, RFS_PRESSURE_TICK,
|
||||||
RFS_FEEDS_BACKTRACK_INSN, RFS_PRIORITY, RFS_SPECULATION,
|
RFS_FEEDS_BACKTRACK_INSN, RFS_PRIORITY, RFS_SPECULATION,
|
||||||
RFS_SCHED_RANK, RFS_LAST_INSN, RFS_PRESSURE_INDEX,
|
RFS_SCHED_RANK, RFS_LAST_INSN, RFS_PRESSURE_INDEX,
|
||||||
RFS_DEP_COUNT, RFS_TIE, RFS_N };
|
RFS_DEP_COUNT, RFS_TIE, RFS_FUSION, RFS_N };
|
||||||
|
|
||||||
/* Corresponding strings for print outs. */
|
/* Corresponding strings for print outs. */
|
||||||
static const char *rfs_str[RFS_N] = {
|
static const char *rfs_str[RFS_N] = {
|
||||||
|
|
@ -2556,7 +2569,7 @@ static const char *rfs_str[RFS_N] = {
|
||||||
"RFS_SCHED_GROUP", "RFS_PRESSURE_DELAY", "RFS_PRESSURE_TICK",
|
"RFS_SCHED_GROUP", "RFS_PRESSURE_DELAY", "RFS_PRESSURE_TICK",
|
||||||
"RFS_FEEDS_BACKTRACK_INSN", "RFS_PRIORITY", "RFS_SPECULATION",
|
"RFS_FEEDS_BACKTRACK_INSN", "RFS_PRIORITY", "RFS_SPECULATION",
|
||||||
"RFS_SCHED_RANK", "RFS_LAST_INSN", "RFS_PRESSURE_INDEX",
|
"RFS_SCHED_RANK", "RFS_LAST_INSN", "RFS_PRESSURE_INDEX",
|
||||||
"RFS_DEP_COUNT", "RFS_TIE" };
|
"RFS_DEP_COUNT", "RFS_TIE", "RFS_FUSION" };
|
||||||
|
|
||||||
/* Statistical breakdown of rank_for_schedule decisions. */
|
/* Statistical breakdown of rank_for_schedule decisions. */
|
||||||
typedef struct { unsigned stats[RFS_N]; } rank_for_schedule_stats_t;
|
typedef struct { unsigned stats[RFS_N]; } rank_for_schedule_stats_t;
|
||||||
|
|
@ -2627,6 +2640,55 @@ rank_for_schedule (const void *x, const void *y)
|
||||||
/* Make sure that priority of TMP and TMP2 are initialized. */
|
/* Make sure that priority of TMP and TMP2 are initialized. */
|
||||||
gcc_assert (INSN_PRIORITY_KNOWN (tmp) && INSN_PRIORITY_KNOWN (tmp2));
|
gcc_assert (INSN_PRIORITY_KNOWN (tmp) && INSN_PRIORITY_KNOWN (tmp2));
|
||||||
|
|
||||||
|
if (sched_fusion)
|
||||||
|
{
|
||||||
|
/* The instruction that has the same fusion priority as the last
|
||||||
|
instruction is the instruction we picked next. If that is not
|
||||||
|
the case, we sort ready list firstly by fusion priority, then
|
||||||
|
by priority, and at last by INSN_LUID. */
|
||||||
|
int a = INSN_FUSION_PRIORITY (tmp);
|
||||||
|
int b = INSN_FUSION_PRIORITY (tmp2);
|
||||||
|
int last = -1;
|
||||||
|
|
||||||
|
if (last_nondebug_scheduled_insn
|
||||||
|
&& !NOTE_P (last_nondebug_scheduled_insn)
|
||||||
|
&& BLOCK_FOR_INSN (tmp)
|
||||||
|
== BLOCK_FOR_INSN (last_nondebug_scheduled_insn))
|
||||||
|
last = INSN_FUSION_PRIORITY (last_nondebug_scheduled_insn);
|
||||||
|
|
||||||
|
if (a != last && b != last)
|
||||||
|
{
|
||||||
|
if (a == b)
|
||||||
|
{
|
||||||
|
a = INSN_PRIORITY (tmp);
|
||||||
|
b = INSN_PRIORITY (tmp2);
|
||||||
|
}
|
||||||
|
if (a != b)
|
||||||
|
return rfs_result (RFS_FUSION, b - a, tmp, tmp2);
|
||||||
|
else
|
||||||
|
return rfs_result (RFS_FUSION,
|
||||||
|
INSN_LUID (tmp) - INSN_LUID (tmp2), tmp, tmp2);
|
||||||
|
}
|
||||||
|
else if (a == b)
|
||||||
|
{
|
||||||
|
gcc_assert (last_nondebug_scheduled_insn
|
||||||
|
&& !NOTE_P (last_nondebug_scheduled_insn));
|
||||||
|
last = INSN_PRIORITY (last_nondebug_scheduled_insn);
|
||||||
|
|
||||||
|
a = abs (INSN_PRIORITY (tmp) - last);
|
||||||
|
b = abs (INSN_PRIORITY (tmp2) - last);
|
||||||
|
if (a != b)
|
||||||
|
return rfs_result (RFS_FUSION, a - b, tmp, tmp2);
|
||||||
|
else
|
||||||
|
return rfs_result (RFS_FUSION,
|
||||||
|
INSN_LUID (tmp) - INSN_LUID (tmp2), tmp, tmp2);
|
||||||
|
}
|
||||||
|
else if (a == last)
|
||||||
|
return rfs_result (RFS_FUSION, -1, tmp, tmp2);
|
||||||
|
else
|
||||||
|
return rfs_result (RFS_FUSION, 1, tmp, tmp2);
|
||||||
|
}
|
||||||
|
|
||||||
if (sched_pressure != SCHED_PRESSURE_NONE)
|
if (sched_pressure != SCHED_PRESSURE_NONE)
|
||||||
{
|
{
|
||||||
/* Prefer insn whose scheduling results in the smallest register
|
/* Prefer insn whose scheduling results in the smallest register
|
||||||
|
|
@ -4007,8 +4069,8 @@ schedule_insn (rtx_insn *insn)
|
||||||
gcc_assert (INSN_TICK (insn) >= MIN_TICK);
|
gcc_assert (INSN_TICK (insn) >= MIN_TICK);
|
||||||
if (INSN_TICK (insn) > clock_var)
|
if (INSN_TICK (insn) > clock_var)
|
||||||
/* INSN has been prematurely moved from the queue to the ready list.
|
/* INSN has been prematurely moved from the queue to the ready list.
|
||||||
This is possible only if following flag is set. */
|
This is possible only if following flags are set. */
|
||||||
gcc_assert (flag_sched_stalled_insns);
|
gcc_assert (flag_sched_stalled_insns || sched_fusion);
|
||||||
|
|
||||||
/* ??? Probably, if INSN is scheduled prematurely, we should leave
|
/* ??? Probably, if INSN is scheduled prematurely, we should leave
|
||||||
INSN_TICK untouched. This is a machine-dependent issue, actually. */
|
INSN_TICK untouched. This is a machine-dependent issue, actually. */
|
||||||
|
|
@ -5500,6 +5562,9 @@ max_issue (struct ready_list *ready, int privileged_n, state_t state,
|
||||||
struct choice_entry *top;
|
struct choice_entry *top;
|
||||||
rtx_insn *insn;
|
rtx_insn *insn;
|
||||||
|
|
||||||
|
if (sched_fusion)
|
||||||
|
return 0;
|
||||||
|
|
||||||
n_ready = ready->n_ready;
|
n_ready = ready->n_ready;
|
||||||
gcc_assert (dfa_lookahead >= 1 && privileged_n >= 0
|
gcc_assert (dfa_lookahead >= 1 && privileged_n >= 0
|
||||||
&& privileged_n <= n_ready);
|
&& privileged_n <= n_ready);
|
||||||
|
|
@ -5848,6 +5913,9 @@ prune_ready_list (state_t temp_state, bool first_cycle_insn_p,
|
||||||
bool sched_group_found = false;
|
bool sched_group_found = false;
|
||||||
int min_cost_group = 1;
|
int min_cost_group = 1;
|
||||||
|
|
||||||
|
if (sched_fusion)
|
||||||
|
return;
|
||||||
|
|
||||||
for (i = 0; i < ready.n_ready; i++)
|
for (i = 0; i < ready.n_ready; i++)
|
||||||
{
|
{
|
||||||
rtx_insn *insn = ready_element (&ready, i);
|
rtx_insn *insn = ready_element (&ready, i);
|
||||||
|
|
@ -6059,7 +6127,7 @@ schedule_block (basic_block *target_bb, state_t init_state)
|
||||||
rtx_insn *tail = PREV_INSN (next_tail);
|
rtx_insn *tail = PREV_INSN (next_tail);
|
||||||
|
|
||||||
if ((current_sched_info->flags & DONT_BREAK_DEPENDENCIES) == 0
|
if ((current_sched_info->flags & DONT_BREAK_DEPENDENCIES) == 0
|
||||||
&& sched_pressure != SCHED_PRESSURE_MODEL)
|
&& sched_pressure != SCHED_PRESSURE_MODEL && !sched_fusion)
|
||||||
find_modifiable_mems (head, tail);
|
find_modifiable_mems (head, tail);
|
||||||
|
|
||||||
/* We used to have code to avoid getting parameters moved from hard
|
/* We used to have code to avoid getting parameters moved from hard
|
||||||
|
|
@ -6455,7 +6523,7 @@ schedule_block (basic_block *target_bb, state_t init_state)
|
||||||
{
|
{
|
||||||
memcpy (temp_state, curr_state, dfa_state_size);
|
memcpy (temp_state, curr_state, dfa_state_size);
|
||||||
cost = state_transition (curr_state, insn);
|
cost = state_transition (curr_state, insn);
|
||||||
if (sched_pressure != SCHED_PRESSURE_WEIGHTED)
|
if (sched_pressure != SCHED_PRESSURE_WEIGHTED && !sched_fusion)
|
||||||
gcc_assert (cost < 0);
|
gcc_assert (cost < 0);
|
||||||
if (memcmp (temp_state, curr_state, dfa_state_size) != 0)
|
if (memcmp (temp_state, curr_state, dfa_state_size) != 0)
|
||||||
cycle_issued_insns++;
|
cycle_issued_insns++;
|
||||||
|
|
@ -7288,7 +7356,7 @@ fix_tick_ready (rtx_insn *next)
|
||||||
INSN_TICK (next) = tick;
|
INSN_TICK (next) = tick;
|
||||||
|
|
||||||
delay = tick - clock_var;
|
delay = tick - clock_var;
|
||||||
if (delay <= 0 || sched_pressure != SCHED_PRESSURE_NONE)
|
if (delay <= 0 || sched_pressure != SCHED_PRESSURE_NONE || sched_fusion)
|
||||||
delay = QUEUE_READY;
|
delay = QUEUE_READY;
|
||||||
|
|
||||||
change_queue_index (next, delay);
|
change_queue_index (next, delay);
|
||||||
|
|
|
||||||
|
|
@ -419,6 +419,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
NEXT_PASS (pass_stack_adjustments);
|
NEXT_PASS (pass_stack_adjustments);
|
||||||
NEXT_PASS (pass_jump2);
|
NEXT_PASS (pass_jump2);
|
||||||
NEXT_PASS (pass_duplicate_computed_gotos);
|
NEXT_PASS (pass_duplicate_computed_gotos);
|
||||||
|
NEXT_PASS (pass_sched_fusion);
|
||||||
NEXT_PASS (pass_peephole2);
|
NEXT_PASS (pass_peephole2);
|
||||||
NEXT_PASS (pass_if_after_reload);
|
NEXT_PASS (pass_if_after_reload);
|
||||||
NEXT_PASS (pass_regrename);
|
NEXT_PASS (pass_regrename);
|
||||||
|
|
|
||||||
|
|
@ -805,6 +805,9 @@ struct _haifa_insn_data
|
||||||
/* A priority for each insn. */
|
/* A priority for each insn. */
|
||||||
int priority;
|
int priority;
|
||||||
|
|
||||||
|
/* The fusion priority for each insn. */
|
||||||
|
int fusion_priority;
|
||||||
|
|
||||||
/* The minimum clock tick at which the insn becomes ready. This is
|
/* The minimum clock tick at which the insn becomes ready. This is
|
||||||
used to note timing constraints for the insns in the pending list. */
|
used to note timing constraints for the insns in the pending list. */
|
||||||
int tick;
|
int tick;
|
||||||
|
|
@ -903,6 +906,7 @@ extern vec<haifa_insn_data_def> h_i_d;
|
||||||
/* Accessor macros for h_i_d. There are more in haifa-sched.c and
|
/* Accessor macros for h_i_d. There are more in haifa-sched.c and
|
||||||
sched-rgn.c. */
|
sched-rgn.c. */
|
||||||
#define INSN_PRIORITY(INSN) (HID (INSN)->priority)
|
#define INSN_PRIORITY(INSN) (HID (INSN)->priority)
|
||||||
|
#define INSN_FUSION_PRIORITY(INSN) (HID (INSN)->fusion_priority)
|
||||||
#define INSN_REG_PRESSURE(INSN) (HID (INSN)->reg_pressure)
|
#define INSN_REG_PRESSURE(INSN) (HID (INSN)->reg_pressure)
|
||||||
#define INSN_MAX_REG_PRESSURE(INSN) (HID (INSN)->max_reg_pressure)
|
#define INSN_MAX_REG_PRESSURE(INSN) (HID (INSN)->max_reg_pressure)
|
||||||
#define INSN_REG_USE_LIST(INSN) (HID (INSN)->reg_use_list)
|
#define INSN_REG_USE_LIST(INSN) (HID (INSN)->reg_use_list)
|
||||||
|
|
@ -1620,6 +1624,10 @@ extern void sd_copy_back_deps (rtx_insn *, rtx_insn *, bool);
|
||||||
extern void sd_delete_dep (sd_iterator_def);
|
extern void sd_delete_dep (sd_iterator_def);
|
||||||
extern void sd_debug_lists (rtx, sd_list_types_def);
|
extern void sd_debug_lists (rtx, sd_list_types_def);
|
||||||
|
|
||||||
|
/* Macros and declarations for scheduling fusion. */
|
||||||
|
#define FUSION_MAX_PRIORITY (INT_MAX)
|
||||||
|
extern bool sched_fusion;
|
||||||
|
|
||||||
#endif /* INSN_SCHEDULING */
|
#endif /* INSN_SCHEDULING */
|
||||||
|
|
||||||
#endif /* GCC_SCHED_INT_H */
|
#endif /* GCC_SCHED_INT_H */
|
||||||
|
|
|
||||||
|
|
@ -3658,6 +3658,17 @@ rest_of_handle_sched2 (void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned int
|
||||||
|
rest_of_handle_sched_fusion (void)
|
||||||
|
{
|
||||||
|
#ifdef INSN_SCHEDULING
|
||||||
|
sched_fusion = true;
|
||||||
|
schedule_insns ();
|
||||||
|
sched_fusion = false;
|
||||||
|
#endif
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
const pass_data pass_data_live_range_shrinkage =
|
const pass_data pass_data_live_range_shrinkage =
|
||||||
|
|
@ -3800,3 +3811,55 @@ make_pass_sched2 (gcc::context *ctxt)
|
||||||
{
|
{
|
||||||
return new pass_sched2 (ctxt);
|
return new pass_sched2 (ctxt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
const pass_data pass_data_sched_fusion =
|
||||||
|
{
|
||||||
|
RTL_PASS, /* type */
|
||||||
|
"sched_fusion", /* name */
|
||||||
|
OPTGROUP_NONE, /* optinfo_flags */
|
||||||
|
TV_SCHED_FUSION, /* tv_id */
|
||||||
|
0, /* properties_required */
|
||||||
|
0, /* properties_provided */
|
||||||
|
0, /* properties_destroyed */
|
||||||
|
0, /* todo_flags_start */
|
||||||
|
TODO_df_finish, /* todo_flags_finish */
|
||||||
|
};
|
||||||
|
|
||||||
|
class pass_sched_fusion : public rtl_opt_pass
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
pass_sched_fusion (gcc::context *ctxt)
|
||||||
|
: rtl_opt_pass (pass_data_sched_fusion, ctxt)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/* opt_pass methods: */
|
||||||
|
virtual bool gate (function *);
|
||||||
|
virtual unsigned int execute (function *)
|
||||||
|
{
|
||||||
|
return rest_of_handle_sched_fusion ();
|
||||||
|
}
|
||||||
|
|
||||||
|
}; // class pass_sched2
|
||||||
|
|
||||||
|
bool
|
||||||
|
pass_sched_fusion::gate (function *)
|
||||||
|
{
|
||||||
|
#ifdef INSN_SCHEDULING
|
||||||
|
/* Scheduling fusion relies on peephole2 to do real fusion work,
|
||||||
|
so only enable it if peephole2 is in effect. */
|
||||||
|
return (optimize > 0 && flag_peephole2
|
||||||
|
&& flag_schedule_fusion && targetm.sched.fusion_priority != NULL);
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anon namespace
|
||||||
|
|
||||||
|
rtl_opt_pass *
|
||||||
|
make_pass_sched_fusion (gcc::context *ctxt)
|
||||||
|
{
|
||||||
|
return new pass_sched_fusion (ctxt);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1526,6 +1526,79 @@ parallelism required in output calculations chain.",
|
||||||
int, (unsigned int opc, machine_mode mode),
|
int, (unsigned int opc, machine_mode mode),
|
||||||
hook_int_uint_mode_1)
|
hook_int_uint_mode_1)
|
||||||
|
|
||||||
|
/* The following member value is a function that returns priority for
|
||||||
|
fusion of each instruction via pointer parameters. */
|
||||||
|
DEFHOOK
|
||||||
|
(fusion_priority,
|
||||||
|
"This hook is called by scheduling fusion pass. It calculates fusion\n\
|
||||||
|
priorities for each instruction passed in by parameter. The priorities\n\
|
||||||
|
are returned via pointer parameters.\n\
|
||||||
|
\n\
|
||||||
|
@var{insn} is the instruction whose priorities need to be calculated.\n\
|
||||||
|
@var{max_pri} is the maximum priority can be returned in any cases.\n\
|
||||||
|
@var{fusion_pri} is the pointer parameter through which @var{insn}'s\n\
|
||||||
|
fusion priority should be calculated and returned.\n\
|
||||||
|
@var{pri} is the pointer parameter through which @var{insn}'s priority\n\
|
||||||
|
should be calculated and returned.\n\
|
||||||
|
\n\
|
||||||
|
Same @var{fusion_pri} should be returned for instructions which should\n\
|
||||||
|
be scheduled together. Different @var{pri} should be returned for\n\
|
||||||
|
instructions with same @var{fusion_pri}. @var{fusion_pri} is the major\n\
|
||||||
|
sort key, @var{pri} is the minor sort key. All instructions will be\n\
|
||||||
|
scheduled according to the two priorities. All priorities calculated\n\
|
||||||
|
should be between 0 (exclusive) and @var{max_pri} (inclusive). To avoid\n\
|
||||||
|
false dependencies, @var{fusion_pri} of instructions which need to be\n\
|
||||||
|
scheduled together should be smaller than @var{fusion_pri} of irrelevant\n\
|
||||||
|
instructions.\n\
|
||||||
|
\n\
|
||||||
|
Given below example:\n\
|
||||||
|
\n\
|
||||||
|
ldr r10, [r1, 4]\n\
|
||||||
|
add r4, r4, r10\n\
|
||||||
|
ldr r15, [r2, 8]\n\
|
||||||
|
sub r5, r5, r15\n\
|
||||||
|
ldr r11, [r1, 0]\n\
|
||||||
|
add r4, r4, r11\n\
|
||||||
|
ldr r16, [r2, 12]\n\
|
||||||
|
sub r5, r5, r16\n\
|
||||||
|
\n\
|
||||||
|
On targets like ARM/AArch64, the two pairs of consecutive loads should be\n\
|
||||||
|
merged. Since peephole2 pass can't help in this case unless consecutive\n\
|
||||||
|
loads are actually next to each other in instruction flow. That's where\n\
|
||||||
|
this scheduling fusion pass works. This hook calculates priority for each\n\
|
||||||
|
instruction based on its fustion type, like:\n\
|
||||||
|
\n\
|
||||||
|
ldr r10, [r1, 4] ; fusion_pri=99, pri=96 \n\
|
||||||
|
add r4, r4, r10 ; fusion_pri=100, pri=100 \n\
|
||||||
|
ldr r15, [r2, 8] ; fusion_pri=98, pri=92 \n\
|
||||||
|
sub r5, r5, r15 ; fusion_pri=100, pri=100 \n\
|
||||||
|
ldr r11, [r1, 0] ; fusion_pri=99, pri=100 \n\
|
||||||
|
add r4, r4, r11 ; fusion_pri=100, pri=100 \n\
|
||||||
|
ldr r16, [r2, 12] ; fusion_pri=98, pri=88 \n\
|
||||||
|
sub r5, r5, r16 ; fusion_pri=100, pri=100 \n\
|
||||||
|
\n\
|
||||||
|
Scheduling fusion pass then sorts all ready to issue instructions according\n\
|
||||||
|
to the priorities. As a result, instructions of same fusion type will be\n\
|
||||||
|
pushed together in instruction flow, like:\n\
|
||||||
|
\n\
|
||||||
|
ldr r11, [r1, 0]\n\
|
||||||
|
ldr r10, [r1, 4]\n\
|
||||||
|
ldr r15, [r2, 8]\n\
|
||||||
|
ldr r16, [r2, 12]\n\
|
||||||
|
add r4, r4, r10\n\
|
||||||
|
sub r5, r5, r15\n\
|
||||||
|
add r4, r4, r11\n\
|
||||||
|
sub r5, r5, r16\n\
|
||||||
|
\n\
|
||||||
|
Now peephole2 pass can simply merge the two pairs of loads.\n\
|
||||||
|
\n\
|
||||||
|
Since scheduling fusion pass relies on peephole2 to do real fusion\n\
|
||||||
|
work, it is only enabled by default when peephole2 is in effect.\n\
|
||||||
|
\n\
|
||||||
|
This is firstly introduced on ARM/AArch64 targets, please refer to\n\
|
||||||
|
the hook implementation for how different fusion types are supported.",
|
||||||
|
void, (rtx_insn *insn, int max_pri, int *fusion_pri, int *pri), NULL)
|
||||||
|
|
||||||
HOOK_VECTOR_END (sched)
|
HOOK_VECTOR_END (sched)
|
||||||
|
|
||||||
/* Functions relating to OpenMP and Cilk Plus SIMD clones. */
|
/* Functions relating to OpenMP and Cilk Plus SIMD clones. */
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,8 @@
|
||||||
|
2014-11-14 Bin Cheng <bin.cheng@arm.com>
|
||||||
|
|
||||||
|
* gcc.target/arm/ldrd-strd-pair-1.c: New test.
|
||||||
|
* gcc.target/arm/vfp-1.c: Improve scanning string.
|
||||||
|
|
||||||
2014-11-13 Rong Xu <xur@google.com>
|
2014-11-13 Rong Xu <xur@google.com>
|
||||||
|
|
||||||
PR debug/63581
|
PR debug/63581
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-require-effective-target arm_prefer_ldrd_strd } */
|
||||||
|
/* { dg-options "-O2 -mthumb" } */
|
||||||
|
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
int x;
|
||||||
|
int y;
|
||||||
|
char c;
|
||||||
|
int d;
|
||||||
|
}a;
|
||||||
|
|
||||||
|
int foo(int x, int y)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
a.x = x;
|
||||||
|
c = a.x;
|
||||||
|
a.d = c;
|
||||||
|
a.y = y;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/* { dg-final { scan-assembler "strd\t" { target { arm_thumb2_ok } } } } */
|
||||||
|
|
@ -126,7 +126,7 @@ void test_convert () {
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_ldst (float f[], double d[]) {
|
void test_ldst (float f[], double d[]) {
|
||||||
/* { dg-final { scan-assembler "vldr.32.+ \\\[r0, #1020\\\]" } } */
|
/* { dg-final { scan-assembler "vldr.32.+ \\\[r0, #-?\[0-9\]+\\\]" } } */
|
||||||
/* { dg-final { scan-assembler "vldr.32.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
|
/* { dg-final { scan-assembler "vldr.32.+ \\\[r\[0-9\], #-1020\\\]" { target { arm32 && { ! arm_thumb2_ok } } } } } */
|
||||||
/* { dg-final { scan-assembler "add.+ r0, #1024" } } */
|
/* { dg-final { scan-assembler "add.+ r0, #1024" } } */
|
||||||
/* { dg-final { scan-assembler "vstr.32.+ \\\[r\[0-9\]\\\]\n" } } */
|
/* { dg-final { scan-assembler "vstr.32.+ \\\[r\[0-9\]\\\]\n" } } */
|
||||||
|
|
|
||||||
|
|
@ -247,6 +247,7 @@ DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2")
|
||||||
DEFTIMEVAR (TV_COMBINE_STACK_ADJUST , "combine stack adjustments")
|
DEFTIMEVAR (TV_COMBINE_STACK_ADJUST , "combine stack adjustments")
|
||||||
DEFTIMEVAR (TV_PEEPHOLE2 , "peephole 2")
|
DEFTIMEVAR (TV_PEEPHOLE2 , "peephole 2")
|
||||||
DEFTIMEVAR (TV_RENAME_REGISTERS , "rename registers")
|
DEFTIMEVAR (TV_RENAME_REGISTERS , "rename registers")
|
||||||
|
DEFTIMEVAR (TV_SCHED_FUSION , "scheduling fusion")
|
||||||
DEFTIMEVAR (TV_CPROP_REGISTERS , "hard reg cprop")
|
DEFTIMEVAR (TV_CPROP_REGISTERS , "hard reg cprop")
|
||||||
DEFTIMEVAR (TV_SCHED2 , "scheduling 2")
|
DEFTIMEVAR (TV_SCHED2 , "scheduling 2")
|
||||||
DEFTIMEVAR (TV_MACH_DEP , "machine dep reorg")
|
DEFTIMEVAR (TV_MACH_DEP , "machine dep reorg")
|
||||||
|
|
|
||||||
|
|
@ -552,6 +552,7 @@ extern rtl_opt_pass *make_pass_branch_target_load_optimize1 (gcc::context
|
||||||
extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context
|
extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context
|
||||||
*ctxt);
|
*ctxt);
|
||||||
extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt);
|
extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt);
|
||||||
|
extern rtl_opt_pass *make_pass_sched_fusion (gcc::context *ctxt);
|
||||||
extern rtl_opt_pass *make_pass_peephole2 (gcc::context *ctxt);
|
extern rtl_opt_pass *make_pass_peephole2 (gcc::context *ctxt);
|
||||||
extern rtl_opt_pass *make_pass_if_after_reload (gcc::context *ctxt);
|
extern rtl_opt_pass *make_pass_if_after_reload (gcc::context *ctxt);
|
||||||
extern rtl_opt_pass *make_pass_regrename (gcc::context *ctxt);
|
extern rtl_opt_pass *make_pass_regrename (gcc::context *ctxt);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue