mirror of git://gcc.gnu.org/git/gcc.git
s390.c (Z10_PREDICT_DISTANCE): New macro.
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com> * config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro. (s390_z10_fix_long_loop_prediction): New function. (s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg. (s390_reorg): Walk over the INSNs and invoke s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp. From-SVN: r150955
This commit is contained in:
parent
f114923514
commit
b0f86a7e6b
|
@ -1,3 +1,11 @@
|
||||||
|
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
|
||||||
|
|
||||||
|
* config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro.
|
||||||
|
(s390_z10_fix_long_loop_prediction): New function.
|
||||||
|
(s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg.
|
||||||
|
(s390_reorg): Walk over the INSNs and invoke
|
||||||
|
s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp.
|
||||||
|
|
||||||
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
|
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
|
||||||
|
|
||||||
* config/s390/s390.md ("*brx_stage1_<GPR:mode>", "*brxg_64bit",
|
* config/s390/s390.md ("*brx_stage1_<GPR:mode>", "*brxg_64bit",
|
||||||
|
|
|
@ -345,6 +345,10 @@ struct GTY(()) machine_function
|
||||||
#define REGNO_PAIR_OK(REGNO, MODE) \
|
#define REGNO_PAIR_OK(REGNO, MODE) \
|
||||||
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
|
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
|
||||||
|
|
||||||
|
/* That's the read ahead of the dynamic branch prediction unit in
|
||||||
|
bytes on a z10 CPU. */
|
||||||
|
#define Z10_PREDICT_DISTANCE 384
|
||||||
|
|
||||||
static enum machine_mode
|
static enum machine_mode
|
||||||
s390_libgcc_cmp_return_mode (void)
|
s390_libgcc_cmp_return_mode (void)
|
||||||
{
|
{
|
||||||
|
@ -9661,6 +9665,66 @@ s390_optimize_prologue (void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* On z10 the dynamic branch prediction must see the backward jump in
|
||||||
|
a window of 384 bytes. If not it falls back to the static
|
||||||
|
prediction. This function rearranges the loop backward branch in a
|
||||||
|
way which makes the static prediction always correct. The function
|
||||||
|
returns true if it added an instruction. */
|
||||||
|
static bool
|
||||||
|
s390_z10_fix_long_loop_prediction (rtx insn)
|
||||||
|
{
|
||||||
|
rtx set = single_set (insn);
|
||||||
|
rtx code_label, label_ref, new_label;
|
||||||
|
rtx uncond_jump;
|
||||||
|
rtx cur_insn;
|
||||||
|
rtx tmp;
|
||||||
|
int distance;
|
||||||
|
|
||||||
|
/* This will exclude branch on count and branch on index patterns
|
||||||
|
since these are correctly statically predicted. */
|
||||||
|
if (!set
|
||||||
|
|| SET_DEST (set) != pc_rtx
|
||||||
|
|| GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
|
||||||
|
XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
|
||||||
|
|
||||||
|
gcc_assert (GET_CODE (label_ref) == LABEL_REF);
|
||||||
|
|
||||||
|
code_label = XEXP (label_ref, 0);
|
||||||
|
|
||||||
|
if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
|
||||||
|
|| INSN_ADDRESSES (INSN_UID (insn)) == -1
|
||||||
|
|| (INSN_ADDRESSES (INSN_UID (insn))
|
||||||
|
- INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (distance = 0, cur_insn = PREV_INSN (insn);
|
||||||
|
distance < Z10_PREDICT_DISTANCE - 6;
|
||||||
|
distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
|
||||||
|
if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
new_label = gen_label_rtx ();
|
||||||
|
uncond_jump = emit_jump_insn_after (
|
||||||
|
gen_rtx_SET (VOIDmode, pc_rtx,
|
||||||
|
gen_rtx_LABEL_REF (VOIDmode, code_label)),
|
||||||
|
insn);
|
||||||
|
emit_label_after (new_label, uncond_jump);
|
||||||
|
|
||||||
|
tmp = XEXP (SET_SRC (set), 1);
|
||||||
|
XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
|
||||||
|
XEXP (SET_SRC (set), 2) = tmp;
|
||||||
|
INSN_CODE (insn) = -1;
|
||||||
|
|
||||||
|
XEXP (label_ref, 0) = new_label;
|
||||||
|
JUMP_LABEL (insn) = new_label;
|
||||||
|
JUMP_LABEL (uncond_jump) = code_label;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/* Returns 1 if INSN reads the value of REG for purposes not related
|
/* Returns 1 if INSN reads the value of REG for purposes not related
|
||||||
to addressing of memory, and 0 otherwise. */
|
to addressing of memory, and 0 otherwise. */
|
||||||
static int
|
static int
|
||||||
|
@ -9743,97 +9807,87 @@ s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
|
||||||
if that register's value is delivered via a bypass, then the
|
if that register's value is delivered via a bypass, then the
|
||||||
pipeline recycles, thereby causing significant performance decline.
|
pipeline recycles, thereby causing significant performance decline.
|
||||||
This function locates such situations and exchanges the two
|
This function locates such situations and exchanges the two
|
||||||
operands of the compare. */
|
operands of the compare. The function return true whenever it
|
||||||
static void
|
added an insn. */
|
||||||
s390_z10_optimize_cmp (void)
|
static bool
|
||||||
|
s390_z10_optimize_cmp (rtx insn)
|
||||||
{
|
{
|
||||||
rtx insn, prev_insn, next_insn;
|
rtx prev_insn, next_insn;
|
||||||
int added_NOPs = 0;
|
bool insn_added_p = false;
|
||||||
|
rtx cond, *op0, *op1;
|
||||||
|
|
||||||
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
if (GET_CODE (PATTERN (insn)) == PARALLEL)
|
||||||
{
|
{
|
||||||
rtx cond, *op0, *op1;
|
/* Handle compare and branch and branch on count
|
||||||
|
instructions. */
|
||||||
|
rtx pattern = single_set (insn);
|
||||||
|
|
||||||
if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
|
if (!pattern
|
||||||
continue;
|
|| SET_DEST (pattern) != pc_rtx
|
||||||
|
|| GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
|
||||||
|
return false;
|
||||||
|
|
||||||
if (GET_CODE (PATTERN (insn)) == PARALLEL)
|
cond = XEXP (SET_SRC (pattern), 0);
|
||||||
|
op0 = &XEXP (cond, 0);
|
||||||
|
op1 = &XEXP (cond, 1);
|
||||||
|
}
|
||||||
|
else if (GET_CODE (PATTERN (insn)) == SET)
|
||||||
|
{
|
||||||
|
rtx src, dest;
|
||||||
|
|
||||||
|
/* Handle normal compare instructions. */
|
||||||
|
src = SET_SRC (PATTERN (insn));
|
||||||
|
dest = SET_DEST (PATTERN (insn));
|
||||||
|
|
||||||
|
if (!REG_P (dest)
|
||||||
|
|| !CC_REGNO_P (REGNO (dest))
|
||||||
|
|| GET_CODE (src) != COMPARE)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* s390_swap_cmp will try to find the conditional
|
||||||
|
jump when passing NULL_RTX as condition. */
|
||||||
|
cond = NULL_RTX;
|
||||||
|
op0 = &XEXP (src, 0);
|
||||||
|
op1 = &XEXP (src, 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!REG_P (*op0) || !REG_P (*op1))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Swap the COMPARE arguments and its mask if there is a
|
||||||
|
conflicting access in the previous insn. */
|
||||||
|
prev_insn = PREV_INSN (insn);
|
||||||
|
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
|
||||||
|
&& reg_referenced_p (*op1, PATTERN (prev_insn)))
|
||||||
|
s390_swap_cmp (cond, op0, op1, insn);
|
||||||
|
|
||||||
|
/* Check if there is a conflict with the next insn. If there
|
||||||
|
was no conflict with the previous insn, then swap the
|
||||||
|
COMPARE arguments and its mask. If we already swapped
|
||||||
|
the operands, or if swapping them would cause a conflict
|
||||||
|
with the previous insn, issue a NOP after the COMPARE in
|
||||||
|
order to separate the two instuctions. */
|
||||||
|
next_insn = NEXT_INSN (insn);
|
||||||
|
if (next_insn != NULL_RTX && INSN_P (next_insn)
|
||||||
|
&& s390_non_addr_reg_read_p (*op1, next_insn))
|
||||||
|
{
|
||||||
|
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
|
||||||
|
&& s390_non_addr_reg_read_p (*op0, prev_insn))
|
||||||
{
|
{
|
||||||
/* Handle compare and branch and branch on count
|
if (REGNO (*op1) == 0)
|
||||||
instructions. */
|
emit_insn_after (gen_nop1 (), insn);
|
||||||
rtx pattern = single_set (insn);
|
else
|
||||||
|
emit_insn_after (gen_nop (), insn);
|
||||||
if (!pattern
|
insn_added_p = true;
|
||||||
|| SET_DEST (pattern) != pc_rtx
|
|
||||||
|| GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
cond = XEXP (SET_SRC (pattern), 0);
|
|
||||||
op0 = &XEXP (cond, 0);
|
|
||||||
op1 = &XEXP (cond, 1);
|
|
||||||
}
|
|
||||||
else if (GET_CODE (PATTERN (insn)) == SET)
|
|
||||||
{
|
|
||||||
rtx src, dest;
|
|
||||||
|
|
||||||
/* Handle normal compare instructions. */
|
|
||||||
src = SET_SRC (PATTERN (insn));
|
|
||||||
dest = SET_DEST (PATTERN (insn));
|
|
||||||
|
|
||||||
if (!REG_P (dest)
|
|
||||||
|| !CC_REGNO_P (REGNO (dest))
|
|
||||||
|| GET_CODE (src) != COMPARE)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* s390_swap_cmp will try to find the conditional
|
|
||||||
jump when passing NULL_RTX as condition. */
|
|
||||||
cond = NULL_RTX;
|
|
||||||
op0 = &XEXP (src, 0);
|
|
||||||
op1 = &XEXP (src, 1);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!REG_P (*op0) || !REG_P (*op1))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* Swap the COMPARE arguments and its mask if there is a
|
|
||||||
conflicting access in the previous insn. */
|
|
||||||
prev_insn = PREV_INSN (insn);
|
|
||||||
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
|
|
||||||
&& reg_referenced_p (*op1, PATTERN (prev_insn)))
|
|
||||||
s390_swap_cmp (cond, op0, op1, insn);
|
s390_swap_cmp (cond, op0, op1, insn);
|
||||||
|
|
||||||
/* Check if there is a conflict with the next insn. If there
|
|
||||||
was no conflict with the previous insn, then swap the
|
|
||||||
COMPARE arguments and its mask. If we already swapped
|
|
||||||
the operands, or if swapping them would cause a conflict
|
|
||||||
with the previous insn, issue a NOP after the COMPARE in
|
|
||||||
order to separate the two instuctions. */
|
|
||||||
next_insn = NEXT_INSN (insn);
|
|
||||||
if (next_insn != NULL_RTX && INSN_P (next_insn)
|
|
||||||
&& s390_non_addr_reg_read_p (*op1, next_insn))
|
|
||||||
{
|
|
||||||
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
|
|
||||||
&& s390_non_addr_reg_read_p (*op0, prev_insn))
|
|
||||||
{
|
|
||||||
if (REGNO (*op1) == 0)
|
|
||||||
emit_insn_after (gen_nop1 (), insn);
|
|
||||||
else
|
|
||||||
emit_insn_after (gen_nop (), insn);
|
|
||||||
added_NOPs = 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
s390_swap_cmp (cond, op0, op1, insn);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
return insn_added_p;
|
||||||
/* Adjust branches if we added new instructions. */
|
|
||||||
if (added_NOPs)
|
|
||||||
shorten_branches (get_insns ());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Perform machine-dependent processing. */
|
/* Perform machine-dependent processing. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -9944,10 +9998,33 @@ s390_reorg (void)
|
||||||
/* Try to optimize prologue and epilogue further. */
|
/* Try to optimize prologue and epilogue further. */
|
||||||
s390_optimize_prologue ();
|
s390_optimize_prologue ();
|
||||||
|
|
||||||
/* Eliminate z10-specific pipeline recycles related to some compare
|
/* Walk over the insns and do some z10 specific changes. */
|
||||||
instructions. */
|
|
||||||
if (s390_tune == PROCESSOR_2097_Z10)
|
if (s390_tune == PROCESSOR_2097_Z10)
|
||||||
s390_z10_optimize_cmp ();
|
{
|
||||||
|
rtx insn;
|
||||||
|
bool insn_added_p = false;
|
||||||
|
|
||||||
|
/* The insn lengths and addresses have to be up to date for the
|
||||||
|
following manipulations. */
|
||||||
|
shorten_branches (get_insns ());
|
||||||
|
|
||||||
|
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
||||||
|
{
|
||||||
|
if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (JUMP_P (insn))
|
||||||
|
insn_added_p |= s390_z10_fix_long_loop_prediction (insn);
|
||||||
|
|
||||||
|
if (GET_CODE (PATTERN (insn)) == PARALLEL
|
||||||
|
|| GET_CODE (PATTERN (insn)) == SET)
|
||||||
|
insn_added_p |= s390_z10_optimize_cmp (insn);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Adjust branches if we added new instructions. */
|
||||||
|
if (insn_added_p)
|
||||||
|
shorten_branches (get_insns ());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1046,6 +1046,64 @@
|
||||||
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
|
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
|
||||||
; 10 byte for clgr/jg
|
; 10 byte for clgr/jg
|
||||||
|
|
||||||
|
; And now the same two patterns as above but with a negated CC mask.
|
||||||
|
|
||||||
|
; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr
|
||||||
|
; The following instructions do a complementary access of their second
|
||||||
|
; operand (z01 only): crj_c, cgrjc, cr, cgr
|
||||||
|
(define_insn "*icmp_and_br_signed_<mode>"
|
||||||
|
[(set (pc)
|
||||||
|
(if_then_else (match_operator 0 "s390_signed_integer_comparison"
|
||||||
|
[(match_operand:GPR 1 "register_operand" "d,d")
|
||||||
|
(match_operand:GPR 2 "nonmemory_operand" "d,C")])
|
||||||
|
(pc)
|
||||||
|
(label_ref (match_operand 3 "" ""))))
|
||||||
|
(clobber (reg:CC CC_REGNUM))]
|
||||||
|
"TARGET_Z10"
|
||||||
|
{
|
||||||
|
if (get_attr_length (insn) == 6)
|
||||||
|
return which_alternative ?
|
||||||
|
"c<g>ij%D0\t%1,%c2,%l3" : "c<g>rj%D0\t%1,%2,%l3";
|
||||||
|
else
|
||||||
|
return which_alternative ?
|
||||||
|
"c<g>fi\t%1,%c2\;jg%D0\t%l3" : "c<g>r\t%1,%2\;jg%D0\t%l3";
|
||||||
|
}
|
||||||
|
[(set_attr "op_type" "RIE")
|
||||||
|
(set_attr "type" "branch")
|
||||||
|
(set_attr "z10prop" "z10_super_c,z10_super")
|
||||||
|
(set (attr "length")
|
||||||
|
(if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
|
||||||
|
(const_int 6) (const_int 12)))]) ; 8 byte for cr/jg
|
||||||
|
; 10 byte for cgr/jg
|
||||||
|
|
||||||
|
; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr
|
||||||
|
; The following instructions do a complementary access of their second
|
||||||
|
; operand (z10 only): clrj, clgrj, clr, clgr
|
||||||
|
(define_insn "*icmp_and_br_unsigned_<mode>"
|
||||||
|
[(set (pc)
|
||||||
|
(if_then_else (match_operator 0 "s390_unsigned_integer_comparison"
|
||||||
|
[(match_operand:GPR 1 "register_operand" "d,d")
|
||||||
|
(match_operand:GPR 2 "nonmemory_operand" "d,I")])
|
||||||
|
(pc)
|
||||||
|
(label_ref (match_operand 3 "" ""))))
|
||||||
|
(clobber (reg:CC CC_REGNUM))]
|
||||||
|
"TARGET_Z10"
|
||||||
|
{
|
||||||
|
if (get_attr_length (insn) == 6)
|
||||||
|
return which_alternative ?
|
||||||
|
"cl<g>ij%D0\t%1,%b2,%l3" : "cl<g>rj%D0\t%1,%2,%l3";
|
||||||
|
else
|
||||||
|
return which_alternative ?
|
||||||
|
"cl<g>fi\t%1,%b2\;jg%D0\t%l3" : "cl<g>r\t%1,%2\;jg%D0\t%l3";
|
||||||
|
}
|
||||||
|
[(set_attr "op_type" "RIE")
|
||||||
|
(set_attr "type" "branch")
|
||||||
|
(set_attr "z10prop" "z10_super_c,z10_super")
|
||||||
|
(set (attr "length")
|
||||||
|
(if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
|
||||||
|
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
|
||||||
|
; 10 byte for clgr/jg
|
||||||
|
|
||||||
;;
|
;;
|
||||||
;;- Move instructions.
|
;;- Move instructions.
|
||||||
;;
|
;;
|
||||||
|
|
Loading…
Reference in New Issue