diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bd056b6cf834..00f553278282 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +2011-11-09 Alan Modra + + * function.c (bb_active_p): Delete. + (dup_block_and_redirect, active_insn_between): New functions. + (convert_jumps_to_returns, emit_return_for_exit): New functions, + split out from.. + (thread_prologue_and_epilogue_insns): ..here. Delete + shadowing variables. Don't do prologue register clobber tests + when shrink wrapping already failed. Delete all last_bb_active + code. Instead compute tail block candidates for duplicating + exit path. Remove these from antic set. Duplicate tails when + reached from both blocks needing a prologue/epilogue and + blocks not needing such. + * ifcvt.c (dead_or_predicable): Test both flag_shrink_wrap and + HAVE_simple_return. + * bb-reorder.c (get_uncond_jump_length): Make global. + * bb-reorder.h (get_uncond_jump_length): Declare. + * cfgrtl.c (rtl_create_basic_block): Comment typo fix. + (rtl_split_edge): Likewise. Warning fix. + (rtl_duplicate_bb): New function. + (rtl_cfg_hooks): Enable can_duplicate_block_p and duplicate_block. + * Makefile.in (function.o): Update dependencies. + 2011-11-08 Ulrich Weigand Georg-Johann Lay diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 9ec2df1485bf..ae4f4da2b3ea 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2807,7 +2807,8 @@ function.o : function.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_ERROR_ $(OPTABS_H) $(LIBFUNCS_H) $(REGS_H) hard-reg-set.h insn-config.h $(RECOG_H) \ output.h $(EXCEPT_H) $(HASHTAB_H) $(GGC_H) $(TM_P_H) langhooks.h \ gt-function.h $(TARGET_H) $(BASIC_BLOCK_H) $(INTEGRATE_H) $(PREDICT_H) \ - $(TREE_PASS_H) $(DF_H) $(TIMEVAR_H) vecprim.h $(COMMON_TARGET_H) + $(TREE_PASS_H) $(DF_H) $(TIMEVAR_H) vecprim.h $(PARAMS_H) bb-reorder.h \ + $(COMMON_TARGET_H) statistics.o : statistics.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TREE_PASS_H) $(TREE_DUMP_H) $(HASHTAB_H) statistics.h $(FUNCTION_H) stmt.o : stmt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c index 3ac7fbd6034d..017c205d0865 100644 --- a/gcc/bb-reorder.c +++ b/gcc/bb-reorder.c @@ -181,7 +181,6 @@ static fibheapkey_t bb_to_key (basic_block); static bool better_edge_p (const_basic_block, const_edge, int, int, int, int, const_edge); static void connect_traces (int, struct trace *); static bool copy_bb_p (const_basic_block, int); -static int get_uncond_jump_length (void); static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type); /* Check to see if bb should be pushed into the next round of trace @@ -1193,7 +1192,7 @@ copy_bb_p (const_basic_block bb, int code_may_grow) /* Return the length of unconditional jump instruction. */ -static int +int get_uncond_jump_length (void) { rtx label, jump; diff --git a/gcc/bb-reorder.h b/gcc/bb-reorder.h index 0b5ed254826e..13acdb0f31e6 100644 --- a/gcc/bb-reorder.h +++ b/gcc/bb-reorder.h @@ -34,4 +34,6 @@ extern struct target_bb_reorder *this_target_bb_reorder; #define this_target_bb_reorder (&default_target_bb_reorder) #endif +extern int get_uncond_jump_length (void); + #endif diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c index 6e9f70ed719a..16e2eb30a6a7 100644 --- a/gcc/cfgrtl.c +++ b/gcc/cfgrtl.c @@ -322,9 +322,9 @@ create_basic_block_structure (rtx head, rtx end, rtx bb_note, basic_block after) } /* Create new basic block consisting of instructions in between HEAD and END - and place it to the BB chain after block AFTER. END can be NULL in to - create new empty basic block before HEAD. Both END and HEAD can be NULL to - create basic block at the end of INSN chain. */ + and place it to the BB chain after block AFTER. END can be NULL to + create a new empty basic block before HEAD. Both END and HEAD can be + NULL to create basic block at the end of INSN chain. */ static basic_block rtl_create_basic_block (void *headp, void *endp, basic_block after) @@ -1411,8 +1411,8 @@ rtl_split_edge (edge edge_in) before = NULL_RTX; /* If this is a fall through edge to the exit block, the blocks might be - not adjacent, and the right place is the after the source. */ - if (edge_in->flags & EDGE_FALLTHRU && edge_in->dest == EXIT_BLOCK_PTR) + not adjacent, and the right place is after the source. */ + if ((edge_in->flags & EDGE_FALLTHRU) && edge_in->dest == EXIT_BLOCK_PTR) { before = NEXT_INSN (BB_END (edge_in->src)); bb = create_basic_block (before, NULL, edge_in->src); @@ -3177,6 +3177,21 @@ rtl_can_remove_branch_p (const_edge e) return true; } +/* We do not want to declare these functions in a header file, since they + should only be used through the cfghooks interface, and we do not want to + move them here since it would require also moving quite a lot of related + code. They are in cfglayout.c. */ +extern bool cfg_layout_can_duplicate_bb_p (const_basic_block); +extern basic_block cfg_layout_duplicate_bb (basic_block); + +static basic_block +rtl_duplicate_bb (basic_block bb) +{ + bb = cfg_layout_duplicate_bb (bb); + bb->aux = NULL; + return bb; +} + /* Implementation of CFG manipulation for linearized RTL. */ struct cfg_hooks rtl_cfg_hooks = { "rtl", @@ -3193,8 +3208,8 @@ struct cfg_hooks rtl_cfg_hooks = { rtl_merge_blocks, rtl_predict_edge, rtl_predicted_by_p, - NULL, /* can_duplicate_block_p */ - NULL, /* duplicate_block */ + cfg_layout_can_duplicate_bb_p, + rtl_duplicate_bb, rtl_split_edge, rtl_make_forwarder_block, rtl_tidy_fallthru_edge, @@ -3216,13 +3231,6 @@ struct cfg_hooks rtl_cfg_hooks = { This representation will hopefully become the default one in future version of the compiler. */ -/* We do not want to declare these functions in a header file, since they - should only be used through the cfghooks interface, and we do not want to - move them here since it would require also moving quite a lot of related - code. They are in cfglayout.c. */ -extern bool cfg_layout_can_duplicate_bb_p (const_basic_block); -extern basic_block cfg_layout_duplicate_bb (basic_block); - struct cfg_hooks cfg_layout_rtl_cfg_hooks = { "cfglayout mode", rtl_verify_flow_info_1, diff --git a/gcc/function.c b/gcc/function.c index 2058d7540806..0cbbbc5e4047 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -65,6 +65,8 @@ along with GCC; see the file COPYING3. If not see #include "df.h" #include "timevar.h" #include "vecprim.h" +#include "params.h" +#include "bb-reorder.h" /* So we can assign to cfun in this file. */ #undef cfun @@ -5290,8 +5292,6 @@ requires_stack_frame_p (rtx insn, HARD_REG_SET prologue_used, HARD_REG_SET hardregs; unsigned regno; - if (!INSN_P (insn) || DEBUG_INSN_P (insn)) - return false; if (CALL_P (insn)) return !SIBLING_CALL_P (insn); @@ -5514,22 +5514,185 @@ set_return_jump_label (rtx returnjump) JUMP_LABEL (returnjump) = ret_rtx; } -/* Return true if BB has any active insns. */ -static bool -bb_active_p (basic_block bb) +#ifdef HAVE_simple_return +/* Create a copy of BB instructions and insert at BEFORE. Redirect + preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */ +static void +dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx before, + bitmap_head *need_prologue) { - rtx label; + edge_iterator ei; + edge e; + rtx insn = BB_END (bb); - /* Test whether there are active instructions in BB. */ - label = BB_END (bb); - while (label && !LABEL_P (label)) + /* We know BB has a single successor, so there is no need to copy a + simple jump at the end of BB. */ + if (simplejump_p (insn)) + insn = PREV_INSN (insn); + + start_sequence (); + duplicate_insn_chain (BB_HEAD (bb), insn); + if (dump_file) { - if (active_insn_p (label)) - break; - label = PREV_INSN (label); + unsigned count = 0; + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (active_insn_p (insn)) + ++count; + fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n", + bb->index, copy_bb->index, count); } - return BB_HEAD (bb) != label || !LABEL_P (label); + insn = get_insns (); + end_sequence (); + emit_insn_before (insn, before); + + /* Redirect all the paths that need no prologue into copy_bb. */ + for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); ) + if (!bitmap_bit_p (need_prologue, e->src->index)) + { + redirect_edge_and_branch_force (e, copy_bb); + continue; + } + else + ei_next (&ei); } +#endif + +#if defined (HAVE_return) || defined (HAVE_simple_return) +/* Return true if there are any active insns between HEAD and TAIL. */ +static bool +active_insn_between (rtx head, rtx tail) +{ + while (tail) + { + if (active_insn_p (tail)) + return true; + if (tail == head) + return false; + tail = PREV_INSN (tail); + } + return false; +} + +/* LAST_BB is a block that exits, and empty of active instructions. + Examine its predecessors for jumps that can be converted to + (conditional) returns. */ +static VEC (edge, heap) * +convert_jumps_to_returns (basic_block last_bb, bool simple_p, + VEC (edge, heap) *unconverted ATTRIBUTE_UNUSED) +{ + int i; + basic_block bb; + rtx label; + edge_iterator ei; + edge e; + VEC(basic_block,heap) *src_bbs; + + src_bbs = VEC_alloc (basic_block, heap, EDGE_COUNT (last_bb->preds)); + FOR_EACH_EDGE (e, ei, last_bb->preds) + if (e->src != ENTRY_BLOCK_PTR) + VEC_quick_push (basic_block, src_bbs, e->src); + + label = BB_HEAD (last_bb); + + FOR_EACH_VEC_ELT (basic_block, src_bbs, i, bb) + { + rtx jump = BB_END (bb); + + if (!JUMP_P (jump) || JUMP_LABEL (jump) != label) + continue; + + e = find_edge (bb, last_bb); + + /* If we have an unconditional jump, we can replace that + with a simple return instruction. */ + if (simplejump_p (jump)) + { + /* The use of the return register might be present in the exit + fallthru block. Either: + - removing the use is safe, and we should remove the use in + the exit fallthru block, or + - removing the use is not safe, and we should add it here. + For now, we conservatively choose the latter. Either of the + 2 helps in crossjumping. */ + emit_use_return_register_into_block (bb); + + emit_return_into_block (simple_p, bb); + delete_insn (jump); + } + + /* If we have a conditional jump branching to the last + block, we can try to replace that with a conditional + return instruction. */ + else if (condjump_p (jump)) + { + rtx dest; + + if (simple_p) + dest = simple_return_rtx; + else + dest = ret_rtx; + if (!redirect_jump (jump, dest, 0)) + { +#ifdef HAVE_simple_return + if (simple_p) + { + if (dump_file) + fprintf (dump_file, + "Failed to redirect bb %d branch.\n", bb->index); + VEC_safe_push (edge, heap, unconverted, e); + } +#endif + continue; + } + + /* See comment in simplejump_p case above. */ + emit_use_return_register_into_block (bb); + + /* If this block has only one successor, it both jumps + and falls through to the fallthru block, so we can't + delete the edge. */ + if (single_succ_p (bb)) + continue; + } + else + { +#ifdef HAVE_simple_return + if (simple_p) + { + if (dump_file) + fprintf (dump_file, + "Failed to redirect bb %d branch.\n", bb->index); + VEC_safe_push (edge, heap, unconverted, e); + } +#endif + continue; + } + + /* Fix up the CFG for the successful change we just made. */ + redirect_edge_succ (e, EXIT_BLOCK_PTR); + } + VEC_free (basic_block, heap, src_bbs); + return unconverted; +} + +/* Emit a return insn for the exit fallthru block. */ +static basic_block +emit_return_for_exit (edge exit_fallthru_edge, bool simple_p) +{ + basic_block last_bb = exit_fallthru_edge->src; + + if (JUMP_P (BB_END (last_bb))) + { + last_bb = split_edge (exit_fallthru_edge); + exit_fallthru_edge = single_succ_edge (last_bb); + } + emit_barrier_after (BB_END (last_bb)); + emit_return_into_block (simple_p, last_bb); + exit_fallthru_edge->flags &= ~EDGE_FALLTHRU; + return last_bb; +} +#endif + /* Generate the prologue and epilogue RTL if the machine supports it. Thread this into place with notes indicating where the prologue ends and where @@ -5583,20 +5746,17 @@ static void thread_prologue_and_epilogue_insns (void) { bool inserted; - basic_block last_bb; - bool last_bb_active ATTRIBUTE_UNUSED; #ifdef HAVE_simple_return - VEC (rtx, heap) *unconverted_simple_returns = NULL; - basic_block simple_return_block_hot = NULL; - basic_block simple_return_block_cold = NULL; + VEC (edge, heap) *unconverted_simple_returns = NULL; bool nonempty_prologue; + bitmap_head bb_flags; + unsigned max_grow_size; #endif - rtx returnjump ATTRIBUTE_UNUSED; + rtx returnjump; rtx seq ATTRIBUTE_UNUSED, epilogue_end ATTRIBUTE_UNUSED; rtx prologue_seq ATTRIBUTE_UNUSED, split_prologue_seq ATTRIBUTE_UNUSED; edge e, entry_edge, orig_entry_edge, exit_fallthru_edge; edge_iterator ei; - bitmap_head bb_flags; df_analyze (); @@ -5614,18 +5774,6 @@ thread_prologue_and_epilogue_insns (void) entry_edge = single_succ_edge (ENTRY_BLOCK_PTR); orig_entry_edge = entry_edge; - exit_fallthru_edge = find_fallthru_edge (EXIT_BLOCK_PTR->preds); - if (exit_fallthru_edge != NULL) - { - last_bb = exit_fallthru_edge->src; - last_bb_active = bb_active_p (last_bb); - } - else - { - last_bb = NULL; - last_bb_active = false; - } - split_prologue_seq = NULL_RTX; if (flag_split_stack && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl)) @@ -5675,9 +5823,9 @@ thread_prologue_and_epilogue_insns (void) } #endif +#ifdef HAVE_simple_return bitmap_initialize (&bb_flags, &bitmap_default_obstack); -#ifdef HAVE_simple_return /* Try to perform a kind of shrink-wrapping, making sure the prologue/epilogue is emitted only around those parts of the function that require it. */ @@ -5697,11 +5845,11 @@ thread_prologue_and_epilogue_insns (void) HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge; HARD_REG_SET set_up_by_prologue; rtx p_insn; - VEC(basic_block, heap) *vec; basic_block bb; bitmap_head bb_antic_flags; bitmap_head bb_on_list; + bitmap_head bb_tail; if (dump_file) fprintf (dump_file, "Attempting shrink-wrapping optimization.\n"); @@ -5726,14 +5874,12 @@ thread_prologue_and_epilogue_insns (void) prepare_shrink_wrap (entry_edge->dest); - /* That may have inserted instructions into the last block. */ - if (last_bb && !last_bb_active) - last_bb_active = bb_active_p (last_bb); - bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack); bitmap_initialize (&bb_on_list, &bitmap_default_obstack); + bitmap_initialize (&bb_tail, &bitmap_default_obstack); - /* Find the set of basic blocks that require a stack frame. */ + /* Find the set of basic blocks that require a stack frame, + and blocks that are too big to be duplicated. */ vec = VEC_alloc (basic_block, heap, n_basic_blocks); @@ -5747,66 +5893,80 @@ thread_prologue_and_epilogue_insns (void) add_to_hard_reg_set (&set_up_by_prologue, Pmode, PIC_OFFSET_TABLE_REGNUM); + /* We don't use a different max size depending on + optimize_bb_for_speed_p because increasing shrink-wrapping + opportunities by duplicating tail blocks can actually result + in an overall decrease in code size. */ + max_grow_size = get_uncond_jump_length (); + max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS); + FOR_EACH_BB (bb) { rtx insn; - /* As a special case, check for jumps to the last bb that - cannot successfully be converted to simple_returns later - on, and mark them as requiring a frame. These are - conditional jumps that jump to their fallthru block, so - it's not a case that is expected to occur often. */ - if (JUMP_P (BB_END (bb)) && any_condjump_p (BB_END (bb)) - && single_succ_p (bb) - && !last_bb_active - && single_succ (bb) == last_bb) - { - bitmap_set_bit (&bb_flags, bb->index); - VEC_quick_push (basic_block, vec, bb); - } - else - FOR_BB_INSNS (bb, insn) - if (requires_stack_frame_p (insn, prologue_used, - set_up_by_prologue)) - { - bitmap_set_bit (&bb_flags, bb->index); - VEC_quick_push (basic_block, vec, bb); - break; - } + unsigned size = 0; + + FOR_BB_INSNS (bb, insn) + if (NONDEBUG_INSN_P (insn)) + { + if (requires_stack_frame_p (insn, prologue_used, + set_up_by_prologue)) + { + if (bb == entry_edge->dest) + goto fail_shrinkwrap; + bitmap_set_bit (&bb_flags, bb->index); + VEC_quick_push (basic_block, vec, bb); + break; + } + else if (size <= max_grow_size) + { + size += get_attr_min_length (insn); + if (size > max_grow_size) + bitmap_set_bit (&bb_on_list, bb->index); + } + } } + /* Blocks that really need a prologue, or are too big for tails. */ + bitmap_ior_into (&bb_on_list, &bb_flags); + /* For every basic block that needs a prologue, mark all blocks reachable from it, so as to ensure they are also seen as requiring a prologue. */ while (!VEC_empty (basic_block, vec)) { basic_block tmp_bb = VEC_pop (basic_block, vec); - edge e; - edge_iterator ei; + FOR_EACH_EDGE (e, ei, tmp_bb->succs) if (e->dest != EXIT_BLOCK_PTR && bitmap_set_bit (&bb_flags, e->dest->index)) VEC_quick_push (basic_block, vec, e->dest); } - /* If the last basic block contains only a label, we'll be able - to convert jumps to it to (potentially conditional) return - insns later. This means we don't necessarily need a prologue - for paths reaching it. */ - if (last_bb && optimize) + + /* Find the set of basic blocks that need no prologue, have a + single successor, can be duplicated, meet a max size + requirement, and go to the exit via like blocks. */ + VEC_quick_push (basic_block, vec, EXIT_BLOCK_PTR); + while (!VEC_empty (basic_block, vec)) { - if (!last_bb_active) - bitmap_clear_bit (&bb_flags, last_bb->index); - else if (!bitmap_bit_p (&bb_flags, last_bb->index)) - goto fail_shrinkwrap; + basic_block tmp_bb = VEC_pop (basic_block, vec); + + FOR_EACH_EDGE (e, ei, tmp_bb->preds) + if (single_succ_p (e->src) + && !bitmap_bit_p (&bb_on_list, e->src->index) + && can_duplicate_block_p (e->src) + && bitmap_set_bit (&bb_tail, e->src->index)) + VEC_quick_push (basic_block, vec, e->src); } /* Now walk backwards from every block that is marked as needing - a prologue to compute the bb_antic_flags bitmap. */ - bitmap_copy (&bb_antic_flags, &bb_flags); + a prologue to compute the bb_antic_flags bitmap. Exclude + tail blocks; They can be duplicated to be used on paths not + needing a prologue. */ + bitmap_clear (&bb_on_list); + bitmap_and_compl (&bb_antic_flags, &bb_flags, &bb_tail); FOR_EACH_BB (bb) { - edge e; - edge_iterator ei; - if (!bitmap_bit_p (&bb_flags, bb->index)) + if (!bitmap_bit_p (&bb_antic_flags, bb->index)) continue; FOR_EACH_EDGE (e, ei, bb->preds) if (!bitmap_bit_p (&bb_antic_flags, e->src->index) @@ -5816,8 +5976,6 @@ thread_prologue_and_epilogue_insns (void) while (!VEC_empty (basic_block, vec)) { basic_block tmp_bb = VEC_pop (basic_block, vec); - edge e; - edge_iterator ei; bool all_set = true; bitmap_clear_bit (&bb_on_list, tmp_bb->index); @@ -5862,28 +6020,134 @@ thread_prologue_and_epilogue_insns (void) } } - /* Test whether the prologue is known to clobber any register - (other than FP or SP) which are live on the edge. */ - CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); - if (frame_pointer_needed) - CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); - CLEAR_HARD_REG_SET (live_on_edge); - reg_set_to_hard_reg_set (&live_on_edge, - df_get_live_in (entry_edge->dest)); - if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered)) + if (entry_edge != orig_entry_edge) { - entry_edge = orig_entry_edge; - if (dump_file) - fprintf (dump_file, "Shrink-wrapping aborted due to clobber.\n"); + /* Test whether the prologue is known to clobber any register + (other than FP or SP) which are live on the edge. */ + CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); + if (frame_pointer_needed) + CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); + CLEAR_HARD_REG_SET (live_on_edge); + reg_set_to_hard_reg_set (&live_on_edge, + df_get_live_in (entry_edge->dest)); + if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered)) + { + entry_edge = orig_entry_edge; + if (dump_file) + fprintf (dump_file, + "Shrink-wrapping aborted due to clobber.\n"); + } } - else if (entry_edge != orig_entry_edge) + if (entry_edge != orig_entry_edge) { crtl->shrink_wrapped = true; if (dump_file) fprintf (dump_file, "Performing shrink-wrapping.\n"); + + /* Find tail blocks reachable from both blocks needing a + prologue and blocks not needing a prologue. */ + if (!bitmap_empty_p (&bb_tail)) + FOR_EACH_BB (bb) + { + bool some_pro, some_no_pro; + if (!bitmap_bit_p (&bb_tail, bb->index)) + continue; + some_pro = some_no_pro = false; + FOR_EACH_EDGE (e, ei, bb->preds) + { + if (bitmap_bit_p (&bb_flags, e->src->index)) + some_pro = true; + else + some_no_pro = true; + } + if (some_pro && some_no_pro) + VEC_quick_push (basic_block, vec, bb); + else + bitmap_clear_bit (&bb_tail, bb->index); + } + /* Find the head of each tail. */ + while (!VEC_empty (basic_block, vec)) + { + basic_block tbb = VEC_pop (basic_block, vec); + + if (!bitmap_bit_p (&bb_tail, tbb->index)) + continue; + + while (single_succ_p (tbb)) + { + tbb = single_succ (tbb); + bitmap_clear_bit (&bb_tail, tbb->index); + } + } + /* Now duplicate the tails. */ + if (!bitmap_empty_p (&bb_tail)) + FOR_EACH_BB_REVERSE (bb) + { + basic_block copy_bb, tbb; + rtx insert_point; + int eflags; + + if (!bitmap_clear_bit (&bb_tail, bb->index)) + continue; + + /* Create a copy of BB, instructions and all, for + use on paths that don't need a prologue. + Ideal placement of the copy is on a fall-thru edge + or after a block that would jump to the copy. */ + FOR_EACH_EDGE (e, ei, bb->preds) + if (!bitmap_bit_p (&bb_flags, e->src->index) + && single_succ_p (e->src)) + break; + if (e) + { + copy_bb = create_basic_block (NEXT_INSN (BB_END (e->src)), + NULL_RTX, e->src); + BB_COPY_PARTITION (copy_bb, e->src); + } + else + { + /* Otherwise put the copy at the end of the function. */ + copy_bb = create_basic_block (NULL_RTX, NULL_RTX, + EXIT_BLOCK_PTR->prev_bb); + BB_COPY_PARTITION (copy_bb, bb); + } + + insert_point = emit_note_after (NOTE_INSN_DELETED, + BB_END (copy_bb)); + emit_barrier_after (BB_END (copy_bb)); + + tbb = bb; + while (1) + { + dup_block_and_redirect (tbb, copy_bb, insert_point, + &bb_flags); + tbb = single_succ (tbb); + if (tbb == EXIT_BLOCK_PTR) + break; + e = split_block (copy_bb, PREV_INSN (insert_point)); + copy_bb = e->dest; + } + + /* Quiet verify_flow_info by (ab)using EDGE_FAKE. + We have yet to add a simple_return to the tails, + as we'd like to first convert_jumps_to_returns in + case the block is no longer used after that. */ + eflags = EDGE_FAKE; + if (CALL_P (PREV_INSN (insert_point)) + && SIBLING_CALL_P (PREV_INSN (insert_point))) + eflags = EDGE_SIBCALL | EDGE_ABNORMAL; + make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR, eflags); + + /* verify_flow_info doesn't like a note after a + sibling call. */ + delete_insn (insert_point); + if (bitmap_empty_p (&bb_tail)) + break; + } } fail_shrinkwrap: + bitmap_clear (&bb_tail); bitmap_clear (&bb_antic_flags); bitmap_clear (&bb_on_list); VEC_free (basic_block, heap, vec); @@ -5911,147 +6175,73 @@ thread_prologue_and_epilogue_insns (void) rtl_profile_for_bb (EXIT_BLOCK_PTR); -#ifdef HAVE_return + exit_fallthru_edge = find_fallthru_edge (EXIT_BLOCK_PTR->preds); + /* If we're allowed to generate a simple return instruction, then by definition we don't need a full epilogue. If the last basic block before the exit block does not contain active instructions, examine its predecessors and try to emit (conditional) return instructions. */ - if (optimize && !last_bb_active - && (HAVE_return || entry_edge != orig_entry_edge)) +#ifdef HAVE_simple_return + if (entry_edge != orig_entry_edge) { - edge_iterator ei2; - int i; - basic_block bb; - rtx label; - VEC(basic_block,heap) *src_bbs; + if (optimize) + { + unsigned i, last; + /* convert_jumps_to_returns may add to EXIT_BLOCK_PTR->preds + (but won't remove). Stop at end of current preds. */ + last = EDGE_COUNT (EXIT_BLOCK_PTR->preds); + for (i = 0; i < last; i++) + { + e = EDGE_I (EXIT_BLOCK_PTR->preds, i); + if (LABEL_P (BB_HEAD (e->src)) + && !bitmap_bit_p (&bb_flags, e->src->index) + && !active_insn_between (BB_HEAD (e->src), BB_END (e->src))) + unconverted_simple_returns + = convert_jumps_to_returns (e->src, true, + unconverted_simple_returns); + } + } + + if (exit_fallthru_edge != NULL + && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0 + && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index)) + { + basic_block last_bb; + + last_bb = emit_return_for_exit (exit_fallthru_edge, true); + returnjump = BB_END (last_bb); + exit_fallthru_edge = NULL; + } + } +#endif +#ifdef HAVE_return + if (HAVE_return) + { if (exit_fallthru_edge == NULL) goto epilogue_done; - label = BB_HEAD (last_bb); - src_bbs = VEC_alloc (basic_block, heap, EDGE_COUNT (last_bb->preds)); - FOR_EACH_EDGE (e, ei2, last_bb->preds) - if (e->src != ENTRY_BLOCK_PTR) - VEC_quick_push (basic_block, src_bbs, e->src); - - FOR_EACH_VEC_ELT (basic_block, src_bbs, i, bb) + if (optimize) { - bool simple_p; - rtx jump; - e = find_edge (bb, last_bb); + basic_block last_bb = exit_fallthru_edge->src; - jump = BB_END (bb); + if (LABEL_P (BB_HEAD (last_bb)) + && !active_insn_between (BB_HEAD (last_bb), BB_END (last_bb))) + convert_jumps_to_returns (last_bb, false, NULL); + if (EDGE_COUNT (exit_fallthru_edge->src->preds) != 0) + { + last_bb = emit_return_for_exit (exit_fallthru_edge, false); + epilogue_end = returnjump = BB_END (last_bb); #ifdef HAVE_simple_return - simple_p = (entry_edge != orig_entry_edge - && !bitmap_bit_p (&bb_flags, bb->index)); -#else - simple_p = false; + /* Emitting the return may add a basic block. + Fix bb_flags for the added block. */ + if (last_bb != exit_fallthru_edge->src) + bitmap_set_bit (&bb_flags, last_bb->index); #endif - - if (!simple_p - && (!HAVE_return || !JUMP_P (jump) - || JUMP_LABEL (jump) != label)) - continue; - - /* If we have an unconditional jump, we can replace that - with a simple return instruction. */ - if (!JUMP_P (jump)) - { - emit_barrier_after (BB_END (bb)); - emit_return_into_block (simple_p, bb); + goto epilogue_done; } - else if (simplejump_p (jump)) - { - /* The use of the return register might be present in the exit - fallthru block. Either: - - removing the use is safe, and we should remove the use in - the exit fallthru block, or - - removing the use is not safe, and we should add it here. - For now, we conservatively choose the latter. Either of the - 2 helps in crossjumping. */ - emit_use_return_register_into_block (bb); - - emit_return_into_block (simple_p, bb); - delete_insn (jump); - } - else if (condjump_p (jump) && JUMP_LABEL (jump) != label) - { - basic_block new_bb; - edge new_e; - - gcc_assert (simple_p); - new_bb = split_edge (e); - emit_barrier_after (BB_END (new_bb)); - emit_return_into_block (simple_p, new_bb); -#ifdef HAVE_simple_return - if (BB_PARTITION (new_bb) == BB_HOT_PARTITION) - simple_return_block_hot = new_bb; - else - simple_return_block_cold = new_bb; -#endif - new_e = single_succ_edge (new_bb); - redirect_edge_succ (new_e, EXIT_BLOCK_PTR); - - continue; - } - /* If we have a conditional jump branching to the last - block, we can try to replace that with a conditional - return instruction. */ - else if (condjump_p (jump)) - { - rtx dest; - if (simple_p) - dest = simple_return_rtx; - else - dest = ret_rtx; - if (! redirect_jump (jump, dest, 0)) - { -#ifdef HAVE_simple_return - if (simple_p) - VEC_safe_push (rtx, heap, - unconverted_simple_returns, jump); -#endif - continue; - } - - /* See comment in simple_jump_p case above. */ - emit_use_return_register_into_block (bb); - - /* If this block has only one successor, it both jumps - and falls through to the fallthru block, so we can't - delete the edge. */ - if (single_succ_p (bb)) - continue; - } - else - { -#ifdef HAVE_simple_return - if (simple_p) - VEC_safe_push (rtx, heap, - unconverted_simple_returns, jump); -#endif - continue; - } - - /* Fix up the CFG for the successful change we just made. */ - redirect_edge_succ (e, EXIT_BLOCK_PTR); - } - VEC_free (basic_block, heap, src_bbs); - - if (HAVE_return) - { - /* Emit a return insn for the exit fallthru block. Whether - this is still reachable will be determined later. */ - - emit_barrier_after (BB_END (last_bb)); - emit_return_into_block (false, last_bb); - epilogue_end = BB_END (last_bb); - if (JUMP_P (epilogue_end)) - set_return_jump_label (epilogue_end); - single_succ_edge (last_bb)->flags &= ~EDGE_FALLTHRU; - goto epilogue_done; } } #endif @@ -6171,10 +6361,13 @@ epilogue_done: convert to conditional simple_returns, but couldn't for some reason, create a block to hold a simple_return insn and redirect those remaining edges. */ - if (!VEC_empty (rtx, unconverted_simple_returns)) + if (!VEC_empty (edge, unconverted_simple_returns)) { + basic_block simple_return_block_hot = NULL; + basic_block simple_return_block_cold = NULL; + edge pending_edge_hot = NULL; + edge pending_edge_cold = NULL; basic_block exit_pred = EXIT_BLOCK_PTR->prev_bb; - rtx jump; int i; gcc_assert (entry_edge != orig_entry_edge); @@ -6184,25 +6377,48 @@ epilogue_done: if (returnjump != NULL_RTX && JUMP_LABEL (returnjump) == simple_return_rtx) { - edge e = split_block (exit_fallthru_edge->src, - PREV_INSN (returnjump)); + e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump)); if (BB_PARTITION (e->src) == BB_HOT_PARTITION) simple_return_block_hot = e->dest; else simple_return_block_cold = e->dest; } - FOR_EACH_VEC_ELT (rtx, unconverted_simple_returns, i, jump) - { - basic_block src_bb = BLOCK_FOR_INSN (jump); - edge e = find_edge (src_bb, last_bb); - basic_block *pdest_bb; + /* Also check returns we might need to add to tail blocks. */ + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) + if (EDGE_COUNT (e->src->preds) != 0 + && (e->flags & EDGE_FAKE) != 0 + && !bitmap_bit_p (&bb_flags, e->src->index)) + { + if (BB_PARTITION (e->src) == BB_HOT_PARTITION) + pending_edge_hot = e; + else + pending_edge_cold = e; + } - if (BB_PARTITION (src_bb) == BB_HOT_PARTITION) - pdest_bb = &simple_return_block_hot; + FOR_EACH_VEC_ELT (edge, unconverted_simple_returns, i, e) + { + basic_block *pdest_bb; + edge pending; + + if (BB_PARTITION (e->src) == BB_HOT_PARTITION) + { + pdest_bb = &simple_return_block_hot; + pending = pending_edge_hot; + } else - pdest_bb = &simple_return_block_cold; - if (*pdest_bb == NULL) + { + pdest_bb = &simple_return_block_cold; + pending = pending_edge_cold; + } + + if (*pdest_bb == NULL && pending != NULL) + { + emit_return_into_block (true, pending->src); + pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); + *pdest_bb = pending->src; + } + else if (*pdest_bb == NULL) { basic_block bb; rtx start; @@ -6219,7 +6435,19 @@ epilogue_done: } redirect_edge_and_branch_force (e, *pdest_bb); } - VEC_free (rtx, heap, unconverted_simple_returns); + VEC_free (edge, heap, unconverted_simple_returns); + } + + if (entry_edge != orig_entry_edge) + { + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) + if (EDGE_COUNT (e->src->preds) != 0 + && (e->flags & EDGE_FAKE) != 0 + && !bitmap_bit_p (&bb_flags, e->src->index)) + { + emit_return_into_block (true, e->src); + e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); + } } #endif @@ -6233,8 +6461,11 @@ epilogue_done: if (!CALL_P (insn) || ! SIBLING_CALL_P (insn) +#ifdef HAVE_simple_return || (entry_edge != orig_entry_edge - && !bitmap_bit_p (&bb_flags, bb->index))) + && !bitmap_bit_p (&bb_flags, bb->index)) +#endif + ) { ei_next (&ei); continue; @@ -6281,7 +6512,9 @@ epilogue_done: } #endif +#ifdef HAVE_simple_return bitmap_clear (&bb_flags); +#endif /* Threading the prologue and epilogue changes the artificial refs in the entry and exit blocks. */ diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c index e8fa0bd0fb99..ce60ce27604e 100644 --- a/gcc/ifcvt.c +++ b/gcc/ifcvt.c @@ -4167,13 +4167,14 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, if (NONDEBUG_INSN_P (insn)) df_simulate_find_defs (insn, merge_set); +#ifdef HAVE_simple_return /* If shrink-wrapping, disable this optimization when test_bb is the first basic block and merge_bb exits. The idea is to not move code setting up a return register as that may clobber a register used to pass function parameters, which then must be saved in caller-saved regs. A caller-saved reg requires the prologue, killing a shrink-wrap opportunity. */ - if ((flag_shrink_wrap && !epilogue_completed) + if ((flag_shrink_wrap && HAVE_simple_return && !epilogue_completed) && ENTRY_BLOCK_PTR->next_bb == test_bb && single_succ_p (new_dest) && single_succ (new_dest) == EXIT_BLOCK_PTR @@ -4224,6 +4225,7 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, } BITMAP_FREE (return_regs); } +#endif } no_body: