diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b9d6cb4d0ff5..6c47cb876ec7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2017-01-05 Andreas Krebbel + + * config/s390/s390.c (s390_expand_setmem): Avoid overlapping bytes + between loop iterations. + 2017-01-05 Martin Liska PR sanitizer/78815 diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 2082cb5e8b8e..257bce7fa093 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -5346,6 +5346,8 @@ s390_expand_movmem (rtx dst, rtx src, rtx len) void s390_expand_setmem (rtx dst, rtx len, rtx val) { + const int very_unlikely = REG_BR_PROB_BASE / 100 - 1; + if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0) return; @@ -5391,13 +5393,14 @@ s390_expand_setmem (rtx dst, rtx len, rtx val) { rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX; rtx_code_label *loop_start_label = gen_label_rtx (); - rtx_code_label *loop_end_label = gen_label_rtx (); - rtx_code_label *end_label = gen_label_rtx (); + rtx_code_label *onebyte_end_label = gen_label_rtx (); + rtx_code_label *zerobyte_end_label = gen_label_rtx (); + rtx_code_label *restbyte_end_label = gen_label_rtx (); machine_mode mode; mode = GET_MODE (len); if (mode == VOIDmode) - mode = Pmode; + mode = Pmode; dst_addr = gen_reg_rtx (Pmode); count = gen_reg_rtx (mode); @@ -5405,39 +5408,56 @@ s390_expand_setmem (rtx dst, rtx len, rtx val) convert_move (count, len, 1); emit_cmp_and_jump_insns (count, const0_rtx, - EQ, NULL_RTX, mode, 1, end_label); + EQ, NULL_RTX, mode, 1, zerobyte_end_label, + very_unlikely); + /* We need to make a copy of the target address since memset is + supposed to return it unmodified. We have to make it here + already since the new reg is used at onebyte_end_label. */ emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX)); dst = change_address (dst, VOIDmode, dst_addr); - if (val == const0_rtx) - temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, - OPTAB_DIRECT); - else + if (val != const0_rtx) { - dstp1 = adjust_address (dst, VOIDmode, 1); + /* When using the overlapping mvc the original target + address is only accessed as single byte entity (even by + the mvc reading this value). */ set_mem_size (dst, 1); - - /* Initialize memory by storing the first byte. */ - emit_move_insn (adjust_address (dst, QImode, 0), val); - - /* If count is 1 we are done. */ - emit_cmp_and_jump_insns (count, const1_rtx, - EQ, NULL_RTX, mode, 1, end_label); - - temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1, - OPTAB_DIRECT); + dstp1 = adjust_address (dst, VOIDmode, 1); + emit_cmp_and_jump_insns (count, + const1_rtx, EQ, NULL_RTX, mode, 1, + onebyte_end_label, very_unlikely); } + + /* There is one unconditional (mvi+mvc)/xc after the loop + dealing with the rest of the bytes, subtracting two (mvi+mvc) + or one (xc) here leaves this number of bytes to be handled by + it. */ + temp = expand_binop (mode, add_optab, count, + val == const0_rtx ? constm1_rtx : GEN_INT (-2), + count, 1, OPTAB_DIRECT); if (temp != count) - emit_move_insn (count, temp); + emit_move_insn (count, temp); temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1, OPTAB_DIRECT); if (temp != blocks) - emit_move_insn (blocks, temp); + emit_move_insn (blocks, temp); emit_cmp_and_jump_insns (blocks, const0_rtx, - EQ, NULL_RTX, mode, 1, loop_end_label); + EQ, NULL_RTX, mode, 1, restbyte_end_label); + + emit_jump (loop_start_label); + + if (val != const0_rtx) + { + /* The 1 byte != 0 special case. Not handled efficiently + since we require two jumps for that. However, this + should be very rare. */ + emit_label (onebyte_end_label); + emit_move_insn (adjust_address (dst, QImode, 0), val); + emit_jump (zerobyte_end_label); + } emit_label (loop_start_label); @@ -5455,26 +5475,39 @@ s390_expand_setmem (rtx dst, rtx len, rtx val) if (val == const0_rtx) emit_insn (gen_clrmem_short (dst, GEN_INT (255))); else - emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255))); + { + /* Set the first byte in the block to the value and use an + overlapping mvc for the block. */ + emit_move_insn (adjust_address (dst, QImode, 0), val); + emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254))); + } s390_load_address (dst_addr, gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256))); temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, OPTAB_DIRECT); if (temp != blocks) - emit_move_insn (blocks, temp); + emit_move_insn (blocks, temp); emit_cmp_and_jump_insns (blocks, const0_rtx, - EQ, NULL_RTX, mode, 1, loop_end_label); + NE, NULL_RTX, mode, 1, loop_start_label); - emit_jump (loop_start_label); - emit_label (loop_end_label); + emit_label (restbyte_end_label); if (val == const0_rtx) - emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1))); + emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1))); else - emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1))); - emit_label (end_label); + { + /* Set the first byte in the block to the value and use an + overlapping mvc for the block. */ + emit_move_insn (adjust_address (dst, QImode, 0), val); + /* execute only uses the lowest 8 bits of count that's + exactly what we need here. */ + emit_insn (gen_movmem_short (dstp1, dst, + convert_to_mode (Pmode, count, 1))); + } + + emit_label (zerobyte_end_label); } }