mirror of git://gcc.gnu.org/git/gcc.git
i386.md (float partial SSE register stall splitter): Move splitter near its instruction pattern.
* config/i386/i386.md (float partial SSE register stall splitter): Move splitter near its instruction pattern. (float_extend partial SSE register stall splitter): Ditto. (float_truncate partial SSE register stall splitter): Ditto. From-SVN: r264185
This commit is contained in:
parent
8eb7aec132
commit
4801cc61b1
|
|
@ -1,3 +1,10 @@
|
|||
2018-09-09 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/i386.md (float partial SSE register stall splitter): Move
|
||||
splitter near its instruction pattern.
|
||||
(float_extend partial SSE register stall splitter): Ditto.
|
||||
(float_truncate partial SSE register stall splitter): Ditto.
|
||||
|
||||
2018-09-09 Hans-Peter Nilsson <hp@bitrange.com>
|
||||
|
||||
PR target/86794
|
||||
|
|
|
|||
|
|
@ -4477,6 +4477,40 @@
|
|||
}
|
||||
})
|
||||
|
||||
(define_insn "*extendsfdf2"
|
||||
[(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
|
||||
(float_extend:DF
|
||||
(match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
|
||||
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
|
||||
{
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
return output_387_reg_move (insn, operands);
|
||||
|
||||
case 2:
|
||||
return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "type" "fmov,fmov,ssecvt")
|
||||
(set_attr "prefix" "orig,orig,maybe_vex")
|
||||
(set_attr "mode" "SF,XF,DF")
|
||||
(set (attr "enabled")
|
||||
(if_then_else
|
||||
(match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
|
||||
(if_then_else
|
||||
(eq_attr "alternative" "0,1")
|
||||
(symbol_ref "TARGET_MIX_SSE_I387")
|
||||
(symbol_ref "true"))
|
||||
(if_then_else
|
||||
(eq_attr "alternative" "0,1")
|
||||
(symbol_ref "true")
|
||||
(symbol_ref "false"))))])
|
||||
|
||||
/* For converting SF(xmm2) to DF(xmm1), use the following code instead of
|
||||
cvtss2sd:
|
||||
unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs
|
||||
|
|
@ -4544,39 +4578,31 @@
|
|||
(set (match_dup 0) (float_extend:DF (match_dup 2)))]
|
||||
"operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
|
||||
|
||||
(define_insn "*extendsfdf2"
|
||||
[(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
|
||||
;; Break partial reg stall for cvtss2sd. This splitter should split
|
||||
;; late in the pass sequence (after register rename pass),
|
||||
;; so allocated registers won't change anymore.
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:DF 0 "sse_reg_operand")
|
||||
(float_extend:DF
|
||||
(match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
|
||||
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
|
||||
(match_operand:SF 1 "nonimmediate_operand")))]
|
||||
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
|
||||
&& optimize_function_for_speed_p (cfun)
|
||||
&& (!REG_P (operands[1])
|
||||
|| REGNO (operands[0]) != REGNO (operands[1]))
|
||||
&& (!EXT_REX_SSE_REG_P (operands[0])
|
||||
|| TARGET_AVX512VL)"
|
||||
[(set (match_dup 0)
|
||||
(vec_merge:V2DF
|
||||
(vec_duplicate:V2DF
|
||||
(float_extend:DF
|
||||
(match_dup 1)))
|
||||
(match_dup 0)
|
||||
(const_int 1)))]
|
||||
{
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
return output_387_reg_move (insn, operands);
|
||||
|
||||
case 2:
|
||||
return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "type" "fmov,fmov,ssecvt")
|
||||
(set_attr "prefix" "orig,orig,maybe_vex")
|
||||
(set_attr "mode" "SF,XF,DF")
|
||||
(set (attr "enabled")
|
||||
(if_then_else
|
||||
(match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
|
||||
(if_then_else
|
||||
(eq_attr "alternative" "0,1")
|
||||
(symbol_ref "TARGET_MIX_SSE_I387")
|
||||
(symbol_ref "true"))
|
||||
(if_then_else
|
||||
(eq_attr "alternative" "0,1")
|
||||
(symbol_ref "true")
|
||||
(symbol_ref "false"))))])
|
||||
operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
|
||||
emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
|
||||
})
|
||||
|
||||
(define_expand "extend<mode>xf2"
|
||||
[(set (match_operand:XF 0 "nonimmediate_operand")
|
||||
|
|
@ -4710,6 +4736,32 @@
|
|||
(set (match_dup 0) (float_truncate:SF (match_dup 2)))]
|
||||
"operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
|
||||
|
||||
;; Break partial reg stall for cvtsd2ss. This splitter should split
|
||||
;; late in the pass sequence (after register rename pass),
|
||||
;; so allocated registers won't change anymore.
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:SF 0 "sse_reg_operand")
|
||||
(float_truncate:SF
|
||||
(match_operand:DF 1 "nonimmediate_operand")))]
|
||||
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
|
||||
&& optimize_function_for_speed_p (cfun)
|
||||
&& (!REG_P (operands[1])
|
||||
|| REGNO (operands[0]) != REGNO (operands[1]))
|
||||
&& (!EXT_REX_SSE_REG_P (operands[0])
|
||||
|| TARGET_AVX512VL)"
|
||||
[(set (match_dup 0)
|
||||
(vec_merge:V4SF
|
||||
(vec_duplicate:V4SF
|
||||
(float_truncate:SF
|
||||
(match_dup 1)))
|
||||
(match_dup 0)
|
||||
(const_int 1)))]
|
||||
{
|
||||
operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
|
||||
emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
|
||||
})
|
||||
|
||||
;; Conversion from XFmode to {SF,DF}mode
|
||||
|
||||
(define_insn "truncxf<mode>2"
|
||||
|
|
@ -5152,83 +5204,6 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
;; Avoid partial SSE register dependency stalls. This splitter should split
|
||||
;; late in the pass sequence (after register rename pass), so allocated
|
||||
;; registers won't change anymore
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:MODEF 0 "sse_reg_operand")
|
||||
(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
|
||||
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
|
||||
&& optimize_function_for_speed_p (cfun)
|
||||
&& (!EXT_REX_SSE_REG_P (operands[0])
|
||||
|| TARGET_AVX512VL)"
|
||||
[(set (match_dup 0)
|
||||
(vec_merge:<MODEF:ssevecmode>
|
||||
(vec_duplicate:<MODEF:ssevecmode>
|
||||
(float:MODEF
|
||||
(match_dup 1)))
|
||||
(match_dup 0)
|
||||
(const_int 1)))]
|
||||
{
|
||||
const machine_mode vmode = <MODEF:ssevecmode>mode;
|
||||
|
||||
operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
|
||||
emit_move_insn (operands[0], CONST0_RTX (vmode));
|
||||
})
|
||||
|
||||
;; Break partial reg stall for cvtsd2ss. This splitter should split
|
||||
;; late in the pass sequence (after register rename pass),
|
||||
;; so allocated registers won't change anymore.
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:SF 0 "sse_reg_operand")
|
||||
(float_truncate:SF
|
||||
(match_operand:DF 1 "nonimmediate_operand")))]
|
||||
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
|
||||
&& optimize_function_for_speed_p (cfun)
|
||||
&& (!REG_P (operands[1])
|
||||
|| REGNO (operands[0]) != REGNO (operands[1]))
|
||||
&& (!EXT_REX_SSE_REG_P (operands[0])
|
||||
|| TARGET_AVX512VL)"
|
||||
[(set (match_dup 0)
|
||||
(vec_merge:V4SF
|
||||
(vec_duplicate:V4SF
|
||||
(float_truncate:SF
|
||||
(match_dup 1)))
|
||||
(match_dup 0)
|
||||
(const_int 1)))]
|
||||
{
|
||||
operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
|
||||
emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
|
||||
})
|
||||
|
||||
;; Break partial reg stall for cvtss2sd. This splitter should split
|
||||
;; late in the pass sequence (after register rename pass),
|
||||
;; so allocated registers won't change anymore.
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:DF 0 "sse_reg_operand")
|
||||
(float_extend:DF
|
||||
(match_operand:SF 1 "nonimmediate_operand")))]
|
||||
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
|
||||
&& optimize_function_for_speed_p (cfun)
|
||||
&& (!REG_P (operands[1])
|
||||
|| REGNO (operands[0]) != REGNO (operands[1]))
|
||||
&& (!EXT_REX_SSE_REG_P (operands[0])
|
||||
|| TARGET_AVX512VL)"
|
||||
[(set (match_dup 0)
|
||||
(vec_merge:V2DF
|
||||
(vec_duplicate:V2DF
|
||||
(float_extend:DF
|
||||
(match_dup 1)))
|
||||
(match_dup 0)
|
||||
(const_int 1)))]
|
||||
{
|
||||
operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
|
||||
emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
|
||||
})
|
||||
|
||||
;; Avoid store forwarding (partial memory) stall penalty
|
||||
;; by passing DImode value through XMM registers. */
|
||||
|
||||
|
|
@ -5279,6 +5254,31 @@
|
|||
(set_attr "unit" "i387")
|
||||
(set_attr "fp_int_src" "true")])
|
||||
|
||||
;; Avoid partial SSE register dependency stalls. This splitter should split
|
||||
;; late in the pass sequence (after register rename pass), so allocated
|
||||
;; registers won't change anymore
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:MODEF 0 "sse_reg_operand")
|
||||
(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
|
||||
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
|
||||
&& optimize_function_for_speed_p (cfun)
|
||||
&& (!EXT_REX_SSE_REG_P (operands[0])
|
||||
|| TARGET_AVX512VL)"
|
||||
[(set (match_dup 0)
|
||||
(vec_merge:<MODEF:ssevecmode>
|
||||
(vec_duplicate:<MODEF:ssevecmode>
|
||||
(float:MODEF
|
||||
(match_dup 1)))
|
||||
(match_dup 0)
|
||||
(const_int 1)))]
|
||||
{
|
||||
const machine_mode vmode = <MODEF:ssevecmode>mode;
|
||||
|
||||
operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
|
||||
emit_move_insn (operands[0], CONST0_RTX (vmode));
|
||||
})
|
||||
|
||||
(define_expand "floatuns<SWI12:mode><MODEF:mode>2"
|
||||
[(set (match_operand:MODEF 0 "register_operand")
|
||||
(unsigned_float:MODEF
|
||||
|
|
|
|||
Loading…
Reference in New Issue