mirror of git://gcc.gnu.org/git/gcc.git
i386.md (float partial SSE register stall splitter): Move splitter near its instruction pattern.
* config/i386/i386.md (float partial SSE register stall splitter): Move splitter near its instruction pattern. (float_extend partial SSE register stall splitter): Ditto. (float_truncate partial SSE register stall splitter): Ditto. From-SVN: r264185
This commit is contained in:
parent
8eb7aec132
commit
4801cc61b1
|
|
@ -1,3 +1,10 @@
|
||||||
|
2018-09-09 Uros Bizjak <ubizjak@gmail.com>
|
||||||
|
|
||||||
|
* config/i386/i386.md (float partial SSE register stall splitter): Move
|
||||||
|
splitter near its instruction pattern.
|
||||||
|
(float_extend partial SSE register stall splitter): Ditto.
|
||||||
|
(float_truncate partial SSE register stall splitter): Ditto.
|
||||||
|
|
||||||
2018-09-09 Hans-Peter Nilsson <hp@bitrange.com>
|
2018-09-09 Hans-Peter Nilsson <hp@bitrange.com>
|
||||||
|
|
||||||
PR target/86794
|
PR target/86794
|
||||||
|
|
|
||||||
|
|
@ -4477,6 +4477,40 @@
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
(define_insn "*extendsfdf2"
|
||||||
|
[(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
|
||||||
|
(float_extend:DF
|
||||||
|
(match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
|
||||||
|
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
|
||||||
|
{
|
||||||
|
switch (which_alternative)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
case 1:
|
||||||
|
return output_387_reg_move (insn, operands);
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
|
||||||
|
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
[(set_attr "type" "fmov,fmov,ssecvt")
|
||||||
|
(set_attr "prefix" "orig,orig,maybe_vex")
|
||||||
|
(set_attr "mode" "SF,XF,DF")
|
||||||
|
(set (attr "enabled")
|
||||||
|
(if_then_else
|
||||||
|
(match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
|
||||||
|
(if_then_else
|
||||||
|
(eq_attr "alternative" "0,1")
|
||||||
|
(symbol_ref "TARGET_MIX_SSE_I387")
|
||||||
|
(symbol_ref "true"))
|
||||||
|
(if_then_else
|
||||||
|
(eq_attr "alternative" "0,1")
|
||||||
|
(symbol_ref "true")
|
||||||
|
(symbol_ref "false"))))])
|
||||||
|
|
||||||
/* For converting SF(xmm2) to DF(xmm1), use the following code instead of
|
/* For converting SF(xmm2) to DF(xmm1), use the following code instead of
|
||||||
cvtss2sd:
|
cvtss2sd:
|
||||||
unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs
|
unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs
|
||||||
|
|
@ -4544,39 +4578,31 @@
|
||||||
(set (match_dup 0) (float_extend:DF (match_dup 2)))]
|
(set (match_dup 0) (float_extend:DF (match_dup 2)))]
|
||||||
"operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
|
"operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
|
||||||
|
|
||||||
(define_insn "*extendsfdf2"
|
;; Break partial reg stall for cvtss2sd. This splitter should split
|
||||||
[(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
|
;; late in the pass sequence (after register rename pass),
|
||||||
|
;; so allocated registers won't change anymore.
|
||||||
|
|
||||||
|
(define_split
|
||||||
|
[(set (match_operand:DF 0 "sse_reg_operand")
|
||||||
(float_extend:DF
|
(float_extend:DF
|
||||||
(match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
|
(match_operand:SF 1 "nonimmediate_operand")))]
|
||||||
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
|
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
|
||||||
|
&& optimize_function_for_speed_p (cfun)
|
||||||
|
&& (!REG_P (operands[1])
|
||||||
|
|| REGNO (operands[0]) != REGNO (operands[1]))
|
||||||
|
&& (!EXT_REX_SSE_REG_P (operands[0])
|
||||||
|
|| TARGET_AVX512VL)"
|
||||||
|
[(set (match_dup 0)
|
||||||
|
(vec_merge:V2DF
|
||||||
|
(vec_duplicate:V2DF
|
||||||
|
(float_extend:DF
|
||||||
|
(match_dup 1)))
|
||||||
|
(match_dup 0)
|
||||||
|
(const_int 1)))]
|
||||||
{
|
{
|
||||||
switch (which_alternative)
|
operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
|
||||||
{
|
emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
|
||||||
case 0:
|
})
|
||||||
case 1:
|
|
||||||
return output_387_reg_move (insn, operands);
|
|
||||||
|
|
||||||
case 2:
|
|
||||||
return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
|
|
||||||
|
|
||||||
default:
|
|
||||||
gcc_unreachable ();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
[(set_attr "type" "fmov,fmov,ssecvt")
|
|
||||||
(set_attr "prefix" "orig,orig,maybe_vex")
|
|
||||||
(set_attr "mode" "SF,XF,DF")
|
|
||||||
(set (attr "enabled")
|
|
||||||
(if_then_else
|
|
||||||
(match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
|
|
||||||
(if_then_else
|
|
||||||
(eq_attr "alternative" "0,1")
|
|
||||||
(symbol_ref "TARGET_MIX_SSE_I387")
|
|
||||||
(symbol_ref "true"))
|
|
||||||
(if_then_else
|
|
||||||
(eq_attr "alternative" "0,1")
|
|
||||||
(symbol_ref "true")
|
|
||||||
(symbol_ref "false"))))])
|
|
||||||
|
|
||||||
(define_expand "extend<mode>xf2"
|
(define_expand "extend<mode>xf2"
|
||||||
[(set (match_operand:XF 0 "nonimmediate_operand")
|
[(set (match_operand:XF 0 "nonimmediate_operand")
|
||||||
|
|
@ -4710,6 +4736,32 @@
|
||||||
(set (match_dup 0) (float_truncate:SF (match_dup 2)))]
|
(set (match_dup 0) (float_truncate:SF (match_dup 2)))]
|
||||||
"operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
|
"operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
|
||||||
|
|
||||||
|
;; Break partial reg stall for cvtsd2ss. This splitter should split
|
||||||
|
;; late in the pass sequence (after register rename pass),
|
||||||
|
;; so allocated registers won't change anymore.
|
||||||
|
|
||||||
|
(define_split
|
||||||
|
[(set (match_operand:SF 0 "sse_reg_operand")
|
||||||
|
(float_truncate:SF
|
||||||
|
(match_operand:DF 1 "nonimmediate_operand")))]
|
||||||
|
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
|
||||||
|
&& optimize_function_for_speed_p (cfun)
|
||||||
|
&& (!REG_P (operands[1])
|
||||||
|
|| REGNO (operands[0]) != REGNO (operands[1]))
|
||||||
|
&& (!EXT_REX_SSE_REG_P (operands[0])
|
||||||
|
|| TARGET_AVX512VL)"
|
||||||
|
[(set (match_dup 0)
|
||||||
|
(vec_merge:V4SF
|
||||||
|
(vec_duplicate:V4SF
|
||||||
|
(float_truncate:SF
|
||||||
|
(match_dup 1)))
|
||||||
|
(match_dup 0)
|
||||||
|
(const_int 1)))]
|
||||||
|
{
|
||||||
|
operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
|
||||||
|
emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
|
||||||
|
})
|
||||||
|
|
||||||
;; Conversion from XFmode to {SF,DF}mode
|
;; Conversion from XFmode to {SF,DF}mode
|
||||||
|
|
||||||
(define_insn "truncxf<mode>2"
|
(define_insn "truncxf<mode>2"
|
||||||
|
|
@ -5152,83 +5204,6 @@
|
||||||
DONE;
|
DONE;
|
||||||
})
|
})
|
||||||
|
|
||||||
;; Avoid partial SSE register dependency stalls. This splitter should split
|
|
||||||
;; late in the pass sequence (after register rename pass), so allocated
|
|
||||||
;; registers won't change anymore
|
|
||||||
|
|
||||||
(define_split
|
|
||||||
[(set (match_operand:MODEF 0 "sse_reg_operand")
|
|
||||||
(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
|
|
||||||
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
|
|
||||||
&& optimize_function_for_speed_p (cfun)
|
|
||||||
&& (!EXT_REX_SSE_REG_P (operands[0])
|
|
||||||
|| TARGET_AVX512VL)"
|
|
||||||
[(set (match_dup 0)
|
|
||||||
(vec_merge:<MODEF:ssevecmode>
|
|
||||||
(vec_duplicate:<MODEF:ssevecmode>
|
|
||||||
(float:MODEF
|
|
||||||
(match_dup 1)))
|
|
||||||
(match_dup 0)
|
|
||||||
(const_int 1)))]
|
|
||||||
{
|
|
||||||
const machine_mode vmode = <MODEF:ssevecmode>mode;
|
|
||||||
|
|
||||||
operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
|
|
||||||
emit_move_insn (operands[0], CONST0_RTX (vmode));
|
|
||||||
})
|
|
||||||
|
|
||||||
;; Break partial reg stall for cvtsd2ss. This splitter should split
|
|
||||||
;; late in the pass sequence (after register rename pass),
|
|
||||||
;; so allocated registers won't change anymore.
|
|
||||||
|
|
||||||
(define_split
|
|
||||||
[(set (match_operand:SF 0 "sse_reg_operand")
|
|
||||||
(float_truncate:SF
|
|
||||||
(match_operand:DF 1 "nonimmediate_operand")))]
|
|
||||||
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
|
|
||||||
&& optimize_function_for_speed_p (cfun)
|
|
||||||
&& (!REG_P (operands[1])
|
|
||||||
|| REGNO (operands[0]) != REGNO (operands[1]))
|
|
||||||
&& (!EXT_REX_SSE_REG_P (operands[0])
|
|
||||||
|| TARGET_AVX512VL)"
|
|
||||||
[(set (match_dup 0)
|
|
||||||
(vec_merge:V4SF
|
|
||||||
(vec_duplicate:V4SF
|
|
||||||
(float_truncate:SF
|
|
||||||
(match_dup 1)))
|
|
||||||
(match_dup 0)
|
|
||||||
(const_int 1)))]
|
|
||||||
{
|
|
||||||
operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
|
|
||||||
emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
|
|
||||||
})
|
|
||||||
|
|
||||||
;; Break partial reg stall for cvtss2sd. This splitter should split
|
|
||||||
;; late in the pass sequence (after register rename pass),
|
|
||||||
;; so allocated registers won't change anymore.
|
|
||||||
|
|
||||||
(define_split
|
|
||||||
[(set (match_operand:DF 0 "sse_reg_operand")
|
|
||||||
(float_extend:DF
|
|
||||||
(match_operand:SF 1 "nonimmediate_operand")))]
|
|
||||||
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
|
|
||||||
&& optimize_function_for_speed_p (cfun)
|
|
||||||
&& (!REG_P (operands[1])
|
|
||||||
|| REGNO (operands[0]) != REGNO (operands[1]))
|
|
||||||
&& (!EXT_REX_SSE_REG_P (operands[0])
|
|
||||||
|| TARGET_AVX512VL)"
|
|
||||||
[(set (match_dup 0)
|
|
||||||
(vec_merge:V2DF
|
|
||||||
(vec_duplicate:V2DF
|
|
||||||
(float_extend:DF
|
|
||||||
(match_dup 1)))
|
|
||||||
(match_dup 0)
|
|
||||||
(const_int 1)))]
|
|
||||||
{
|
|
||||||
operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
|
|
||||||
emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
|
|
||||||
})
|
|
||||||
|
|
||||||
;; Avoid store forwarding (partial memory) stall penalty
|
;; Avoid store forwarding (partial memory) stall penalty
|
||||||
;; by passing DImode value through XMM registers. */
|
;; by passing DImode value through XMM registers. */
|
||||||
|
|
||||||
|
|
@ -5279,6 +5254,31 @@
|
||||||
(set_attr "unit" "i387")
|
(set_attr "unit" "i387")
|
||||||
(set_attr "fp_int_src" "true")])
|
(set_attr "fp_int_src" "true")])
|
||||||
|
|
||||||
|
;; Avoid partial SSE register dependency stalls. This splitter should split
|
||||||
|
;; late in the pass sequence (after register rename pass), so allocated
|
||||||
|
;; registers won't change anymore
|
||||||
|
|
||||||
|
(define_split
|
||||||
|
[(set (match_operand:MODEF 0 "sse_reg_operand")
|
||||||
|
(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
|
||||||
|
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
|
||||||
|
&& optimize_function_for_speed_p (cfun)
|
||||||
|
&& (!EXT_REX_SSE_REG_P (operands[0])
|
||||||
|
|| TARGET_AVX512VL)"
|
||||||
|
[(set (match_dup 0)
|
||||||
|
(vec_merge:<MODEF:ssevecmode>
|
||||||
|
(vec_duplicate:<MODEF:ssevecmode>
|
||||||
|
(float:MODEF
|
||||||
|
(match_dup 1)))
|
||||||
|
(match_dup 0)
|
||||||
|
(const_int 1)))]
|
||||||
|
{
|
||||||
|
const machine_mode vmode = <MODEF:ssevecmode>mode;
|
||||||
|
|
||||||
|
operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
|
||||||
|
emit_move_insn (operands[0], CONST0_RTX (vmode));
|
||||||
|
})
|
||||||
|
|
||||||
(define_expand "floatuns<SWI12:mode><MODEF:mode>2"
|
(define_expand "floatuns<SWI12:mode><MODEF:mode>2"
|
||||||
[(set (match_operand:MODEF 0 "register_operand")
|
[(set (match_operand:MODEF 0 "register_operand")
|
||||||
(unsigned_float:MODEF
|
(unsigned_float:MODEF
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue