mirror of git://gcc.gnu.org/git/gcc.git
re PR target/44141 (Redundant loads and stores generated for AMD bdver1 target)
PR target/44141 * config/i386/i386.c (ix86_expand_vector_move_misalign): Do not handle 128 bit vectors specially for TARGET_AVX. Emit sse2_movupd and sse_movupd RTXes for TARGET_AVX, TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL or when optimizing for size. * config/i386/sse.md (*mov<mode>_internal): Remove TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL handling from asm output code. Calculate "mode" attribute according to optimize_function_for_size_p and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flag. (*<sse>_movu<ssemodesuffix><avxsizesuffix>): Choose asm template depending on the mode of the instruction. Calculate "mode" attribute according to optimize_function_for_size_p, TARGET_SSE_TYPELESS_STORES and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flags. (*<sse2>_movdqu<avxsizesuffix>): Ditto. From-SVN: r187347
This commit is contained in:
parent
eac188c5bc
commit
20f9034bc3
|
@ -1,3 +1,20 @@
|
|||
2012-05-09 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/44141
|
||||
* config/i386/i386.c (ix86_expand_vector_move_misalign): Do not handle
|
||||
128 bit vectors specially for TARGET_AVX. Emit sse2_movupd and
|
||||
sse_movupd RTXes for TARGET_AVX, TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|
||||
or when optimizing for size.
|
||||
* config/i386/sse.md (*mov<mode>_internal): Remove
|
||||
TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL handling from asm output code.
|
||||
Calculate "mode" attribute according to optimize_function_for_size_p
|
||||
and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flag.
|
||||
(*<sse>_movu<ssemodesuffix><avxsizesuffix>): Choose asm template
|
||||
depending on the mode of the instruction. Calculate "mode" attribute
|
||||
according to optimize_function_for_size_p, TARGET_SSE_TYPELESS_STORES
|
||||
and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flags.
|
||||
(*<sse2>_movdqu<avxsizesuffix>): Ditto.
|
||||
|
||||
2012-05-09 Georg-Johann Lay <avr@gjlay.de>
|
||||
|
||||
PR target/53256
|
||||
|
@ -161,7 +178,7 @@
|
|||
PR target/51244
|
||||
* config/sh/sh.md (*branch_true, *branch_false): New insns.
|
||||
|
||||
2012-05-08 Teresa Johnson <tejohnson@google.com>
|
||||
2012-05-08 Teresa Johnson <tejohnson@google.com>
|
||||
|
||||
* gcov-io.h (__gcov_reset, __gcov_dump): Declare.
|
||||
* doc/gcov.texi: Add note on using __gcov_reset and __gcov_dump.
|
||||
|
@ -180,8 +197,7 @@
|
|||
(clone_function_name): Likewise.
|
||||
(cgraph_create_virtual_clone): Likewise.
|
||||
(cgraph_remove_node_and_inline_clones): Likewise.
|
||||
(cgraph_redirect_edge_call_stmt_to_callee): Move here from
|
||||
cgraphunit.c
|
||||
(cgraph_redirect_edge_call_stmt_to_callee): Move here from cgraphunit.c
|
||||
* cgraph.h: Reorder declarations so they match file of origin.
|
||||
(cgraph_create_empty_node): Declare.
|
||||
* cgraphunit.c (update_call_expr): Move to cgraphclones.c
|
||||
|
@ -702,7 +718,7 @@
|
|||
|
||||
Enable -Wunused-local-typedefs when -Wall or -Wunused is on
|
||||
* opts.c (finish_options): Activate -Wunused-local-typedefs if
|
||||
-Wunused is activated.
|
||||
-Wunused is activated.
|
||||
* doc/invoke.texi: Update blurb of -Wunused-local-typedefs.
|
||||
|
||||
2012-05-04 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
|
||||
|
@ -1757,7 +1773,7 @@
|
|||
* config/pa/pa.c (pa_legitimate_constant_p): Don't put function labels
|
||||
in constant pool.
|
||||
|
||||
2012-04-27 Ollie Wild <aaw@google.com>
|
||||
2012-04-27 Ollie Wild <aaw@google.com>
|
||||
|
||||
* doc/invoke.texi (Wliteral-suffix): Document new option.
|
||||
|
||||
|
|
|
@ -15907,60 +15907,19 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
op0 = operands[0];
|
||||
op1 = operands[1];
|
||||
|
||||
if (TARGET_AVX)
|
||||
if (TARGET_AVX
|
||||
&& GET_MODE_SIZE (mode) == 32)
|
||||
{
|
||||
switch (GET_MODE_CLASS (mode))
|
||||
{
|
||||
case MODE_VECTOR_INT:
|
||||
case MODE_INT:
|
||||
switch (GET_MODE_SIZE (mode))
|
||||
{
|
||||
case 16:
|
||||
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
{
|
||||
op0 = gen_lowpart (V4SFmode, op0);
|
||||
op1 = gen_lowpart (V4SFmode, op1);
|
||||
emit_insn (gen_sse_movups (op0, op1));
|
||||
}
|
||||
else
|
||||
{
|
||||
op0 = gen_lowpart (V16QImode, op0);
|
||||
op1 = gen_lowpart (V16QImode, op1);
|
||||
emit_insn (gen_sse2_movdqu (op0, op1));
|
||||
}
|
||||
break;
|
||||
case 32:
|
||||
op0 = gen_lowpart (V32QImode, op0);
|
||||
op1 = gen_lowpart (V32QImode, op1);
|
||||
ix86_avx256_split_vector_move_misalign (op0, op1);
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
break;
|
||||
op0 = gen_lowpart (V32QImode, op0);
|
||||
op1 = gen_lowpart (V32QImode, op1);
|
||||
/* FALLTHRU */
|
||||
|
||||
case MODE_VECTOR_FLOAT:
|
||||
switch (mode)
|
||||
{
|
||||
case V4SFmode:
|
||||
emit_insn (gen_sse_movups (op0, op1));
|
||||
break;
|
||||
case V2DFmode:
|
||||
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
{
|
||||
op0 = gen_lowpart (V4SFmode, op0);
|
||||
op1 = gen_lowpart (V4SFmode, op1);
|
||||
emit_insn (gen_sse_movups (op0, op1));
|
||||
}
|
||||
else
|
||||
emit_insn (gen_sse2_movupd (op0, op1));
|
||||
break;
|
||||
case V8SFmode:
|
||||
case V4DFmode:
|
||||
ix86_avx256_split_vector_move_misalign (op0, op1);
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
ix86_avx256_split_vector_move_misalign (op0, op1);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -15972,16 +15931,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
|
||||
if (MEM_P (op1))
|
||||
{
|
||||
/* If we're optimizing for size, movups is the smallest. */
|
||||
if (optimize_insn_for_size_p ()
|
||||
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
{
|
||||
op0 = gen_lowpart (V4SFmode, op0);
|
||||
op1 = gen_lowpart (V4SFmode, op1);
|
||||
emit_insn (gen_sse_movups (op0, op1));
|
||||
return;
|
||||
}
|
||||
|
||||
/* ??? If we have typed data, then it would appear that using
|
||||
movdqu is the only way to get unaligned data loaded with
|
||||
integer type. */
|
||||
|
@ -15989,16 +15938,19 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
{
|
||||
op0 = gen_lowpart (V16QImode, op0);
|
||||
op1 = gen_lowpart (V16QImode, op1);
|
||||
/* We will eventually emit movups based on insn attributes. */
|
||||
emit_insn (gen_sse2_movdqu (op0, op1));
|
||||
return;
|
||||
}
|
||||
|
||||
if (TARGET_SSE2 && mode == V2DFmode)
|
||||
else if (TARGET_SSE2 && mode == V2DFmode)
|
||||
{
|
||||
rtx zero;
|
||||
|
||||
if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
|
||||
if (TARGET_AVX
|
||||
|| TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
|
||||
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|
||||
|| optimize_function_for_size_p (cfun))
|
||||
{
|
||||
/* We will eventually emit movups based on insn attributes. */
|
||||
emit_insn (gen_sse2_movupd (op0, op1));
|
||||
return;
|
||||
}
|
||||
|
@ -16030,7 +15982,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
}
|
||||
else
|
||||
{
|
||||
if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
|
||||
if (TARGET_AVX
|
||||
|| TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
|
||||
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|
||||
|| optimize_function_for_size_p (cfun))
|
||||
{
|
||||
op0 = gen_lowpart (V4SFmode, op0);
|
||||
op1 = gen_lowpart (V4SFmode, op1);
|
||||
|
@ -16045,6 +16000,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
|
||||
if (mode != V4SFmode)
|
||||
op0 = gen_lowpart (V4SFmode, op0);
|
||||
|
||||
m = adjust_address (op1, V2SFmode, 0);
|
||||
emit_insn (gen_sse_loadlps (op0, op0, m));
|
||||
m = adjust_address (op1, V2SFmode, 8);
|
||||
|
@ -16053,30 +16009,20 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
}
|
||||
else if (MEM_P (op0))
|
||||
{
|
||||
/* If we're optimizing for size, movups is the smallest. */
|
||||
if (optimize_insn_for_size_p ()
|
||||
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
{
|
||||
op0 = gen_lowpart (V4SFmode, op0);
|
||||
op1 = gen_lowpart (V4SFmode, op1);
|
||||
emit_insn (gen_sse_movups (op0, op1));
|
||||
return;
|
||||
}
|
||||
|
||||
/* ??? Similar to above, only less clear
|
||||
because of typeless stores. */
|
||||
if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
|
||||
&& GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
|
||||
if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
|
||||
{
|
||||
op0 = gen_lowpart (V16QImode, op0);
|
||||
op1 = gen_lowpart (V16QImode, op1);
|
||||
/* We will eventually emit movups based on insn attributes. */
|
||||
emit_insn (gen_sse2_movdqu (op0, op1));
|
||||
return;
|
||||
}
|
||||
|
||||
if (TARGET_SSE2 && mode == V2DFmode)
|
||||
else if (TARGET_SSE2 && mode == V2DFmode)
|
||||
{
|
||||
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
|
||||
if (TARGET_AVX
|
||||
|| TARGET_SSE_UNALIGNED_STORE_OPTIMAL
|
||||
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|
||||
|| optimize_function_for_size_p (cfun))
|
||||
/* We will eventually emit movups based on insn attributes. */
|
||||
emit_insn (gen_sse2_movupd (op0, op1));
|
||||
else
|
||||
{
|
||||
|
@ -16091,7 +16037,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
if (mode != V4SFmode)
|
||||
op1 = gen_lowpart (V4SFmode, op1);
|
||||
|
||||
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
|
||||
if (TARGET_AVX
|
||||
|| TARGET_SSE_UNALIGNED_STORE_OPTIMAL
|
||||
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|
||||
|| optimize_function_for_size_p (cfun))
|
||||
{
|
||||
op0 = gen_lowpart (V4SFmode, op0);
|
||||
emit_insn (gen_sse_movups (op0, op1));
|
||||
|
|
|
@ -449,8 +449,6 @@
|
|||
&& (misaligned_operand (operands[0], <MODE>mode)
|
||||
|| misaligned_operand (operands[1], <MODE>mode)))
|
||||
return "vmovupd\t{%1, %0|%0, %1}";
|
||||
else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
return "%vmovaps\t{%1, %0|%0, %1}";
|
||||
else
|
||||
return "%vmovapd\t{%1, %0|%0, %1}";
|
||||
|
||||
|
@ -460,8 +458,6 @@
|
|||
&& (misaligned_operand (operands[0], <MODE>mode)
|
||||
|| misaligned_operand (operands[1], <MODE>mode)))
|
||||
return "vmovdqu\t{%1, %0|%0, %1}";
|
||||
else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
|
||||
return "%vmovaps\t{%1, %0|%0, %1}";
|
||||
else
|
||||
return "%vmovdqa\t{%1, %0|%0, %1}";
|
||||
|
||||
|
@ -475,19 +471,21 @@
|
|||
[(set_attr "type" "sselog1,ssemov,ssemov")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set (attr "mode")
|
||||
(cond [(match_test "TARGET_AVX")
|
||||
(cond [(and (eq_attr "alternative" "1,2")
|
||||
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
|
||||
(if_then_else
|
||||
(match_test "GET_MODE_SIZE (<MODE>mode) > 16")
|
||||
(const_string "V8SF")
|
||||
(const_string "V4SF"))
|
||||
(match_test "TARGET_AVX")
|
||||
(const_string "<sseinsnmode>")
|
||||
(ior (ior (match_test "optimize_function_for_size_p (cfun)")
|
||||
(not (match_test "TARGET_SSE2")))
|
||||
(ior (and (eq_attr "alternative" "1,2")
|
||||
(match_test "optimize_function_for_size_p (cfun)"))
|
||||
(and (eq_attr "alternative" "2")
|
||||
(match_test "TARGET_SSE_TYPELESS_STORES")))
|
||||
(const_string "V4SF")
|
||||
(eq (const_string "<MODE>mode") (const_string "V4SFmode"))
|
||||
(const_string "V4SF")
|
||||
(eq (const_string "<MODE>mode") (const_string "V2DFmode"))
|
||||
(const_string "V2DF")
|
||||
]
|
||||
(const_string "TI")))])
|
||||
(const_string "<sseinsnmode>")))])
|
||||
|
||||
(define_insn "sse2_movq128"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=x")
|
||||
|
@ -597,11 +595,33 @@
|
|||
[(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
|
||||
UNSPEC_MOVU))]
|
||||
"TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
|
||||
"%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
|
||||
{
|
||||
switch (get_attr_mode (insn))
|
||||
{
|
||||
case MODE_V8SF:
|
||||
case MODE_V4SF:
|
||||
return "%vmovups\t{%1, %0|%0, %1}";
|
||||
default:
|
||||
return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
|
||||
}
|
||||
}
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "movu" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
(set (attr "mode")
|
||||
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
|
||||
(if_then_else
|
||||
(match_test "GET_MODE_SIZE (<MODE>mode) > 16")
|
||||
(const_string "V8SF")
|
||||
(const_string "V4SF"))
|
||||
(match_test "TARGET_AVX")
|
||||
(const_string "<MODE>")
|
||||
(ior (match_test "optimize_function_for_size_p (cfun)")
|
||||
(and (eq_attr "alternative" "1")
|
||||
(match_test "TARGET_SSE_TYPELESS_STORES")))
|
||||
(const_string "V4SF")
|
||||
]
|
||||
(const_string "<MODE>")))])
|
||||
|
||||
(define_expand "<sse2>_movdqu<avxsizesuffix>"
|
||||
[(set (match_operand:VI1 0 "nonimmediate_operand")
|
||||
|
@ -618,7 +638,16 @@
|
|||
(unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
|
||||
UNSPEC_MOVU))]
|
||||
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
|
||||
"%vmovdqu\t{%1, %0|%0, %1}"
|
||||
{
|
||||
switch (get_attr_mode (insn))
|
||||
{
|
||||
case MODE_V8SF:
|
||||
case MODE_V4SF:
|
||||
return "%vmovups\t{%1, %0|%0, %1}";
|
||||
default:
|
||||
return "%vmovdqu\t{%1, %0|%0, %1}";
|
||||
}
|
||||
}
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "movu" "1")
|
||||
(set (attr "prefix_data16")
|
||||
|
@ -627,7 +656,20 @@
|
|||
(const_string "*")
|
||||
(const_string "1")))
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
(set (attr "mode")
|
||||
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
|
||||
(if_then_else
|
||||
(match_test "GET_MODE_SIZE (<MODE>mode) > 16")
|
||||
(const_string "V8SF")
|
||||
(const_string "V4SF"))
|
||||
(match_test "TARGET_AVX")
|
||||
(const_string "<sseinsnmode>")
|
||||
(ior (match_test "optimize_function_for_size_p (cfun)")
|
||||
(and (eq_attr "alternative" "1")
|
||||
(match_test "TARGET_SSE_TYPELESS_STORES")))
|
||||
(const_string "V4SF")
|
||||
]
|
||||
(const_string "<sseinsnmode>")))])
|
||||
|
||||
(define_insn "<sse3>_lddqu<avxsizesuffix>"
|
||||
[(set (match_operand:VI1 0 "register_operand" "=x")
|
||||
|
|
Loading…
Reference in New Issue