Emit vzerouppers after reload.

gcc/

2010-11-02  Uros Bizjak  <ubizjak@gmail.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386-protos.h (ix86_split_call_vzeroupper): New.
	(ix86_split_call_pop_vzeroupper): Likewise.

	* config/i386/i386.c (move_or_delete_vzeroupper_2): Rewrite
	the loop.
	(ix86_expand_call): Use UNSPEC_CALL_NEEDS_VZEROUPPER.
	(ix86_split_call_vzeroupper): New.
	(ix86_split_call_pop_vzeroupper): Likewise.

	* config/i386/i386.md (UNSPEC_CALL_NEEDS_VZEROUPPER): New.
	(*call_pop_0_vzeroupper): Likewise.
	(*call_pop_1_vzeroupper): Likewise.
	(*sibcall_pop_1_vzeroupper): Likewise.
	(*call_0_vzeroupper): Likewise.
	(*call_1_vzeroupper): Likewise.
	(*sibcall_1_vzeroupper): Likewise.
	(*call_1_rex64_vzeroupper): Likewise.
	(*call_1_rex64_ms_sysv_vzeroupper): New.
	(*call_1_rex64_large_vzeroupper): Likewise.
	(*sibcall_1_rex64_vzeroupper): Likewise.
	(*call_value_pop_0_vzeroupper): New.
	(*call_value_pop_1_vzeroupper): Likewise.
	(*sibcall_value_pop_1_vzeroupper): Likewise.
	(*call_value_0_vzeroupper): New.
	(*call_value_0_rex64_vzeroupper): Use
	(*call_value_0_rex64_ms_sysv_vzeroupper): Likewise.
	(*call_value_1_vzeroupper): Likewise.
	(*sibcall_value_1_vzeroupper): Likewise.
	(*call_value_1_rex64_vzeroupper): Likewise.
	(*call_value_1_rex64_ms_sysv_vzeroupper): Likewise.
	(*call_value_1_rex64_large_vzeroupper): Likewise.
	(*sibcall_value_1_rex64_vzeroupper): Likewise.

gcc/testsuite/

2010-11-02  H.J. Lu  <hongjiu.lu@intel.com>

	* gcc.target/i386/avx-vzeroupper-15.c: New.
	* gcc.target/i386/avx-vzeroupper-16.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-17.c: Likewise.
	* gcc.target/i386/avx-vzeroupper-18.c: Likewise.

	PR target/46253
	* gcc.target/i386/pr46253.c: New.

From-SVN: r166208
This commit is contained in:
H.J. Lu 2010-11-02 11:08:44 -07:00
parent 12243af614
commit cfec3a7c5e
10 changed files with 563 additions and 111 deletions

View File

@ -1,3 +1,39 @@
2010-11-02 Uros Bizjak <ubizjak@gmail.com>
H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386-protos.h (ix86_split_call_vzeroupper): New.
(ix86_split_call_pop_vzeroupper): Likewise.
* config/i386/i386.c (move_or_delete_vzeroupper_2): Rewrite
the loop.
(ix86_expand_call): Use UNSPEC_CALL_NEEDS_VZEROUPPER.
(ix86_split_call_vzeroupper): New.
(ix86_split_call_pop_vzeroupper): Likewise.
* config/i386/i386.md (UNSPEC_CALL_NEEDS_VZEROUPPER): New.
(*call_pop_0_vzeroupper): Likewise.
(*call_pop_1_vzeroupper): Likewise.
(*sibcall_pop_1_vzeroupper): Likewise.
(*call_0_vzeroupper): Likewise.
(*call_1_vzeroupper): Likewise.
(*sibcall_1_vzeroupper): Likewise.
(*call_1_rex64_vzeroupper): Likewise.
(*call_1_rex64_ms_sysv_vzeroupper): New.
(*call_1_rex64_large_vzeroupper): Likewise.
(*sibcall_1_rex64_vzeroupper): Likewise.
(*call_value_pop_0_vzeroupper): New.
(*call_value_pop_1_vzeroupper): Likewise.
(*sibcall_value_pop_1_vzeroupper): Likewise.
(*call_value_0_vzeroupper): New.
(*call_value_0_rex64_vzeroupper): Use
(*call_value_0_rex64_ms_sysv_vzeroupper): Likewise.
(*call_value_1_vzeroupper): Likewise.
(*sibcall_value_1_vzeroupper): Likewise.
(*call_value_1_rex64_vzeroupper): Likewise.
(*call_value_1_rex64_ms_sysv_vzeroupper): Likewise.
(*call_value_1_rex64_large_vzeroupper): Likewise.
(*sibcall_value_1_rex64_vzeroupper): Likewise.
2010-11-02 Ian Lance Taylor <iant@google.com>
PR lto/46273

View File

@ -119,6 +119,8 @@ extern void ix86_expand_sse_unpack (rtx[], bool, bool);
extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
extern bool ix86_expand_int_addcc (rtx[]);
extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void ix86_split_call_vzeroupper (rtx, rtx);
extern void ix86_split_call_pop_vzeroupper (rtx, rtx);
extern void x86_initialize_trampoline (rtx, rtx, rtx);
extern rtx ix86_zero_extend_to_Pmode (rtx);
extern void ix86_split_long_move (rtx[]);

View File

@ -108,163 +108,119 @@ check_avx256_stores (rtx dest, const_rtx set, void *data)
static void
move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
{
rtx curr_insn, next_insn, prev_insn, insn;
rtx insn;
rtx vzeroupper_insn = NULL_RTX;
rtx pat;
int avx256;
if (dump_file)
fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n",
bb->index, upper_128bits_set);
for (curr_insn = BB_HEAD (bb);
curr_insn && curr_insn != NEXT_INSN (BB_END (bb));
curr_insn = next_insn)
insn = BB_HEAD (bb);
while (insn != BB_END (bb))
{
int avx256;
insn = NEXT_INSN (insn);
next_insn = NEXT_INSN (curr_insn);
if (!NONDEBUG_INSN_P (curr_insn))
if (!NONDEBUG_INSN_P (insn))
continue;
/* Search for vzeroupper. */
insn = PATTERN (curr_insn);
if (GET_CODE (insn) == UNSPEC_VOLATILE
&& XINT (insn, 1) == UNSPECV_VZEROUPPER)
/* Move vzeroupper before jump/call. */
if (JUMP_P (insn) || CALL_P (insn))
{
if (!vzeroupper_insn)
continue;
if (PREV_INSN (insn) != vzeroupper_insn)
{
if (dump_file)
{
fprintf (dump_file, "Move vzeroupper after:\n");
print_rtl_single (dump_file, PREV_INSN (insn));
fprintf (dump_file, "before:\n");
print_rtl_single (dump_file, insn);
}
reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
PREV_INSN (insn));
}
vzeroupper_insn = NULL_RTX;
continue;
}
pat = PATTERN (insn);
/* Check insn for vzeroupper intrinsic. */
if (GET_CODE (pat) == UNSPEC_VOLATILE
&& XINT (pat, 1) == UNSPECV_VZEROUPPER)
{
/* Found vzeroupper. */
if (dump_file)
{
/* Found vzeroupper intrinsic. */
fprintf (dump_file, "Found vzeroupper:\n");
print_rtl_single (dump_file, curr_insn);
print_rtl_single (dump_file, insn);
}
}
else
{
/* Check vzeroall intrinsic. */
if (GET_CODE (insn) == PARALLEL
&& GET_CODE (XVECEXP (insn, 0, 0)) == UNSPEC_VOLATILE
&& XINT (XVECEXP (insn, 0, 0), 1) == UNSPECV_VZEROALL)
upper_128bits_set = false;
else if (!upper_128bits_set)
/* Check insn for vzeroall intrinsic. */
if (GET_CODE (pat) == PARALLEL
&& GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
&& XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
{
/* Check if upper 128bits of AVX registers are used. */
note_stores (insn, check_avx256_stores,
&upper_128bits_set);
upper_128bits_set = false;
/* Delete pending vzeroupper insertion. */
if (vzeroupper_insn)
{
delete_insn (vzeroupper_insn);
vzeroupper_insn = NULL_RTX;
}
}
else if (!upper_128bits_set)
note_stores (pat, check_avx256_stores, &upper_128bits_set);
continue;
}
avx256 = INTVAL (XVECEXP (insn, 0, 0));
/* Process vzeroupper intrinsic. */
avx256 = INTVAL (XVECEXP (pat, 0, 0));
if (!upper_128bits_set)
{
/* Since the upper 128bits are cleared, callee must not pass
256bit AVX register. We only need to check if callee
returns 256bit AVX register. */
upper_128bits_set = avx256 == callee_return_avx256;
upper_128bits_set = (avx256 == callee_return_avx256);
/* Remove unnecessary vzeroupper since upper 128bits are
cleared. */
/* Remove unnecessary vzeroupper since
upper 128bits are cleared. */
if (dump_file)
{
fprintf (dump_file, "Delete redundant vzeroupper:\n");
print_rtl_single (dump_file, curr_insn);
print_rtl_single (dump_file, insn);
}
delete_insn (curr_insn);
continue;
delete_insn (insn);
}
else if (avx256 == callee_return_pass_avx256
|| avx256 == callee_pass_avx256)
{
/* Callee passes 256bit AVX register. Check if callee
returns 256bit AVX register. */
upper_128bits_set = avx256 == callee_return_pass_avx256;
upper_128bits_set = (avx256 == callee_return_pass_avx256);
/* Must remove vzeroupper since callee passes 256bit AVX
register. */
/* Must remove vzeroupper since
callee passes in 256bit AVX register. */
if (dump_file)
{
fprintf (dump_file, "Delete callee pass vzeroupper:\n");
print_rtl_single (dump_file, curr_insn);
print_rtl_single (dump_file, insn);
}
delete_insn (curr_insn);
continue;
}
/* Find the jump after vzeroupper. */
prev_insn = curr_insn;
if (avx256 == vzeroupper_intrinsic)
{
/* For vzeroupper intrinsic, check if there is another
vzeroupper. */
insn = NEXT_INSN (curr_insn);
while (insn)
{
if (NONJUMP_INSN_P (insn)
&& GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
&& XINT (PATTERN (insn), 1) == UNSPECV_VZEROUPPER)
{
if (dump_file)
{
fprintf (dump_file,
"Delete redundant vzeroupper intrinsic:\n");
print_rtl_single (dump_file, curr_insn);
}
delete_insn (curr_insn);
insn = NULL;
continue;
}
if (JUMP_P (insn) || CALL_P (insn))
break;
prev_insn = insn;
insn = NEXT_INSN (insn);
if (insn == NEXT_INSN (BB_END (bb)))
break;
}
/* Continue if redundant vzeroupper intrinsic is deleted. */
if (!insn)
continue;
delete_insn (insn);
}
else
{
/* Find the next jump/call. */
insn = NEXT_INSN (curr_insn);
while (insn)
{
if (JUMP_P (insn) || CALL_P (insn))
break;
prev_insn = insn;
insn = NEXT_INSN (insn);
if (insn == NEXT_INSN (BB_END (bb)))
break;
}
if (!insn)
gcc_unreachable();
upper_128bits_set = false;
vzeroupper_insn = insn;
}
/* Keep vzeroupper. */
upper_128bits_set = false;
/* Also allow label as the next instruction. */
if (insn == NEXT_INSN (BB_END (bb)) && !LABEL_P (insn))
gcc_unreachable();
/* Move vzeroupper before jump/call if neeeded. */
if (curr_insn != prev_insn)
{
reorder_insns_nobb (curr_insn, curr_insn, prev_insn);
if (dump_file)
{
fprintf (dump_file, "Move vzeroupper after:\n");
print_rtl_single (dump_file, prev_insn);
fprintf (dump_file, "before:\n");
print_rtl_single (dump_file, insn);
}
}
next_insn = NEXT_INSN (insn);
}
BLOCK_INFO (bb)->upper_128bits_set = upper_128bits_set;
@ -21565,10 +21521,12 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
+ 2, vec));
}
/* Emit vzeroupper if needed. */
/* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
if (TARGET_VZEROUPPER && cfun->machine->use_avx256_p)
{
rtx unspec;
int avx256;
cfun->machine->use_vzeroupper_p = 1;
if (cfun->machine->callee_pass_avx256_p)
{
@ -21581,7 +21539,11 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
avx256 = callee_return_avx256;
else
avx256 = call_no_avx256;
emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
unspec = gen_rtx_UNSPEC (VOIDmode,
gen_rtvec (1, GEN_INT (avx256)),
UNSPEC_CALL_NEEDS_VZEROUPPER);
call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, unspec));
}
call = emit_call_insn (call);
@ -21591,6 +21553,24 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
return call;
}
void
ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
{
rtx call = XVECEXP (PATTERN (insn), 0, 0);
emit_insn (gen_avx_vzeroupper (vzeroupper));
emit_call_insn (call);
}
void
ix86_split_call_pop_vzeroupper (rtx insn, rtx vzeroupper)
{
rtx call = XVECEXP (PATTERN (insn), 0, 0);
rtx pop = XVECEXP (PATTERN (insn), 0, 1);
emit_insn (gen_avx_vzeroupper (vzeroupper));
emit_call_insn (gen_rtx_PARALLEL (VOIDmode,
gen_rtvec (2, call, pop)));
}
/* Output the assembly for a call instruction. */
const char *

View File

@ -105,6 +105,7 @@
UNSPEC_LD_MPIC ; load_macho_picbase
UNSPEC_TRUNC_NOOP
UNSPEC_DIV_ALREADY_SPLIT
UNSPEC_CALL_NEEDS_VZEROUPPER
;; For SSE/MMX support:
UNSPEC_FIX_NOTRUNC
@ -11260,6 +11261,21 @@
DONE;
})
(define_insn_and_split "*call_pop_0_vzeroupper"
[(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
(match_operand:SI 1 "" ""))
(set (reg:SI SP_REG)
(plus:SI (reg:SI SP_REG)
(match_operand:SI 2 "immediate_operand" "")))
(unspec [(match_operand 3 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && !TARGET_64BIT"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;"
[(set_attr "type" "call")])
(define_insn "*call_pop_0"
[(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
(match_operand:SI 1 "" ""))
@ -11275,6 +11291,21 @@
}
[(set_attr "type" "call")])
(define_insn_and_split "*call_pop_1_vzeroupper"
[(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
(match_operand:SI 1 "" ""))
(set (reg:SI SP_REG)
(plus:SI (reg:SI SP_REG)
(match_operand:SI 2 "immediate_operand" "i")))
(unspec [(match_operand 3 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;"
[(set_attr "type" "call")])
(define_insn "*call_pop_1"
[(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
(match_operand:SI 1 "" ""))
@ -11289,6 +11320,21 @@
}
[(set_attr "type" "call")])
(define_insn_and_split "*sibcall_pop_1_vzeroupper"
[(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
(match_operand:SI 1 "" ""))
(set (reg:SI SP_REG)
(plus:SI (reg:SI SP_REG)
(match_operand:SI 2 "immediate_operand" "i,i")))
(unspec [(match_operand 3 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;"
[(set_attr "type" "call")])
(define_insn "*sibcall_pop_1"
[(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
(match_operand:SI 1 "" ""))
@ -11321,6 +11367,18 @@
DONE;
})
(define_insn_and_split "*call_0_vzeroupper"
[(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
(match_operand 1 "" ""))
(unspec [(match_operand 2 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
[(set_attr "type" "call")])
(define_insn "*call_0"
[(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
(match_operand 1 "" ""))]
@ -11328,6 +11386,18 @@
{ return ix86_output_call_insn (insn, operands[0], 0); }
[(set_attr "type" "call")])
(define_insn_and_split "*call_1_vzeroupper"
[(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
(match_operand 1 "" ""))
(unspec [(match_operand 2 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
[(set_attr "type" "call")])
(define_insn "*call_1"
[(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
(match_operand 1 "" ""))]
@ -11335,6 +11405,18 @@
{ return ix86_output_call_insn (insn, operands[0], 0); }
[(set_attr "type" "call")])
(define_insn_and_split "*sibcall_1_vzeroupper"
[(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
(match_operand 1 "" ""))
(unspec [(match_operand 2 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
[(set_attr "type" "call")])
(define_insn "*sibcall_1"
[(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
(match_operand 1 "" ""))]
@ -11342,6 +11424,19 @@
{ return ix86_output_call_insn (insn, operands[0], 0); }
[(set_attr "type" "call")])
(define_insn_and_split "*call_1_rex64_vzeroupper"
[(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
(match_operand 1 "" ""))
(unspec [(match_operand 2 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)
&& ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
[(set_attr "type" "call")])
(define_insn "*call_1_rex64"
[(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
(match_operand 1 "" ""))]
@ -11350,6 +11445,32 @@
{ return ix86_output_call_insn (insn, operands[0], 0); }
[(set_attr "type" "call")])
(define_insn_and_split "*call_1_rex64_ms_sysv_vzeroupper"
[(parallel
[(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
(match_operand 1 "" ""))
(unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
(clobber (reg:TI XMM6_REG))
(clobber (reg:TI XMM7_REG))
(clobber (reg:TI XMM8_REG))
(clobber (reg:TI XMM9_REG))
(clobber (reg:TI XMM10_REG))
(clobber (reg:TI XMM11_REG))
(clobber (reg:TI XMM12_REG))
(clobber (reg:TI XMM13_REG))
(clobber (reg:TI XMM14_REG))
(clobber (reg:TI XMM15_REG))
(clobber (reg:DI SI_REG))
(clobber (reg:DI DI_REG))])
(unspec [(match_operand 2 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
[(set_attr "type" "call")])
(define_insn "*call_1_rex64_ms_sysv"
[(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
(match_operand 1 "" ""))
@ -11370,6 +11491,18 @@
{ return ix86_output_call_insn (insn, operands[0], 0); }
[(set_attr "type" "call")])
(define_insn_and_split "*call_1_rex64_large_vzeroupper"
[(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
(match_operand 1 "" ""))
(unspec [(match_operand 2 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
[(set_attr "type" "call")])
(define_insn "*call_1_rex64_large"
[(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
(match_operand 1 "" ""))]
@ -11377,6 +11510,18 @@
{ return ix86_output_call_insn (insn, operands[0], 0); }
[(set_attr "type" "call")])
(define_insn_and_split "*sibcall_1_rex64_vzeroupper"
[(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U"))
(match_operand 1 "" ""))
(unspec [(match_operand 2 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
[(set_attr "type" "call")])
(define_insn "*sibcall_1_rex64"
[(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U"))
(match_operand 1 "" ""))]
@ -17123,6 +17268,22 @@
;; Call-value patterns last so that the wildcard operand does not
;; disrupt insn-recog's switch tables.
(define_insn_and_split "*call_value_pop_0_vzeroupper"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
(match_operand:SI 2 "" "")))
(set (reg:SI SP_REG)
(plus:SI (reg:SI SP_REG)
(match_operand:SI 3 "immediate_operand" "")))
(unspec [(match_operand 4 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && !TARGET_64BIT"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;"
[(set_attr "type" "callv")])
(define_insn "*call_value_pop_0"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
@ -17134,6 +17295,22 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
(define_insn_and_split "*call_value_pop_1_vzeroupper"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
(match_operand:SI 2 "" "")))
(set (reg:SI SP_REG)
(plus:SI (reg:SI SP_REG)
(match_operand:SI 3 "immediate_operand" "i")))
(unspec [(match_operand 4 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;"
[(set_attr "type" "callv")])
(define_insn "*call_value_pop_1"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
@ -17145,6 +17322,22 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
(define_insn_and_split "*sibcall_value_pop_1_vzeroupper"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
(match_operand:SI 2 "" "")))
(set (reg:SI SP_REG)
(plus:SI (reg:SI SP_REG)
(match_operand:SI 3 "immediate_operand" "i,i")))
(unspec [(match_operand 4 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;"
[(set_attr "type" "callv")])
(define_insn "*sibcall_value_pop_1"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
@ -17156,6 +17349,19 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
(define_insn_and_split "*call_value_0_vzeroupper"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
(match_operand:SI 2 "" "")))
(unspec [(match_operand 3 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && !TARGET_64BIT"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
[(set_attr "type" "callv")])
(define_insn "*call_value_0"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
@ -17164,6 +17370,19 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
(define_insn_and_split "*call_value_0_rex64_vzeroupper"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
(match_operand:DI 2 "const_int_operand" "")))
(unspec [(match_operand 3 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && TARGET_64BIT"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
[(set_attr "type" "callv")])
(define_insn "*call_value_0_rex64"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
@ -17172,6 +17391,33 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
(define_insn_and_split "*call_value_0_rex64_ms_sysv_vzeroupper"
[(parallel
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
(match_operand:DI 2 "const_int_operand" "")))
(unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
(clobber (reg:TI XMM6_REG))
(clobber (reg:TI XMM7_REG))
(clobber (reg:TI XMM8_REG))
(clobber (reg:TI XMM9_REG))
(clobber (reg:TI XMM10_REG))
(clobber (reg:TI XMM11_REG))
(clobber (reg:TI XMM12_REG))
(clobber (reg:TI XMM13_REG))
(clobber (reg:TI XMM14_REG))
(clobber (reg:TI XMM15_REG))
(clobber (reg:DI SI_REG))
(clobber (reg:DI DI_REG))])
(unspec [(match_operand 3 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
[(set_attr "type" "callv")])
(define_insn "*call_value_0_rex64_ms_sysv"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
@ -17193,6 +17439,19 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
(define_insn_and_split "*call_value_1_vzeroupper"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
(match_operand:SI 2 "" "")))
(unspec [(match_operand 3 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
[(set_attr "type" "callv")])
(define_insn "*call_value_1"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
@ -17201,6 +17460,19 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
(define_insn_and_split "*sibcall_value_1_vzeroupper"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
(match_operand:SI 2 "" "")))
(unspec [(match_operand 3 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
[(set_attr "type" "callv")])
(define_insn "*sibcall_value_1"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
@ -17209,6 +17481,20 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
(define_insn_and_split "*call_value_1_rex64_vzeroupper"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
(match_operand:DI 2 "" "")))
(unspec [(match_operand 3 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)
&& ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
[(set_attr "type" "callv")])
(define_insn "*call_value_1_rex64"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
@ -17218,6 +17504,33 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
(define_insn_and_split "*call_value_1_rex64_ms_sysv_vzeroupper"
[(parallel
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
(match_operand:DI 2 "" "")))
(unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
(clobber (reg:TI XMM6_REG))
(clobber (reg:TI XMM7_REG))
(clobber (reg:TI XMM8_REG))
(clobber (reg:TI XMM9_REG))
(clobber (reg:TI XMM10_REG))
(clobber (reg:TI XMM11_REG))
(clobber (reg:TI XMM12_REG))
(clobber (reg:TI XMM13_REG))
(clobber (reg:TI XMM14_REG))
(clobber (reg:TI XMM15_REG))
(clobber (reg:DI SI_REG))
(clobber (reg:DI DI_REG))])
(unspec [(match_operand 3 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
[(set_attr "type" "callv")])
(define_insn "*call_value_1_rex64_ms_sysv"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
@ -17239,6 +17552,19 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
(define_insn_and_split "*call_value_1_rex64_large_vzeroupper"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
(match_operand:DI 2 "" "")))
(unspec [(match_operand 3 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
[(set_attr "type" "callv")])
(define_insn "*call_value_1_rex64_large"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
@ -17247,6 +17573,19 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
(define_insn_and_split "*sibcall_value_1_rex64_vzeroupper"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U"))
(match_operand:DI 2 "" "")))
(unspec [(match_operand 3 "const_int_operand" "")]
UNSPEC_CALL_NEEDS_VZEROUPPER)]
"TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
[(set_attr "type" "callv")])
(define_insn "*sibcall_value_1_rex64"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U"))

View File

@ -1,3 +1,13 @@
2010-11-02 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/avx-vzeroupper-15.c: New.
* gcc.target/i386/avx-vzeroupper-16.c: Likewise.
* gcc.target/i386/avx-vzeroupper-17.c: Likewise.
* gcc.target/i386/avx-vzeroupper-18.c: Likewise.
PR target/46253
* gcc.target/i386/pr46253.c: New.
2010-11-02 Steven G. Kargl < kargl@gcc.gnu.org>
Tobias Burnus <burnus@net-b.de>

View File

@ -0,0 +1,16 @@
/* { dg-do compile } */
/* { dg-options "-O0 -mavx -mtune=generic -dp" } */
#include <immintrin.h>
extern __m256 x, y;
extern void (*bar) (void);
void
foo ()
{
x = y;
bar ();
}
/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */

View File

@ -0,0 +1,18 @@
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
extern __m256 x;
extern __m256 __attribute__ ((sysv_abi)) bar (__m256);
void
foo (void)
{
bar (x);
}
/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
/* { dg-final { scan-assembler-times "\\*call_value_0_rex64_ms_sysv" 1 } } */

View File

@ -0,0 +1,18 @@
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
extern __m256 x;
extern __m256 __attribute__ ((sysv_abi)) (*bar) (__m256);
void
foo (void)
{
bar (x);
}
/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
/* { dg-final { scan-assembler-times "\\*call_value_1_rex64_ms_sysv" 1 } } */

View File

@ -0,0 +1,18 @@
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
extern __m256 x;
extern void __attribute__ ((sysv_abi)) bar (__m256);
void
foo (void)
{
bar (x);
}
/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
/* { dg-final { scan-assembler-times "\\*call_1_rex64_ms_sysv" 1 } } */

View File

@ -0,0 +1,15 @@
/* { dg-do compile } */
/* { dg-options "-O -g -mf16c -mtune=generic -dp" } */
typedef __m256i __attribute__ ((__vector_size__ (32)));
__m256i bar (void);
void foo (void)
{
int i = 0;
bar ();
__builtin_ia32_vzeroupper ();
while (++i);
}
/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */