i386.c (ix86_expand_vector_init_one_nonzero): Renamed from ix86_expand_vector_init_low_nonzero.

* config/i386/i386.c (ix86_expand_vector_init_one_nonzero): Renamed
	from ix86_expand_vector_init_low_nonzero.  Take an additional
	one_var argument indicating which element is non-zero.  Support
	one_var != 0 for V4SFmode and V4SImode by permuting the result.
	(ix86_expand_vector_init): Call ix86_expand_vector_init_one_nonzero
	with one_var instead of ix86_expand_vector_init_low_nonzero.

	* gcc.target/i386/vecinit-1.c: New test case.
	* gcc.target/i386/vecinit-2.c: Likewise.

From-SVN: r112832
This commit is contained in:
Roger Sayle 2006-04-10 21:01:19 +00:00 committed by Roger Sayle
parent e3df376d22
commit acef130fab
5 changed files with 98 additions and 9 deletions

View File

@ -1,3 +1,12 @@
2006-04-10 Roger Sayle <roger@eyesopen.com>
* config/i386/i386.c (ix86_expand_vector_init_one_nonzero): Renamed
from ix86_expand_vector_init_low_nonzero. Take an additional
one_var argument indicating which element is non-zero. Support
one_var != 0 for V4SFmode and V4SImode by permuting the result.
(ix86_expand_vector_init): Call ix86_expand_vector_init_one_nonzero
with one_var instead of ix86_expand_vector_init_low_nonzero.
2006-04-10 Kazu Hirata <kazu@codesourcery.com>
* Makefile.in (tree-into-ssa.o, tree-outof-ssa.o,

View File

@ -17880,15 +17880,16 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
}
/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
whose low element is VAR, and other elements are zero. Return true
whose ONE_VAR element is VAR, and other elements are zero. Return true
if successful. */
static bool
ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
rtx target, rtx var)
ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
rtx target, rtx var, int one_var)
{
enum machine_mode vsimode;
rtx x;
rtx new_target;
rtx x, tmp;
switch (mode)
{
@ -17900,6 +17901,8 @@ ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
case V2DFmode:
case V2DImode:
if (one_var != 0)
return false;
var = force_reg (GET_MODE_INNER (mode), var);
x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
emit_insn (gen_rtx_SET (VOIDmode, target, x));
@ -17907,10 +17910,55 @@ ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
case V4SFmode:
case V4SImode:
if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
new_target = gen_reg_rtx (mode);
else
new_target = target;
var = force_reg (GET_MODE_INNER (mode), var);
x = gen_rtx_VEC_DUPLICATE (mode, var);
x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
emit_insn (gen_rtx_SET (VOIDmode, target, x));
emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
if (one_var != 0)
{
/* We need to shuffle the value to the correct position, so
create a new pseudo to store the intermediate result. */
/* With SSE2, we can use the integer shuffle insns. */
if (mode != V4SFmode && TARGET_SSE2)
{
emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
GEN_INT (1),
GEN_INT (one_var == 1 ? 0 : 1),
GEN_INT (one_var == 2 ? 0 : 1),
GEN_INT (one_var == 3 ? 0 : 1)));
if (target != new_target)
emit_move_insn (target, new_target);
return true;
}
/* Otherwise convert the intermediate result to V4SFmode and
use the SSE1 shuffle instructions. */
if (mode != V4SFmode)
{
tmp = gen_reg_rtx (V4SFmode);
emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
}
else
tmp = new_target;
emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
GEN_INT (1),
GEN_INT (one_var == 1 ? 0 : 1),
GEN_INT (one_var == 2 ? 0+4 : 1+4),
GEN_INT (one_var == 3 ? 0+4 : 1+4)));
if (mode != V4SFmode)
emit_move_insn (target, gen_lowpart (V4SImode, tmp));
else if (tmp != target)
emit_move_insn (target, tmp);
}
else if (target != new_target)
emit_move_insn (target, new_target);
return true;
case V8HImode:
@ -17924,11 +17972,15 @@ ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
vsimode = V2SImode;
goto widen;
widen:
if (one_var != 0)
return false;
/* Zero extend the variable element to SImode and recurse. */
var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
x = gen_reg_rtx (vsimode);
if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
var, one_var))
gcc_unreachable ();
emit_move_insn (target, gen_lowpart (mode, x));
@ -18185,9 +18237,10 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
the pool and overwritten via move later. */
if (n_var == 1)
{
if (all_const_zero && one_var == 0
&& ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
XVECEXP (vals, 0, 0)))
if (all_const_zero
&& ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
XVECEXP (vals, 0, one_var),
one_var))
return;
if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))

View File

@ -1,3 +1,8 @@
2006-04-10 Roger Sayle <roger@eyesopen.com>
* gcc.target/i386/vecinit-1.c: New test case.
* gcc.target/i386/vecinit-2.c: Likewise.
2006-04-10 Jakub Jelinek <jakub@redhat.com>
PR debug/27057

View File

@ -0,0 +1,11 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse2" } */
#define vector __attribute__((vector_size(16)))
float a;
vector float f1(void) { return (vector float){ a, 0.0, 0.0, 0.0}; }
vector float f2(void) { return (vector float){ 0.0, a, 0.0, 0.0}; }
vector float f3(void) { return (vector float){ 0.0, 0.0, a, 0.0}; }
vector float f4(void) { return (vector float){ 0.0, 0.0, 0.0, a}; }
/* { dg-final { scan-assembler-not "movaps" } } */
/* { dg-final { scan-assembler-not "xor" } } */

View File

@ -0,0 +1,11 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse2" } */
#define vector __attribute__((vector_size(16)))
int a;
vector int f1(void) { return (vector int){ a, 0, 0, 0}; }
vector int f2(void) { return (vector int){ 0, a, 0, 0}; }
vector int f3(void) { return (vector int){ 0, 0, a, 0}; }
vector int f4(void) { return (vector int){ 0, 0, 0, a}; }
/* { dg-final { scan-assembler-not "movaps" } } */
/* { dg-final { scan-assembler-not "xor" } } */