mirror of git://gcc.gnu.org/git/gcc.git
i386: Add and use one_operand_p to vec_perm control struct.
* config/i386/i386.c (struct expand_vec_perm_d): Add one_operand_p. (ix86_expand_vector_init_duplicate): Initialize it. (expand_vec_perm_palignr): Likewise. (ix86_expand_vec_perm_const): Likewise. (ix86_vectorize_vec_perm_const_ok): Likewise. (expand_vec_perm_blend): Use it. (expand_vec_perm_vpermil): Likewise. (expand_vec_perm_pshufb): Likewise. (expand_vec_perm_1): Likewise. (expand_vec_perm_pshuflw_pshufhw): Likewise. (expand_vec_perm_interleave2): Likewise. (expand_vec_perm_vpermq_perm_1): Likewise. (expand_vec_perm_vperm2f128): Likewise. (expand_vec_perm_interleave3): Likewise. (expand_vec_perm_vperm2f128_vblend): Likewise. (expand_vec_perm_vpshufb2_vpermq): Likewise. (expand_vec_perm_vpshufb2_vpermq_even_odd): Likewise,. (expand_vec_perm_broadcast): Likewise. (expand_vec_perm_vpshufb4_vpermq2): Likewise. From-SVN: r186017
This commit is contained in:
parent
7b470faf61
commit
e025da0c31
|
@ -1,3 +1,25 @@
|
|||
2012-03-30 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/i386.c (struct expand_vec_perm_d): Add one_operand_p.
|
||||
(ix86_expand_vector_init_duplicate): Initialize it.
|
||||
(expand_vec_perm_palignr): Likewise.
|
||||
(ix86_expand_vec_perm_const): Likewise.
|
||||
(ix86_vectorize_vec_perm_const_ok): Likewise.
|
||||
(expand_vec_perm_blend): Use it.
|
||||
(expand_vec_perm_vpermil): Likewise.
|
||||
(expand_vec_perm_pshufb): Likewise.
|
||||
(expand_vec_perm_1): Likewise.
|
||||
(expand_vec_perm_pshuflw_pshufhw): Likewise.
|
||||
(expand_vec_perm_interleave2): Likewise.
|
||||
(expand_vec_perm_vpermq_perm_1): Likewise.
|
||||
(expand_vec_perm_vperm2f128): Likewise.
|
||||
(expand_vec_perm_interleave3): Likewise.
|
||||
(expand_vec_perm_vperm2f128_vblend): Likewise.
|
||||
(expand_vec_perm_vpshufb2_vpermq): Likewise.
|
||||
(expand_vec_perm_vpshufb2_vpermq_even_odd): Likewise,.
|
||||
(expand_vec_perm_broadcast): Likewise.
|
||||
(expand_vec_perm_vpshufb4_vpermq2): Likewise.
|
||||
|
||||
2012-03-30 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* dwarf2out.c (gen_variable_die): Initialize off.
|
||||
|
|
|
@ -32938,6 +32938,7 @@ struct expand_vec_perm_d
|
|||
unsigned char perm[MAX_VECT_LEN];
|
||||
enum machine_mode vmode;
|
||||
unsigned char nelt;
|
||||
bool one_operand_p;
|
||||
bool testing_p;
|
||||
};
|
||||
|
||||
|
@ -33038,6 +33039,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
|
|||
dperm.vmode = mode;
|
||||
dperm.nelt = GET_MODE_NUNITS (mode);
|
||||
dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
|
||||
dperm.one_operand_p = true;
|
||||
|
||||
/* Extend to SImode using a paradoxical SUBREG. */
|
||||
tmp1 = gen_reg_rtx (SImode);
|
||||
|
@ -35735,7 +35737,7 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
|
|||
rtx target, op0, op1, x;
|
||||
rtx rperm[32], vperm;
|
||||
|
||||
if (d->op0 == d->op1)
|
||||
if (d->one_operand_p)
|
||||
return false;
|
||||
if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
|
||||
;
|
||||
|
@ -35922,7 +35924,7 @@ expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
|
|||
rtx rperm[8], vperm;
|
||||
unsigned i;
|
||||
|
||||
if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
|
||||
if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
|
||||
return false;
|
||||
|
||||
/* We can only permute within the 128-bit lane. */
|
||||
|
@ -35998,7 +36000,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
|
||||
nelt = d->nelt;
|
||||
|
||||
if (d->op0 != d->op1)
|
||||
if (!d->one_operand_p)
|
||||
{
|
||||
if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
|
||||
{
|
||||
|
@ -36086,7 +36088,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
else
|
||||
{
|
||||
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
|
||||
if (d->op0 != d->op1)
|
||||
if (!d->one_operand_p)
|
||||
mask = 2 * nelt - 1;
|
||||
else if (vmode == V16QImode)
|
||||
mask = nelt - 1;
|
||||
|
@ -36113,7 +36115,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
|
||||
target = gen_lowpart (vmode, d->target);
|
||||
op0 = gen_lowpart (vmode, d->op0);
|
||||
if (d->op0 == d->op1)
|
||||
if (d->one_operand_p)
|
||||
{
|
||||
if (vmode == V16QImode)
|
||||
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
|
||||
|
@ -36145,7 +36147,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
|
|||
/* Check plain VEC_SELECT first, because AVX has instructions that could
|
||||
match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
|
||||
input where SEL+CONCAT may not. */
|
||||
if (d->op0 == d->op1)
|
||||
if (d->one_operand_p)
|
||||
{
|
||||
int mask = nelt - 1;
|
||||
bool identity_perm = true;
|
||||
|
@ -36242,7 +36244,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
|
|||
return true;
|
||||
|
||||
/* Recognize interleave style patterns with reversed operands. */
|
||||
if (d->op0 != d->op1)
|
||||
if (!d->one_operand_p)
|
||||
{
|
||||
for (i = 0; i < nelt; ++i)
|
||||
{
|
||||
|
@ -36285,7 +36287,7 @@ expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
|
|||
unsigned i;
|
||||
bool ok;
|
||||
|
||||
if (d->vmode != V8HImode || d->op0 != d->op1)
|
||||
if (d->vmode != V8HImode || !d->one_operand_p)
|
||||
return false;
|
||||
|
||||
/* The two permutations only operate in 64-bit lanes. */
|
||||
|
@ -36357,6 +36359,7 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
|
|||
gen_lowpart (TImode, d->op0), shift));
|
||||
|
||||
d->op0 = d->op1 = d->target;
|
||||
d->one_operand_p = true;
|
||||
|
||||
in_order = true;
|
||||
for (i = 0; i < nelt; ++i)
|
||||
|
@ -36396,14 +36399,14 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
|||
|
||||
if (GET_MODE_SIZE (d->vmode) == 16)
|
||||
{
|
||||
if (d->op0 == d->op1)
|
||||
if (d->one_operand_p)
|
||||
return false;
|
||||
}
|
||||
else if (GET_MODE_SIZE (d->vmode) == 32)
|
||||
{
|
||||
if (!TARGET_AVX)
|
||||
return false;
|
||||
/* For 32-byte modes allow even d->op0 == d->op1.
|
||||
/* For 32-byte modes allow even d->one_operand_p.
|
||||
The lack of cross-lane shuffling in some instructions
|
||||
might prevent a single insn shuffle. */
|
||||
dfinal = *d;
|
||||
|
@ -36528,11 +36531,11 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
|||
|
||||
if (nzcnt == 1)
|
||||
{
|
||||
gcc_assert (d->op0 == d->op1);
|
||||
gcc_assert (d->one_operand_p);
|
||||
nonzero_halves[1] = nonzero_halves[0];
|
||||
same_halves = true;
|
||||
}
|
||||
else if (d->op0 == d->op1)
|
||||
else if (d->one_operand_p)
|
||||
{
|
||||
gcc_assert (nonzero_halves[0] == 0);
|
||||
gcc_assert (nonzero_halves[1] == 1);
|
||||
|
@ -36571,7 +36574,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (d->op0 == d->op1)
|
||||
else if (d->one_operand_p)
|
||||
return false;
|
||||
else if (TARGET_AVX2
|
||||
&& (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
|
||||
|
@ -36628,6 +36631,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
|||
}
|
||||
dfinal.op0 = gen_reg_rtx (dfinal.vmode);
|
||||
dfinal.op1 = dfinal.op0;
|
||||
dfinal.one_operand_p = true;
|
||||
dremap.target = dfinal.op0;
|
||||
|
||||
/* Test if the final remap can be done with a single insn. For V4SFmode or
|
||||
|
@ -36671,7 +36675,7 @@ expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
|
|||
|
||||
if (!(TARGET_AVX2
|
||||
&& (d->vmode == V32QImode || d->vmode == V16HImode)
|
||||
&& d->op0 == d->op1))
|
||||
&& d->one_operand_p))
|
||||
return false;
|
||||
|
||||
contents[0] = 0;
|
||||
|
@ -36699,6 +36703,7 @@ expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
|
|||
dremap.target = gen_reg_rtx (V4DImode);
|
||||
dremap.op0 = gen_lowpart (V4DImode, d->op0);
|
||||
dremap.op1 = dremap.op0;
|
||||
dremap.one_operand_p = true;
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
unsigned int cnt = 0;
|
||||
|
@ -36712,6 +36717,7 @@ expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
|
|||
dfinal = *d;
|
||||
dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
|
||||
dfinal.op1 = dfinal.op0;
|
||||
dfinal.one_operand_p = true;
|
||||
for (i = 0, j = 0; i < nelt; ++i)
|
||||
{
|
||||
if (i == nelt2)
|
||||
|
@ -36751,8 +36757,7 @@ expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
|
|||
return false;
|
||||
|
||||
dsecond = *d;
|
||||
if (d->op0 == d->op1)
|
||||
dsecond.op1 = gen_reg_rtx (d->vmode);
|
||||
dsecond.one_operand_p = false;
|
||||
dsecond.testing_p = true;
|
||||
|
||||
/* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
|
||||
|
@ -36821,10 +36826,7 @@ expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
|
|||
vperm2f128 on d->op0 and d->op1. */
|
||||
dsecond.testing_p = false;
|
||||
dfirst = *d;
|
||||
if (d->op0 == d->op1)
|
||||
dfirst.target = dsecond.op1;
|
||||
else
|
||||
dfirst.target = gen_reg_rtx (d->vmode);
|
||||
dfirst.target = gen_reg_rtx (d->vmode);
|
||||
for (i = 0; i < nelt; i++)
|
||||
dfirst.perm[i] = (i & (nelt2 - 1))
|
||||
+ ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
|
||||
|
@ -36845,9 +36847,8 @@ expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* For d->op0 == d->op1 the only useful vperm2f128 permutation
|
||||
is 0x10. */
|
||||
if (d->op0 == d->op1)
|
||||
/* For one operand, the only useful vperm2f128 permutation is 0x10. */
|
||||
if (d->one_operand_p)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -36864,7 +36865,7 @@ expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
|
|||
unsigned i, nelt;
|
||||
rtx (*gen) (rtx, rtx, rtx);
|
||||
|
||||
if (d->op0 == d->op1)
|
||||
if (d->one_operand_p)
|
||||
return false;
|
||||
if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
|
||||
;
|
||||
|
@ -36947,7 +36948,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
|
|||
if (!TARGET_AVX
|
||||
|| TARGET_AVX2
|
||||
|| (d->vmode != V8SFmode && d->vmode != V4DFmode)
|
||||
|| d->op0 != d->op1)
|
||||
|| !d->one_operand_p)
|
||||
return false;
|
||||
|
||||
dfirst = *d;
|
||||
|
@ -36985,6 +36986,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
|
|||
dsecond = *d;
|
||||
dsecond.op0 = dfirst.target;
|
||||
dsecond.op1 = dfirst.target;
|
||||
dsecond.one_operand_p = true;
|
||||
dsecond.target = gen_reg_rtx (dsecond.vmode);
|
||||
for (i = 0; i < nelt; i++)
|
||||
dsecond.perm[i] = i ^ nelt2;
|
||||
|
@ -37009,7 +37011,7 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
|
|||
|
||||
if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
|
||||
return false;
|
||||
gcc_assert (d->op0 != d->op1);
|
||||
gcc_assert (!d->one_operand_p);
|
||||
|
||||
nelt = d->nelt;
|
||||
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
|
||||
|
@ -37064,7 +37066,7 @@ expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
|
|||
unsigned int i, nelt, eltsz;
|
||||
|
||||
if (!TARGET_AVX2
|
||||
|| d->op0 != d->op1
|
||||
|| !d->one_operand_p
|
||||
|| (d->vmode != V32QImode && d->vmode != V16HImode))
|
||||
return false;
|
||||
|
||||
|
@ -37132,7 +37134,7 @@ expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
|
|||
unsigned int i, nelt, eltsz;
|
||||
|
||||
if (!TARGET_AVX2
|
||||
|| d->op0 == d->op1
|
||||
|| d->one_operand_p
|
||||
|| (d->vmode != V32QImode && d->vmode != V16HImode))
|
||||
return false;
|
||||
|
||||
|
@ -37491,7 +37493,7 @@ expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
|
|||
{
|
||||
unsigned i, elt, nelt = d->nelt;
|
||||
|
||||
if (d->op0 != d->op1)
|
||||
if (!d->one_operand_p)
|
||||
return false;
|
||||
|
||||
elt = d->perm[0];
|
||||
|
@ -37514,7 +37516,7 @@ expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
|
|||
bool used[4];
|
||||
|
||||
if (!TARGET_AVX2
|
||||
|| d->op0 == d->op1
|
||||
|| d->one_operand_p
|
||||
|| (d->vmode != V32QImode && d->vmode != V16HImode))
|
||||
return false;
|
||||
|
||||
|
@ -37715,6 +37717,7 @@ ix86_expand_vec_perm_const (rtx operands[4])
|
|||
perm[i] = ei;
|
||||
}
|
||||
|
||||
d.one_operand_p = true;
|
||||
switch (which)
|
||||
{
|
||||
default:
|
||||
|
@ -37722,51 +37725,39 @@ ix86_expand_vec_perm_const (rtx operands[4])
|
|||
|
||||
case 3:
|
||||
if (!rtx_equal_p (d.op0, d.op1))
|
||||
break;
|
||||
|
||||
{
|
||||
d.one_operand_p = false;
|
||||
break;
|
||||
}
|
||||
/* The elements of PERM do not suggest that only the first operand
|
||||
is used, but both operands are identical. Allow easier matching
|
||||
of the permutation by folding the permutation into the single
|
||||
input vector. */
|
||||
for (i = 0; i < nelt; ++i)
|
||||
if (d.perm[i] >= nelt)
|
||||
d.perm[i] -= nelt;
|
||||
/* FALLTHRU */
|
||||
|
||||
case 1:
|
||||
d.op1 = d.op0;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
for (i = 0; i < nelt; ++i)
|
||||
d.perm[i] -= nelt;
|
||||
d.perm[i] &= nelt - 1;
|
||||
d.op0 = d.op1;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
d.op1 = d.op0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ix86_expand_vec_perm_const_1 (&d))
|
||||
return true;
|
||||
|
||||
/* If the mask says both arguments are needed, but they are the same,
|
||||
the above tried to expand with d.op0 == d.op1. If that didn't work,
|
||||
retry with d.op0 != d.op1 as that is what testing has been done with. */
|
||||
if (which == 3 && d.op0 == d.op1)
|
||||
/* If the selector says both arguments are needed, but the operands are the
|
||||
same, the above tried to expand with one_operand_p and flattened selector.
|
||||
If that didn't work, retry without one_operand_p; we succeeded with that
|
||||
during testing. */
|
||||
if (which == 3 && d.one_operand_p)
|
||||
{
|
||||
rtx seq;
|
||||
bool ok;
|
||||
|
||||
d.one_operand_p = false;
|
||||
memcpy (d.perm, perm, sizeof (perm));
|
||||
d.op1 = gen_reg_rtx (d.vmode);
|
||||
start_sequence ();
|
||||
ok = ix86_expand_vec_perm_const_1 (&d);
|
||||
seq = get_insns ();
|
||||
end_sequence ();
|
||||
if (ok)
|
||||
{
|
||||
emit_move_insn (d.op1, d.op0);
|
||||
emit_insn (seq);
|
||||
return true;
|
||||
}
|
||||
return ix86_expand_vec_perm_const_1 (&d);
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -37780,7 +37771,7 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
|
|||
{
|
||||
struct expand_vec_perm_d d;
|
||||
unsigned int i, nelt, which;
|
||||
bool ret, one_vec;
|
||||
bool ret;
|
||||
|
||||
d.vmode = vmode;
|
||||
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
||||
|
@ -37817,17 +37808,17 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
|
|||
d.perm[i] -= nelt;
|
||||
|
||||
/* Check whether the mask can be applied to the vector type. */
|
||||
one_vec = (which != 3);
|
||||
d.one_operand_p = (which != 3);
|
||||
|
||||
/* Implementable with shufps or pshufd. */
|
||||
if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
|
||||
if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
|
||||
return true;
|
||||
|
||||
/* Otherwise we have to go through the motions and see if we can
|
||||
figure out how to generate the requested permutation. */
|
||||
d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
|
||||
d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
|
||||
if (!one_vec)
|
||||
if (!d.one_operand_p)
|
||||
d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
|
||||
|
||||
start_sequence ();
|
||||
|
@ -37848,6 +37839,7 @@ ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
|
|||
d.op1 = op1;
|
||||
d.vmode = GET_MODE (targ);
|
||||
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
||||
d.one_operand_p = false;
|
||||
d.testing_p = false;
|
||||
|
||||
for (i = 0; i < nelt; ++i)
|
||||
|
|
Loading…
Reference in New Issue