mirror of git://gcc.gnu.org/git/gcc.git
i386: Add and use one_operand_p to vec_perm control struct.
* config/i386/i386.c (struct expand_vec_perm_d): Add one_operand_p. (ix86_expand_vector_init_duplicate): Initialize it. (expand_vec_perm_palignr): Likewise. (ix86_expand_vec_perm_const): Likewise. (ix86_vectorize_vec_perm_const_ok): Likewise. (expand_vec_perm_blend): Use it. (expand_vec_perm_vpermil): Likewise. (expand_vec_perm_pshufb): Likewise. (expand_vec_perm_1): Likewise. (expand_vec_perm_pshuflw_pshufhw): Likewise. (expand_vec_perm_interleave2): Likewise. (expand_vec_perm_vpermq_perm_1): Likewise. (expand_vec_perm_vperm2f128): Likewise. (expand_vec_perm_interleave3): Likewise. (expand_vec_perm_vperm2f128_vblend): Likewise. (expand_vec_perm_vpshufb2_vpermq): Likewise. (expand_vec_perm_vpshufb2_vpermq_even_odd): Likewise,. (expand_vec_perm_broadcast): Likewise. (expand_vec_perm_vpshufb4_vpermq2): Likewise. From-SVN: r186017
This commit is contained in:
parent
7b470faf61
commit
e025da0c31
|
@ -1,3 +1,25 @@
|
||||||
|
2012-03-30 Richard Henderson <rth@redhat.com>
|
||||||
|
|
||||||
|
* config/i386/i386.c (struct expand_vec_perm_d): Add one_operand_p.
|
||||||
|
(ix86_expand_vector_init_duplicate): Initialize it.
|
||||||
|
(expand_vec_perm_palignr): Likewise.
|
||||||
|
(ix86_expand_vec_perm_const): Likewise.
|
||||||
|
(ix86_vectorize_vec_perm_const_ok): Likewise.
|
||||||
|
(expand_vec_perm_blend): Use it.
|
||||||
|
(expand_vec_perm_vpermil): Likewise.
|
||||||
|
(expand_vec_perm_pshufb): Likewise.
|
||||||
|
(expand_vec_perm_1): Likewise.
|
||||||
|
(expand_vec_perm_pshuflw_pshufhw): Likewise.
|
||||||
|
(expand_vec_perm_interleave2): Likewise.
|
||||||
|
(expand_vec_perm_vpermq_perm_1): Likewise.
|
||||||
|
(expand_vec_perm_vperm2f128): Likewise.
|
||||||
|
(expand_vec_perm_interleave3): Likewise.
|
||||||
|
(expand_vec_perm_vperm2f128_vblend): Likewise.
|
||||||
|
(expand_vec_perm_vpshufb2_vpermq): Likewise.
|
||||||
|
(expand_vec_perm_vpshufb2_vpermq_even_odd): Likewise,.
|
||||||
|
(expand_vec_perm_broadcast): Likewise.
|
||||||
|
(expand_vec_perm_vpshufb4_vpermq2): Likewise.
|
||||||
|
|
||||||
2012-03-30 Richard Henderson <rth@redhat.com>
|
2012-03-30 Richard Henderson <rth@redhat.com>
|
||||||
|
|
||||||
* dwarf2out.c (gen_variable_die): Initialize off.
|
* dwarf2out.c (gen_variable_die): Initialize off.
|
||||||
|
|
|
@ -32938,6 +32938,7 @@ struct expand_vec_perm_d
|
||||||
unsigned char perm[MAX_VECT_LEN];
|
unsigned char perm[MAX_VECT_LEN];
|
||||||
enum machine_mode vmode;
|
enum machine_mode vmode;
|
||||||
unsigned char nelt;
|
unsigned char nelt;
|
||||||
|
bool one_operand_p;
|
||||||
bool testing_p;
|
bool testing_p;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -33038,6 +33039,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
|
||||||
dperm.vmode = mode;
|
dperm.vmode = mode;
|
||||||
dperm.nelt = GET_MODE_NUNITS (mode);
|
dperm.nelt = GET_MODE_NUNITS (mode);
|
||||||
dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
|
dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
|
||||||
|
dperm.one_operand_p = true;
|
||||||
|
|
||||||
/* Extend to SImode using a paradoxical SUBREG. */
|
/* Extend to SImode using a paradoxical SUBREG. */
|
||||||
tmp1 = gen_reg_rtx (SImode);
|
tmp1 = gen_reg_rtx (SImode);
|
||||||
|
@ -35735,7 +35737,7 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
|
||||||
rtx target, op0, op1, x;
|
rtx target, op0, op1, x;
|
||||||
rtx rperm[32], vperm;
|
rtx rperm[32], vperm;
|
||||||
|
|
||||||
if (d->op0 == d->op1)
|
if (d->one_operand_p)
|
||||||
return false;
|
return false;
|
||||||
if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
|
if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
|
||||||
;
|
;
|
||||||
|
@ -35922,7 +35924,7 @@ expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
|
||||||
rtx rperm[8], vperm;
|
rtx rperm[8], vperm;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
|
if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* We can only permute within the 128-bit lane. */
|
/* We can only permute within the 128-bit lane. */
|
||||||
|
@ -35998,7 +36000,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
||||||
|
|
||||||
nelt = d->nelt;
|
nelt = d->nelt;
|
||||||
|
|
||||||
if (d->op0 != d->op1)
|
if (!d->one_operand_p)
|
||||||
{
|
{
|
||||||
if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
|
if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
|
||||||
{
|
{
|
||||||
|
@ -36086,7 +36088,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
|
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
|
||||||
if (d->op0 != d->op1)
|
if (!d->one_operand_p)
|
||||||
mask = 2 * nelt - 1;
|
mask = 2 * nelt - 1;
|
||||||
else if (vmode == V16QImode)
|
else if (vmode == V16QImode)
|
||||||
mask = nelt - 1;
|
mask = nelt - 1;
|
||||||
|
@ -36113,7 +36115,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
||||||
|
|
||||||
target = gen_lowpart (vmode, d->target);
|
target = gen_lowpart (vmode, d->target);
|
||||||
op0 = gen_lowpart (vmode, d->op0);
|
op0 = gen_lowpart (vmode, d->op0);
|
||||||
if (d->op0 == d->op1)
|
if (d->one_operand_p)
|
||||||
{
|
{
|
||||||
if (vmode == V16QImode)
|
if (vmode == V16QImode)
|
||||||
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
|
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
|
||||||
|
@ -36145,7 +36147,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
|
||||||
/* Check plain VEC_SELECT first, because AVX has instructions that could
|
/* Check plain VEC_SELECT first, because AVX has instructions that could
|
||||||
match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
|
match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
|
||||||
input where SEL+CONCAT may not. */
|
input where SEL+CONCAT may not. */
|
||||||
if (d->op0 == d->op1)
|
if (d->one_operand_p)
|
||||||
{
|
{
|
||||||
int mask = nelt - 1;
|
int mask = nelt - 1;
|
||||||
bool identity_perm = true;
|
bool identity_perm = true;
|
||||||
|
@ -36242,7 +36244,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/* Recognize interleave style patterns with reversed operands. */
|
/* Recognize interleave style patterns with reversed operands. */
|
||||||
if (d->op0 != d->op1)
|
if (!d->one_operand_p)
|
||||||
{
|
{
|
||||||
for (i = 0; i < nelt; ++i)
|
for (i = 0; i < nelt; ++i)
|
||||||
{
|
{
|
||||||
|
@ -36285,7 +36287,7 @@ expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
|
||||||
unsigned i;
|
unsigned i;
|
||||||
bool ok;
|
bool ok;
|
||||||
|
|
||||||
if (d->vmode != V8HImode || d->op0 != d->op1)
|
if (d->vmode != V8HImode || !d->one_operand_p)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* The two permutations only operate in 64-bit lanes. */
|
/* The two permutations only operate in 64-bit lanes. */
|
||||||
|
@ -36357,6 +36359,7 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
|
||||||
gen_lowpart (TImode, d->op0), shift));
|
gen_lowpart (TImode, d->op0), shift));
|
||||||
|
|
||||||
d->op0 = d->op1 = d->target;
|
d->op0 = d->op1 = d->target;
|
||||||
|
d->one_operand_p = true;
|
||||||
|
|
||||||
in_order = true;
|
in_order = true;
|
||||||
for (i = 0; i < nelt; ++i)
|
for (i = 0; i < nelt; ++i)
|
||||||
|
@ -36396,14 +36399,14 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
||||||
|
|
||||||
if (GET_MODE_SIZE (d->vmode) == 16)
|
if (GET_MODE_SIZE (d->vmode) == 16)
|
||||||
{
|
{
|
||||||
if (d->op0 == d->op1)
|
if (d->one_operand_p)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
else if (GET_MODE_SIZE (d->vmode) == 32)
|
else if (GET_MODE_SIZE (d->vmode) == 32)
|
||||||
{
|
{
|
||||||
if (!TARGET_AVX)
|
if (!TARGET_AVX)
|
||||||
return false;
|
return false;
|
||||||
/* For 32-byte modes allow even d->op0 == d->op1.
|
/* For 32-byte modes allow even d->one_operand_p.
|
||||||
The lack of cross-lane shuffling in some instructions
|
The lack of cross-lane shuffling in some instructions
|
||||||
might prevent a single insn shuffle. */
|
might prevent a single insn shuffle. */
|
||||||
dfinal = *d;
|
dfinal = *d;
|
||||||
|
@ -36528,11 +36531,11 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
||||||
|
|
||||||
if (nzcnt == 1)
|
if (nzcnt == 1)
|
||||||
{
|
{
|
||||||
gcc_assert (d->op0 == d->op1);
|
gcc_assert (d->one_operand_p);
|
||||||
nonzero_halves[1] = nonzero_halves[0];
|
nonzero_halves[1] = nonzero_halves[0];
|
||||||
same_halves = true;
|
same_halves = true;
|
||||||
}
|
}
|
||||||
else if (d->op0 == d->op1)
|
else if (d->one_operand_p)
|
||||||
{
|
{
|
||||||
gcc_assert (nonzero_halves[0] == 0);
|
gcc_assert (nonzero_halves[0] == 0);
|
||||||
gcc_assert (nonzero_halves[1] == 1);
|
gcc_assert (nonzero_halves[1] == 1);
|
||||||
|
@ -36571,7 +36574,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (d->op0 == d->op1)
|
else if (d->one_operand_p)
|
||||||
return false;
|
return false;
|
||||||
else if (TARGET_AVX2
|
else if (TARGET_AVX2
|
||||||
&& (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
|
&& (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
|
||||||
|
@ -36628,6 +36631,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
||||||
}
|
}
|
||||||
dfinal.op0 = gen_reg_rtx (dfinal.vmode);
|
dfinal.op0 = gen_reg_rtx (dfinal.vmode);
|
||||||
dfinal.op1 = dfinal.op0;
|
dfinal.op1 = dfinal.op0;
|
||||||
|
dfinal.one_operand_p = true;
|
||||||
dremap.target = dfinal.op0;
|
dremap.target = dfinal.op0;
|
||||||
|
|
||||||
/* Test if the final remap can be done with a single insn. For V4SFmode or
|
/* Test if the final remap can be done with a single insn. For V4SFmode or
|
||||||
|
@ -36671,7 +36675,7 @@ expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
|
||||||
|
|
||||||
if (!(TARGET_AVX2
|
if (!(TARGET_AVX2
|
||||||
&& (d->vmode == V32QImode || d->vmode == V16HImode)
|
&& (d->vmode == V32QImode || d->vmode == V16HImode)
|
||||||
&& d->op0 == d->op1))
|
&& d->one_operand_p))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
contents[0] = 0;
|
contents[0] = 0;
|
||||||
|
@ -36699,6 +36703,7 @@ expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
|
||||||
dremap.target = gen_reg_rtx (V4DImode);
|
dremap.target = gen_reg_rtx (V4DImode);
|
||||||
dremap.op0 = gen_lowpart (V4DImode, d->op0);
|
dremap.op0 = gen_lowpart (V4DImode, d->op0);
|
||||||
dremap.op1 = dremap.op0;
|
dremap.op1 = dremap.op0;
|
||||||
|
dremap.one_operand_p = true;
|
||||||
for (i = 0; i < 2; ++i)
|
for (i = 0; i < 2; ++i)
|
||||||
{
|
{
|
||||||
unsigned int cnt = 0;
|
unsigned int cnt = 0;
|
||||||
|
@ -36712,6 +36717,7 @@ expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
|
||||||
dfinal = *d;
|
dfinal = *d;
|
||||||
dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
|
dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
|
||||||
dfinal.op1 = dfinal.op0;
|
dfinal.op1 = dfinal.op0;
|
||||||
|
dfinal.one_operand_p = true;
|
||||||
for (i = 0, j = 0; i < nelt; ++i)
|
for (i = 0, j = 0; i < nelt; ++i)
|
||||||
{
|
{
|
||||||
if (i == nelt2)
|
if (i == nelt2)
|
||||||
|
@ -36751,8 +36757,7 @@ expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
dsecond = *d;
|
dsecond = *d;
|
||||||
if (d->op0 == d->op1)
|
dsecond.one_operand_p = false;
|
||||||
dsecond.op1 = gen_reg_rtx (d->vmode);
|
|
||||||
dsecond.testing_p = true;
|
dsecond.testing_p = true;
|
||||||
|
|
||||||
/* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
|
/* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
|
||||||
|
@ -36821,10 +36826,7 @@ expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
|
||||||
vperm2f128 on d->op0 and d->op1. */
|
vperm2f128 on d->op0 and d->op1. */
|
||||||
dsecond.testing_p = false;
|
dsecond.testing_p = false;
|
||||||
dfirst = *d;
|
dfirst = *d;
|
||||||
if (d->op0 == d->op1)
|
dfirst.target = gen_reg_rtx (d->vmode);
|
||||||
dfirst.target = dsecond.op1;
|
|
||||||
else
|
|
||||||
dfirst.target = gen_reg_rtx (d->vmode);
|
|
||||||
for (i = 0; i < nelt; i++)
|
for (i = 0; i < nelt; i++)
|
||||||
dfirst.perm[i] = (i & (nelt2 - 1))
|
dfirst.perm[i] = (i & (nelt2 - 1))
|
||||||
+ ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
|
+ ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
|
||||||
|
@ -36845,9 +36847,8 @@ expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For d->op0 == d->op1 the only useful vperm2f128 permutation
|
/* For one operand, the only useful vperm2f128 permutation is 0x10. */
|
||||||
is 0x10. */
|
if (d->one_operand_p)
|
||||||
if (d->op0 == d->op1)
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36864,7 +36865,7 @@ expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
|
||||||
unsigned i, nelt;
|
unsigned i, nelt;
|
||||||
rtx (*gen) (rtx, rtx, rtx);
|
rtx (*gen) (rtx, rtx, rtx);
|
||||||
|
|
||||||
if (d->op0 == d->op1)
|
if (d->one_operand_p)
|
||||||
return false;
|
return false;
|
||||||
if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
|
if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
|
||||||
;
|
;
|
||||||
|
@ -36947,7 +36948,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
|
||||||
if (!TARGET_AVX
|
if (!TARGET_AVX
|
||||||
|| TARGET_AVX2
|
|| TARGET_AVX2
|
||||||
|| (d->vmode != V8SFmode && d->vmode != V4DFmode)
|
|| (d->vmode != V8SFmode && d->vmode != V4DFmode)
|
||||||
|| d->op0 != d->op1)
|
|| !d->one_operand_p)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
dfirst = *d;
|
dfirst = *d;
|
||||||
|
@ -36985,6 +36986,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
|
||||||
dsecond = *d;
|
dsecond = *d;
|
||||||
dsecond.op0 = dfirst.target;
|
dsecond.op0 = dfirst.target;
|
||||||
dsecond.op1 = dfirst.target;
|
dsecond.op1 = dfirst.target;
|
||||||
|
dsecond.one_operand_p = true;
|
||||||
dsecond.target = gen_reg_rtx (dsecond.vmode);
|
dsecond.target = gen_reg_rtx (dsecond.vmode);
|
||||||
for (i = 0; i < nelt; i++)
|
for (i = 0; i < nelt; i++)
|
||||||
dsecond.perm[i] = i ^ nelt2;
|
dsecond.perm[i] = i ^ nelt2;
|
||||||
|
@ -37009,7 +37011,7 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
|
||||||
|
|
||||||
if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
|
if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
|
||||||
return false;
|
return false;
|
||||||
gcc_assert (d->op0 != d->op1);
|
gcc_assert (!d->one_operand_p);
|
||||||
|
|
||||||
nelt = d->nelt;
|
nelt = d->nelt;
|
||||||
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
|
eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
|
||||||
|
@ -37064,7 +37066,7 @@ expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
|
||||||
unsigned int i, nelt, eltsz;
|
unsigned int i, nelt, eltsz;
|
||||||
|
|
||||||
if (!TARGET_AVX2
|
if (!TARGET_AVX2
|
||||||
|| d->op0 != d->op1
|
|| !d->one_operand_p
|
||||||
|| (d->vmode != V32QImode && d->vmode != V16HImode))
|
|| (d->vmode != V32QImode && d->vmode != V16HImode))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -37132,7 +37134,7 @@ expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
|
||||||
unsigned int i, nelt, eltsz;
|
unsigned int i, nelt, eltsz;
|
||||||
|
|
||||||
if (!TARGET_AVX2
|
if (!TARGET_AVX2
|
||||||
|| d->op0 == d->op1
|
|| d->one_operand_p
|
||||||
|| (d->vmode != V32QImode && d->vmode != V16HImode))
|
|| (d->vmode != V32QImode && d->vmode != V16HImode))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -37491,7 +37493,7 @@ expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
|
||||||
{
|
{
|
||||||
unsigned i, elt, nelt = d->nelt;
|
unsigned i, elt, nelt = d->nelt;
|
||||||
|
|
||||||
if (d->op0 != d->op1)
|
if (!d->one_operand_p)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
elt = d->perm[0];
|
elt = d->perm[0];
|
||||||
|
@ -37514,7 +37516,7 @@ expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
|
||||||
bool used[4];
|
bool used[4];
|
||||||
|
|
||||||
if (!TARGET_AVX2
|
if (!TARGET_AVX2
|
||||||
|| d->op0 == d->op1
|
|| d->one_operand_p
|
||||||
|| (d->vmode != V32QImode && d->vmode != V16HImode))
|
|| (d->vmode != V32QImode && d->vmode != V16HImode))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -37715,6 +37717,7 @@ ix86_expand_vec_perm_const (rtx operands[4])
|
||||||
perm[i] = ei;
|
perm[i] = ei;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
d.one_operand_p = true;
|
||||||
switch (which)
|
switch (which)
|
||||||
{
|
{
|
||||||
default:
|
default:
|
||||||
|
@ -37722,51 +37725,39 @@ ix86_expand_vec_perm_const (rtx operands[4])
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
if (!rtx_equal_p (d.op0, d.op1))
|
if (!rtx_equal_p (d.op0, d.op1))
|
||||||
break;
|
{
|
||||||
|
d.one_operand_p = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
/* The elements of PERM do not suggest that only the first operand
|
/* The elements of PERM do not suggest that only the first operand
|
||||||
is used, but both operands are identical. Allow easier matching
|
is used, but both operands are identical. Allow easier matching
|
||||||
of the permutation by folding the permutation into the single
|
of the permutation by folding the permutation into the single
|
||||||
input vector. */
|
input vector. */
|
||||||
for (i = 0; i < nelt; ++i)
|
|
||||||
if (d.perm[i] >= nelt)
|
|
||||||
d.perm[i] -= nelt;
|
|
||||||
/* FALLTHRU */
|
/* FALLTHRU */
|
||||||
|
|
||||||
case 1:
|
|
||||||
d.op1 = d.op0;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
for (i = 0; i < nelt; ++i)
|
for (i = 0; i < nelt; ++i)
|
||||||
d.perm[i] -= nelt;
|
d.perm[i] &= nelt - 1;
|
||||||
d.op0 = d.op1;
|
d.op0 = d.op1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
d.op1 = d.op0;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ix86_expand_vec_perm_const_1 (&d))
|
if (ix86_expand_vec_perm_const_1 (&d))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/* If the mask says both arguments are needed, but they are the same,
|
/* If the selector says both arguments are needed, but the operands are the
|
||||||
the above tried to expand with d.op0 == d.op1. If that didn't work,
|
same, the above tried to expand with one_operand_p and flattened selector.
|
||||||
retry with d.op0 != d.op1 as that is what testing has been done with. */
|
If that didn't work, retry without one_operand_p; we succeeded with that
|
||||||
if (which == 3 && d.op0 == d.op1)
|
during testing. */
|
||||||
|
if (which == 3 && d.one_operand_p)
|
||||||
{
|
{
|
||||||
rtx seq;
|
d.one_operand_p = false;
|
||||||
bool ok;
|
|
||||||
|
|
||||||
memcpy (d.perm, perm, sizeof (perm));
|
memcpy (d.perm, perm, sizeof (perm));
|
||||||
d.op1 = gen_reg_rtx (d.vmode);
|
return ix86_expand_vec_perm_const_1 (&d);
|
||||||
start_sequence ();
|
|
||||||
ok = ix86_expand_vec_perm_const_1 (&d);
|
|
||||||
seq = get_insns ();
|
|
||||||
end_sequence ();
|
|
||||||
if (ok)
|
|
||||||
{
|
|
||||||
emit_move_insn (d.op1, d.op0);
|
|
||||||
emit_insn (seq);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -37780,7 +37771,7 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
|
||||||
{
|
{
|
||||||
struct expand_vec_perm_d d;
|
struct expand_vec_perm_d d;
|
||||||
unsigned int i, nelt, which;
|
unsigned int i, nelt, which;
|
||||||
bool ret, one_vec;
|
bool ret;
|
||||||
|
|
||||||
d.vmode = vmode;
|
d.vmode = vmode;
|
||||||
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
||||||
|
@ -37817,17 +37808,17 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
|
||||||
d.perm[i] -= nelt;
|
d.perm[i] -= nelt;
|
||||||
|
|
||||||
/* Check whether the mask can be applied to the vector type. */
|
/* Check whether the mask can be applied to the vector type. */
|
||||||
one_vec = (which != 3);
|
d.one_operand_p = (which != 3);
|
||||||
|
|
||||||
/* Implementable with shufps or pshufd. */
|
/* Implementable with shufps or pshufd. */
|
||||||
if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
|
if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/* Otherwise we have to go through the motions and see if we can
|
/* Otherwise we have to go through the motions and see if we can
|
||||||
figure out how to generate the requested permutation. */
|
figure out how to generate the requested permutation. */
|
||||||
d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
|
d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
|
||||||
d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
|
d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
|
||||||
if (!one_vec)
|
if (!d.one_operand_p)
|
||||||
d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
|
d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
|
||||||
|
|
||||||
start_sequence ();
|
start_sequence ();
|
||||||
|
@ -37848,6 +37839,7 @@ ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
|
||||||
d.op1 = op1;
|
d.op1 = op1;
|
||||||
d.vmode = GET_MODE (targ);
|
d.vmode = GET_MODE (targ);
|
||||||
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
||||||
|
d.one_operand_p = false;
|
||||||
d.testing_p = false;
|
d.testing_p = false;
|
||||||
|
|
||||||
for (i = 0; i < nelt; ++i)
|
for (i = 0; i < nelt; ++i)
|
||||||
|
|
Loading…
Reference in New Issue