mirror of git://gcc.gnu.org/git/gcc.git
optabs.c (expand_vec_perm): Avoid vector mode punning SUBREGs in SET_DEST.
* optabs.c (expand_vec_perm): Avoid vector mode punning SUBREGs in SET_DEST. * expmed.c (store_bit_field_1): Likewise. * config/i386/sse.md (movdi_to_sse, vec_pack_sfix_trunc_v2df, vec_pack_sfix_v2df, vec_shl_<mode>, vec_shr_<mode>, vec_interleave_high<mode>, vec_interleave_low<mode>): Likewise. * config/i386/i386.c (ix86_expand_vector_move_misalign, ix86_expand_sse_movcc, ix86_expand_int_vcond, ix86_expand_vec_perm, ix86_expand_sse_unpack, ix86_expand_args_builtin, ix86_expand_vector_init_duplicate, ix86_expand_vector_set, emit_reduc_half, expand_vec_perm_blend, expand_vec_perm_pshufb, expand_vec_perm_interleave2, expand_vec_perm_pshufb2, expand_vec_perm_vpshufb2_vpermq, expand_vec_perm_vpshufb2_vpermq_even_odd, expand_vec_perm_even_odd_1, expand_vec_perm_broadcast_1, expand_vec_perm_vpshufb4_vpermq2, ix86_expand_sse2_mulv4si3, ix86_expand_pinsr): Likewise. (expand_vec_perm_palignr): Likewise. Modify a copy of *d rather than *d itself. From-SVN: r204274
This commit is contained in:
parent
5a9785fb4c
commit
d8c84975e6
|
|
@ -1,3 +1,24 @@
|
|||
2013-10-31 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* optabs.c (expand_vec_perm): Avoid vector mode punning
|
||||
SUBREGs in SET_DEST.
|
||||
* expmed.c (store_bit_field_1): Likewise.
|
||||
* config/i386/sse.md (movdi_to_sse, vec_pack_sfix_trunc_v2df,
|
||||
vec_pack_sfix_v2df, vec_shl_<mode>, vec_shr_<mode>,
|
||||
vec_interleave_high<mode>, vec_interleave_low<mode>): Likewise.
|
||||
* config/i386/i386.c (ix86_expand_vector_move_misalign,
|
||||
ix86_expand_sse_movcc, ix86_expand_int_vcond, ix86_expand_vec_perm,
|
||||
ix86_expand_sse_unpack, ix86_expand_args_builtin,
|
||||
ix86_expand_vector_init_duplicate, ix86_expand_vector_set,
|
||||
emit_reduc_half, expand_vec_perm_blend, expand_vec_perm_pshufb,
|
||||
expand_vec_perm_interleave2, expand_vec_perm_pshufb2,
|
||||
expand_vec_perm_vpshufb2_vpermq,
|
||||
expand_vec_perm_vpshufb2_vpermq_even_odd, expand_vec_perm_even_odd_1,
|
||||
expand_vec_perm_broadcast_1, expand_vec_perm_vpshufb4_vpermq2,
|
||||
ix86_expand_sse2_mulv4si3, ix86_expand_pinsr): Likewise.
|
||||
(expand_vec_perm_palignr): Likewise. Modify a copy of *d rather
|
||||
than *d itself.
|
||||
|
||||
2013-10-31 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/i386.c (ix86_expand_sse2_abs): Rename function arguments.
|
||||
|
|
|
|||
|
|
@ -16803,6 +16803,8 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
}
|
||||
else
|
||||
{
|
||||
rtx t;
|
||||
|
||||
if (TARGET_AVX
|
||||
|| TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
|
||||
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|
||||
|
|
@ -16821,18 +16823,22 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
|||
return;
|
||||
}
|
||||
|
||||
if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
|
||||
emit_move_insn (op0, CONST0_RTX (mode));
|
||||
else
|
||||
emit_clobber (op0);
|
||||
|
||||
if (mode != V4SFmode)
|
||||
op0 = gen_lowpart (V4SFmode, op0);
|
||||
t = gen_reg_rtx (V4SFmode);
|
||||
else
|
||||
t = op0;
|
||||
|
||||
if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
|
||||
emit_move_insn (t, CONST0_RTX (V4SFmode));
|
||||
else
|
||||
emit_clobber (t);
|
||||
|
||||
m = adjust_address (op1, V2SFmode, 0);
|
||||
emit_insn (gen_sse_loadlps (op0, op0, m));
|
||||
emit_insn (gen_sse_loadlps (t, t, m));
|
||||
m = adjust_address (op1, V2SFmode, 8);
|
||||
emit_insn (gen_sse_loadhps (op0, op0, m));
|
||||
emit_insn (gen_sse_loadhps (t, t, m));
|
||||
if (mode != V4SFmode)
|
||||
emit_move_insn (op0, gen_lowpart (mode, t));
|
||||
}
|
||||
}
|
||||
else if (MEM_P (op0))
|
||||
|
|
@ -20473,6 +20479,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
else
|
||||
{
|
||||
rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
|
||||
rtx d = dest;
|
||||
|
||||
if (!nonimmediate_operand (op_true, mode))
|
||||
op_true = force_reg (mode, op_true);
|
||||
|
|
@ -20496,7 +20503,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
if (TARGET_SSE4_1)
|
||||
{
|
||||
gen = gen_sse4_1_pblendvb;
|
||||
dest = gen_lowpart (V16QImode, dest);
|
||||
if (mode != V16QImode)
|
||||
d = gen_reg_rtx (V16QImode);
|
||||
op_false = gen_lowpart (V16QImode, op_false);
|
||||
op_true = gen_lowpart (V16QImode, op_true);
|
||||
cmp = gen_lowpart (V16QImode, cmp);
|
||||
|
|
@ -20517,7 +20525,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
if (TARGET_AVX2)
|
||||
{
|
||||
gen = gen_avx2_pblendvb;
|
||||
dest = gen_lowpart (V32QImode, dest);
|
||||
if (mode != V32QImode)
|
||||
d = gen_reg_rtx (V32QImode);
|
||||
op_false = gen_lowpart (V32QImode, op_false);
|
||||
op_true = gen_lowpart (V32QImode, op_true);
|
||||
cmp = gen_lowpart (V32QImode, cmp);
|
||||
|
|
@ -20528,7 +20537,11 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
|||
}
|
||||
|
||||
if (gen != NULL)
|
||||
emit_insn (gen (dest, op_false, op_true, cmp));
|
||||
{
|
||||
emit_insn (gen (d, op_false, op_true, cmp));
|
||||
if (d != dest)
|
||||
emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
|
||||
}
|
||||
else
|
||||
{
|
||||
op_true = force_reg (mode, op_true);
|
||||
|
|
@ -20849,8 +20862,7 @@ ix86_expand_int_vcond (rtx operands[])
|
|||
else
|
||||
{
|
||||
gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
|
||||
x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
|
||||
code, cop0, cop1,
|
||||
x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
|
||||
operands[1+negate], operands[2-negate]);
|
||||
x = gen_lowpart (data_mode, x);
|
||||
}
|
||||
|
|
@ -20869,7 +20881,7 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
rtx op0 = operands[1];
|
||||
rtx op1 = operands[2];
|
||||
rtx mask = operands[3];
|
||||
rtx t1, t2, t3, t4, vt, vt2, vec[32];
|
||||
rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
|
||||
enum machine_mode mode = GET_MODE (op0);
|
||||
enum machine_mode maskmode = GET_MODE (mask);
|
||||
int w, e, i;
|
||||
|
|
@ -20937,7 +20949,7 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
|
||||
/* Continue as if V8SImode (resp. V32QImode) was used initially. */
|
||||
operands[3] = mask = t1;
|
||||
target = gen_lowpart (mode, target);
|
||||
target = gen_reg_rtx (mode);
|
||||
op0 = gen_lowpart (mode, op0);
|
||||
op1 = gen_lowpart (mode, op1);
|
||||
}
|
||||
|
|
@ -20949,7 +20961,12 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
the high bits of the shuffle elements. No need for us to
|
||||
perform an AND ourselves. */
|
||||
if (one_operand_shuffle)
|
||||
emit_insn (gen_avx2_permvarv8si (target, op0, mask));
|
||||
{
|
||||
emit_insn (gen_avx2_permvarv8si (target, op0, mask));
|
||||
if (target != operands[0])
|
||||
emit_move_insn (operands[0],
|
||||
gen_lowpart (GET_MODE (operands[0]), target));
|
||||
}
|
||||
else
|
||||
{
|
||||
t1 = gen_reg_rtx (V8SImode);
|
||||
|
|
@ -21022,13 +21039,13 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
stands for other 12 bytes. */
|
||||
/* The bit whether element is from the same lane or the other
|
||||
lane is bit 4, so shift it up by 3 to the MSB position. */
|
||||
emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode, t1),
|
||||
gen_lowpart (V4DImode, mask),
|
||||
t5 = gen_reg_rtx (V4DImode);
|
||||
emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
|
||||
GEN_INT (3)));
|
||||
/* Clear MSB bits from the mask just in case it had them set. */
|
||||
emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
|
||||
/* After this t1 will have MSB set for elements from other lane. */
|
||||
emit_insn (gen_xorv32qi3 (t1, t1, vt2));
|
||||
emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
|
||||
/* Clear bits other than MSB. */
|
||||
emit_insn (gen_andv32qi3 (t1, t1, vt));
|
||||
/* Or in the lower bits from mask into t3. */
|
||||
|
|
@ -21037,8 +21054,8 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
lane. */
|
||||
emit_insn (gen_xorv32qi3 (t1, t1, vt));
|
||||
/* Swap 128-bit lanes in t3. */
|
||||
emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
|
||||
gen_lowpart (V4DImode, t3),
|
||||
t6 = gen_reg_rtx (V4DImode);
|
||||
emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
|
||||
const2_rtx, GEN_INT (3),
|
||||
const0_rtx, const1_rtx));
|
||||
/* And or in the lower bits from mask into t1. */
|
||||
|
|
@ -21048,15 +21065,20 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
/* Each of these shuffles will put 0s in places where
|
||||
element from the other 128-bit lane is needed, otherwise
|
||||
will shuffle in the requested value. */
|
||||
emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3));
|
||||
emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
|
||||
gen_lowpart (V32QImode, t6)));
|
||||
emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
|
||||
/* For t3 the 128-bit lanes are swapped again. */
|
||||
emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
|
||||
gen_lowpart (V4DImode, t3),
|
||||
t7 = gen_reg_rtx (V4DImode);
|
||||
emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
|
||||
const2_rtx, GEN_INT (3),
|
||||
const0_rtx, const1_rtx));
|
||||
/* And oring both together leads to the result. */
|
||||
emit_insn (gen_iorv32qi3 (target, t1, t3));
|
||||
emit_insn (gen_iorv32qi3 (target, t1,
|
||||
gen_lowpart (V32QImode, t7)));
|
||||
if (target != operands[0])
|
||||
emit_move_insn (operands[0],
|
||||
gen_lowpart (GET_MODE (operands[0]), target));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -21064,20 +21086,22 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
/* Similarly to the above one_operand_shuffle code,
|
||||
just for repeated twice for each operand. merge_two:
|
||||
code will merge the two results together. */
|
||||
emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3));
|
||||
emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3));
|
||||
emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
|
||||
gen_lowpart (V32QImode, t6)));
|
||||
emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
|
||||
gen_lowpart (V32QImode, t6)));
|
||||
emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
|
||||
emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
|
||||
emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4),
|
||||
gen_lowpart (V4DImode, t4),
|
||||
t7 = gen_reg_rtx (V4DImode);
|
||||
emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
|
||||
const2_rtx, GEN_INT (3),
|
||||
const0_rtx, const1_rtx));
|
||||
emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
|
||||
gen_lowpart (V4DImode, t3),
|
||||
t8 = gen_reg_rtx (V4DImode);
|
||||
emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
|
||||
const2_rtx, GEN_INT (3),
|
||||
const0_rtx, const1_rtx));
|
||||
emit_insn (gen_iorv32qi3 (t4, t2, t4));
|
||||
emit_insn (gen_iorv32qi3 (t3, t1, t3));
|
||||
emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
|
||||
emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
|
||||
t1 = t4;
|
||||
t2 = t3;
|
||||
goto merge_two;
|
||||
|
|
@ -21146,15 +21170,24 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
/* The actual shuffle operations all operate on V16QImode. */
|
||||
op0 = gen_lowpart (V16QImode, op0);
|
||||
op1 = gen_lowpart (V16QImode, op1);
|
||||
target = gen_lowpart (V16QImode, target);
|
||||
|
||||
if (TARGET_XOP)
|
||||
{
|
||||
if (GET_MODE (target) != V16QImode)
|
||||
target = gen_reg_rtx (V16QImode);
|
||||
emit_insn (gen_xop_pperm (target, op0, op1, mask));
|
||||
if (target != operands[0])
|
||||
emit_move_insn (operands[0],
|
||||
gen_lowpart (GET_MODE (operands[0]), target));
|
||||
}
|
||||
else if (one_operand_shuffle)
|
||||
{
|
||||
if (GET_MODE (target) != V16QImode)
|
||||
target = gen_reg_rtx (V16QImode);
|
||||
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
|
||||
if (target != operands[0])
|
||||
emit_move_insn (operands[0],
|
||||
gen_lowpart (GET_MODE (operands[0]), target));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -21194,7 +21227,9 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
mask = expand_simple_binop (maskmode, AND, mask, vt,
|
||||
NULL_RTX, 0, OPTAB_DIRECT);
|
||||
|
||||
xops[0] = gen_lowpart (mode, operands[0]);
|
||||
if (GET_MODE (target) != mode)
|
||||
target = gen_reg_rtx (mode);
|
||||
xops[0] = target;
|
||||
xops[1] = gen_lowpart (mode, t2);
|
||||
xops[2] = gen_lowpart (mode, t1);
|
||||
xops[3] = gen_rtx_EQ (maskmode, mask, vt);
|
||||
|
|
@ -21202,6 +21237,9 @@ ix86_expand_vec_perm (rtx operands[])
|
|||
xops[5] = vt;
|
||||
ok = ix86_expand_int_vcond (xops);
|
||||
gcc_assert (ok);
|
||||
if (target != operands[0])
|
||||
emit_move_insn (operands[0],
|
||||
gen_lowpart (GET_MODE (operands[0]), target));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -21280,10 +21318,10 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
|
|||
else if (high_p)
|
||||
{
|
||||
/* Shift higher 8 bytes to lower 8 bytes. */
|
||||
tmp = gen_reg_rtx (imode);
|
||||
emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
|
||||
gen_lowpart (V1TImode, src),
|
||||
tmp = gen_reg_rtx (V1TImode);
|
||||
emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
|
||||
GEN_INT (64)));
|
||||
tmp = gen_lowpart (imode, tmp);
|
||||
}
|
||||
else
|
||||
tmp = src;
|
||||
|
|
@ -21324,7 +21362,9 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
|
|||
tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
|
||||
src, pc_rtx, pc_rtx);
|
||||
|
||||
emit_insn (unpack (gen_lowpart (imode, dest), src, tmp));
|
||||
rtx tmp2 = gen_reg_rtx (imode);
|
||||
emit_insn (unpack (tmp2, src, tmp));
|
||||
emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -31967,8 +32007,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
|||
}
|
||||
else
|
||||
{
|
||||
target = gen_reg_rtx (rmode);
|
||||
real_target = simplify_gen_subreg (tmode, target, rmode, 0);
|
||||
real_target = gen_reg_rtx (tmode);
|
||||
target = simplify_gen_subreg (rmode, real_target, tmode, 0);
|
||||
}
|
||||
|
||||
for (i = 0; i < nargs; i++)
|
||||
|
|
@ -36691,8 +36731,9 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
|
|||
emit_move_insn (tmp1, gen_lowpart (SImode, val));
|
||||
|
||||
/* Insert the SImode value as low element of a V4SImode vector. */
|
||||
tmp2 = gen_lowpart (V4SImode, dperm.op0);
|
||||
tmp2 = gen_reg_rtx (V4SImode);
|
||||
emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
|
||||
emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
|
||||
|
||||
ok = (expand_vec_perm_1 (&dperm)
|
||||
|| expand_vec_perm_broadcast_1 (&dperm));
|
||||
|
|
@ -36722,9 +36763,10 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
|
|||
NULL_RTX, 1, OPTAB_LIB_WIDEN);
|
||||
val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
|
||||
|
||||
x = gen_lowpart (wvmode, target);
|
||||
x = gen_reg_rtx (wvmode);
|
||||
ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
|
||||
gcc_assert (ok);
|
||||
emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
|
||||
return ok;
|
||||
}
|
||||
|
||||
|
|
@ -37599,8 +37641,9 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
|
|||
else
|
||||
{
|
||||
/* For SSE1, we have to reuse the V4SF code. */
|
||||
ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
|
||||
gen_lowpart (SFmode, val), elt);
|
||||
rtx t = gen_reg_rtx (V4SFmode);
|
||||
ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
|
||||
emit_move_insn (target, gen_lowpart (mode, t));
|
||||
}
|
||||
return;
|
||||
|
||||
|
|
@ -37918,7 +37961,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
|
|||
static void
|
||||
emit_reduc_half (rtx dest, rtx src, int i)
|
||||
{
|
||||
rtx tem;
|
||||
rtx tem, d = dest;
|
||||
switch (GET_MODE (src))
|
||||
{
|
||||
case V4SFmode:
|
||||
|
|
@ -37935,8 +37978,8 @@ emit_reduc_half (rtx dest, rtx src, int i)
|
|||
case V8HImode:
|
||||
case V4SImode:
|
||||
case V2DImode:
|
||||
tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest),
|
||||
gen_lowpart (V1TImode, src),
|
||||
d = gen_reg_rtx (V1TImode);
|
||||
tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
|
||||
GEN_INT (i / 2));
|
||||
break;
|
||||
case V8SFmode:
|
||||
|
|
@ -37957,19 +38000,26 @@ emit_reduc_half (rtx dest, rtx src, int i)
|
|||
case V8SImode:
|
||||
case V4DImode:
|
||||
if (i == 256)
|
||||
tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest),
|
||||
gen_lowpart (V4DImode, src),
|
||||
gen_lowpart (V4DImode, src),
|
||||
const1_rtx);
|
||||
{
|
||||
if (GET_MODE (dest) != V4DImode)
|
||||
d = gen_reg_rtx (V4DImode);
|
||||
tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
|
||||
gen_lowpart (V4DImode, src),
|
||||
const1_rtx);
|
||||
}
|
||||
else
|
||||
tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest),
|
||||
gen_lowpart (V2TImode, src),
|
||||
GEN_INT (i / 2));
|
||||
{
|
||||
d = gen_reg_rtx (V2TImode);
|
||||
tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
|
||||
GEN_INT (i / 2));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
emit_insn (tem);
|
||||
if (d != dest)
|
||||
emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
|
||||
}
|
||||
|
||||
/* Expand a vector reduction. FN is the binary pattern to reduce;
|
||||
|
|
@ -39462,6 +39512,8 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
|
|||
emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
|
||||
else
|
||||
emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
|
||||
if (target != d->target)
|
||||
emit_move_insn (d->target, gen_lowpart (d->vmode, target));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -39471,7 +39523,7 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
|
|||
/* FALLTHRU */
|
||||
|
||||
do_subreg:
|
||||
target = gen_lowpart (vmode, target);
|
||||
target = gen_reg_rtx (vmode);
|
||||
op0 = gen_lowpart (vmode, op0);
|
||||
op1 = gen_lowpart (vmode, op1);
|
||||
break;
|
||||
|
|
@ -39525,7 +39577,7 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
|
|||
|
||||
vmode = V32QImode;
|
||||
nelt = 32;
|
||||
target = gen_lowpart (vmode, target);
|
||||
target = gen_reg_rtx (vmode);
|
||||
op0 = gen_lowpart (vmode, op0);
|
||||
op1 = gen_lowpart (vmode, op1);
|
||||
goto finish_pblendvb;
|
||||
|
|
@ -39558,6 +39610,8 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
|
|||
x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
|
||||
x = gen_rtx_SET (VOIDmode, target, x);
|
||||
emit_insn (x);
|
||||
if (target != d->target)
|
||||
emit_move_insn (d->target, gen_lowpart (d->vmode, target));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -39663,13 +39717,17 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
|
||||
/* Use vperm2i128 insn. The pattern uses
|
||||
V4DImode instead of V2TImode. */
|
||||
target = gen_lowpart (V4DImode, d->target);
|
||||
target = d->target;
|
||||
if (d->vmode != V4DImode)
|
||||
target = gen_reg_rtx (V4DImode);
|
||||
op0 = gen_lowpart (V4DImode, d->op0);
|
||||
op1 = gen_lowpart (V4DImode, d->op1);
|
||||
rperm[0]
|
||||
= GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
|
||||
|| ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
|
||||
emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
|
||||
if (target != d->target)
|
||||
emit_move_insn (d->target, gen_lowpart (d->vmode, target));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
@ -39704,9 +39762,15 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
return expand_vselect (gen_lowpart (V4DImode, d->target),
|
||||
gen_lowpart (V4DImode, d->op0),
|
||||
perm, 4, false);
|
||||
target = gen_reg_rtx (V4DImode);
|
||||
if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
|
||||
perm, 4, false))
|
||||
{
|
||||
emit_move_insn (d->target,
|
||||
gen_lowpart (d->vmode, target));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Next see if vpermd can be used. */
|
||||
|
|
@ -39758,7 +39822,9 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
|
||||
vperm = force_reg (vmode, vperm);
|
||||
|
||||
target = gen_lowpart (vmode, d->target);
|
||||
target = d->target;
|
||||
if (d->vmode != vmode)
|
||||
target = gen_reg_rtx (vmode);
|
||||
op0 = gen_lowpart (vmode, d->op0);
|
||||
if (d->one_operand_p)
|
||||
{
|
||||
|
|
@ -39776,6 +39842,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
op1 = gen_lowpart (vmode, d->op1);
|
||||
emit_insn (gen_xop_pperm (target, op0, op1, vperm));
|
||||
}
|
||||
if (target != d->target)
|
||||
emit_move_insn (d->target, gen_lowpart (d->vmode, target));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -39975,7 +40043,8 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
|
|||
unsigned i, nelt = d->nelt;
|
||||
unsigned min, max;
|
||||
bool in_order, ok;
|
||||
rtx shift;
|
||||
rtx shift, target;
|
||||
struct expand_vec_perm_d dcopy;
|
||||
|
||||
/* Even with AVX, palignr only operates on 128-bit vectors. */
|
||||
if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
|
||||
|
|
@ -39998,29 +40067,33 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
|
|||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
dcopy = *d;
|
||||
shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
|
||||
emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
|
||||
gen_lowpart (TImode, d->op1),
|
||||
target = gen_reg_rtx (TImode);
|
||||
emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
|
||||
gen_lowpart (TImode, d->op0), shift));
|
||||
|
||||
d->op0 = d->op1 = d->target;
|
||||
d->one_operand_p = true;
|
||||
dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
|
||||
dcopy.one_operand_p = true;
|
||||
|
||||
in_order = true;
|
||||
for (i = 0; i < nelt; ++i)
|
||||
{
|
||||
unsigned e = d->perm[i] - min;
|
||||
unsigned e = dcopy.perm[i] - min;
|
||||
if (e != i)
|
||||
in_order = false;
|
||||
d->perm[i] = e;
|
||||
dcopy.perm[i] = e;
|
||||
}
|
||||
|
||||
/* Test for the degenerate case where the alignment by itself
|
||||
produces the desired permutation. */
|
||||
if (in_order)
|
||||
return true;
|
||||
{
|
||||
emit_move_insn (d->target, dcopy.op0);
|
||||
return true;
|
||||
}
|
||||
|
||||
ok = expand_vec_perm_1 (d);
|
||||
ok = expand_vec_perm_1 (&dcopy);
|
||||
gcc_assert (ok);
|
||||
|
||||
return ok;
|
||||
|
|
@ -40274,10 +40347,10 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
|||
else
|
||||
dfinal.perm[i] = e;
|
||||
}
|
||||
dfinal.op0 = gen_reg_rtx (dfinal.vmode);
|
||||
dremap.target = gen_reg_rtx (dremap.vmode);
|
||||
dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
|
||||
dfinal.op1 = dfinal.op0;
|
||||
dfinal.one_operand_p = true;
|
||||
dremap.target = dfinal.op0;
|
||||
|
||||
/* Test if the final remap can be done with a single insn. For V4SFmode or
|
||||
V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
|
||||
|
|
@ -40294,7 +40367,6 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
|||
|
||||
if (dremap.vmode != dfinal.vmode)
|
||||
{
|
||||
dremap.target = gen_lowpart (dremap.vmode, dremap.target);
|
||||
dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
|
||||
dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
|
||||
}
|
||||
|
|
@ -40745,8 +40817,12 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
|
|||
op = gen_lowpart (V16QImode, d->op1);
|
||||
emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
|
||||
|
||||
op = gen_lowpart (V16QImode, d->target);
|
||||
op = d->target;
|
||||
if (d->vmode != V16QImode)
|
||||
op = gen_reg_rtx (V16QImode);
|
||||
emit_insn (gen_iorv16qi3 (op, l, h));
|
||||
if (op != d->target)
|
||||
emit_move_insn (d->target, gen_lowpart (d->vmode, op));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -40812,8 +40888,12 @@ expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
|
|||
op = gen_lowpart (V32QImode, d->op0);
|
||||
emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
|
||||
|
||||
op = gen_lowpart (V32QImode, d->target);
|
||||
op = d->target;
|
||||
if (d->vmode != V32QImode)
|
||||
op = gen_reg_rtx (V32QImode);
|
||||
emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
|
||||
if (op != d->target)
|
||||
emit_move_insn (d->target, gen_lowpart (d->vmode, op));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -40889,10 +40969,11 @@ expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
|
|||
emit_insn (gen_iorv32qi3 (ior, l, h));
|
||||
|
||||
/* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
|
||||
op = gen_lowpart (V4DImode, d->target);
|
||||
op = gen_reg_rtx (V4DImode);
|
||||
ior = gen_lowpart (V4DImode, ior);
|
||||
emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
|
||||
const1_rtx, GEN_INT (3)));
|
||||
emit_move_insn (d->target, gen_lowpart (d->vmode, op));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -40903,7 +40984,7 @@ expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
|
|||
static bool
|
||||
expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
|
||||
{
|
||||
rtx t1, t2, t3;
|
||||
rtx t1, t2, t3, t4, t5;
|
||||
|
||||
switch (d->vmode)
|
||||
{
|
||||
|
|
@ -41015,10 +41096,17 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
|
|||
{
|
||||
struct expand_vec_perm_d d_copy = *d;
|
||||
d_copy.vmode = V4DFmode;
|
||||
d_copy.target = gen_lowpart (V4DFmode, d->target);
|
||||
d_copy.target = gen_reg_rtx (V4DFmode);
|
||||
d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
|
||||
d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
|
||||
return expand_vec_perm_even_odd_1 (&d_copy, odd);
|
||||
if (expand_vec_perm_even_odd_1 (&d_copy, odd))
|
||||
{
|
||||
if (!d->testing_p)
|
||||
emit_move_insn (d->target,
|
||||
gen_lowpart (V4DImode, d_copy.target));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
t1 = gen_reg_rtx (V4DImode);
|
||||
|
|
@ -41041,44 +41129,51 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
|
|||
{
|
||||
struct expand_vec_perm_d d_copy = *d;
|
||||
d_copy.vmode = V8SFmode;
|
||||
d_copy.target = gen_lowpart (V8SFmode, d->target);
|
||||
d_copy.target = gen_reg_rtx (V8SFmode);
|
||||
d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
|
||||
d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
|
||||
return expand_vec_perm_even_odd_1 (&d_copy, odd);
|
||||
if (expand_vec_perm_even_odd_1 (&d_copy, odd))
|
||||
{
|
||||
if (!d->testing_p)
|
||||
emit_move_insn (d->target,
|
||||
gen_lowpart (V8SImode, d_copy.target));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
t1 = gen_reg_rtx (V8SImode);
|
||||
t2 = gen_reg_rtx (V8SImode);
|
||||
t3 = gen_reg_rtx (V4DImode);
|
||||
t4 = gen_reg_rtx (V4DImode);
|
||||
t5 = gen_reg_rtx (V4DImode);
|
||||
|
||||
/* Shuffle the lanes around into
|
||||
{ 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
|
||||
emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1),
|
||||
gen_lowpart (V4DImode, d->op0),
|
||||
emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
|
||||
gen_lowpart (V4DImode, d->op1),
|
||||
GEN_INT (0x20)));
|
||||
emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2),
|
||||
gen_lowpart (V4DImode, d->op0),
|
||||
emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
|
||||
gen_lowpart (V4DImode, d->op1),
|
||||
GEN_INT (0x31)));
|
||||
|
||||
/* Swap the 2nd and 3rd position in each lane into
|
||||
{ 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
|
||||
emit_insn (gen_avx2_pshufdv3 (t1, t1,
|
||||
emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
|
||||
GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
|
||||
emit_insn (gen_avx2_pshufdv3 (t2, t2,
|
||||
emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
|
||||
GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
|
||||
|
||||
/* Now an vpunpck[lh]qdq will produce
|
||||
{ 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
|
||||
if (odd)
|
||||
t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target),
|
||||
gen_lowpart (V4DImode, t1),
|
||||
t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
|
||||
gen_lowpart (V4DImode, t2));
|
||||
else
|
||||
t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target),
|
||||
gen_lowpart (V4DImode, t1),
|
||||
t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
|
||||
gen_lowpart (V4DImode, t2));
|
||||
emit_insn (t3);
|
||||
emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
@ -41116,7 +41211,7 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
|
|||
unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
|
||||
enum machine_mode vmode = d->vmode;
|
||||
unsigned char perm2[4];
|
||||
rtx op0 = d->op0;
|
||||
rtx op0 = d->op0, dest;
|
||||
bool ok;
|
||||
|
||||
switch (vmode)
|
||||
|
|
@ -41162,9 +41257,11 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
|
|||
while (vmode != V4SImode);
|
||||
|
||||
memset (perm2, elt, 4);
|
||||
ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4,
|
||||
d->testing_p);
|
||||
dest = gen_reg_rtx (V4SImode);
|
||||
ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
|
||||
gcc_assert (ok);
|
||||
if (!d->testing_p)
|
||||
emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
|
||||
return true;
|
||||
|
||||
case V32QImode:
|
||||
|
|
@ -41306,8 +41403,12 @@ expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
|
|||
}
|
||||
|
||||
gcc_assert (l[0] && l[1]);
|
||||
op = gen_lowpart (V32QImode, d->target);
|
||||
op = d->target;
|
||||
if (d->vmode != V32QImode)
|
||||
op = gen_reg_rtx (V32QImode);
|
||||
emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
|
||||
if (op != d->target)
|
||||
emit_move_insn (d->target, gen_lowpart (d->vmode, op));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -41875,7 +41976,9 @@ ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
|
|||
op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
|
||||
gcc_assert (t1 && t2);
|
||||
|
||||
ix86_expand_vec_interleave (gen_lowpart (mode, dest), t1, t2, high_p);
|
||||
t3 = gen_reg_rtx (mode);
|
||||
ix86_expand_vec_interleave (t3, t1, t2, high_p);
|
||||
emit_move_insn (dest, gen_lowpart (wmode, t3));
|
||||
break;
|
||||
|
||||
case V16QImode:
|
||||
|
|
@ -41896,14 +41999,14 @@ ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
|
|||
void
|
||||
ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
|
||||
{
|
||||
rtx res_1, res_2;
|
||||
rtx res_1, res_2, res_3, res_4;
|
||||
|
||||
res_1 = gen_reg_rtx (V4SImode);
|
||||
res_2 = gen_reg_rtx (V4SImode);
|
||||
ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode, res_1),
|
||||
op1, op2, true, false);
|
||||
ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode, res_2),
|
||||
op1, op2, true, true);
|
||||
res_3 = gen_reg_rtx (V2DImode);
|
||||
res_4 = gen_reg_rtx (V2DImode);
|
||||
ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
|
||||
ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
|
||||
|
||||
/* Move the results in element 2 down to element 1; we don't care
|
||||
what goes in elements 2 and 3. Then we can merge the parts
|
||||
|
|
@ -41917,9 +42020,11 @@ ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
|
|||
In both cases the cost of the reformatting stall was too high
|
||||
and the overall sequence slower. */
|
||||
|
||||
emit_insn (gen_sse2_pshufd_1 (res_1, res_1, const0_rtx, const2_rtx,
|
||||
emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
|
||||
const0_rtx, const2_rtx,
|
||||
const0_rtx, const0_rtx));
|
||||
emit_insn (gen_sse2_pshufd_1 (res_2, res_2, const0_rtx, const2_rtx,
|
||||
emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
|
||||
const0_rtx, const2_rtx,
|
||||
const0_rtx, const0_rtx));
|
||||
res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
|
||||
|
||||
|
|
@ -42138,12 +42243,17 @@ ix86_expand_pinsr (rtx *operands)
|
|||
return false;
|
||||
}
|
||||
|
||||
dst = gen_lowpart (dstmode, dst);
|
||||
rtx d = dst;
|
||||
if (GET_MODE (dst) != dstmode)
|
||||
d = gen_reg_rtx (dstmode);
|
||||
src = gen_lowpart (srcmode, src);
|
||||
|
||||
pos /= size;
|
||||
|
||||
emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
|
||||
emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
|
||||
GEN_INT (1 << pos)));
|
||||
if (d != dst)
|
||||
emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -800,10 +800,13 @@
|
|||
gen_rtx_SUBREG (SImode, operands[1], 4)));
|
||||
emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
|
||||
operands[2]));
|
||||
}
|
||||
}
|
||||
else if (memory_operand (operands[1], DImode))
|
||||
emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
|
||||
operands[1], const0_rtx));
|
||||
{
|
||||
rtx tmp = gen_reg_rtx (V2DImode);
|
||||
emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
|
||||
emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
})
|
||||
|
|
@ -4208,7 +4211,7 @@
|
|||
(match_operand:V2DF 2 "nonimmediate_operand")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx tmp0, tmp1;
|
||||
rtx tmp0, tmp1, tmp2;
|
||||
|
||||
if (TARGET_AVX && !TARGET_PREFER_AVX128)
|
||||
{
|
||||
|
|
@ -4222,13 +4225,14 @@
|
|||
{
|
||||
tmp0 = gen_reg_rtx (V4SImode);
|
||||
tmp1 = gen_reg_rtx (V4SImode);
|
||||
tmp2 = gen_reg_rtx (V2DImode);
|
||||
|
||||
emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
|
||||
emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
|
||||
emit_insn
|
||||
(gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
|
||||
gen_lowpart (V2DImode, tmp0),
|
||||
gen_lowpart (V2DImode, tmp1)));
|
||||
emit_insn (gen_vec_interleave_lowv2di (tmp2,
|
||||
gen_lowpart (V2DImode, tmp0),
|
||||
gen_lowpart (V2DImode, tmp1)));
|
||||
emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
|
||||
}
|
||||
DONE;
|
||||
})
|
||||
|
|
@ -4289,7 +4293,7 @@
|
|||
(match_operand:V2DF 2 "nonimmediate_operand")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx tmp0, tmp1;
|
||||
rtx tmp0, tmp1, tmp2;
|
||||
|
||||
if (TARGET_AVX && !TARGET_PREFER_AVX128)
|
||||
{
|
||||
|
|
@ -4303,13 +4307,14 @@
|
|||
{
|
||||
tmp0 = gen_reg_rtx (V4SImode);
|
||||
tmp1 = gen_reg_rtx (V4SImode);
|
||||
tmp2 = gen_reg_rtx (V2DImode);
|
||||
|
||||
emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
|
||||
emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
|
||||
emit_insn
|
||||
(gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
|
||||
gen_lowpart (V2DImode, tmp0),
|
||||
gen_lowpart (V2DImode, tmp1)));
|
||||
emit_insn (gen_vec_interleave_lowv2di (tmp2,
|
||||
gen_lowpart (V2DImode, tmp0),
|
||||
gen_lowpart (V2DImode, tmp1)));
|
||||
emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
|
||||
}
|
||||
DONE;
|
||||
})
|
||||
|
|
@ -7328,14 +7333,16 @@
|
|||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
(define_expand "vec_shl_<mode>"
|
||||
[(set (match_operand:VI_128 0 "register_operand")
|
||||
[(set (match_dup 3)
|
||||
(ashift:V1TI
|
||||
(match_operand:VI_128 1 "register_operand")
|
||||
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
|
||||
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))
|
||||
(set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
operands[0] = gen_lowpart (V1TImode, operands[0]);
|
||||
operands[1] = gen_lowpart (V1TImode, operands[1]);
|
||||
operands[3] = gen_reg_rtx (V1TImode);
|
||||
operands[4] = gen_lowpart (<MODE>mode, operands[3]);
|
||||
})
|
||||
|
||||
(define_insn "<sse2_avx2>_ashl<mode>3"
|
||||
|
|
@ -7365,14 +7372,16 @@
|
|||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
(define_expand "vec_shr_<mode>"
|
||||
[(set (match_operand:VI_128 0 "register_operand")
|
||||
[(set (match_dup 3)
|
||||
(lshiftrt:V1TI
|
||||
(match_operand:VI_128 1 "register_operand")
|
||||
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
|
||||
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))
|
||||
(set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
operands[0] = gen_lowpart (V1TImode, operands[0]);
|
||||
operands[1] = gen_lowpart (V1TImode, operands[1]);
|
||||
operands[3] = gen_reg_rtx (V1TImode);
|
||||
operands[4] = gen_lowpart (<MODE>mode, operands[3]);
|
||||
})
|
||||
|
||||
(define_insn "<sse2_avx2>_lshr<mode>3"
|
||||
|
|
@ -8542,12 +8551,13 @@
|
|||
{
|
||||
rtx t1 = gen_reg_rtx (<MODE>mode);
|
||||
rtx t2 = gen_reg_rtx (<MODE>mode);
|
||||
rtx t3 = gen_reg_rtx (V4DImode);
|
||||
emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
|
||||
emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
|
||||
emit_insn (gen_avx2_permv2ti
|
||||
(gen_lowpart (V4DImode, operands[0]),
|
||||
gen_lowpart (V4DImode, t1),
|
||||
gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
|
||||
emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
|
||||
gen_lowpart (V4DImode, t2),
|
||||
GEN_INT (1 + (3 << 4))));
|
||||
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
|
@ -8559,12 +8569,13 @@
|
|||
{
|
||||
rtx t1 = gen_reg_rtx (<MODE>mode);
|
||||
rtx t2 = gen_reg_rtx (<MODE>mode);
|
||||
rtx t3 = gen_reg_rtx (V4DImode);
|
||||
emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
|
||||
emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
|
||||
emit_insn (gen_avx2_permv2ti
|
||||
(gen_lowpart (V4DImode, operands[0]),
|
||||
gen_lowpart (V4DImode, t1),
|
||||
gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
|
||||
emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
|
||||
gen_lowpart (V4DImode, t2),
|
||||
GEN_INT (0 + (2 << 4))));
|
||||
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
|
|
|||
27
gcc/expmed.c
27
gcc/expmed.c
|
|
@ -624,13 +624,28 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
|
|||
|| (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
|
||||
{
|
||||
/* Use the subreg machinery either to narrow OP0 to the required
|
||||
words or to cope with mode punning between equal-sized modes. */
|
||||
rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
|
||||
bitnum / BITS_PER_UNIT);
|
||||
if (sub)
|
||||
words or to cope with mode punning between equal-sized modes.
|
||||
In the latter case, use subreg on the rhs side, not lhs. */
|
||||
rtx sub;
|
||||
|
||||
if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
|
||||
{
|
||||
emit_move_insn (sub, value);
|
||||
return true;
|
||||
sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
|
||||
if (sub)
|
||||
{
|
||||
emit_move_insn (op0, sub);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
|
||||
bitnum / BITS_PER_UNIT);
|
||||
if (sub)
|
||||
{
|
||||
emit_move_insn (sub, value);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
10
gcc/optabs.c
10
gcc/optabs.c
|
|
@ -6624,8 +6624,8 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
|
|||
icode = direct_optab_handler (vec_perm_const_optab, qimode);
|
||||
if (icode != CODE_FOR_nothing)
|
||||
{
|
||||
tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target),
|
||||
gen_lowpart (qimode, v0),
|
||||
tmp = mode != qimode ? gen_reg_rtx (qimode) : target;
|
||||
tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
|
||||
gen_lowpart (qimode, v1), sel_qi);
|
||||
if (tmp)
|
||||
return gen_lowpart (mode, tmp);
|
||||
|
|
@ -6674,7 +6674,7 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
|
|||
}
|
||||
tmp = gen_rtx_CONST_VECTOR (qimode, vec);
|
||||
sel = gen_lowpart (qimode, sel);
|
||||
sel = expand_vec_perm (qimode, sel, sel, tmp, NULL);
|
||||
sel = expand_vec_perm (qimode, gen_reg_rtx (qimode), sel, tmp, NULL);
|
||||
gcc_assert (sel != NULL);
|
||||
|
||||
/* Add the byte offset to each byte element. */
|
||||
|
|
@ -6689,8 +6689,8 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
|
|||
gcc_assert (sel_qi != NULL);
|
||||
}
|
||||
|
||||
tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target),
|
||||
gen_lowpart (qimode, v0),
|
||||
tmp = mode != qimode ? gen_reg_rtx (qimode) : target;
|
||||
tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
|
||||
gen_lowpart (qimode, v1), sel_qi);
|
||||
if (tmp)
|
||||
tmp = gen_lowpart (mode, tmp);
|
||||
|
|
|
|||
Loading…
Reference in New Issue