optabs.c (expand_vec_perm): Avoid vector mode punning SUBREGs in SET_DEST.

* optabs.c (expand_vec_perm): Avoid vector mode punning
	SUBREGs in SET_DEST.
	* expmed.c (store_bit_field_1): Likewise.
	* config/i386/sse.md (movdi_to_sse, vec_pack_sfix_trunc_v2df,
	vec_pack_sfix_v2df, vec_shl_<mode>, vec_shr_<mode>,
	vec_interleave_high<mode>, vec_interleave_low<mode>): Likewise.
	* config/i386/i386.c (ix86_expand_vector_move_misalign,
	ix86_expand_sse_movcc, ix86_expand_int_vcond, ix86_expand_vec_perm,
	ix86_expand_sse_unpack, ix86_expand_args_builtin,
	ix86_expand_vector_init_duplicate, ix86_expand_vector_set,
	emit_reduc_half, expand_vec_perm_blend, expand_vec_perm_pshufb,
	expand_vec_perm_interleave2, expand_vec_perm_pshufb2,
	expand_vec_perm_vpshufb2_vpermq,
	expand_vec_perm_vpshufb2_vpermq_even_odd, expand_vec_perm_even_odd_1,
	expand_vec_perm_broadcast_1, expand_vec_perm_vpshufb4_vpermq2,
	ix86_expand_sse2_mulv4si3, ix86_expand_pinsr): Likewise.
	(expand_vec_perm_palignr): Likewise.  Modify a copy of *d rather
	than *d itself.

From-SVN: r204274
This commit is contained in:
Jakub Jelinek 2013-10-31 20:06:49 +01:00 committed by Jakub Jelinek
parent 5a9785fb4c
commit d8c84975e6
5 changed files with 301 additions and 144 deletions

View File

@ -1,3 +1,24 @@
2013-10-31 Jakub Jelinek <jakub@redhat.com>
* optabs.c (expand_vec_perm): Avoid vector mode punning
SUBREGs in SET_DEST.
* expmed.c (store_bit_field_1): Likewise.
* config/i386/sse.md (movdi_to_sse, vec_pack_sfix_trunc_v2df,
vec_pack_sfix_v2df, vec_shl_<mode>, vec_shr_<mode>,
vec_interleave_high<mode>, vec_interleave_low<mode>): Likewise.
* config/i386/i386.c (ix86_expand_vector_move_misalign,
ix86_expand_sse_movcc, ix86_expand_int_vcond, ix86_expand_vec_perm,
ix86_expand_sse_unpack, ix86_expand_args_builtin,
ix86_expand_vector_init_duplicate, ix86_expand_vector_set,
emit_reduc_half, expand_vec_perm_blend, expand_vec_perm_pshufb,
expand_vec_perm_interleave2, expand_vec_perm_pshufb2,
expand_vec_perm_vpshufb2_vpermq,
expand_vec_perm_vpshufb2_vpermq_even_odd, expand_vec_perm_even_odd_1,
expand_vec_perm_broadcast_1, expand_vec_perm_vpshufb4_vpermq2,
ix86_expand_sse2_mulv4si3, ix86_expand_pinsr): Likewise.
(expand_vec_perm_palignr): Likewise. Modify a copy of *d rather
than *d itself.
2013-10-31 Uros Bizjak <ubizjak@gmail.com> 2013-10-31 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (ix86_expand_sse2_abs): Rename function arguments. * config/i386/i386.c (ix86_expand_sse2_abs): Rename function arguments.

View File

@ -16803,6 +16803,8 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
} }
else else
{ {
rtx t;
if (TARGET_AVX if (TARGET_AVX
|| TARGET_SSE_UNALIGNED_LOAD_OPTIMAL || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
@ -16821,18 +16823,22 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
return; return;
} }
if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
emit_move_insn (op0, CONST0_RTX (mode));
else
emit_clobber (op0);
if (mode != V4SFmode) if (mode != V4SFmode)
op0 = gen_lowpart (V4SFmode, op0); t = gen_reg_rtx (V4SFmode);
else
t = op0;
if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
emit_move_insn (t, CONST0_RTX (V4SFmode));
else
emit_clobber (t);
m = adjust_address (op1, V2SFmode, 0); m = adjust_address (op1, V2SFmode, 0);
emit_insn (gen_sse_loadlps (op0, op0, m)); emit_insn (gen_sse_loadlps (t, t, m));
m = adjust_address (op1, V2SFmode, 8); m = adjust_address (op1, V2SFmode, 8);
emit_insn (gen_sse_loadhps (op0, op0, m)); emit_insn (gen_sse_loadhps (t, t, m));
if (mode != V4SFmode)
emit_move_insn (op0, gen_lowpart (mode, t));
} }
} }
else if (MEM_P (op0)) else if (MEM_P (op0))
@ -20473,6 +20479,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
else else
{ {
rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
rtx d = dest;
if (!nonimmediate_operand (op_true, mode)) if (!nonimmediate_operand (op_true, mode))
op_true = force_reg (mode, op_true); op_true = force_reg (mode, op_true);
@ -20496,7 +20503,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
if (TARGET_SSE4_1) if (TARGET_SSE4_1)
{ {
gen = gen_sse4_1_pblendvb; gen = gen_sse4_1_pblendvb;
dest = gen_lowpart (V16QImode, dest); if (mode != V16QImode)
d = gen_reg_rtx (V16QImode);
op_false = gen_lowpart (V16QImode, op_false); op_false = gen_lowpart (V16QImode, op_false);
op_true = gen_lowpart (V16QImode, op_true); op_true = gen_lowpart (V16QImode, op_true);
cmp = gen_lowpart (V16QImode, cmp); cmp = gen_lowpart (V16QImode, cmp);
@ -20517,7 +20525,8 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
if (TARGET_AVX2) if (TARGET_AVX2)
{ {
gen = gen_avx2_pblendvb; gen = gen_avx2_pblendvb;
dest = gen_lowpart (V32QImode, dest); if (mode != V32QImode)
d = gen_reg_rtx (V32QImode);
op_false = gen_lowpart (V32QImode, op_false); op_false = gen_lowpart (V32QImode, op_false);
op_true = gen_lowpart (V32QImode, op_true); op_true = gen_lowpart (V32QImode, op_true);
cmp = gen_lowpart (V32QImode, cmp); cmp = gen_lowpart (V32QImode, cmp);
@ -20528,7 +20537,11 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
} }
if (gen != NULL) if (gen != NULL)
emit_insn (gen (dest, op_false, op_true, cmp)); {
emit_insn (gen (d, op_false, op_true, cmp));
if (d != dest)
emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
}
else else
{ {
op_true = force_reg (mode, op_true); op_true = force_reg (mode, op_true);
@ -20849,8 +20862,7 @@ ix86_expand_int_vcond (rtx operands[])
else else
{ {
gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode)); gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]), x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
code, cop0, cop1,
operands[1+negate], operands[2-negate]); operands[1+negate], operands[2-negate]);
x = gen_lowpart (data_mode, x); x = gen_lowpart (data_mode, x);
} }
@ -20869,7 +20881,7 @@ ix86_expand_vec_perm (rtx operands[])
rtx op0 = operands[1]; rtx op0 = operands[1];
rtx op1 = operands[2]; rtx op1 = operands[2];
rtx mask = operands[3]; rtx mask = operands[3];
rtx t1, t2, t3, t4, vt, vt2, vec[32]; rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
enum machine_mode mode = GET_MODE (op0); enum machine_mode mode = GET_MODE (op0);
enum machine_mode maskmode = GET_MODE (mask); enum machine_mode maskmode = GET_MODE (mask);
int w, e, i; int w, e, i;
@ -20937,7 +20949,7 @@ ix86_expand_vec_perm (rtx operands[])
/* Continue as if V8SImode (resp. V32QImode) was used initially. */ /* Continue as if V8SImode (resp. V32QImode) was used initially. */
operands[3] = mask = t1; operands[3] = mask = t1;
target = gen_lowpart (mode, target); target = gen_reg_rtx (mode);
op0 = gen_lowpart (mode, op0); op0 = gen_lowpart (mode, op0);
op1 = gen_lowpart (mode, op1); op1 = gen_lowpart (mode, op1);
} }
@ -20949,7 +20961,12 @@ ix86_expand_vec_perm (rtx operands[])
the high bits of the shuffle elements. No need for us to the high bits of the shuffle elements. No need for us to
perform an AND ourselves. */ perform an AND ourselves. */
if (one_operand_shuffle) if (one_operand_shuffle)
emit_insn (gen_avx2_permvarv8si (target, op0, mask)); {
emit_insn (gen_avx2_permvarv8si (target, op0, mask));
if (target != operands[0])
emit_move_insn (operands[0],
gen_lowpart (GET_MODE (operands[0]), target));
}
else else
{ {
t1 = gen_reg_rtx (V8SImode); t1 = gen_reg_rtx (V8SImode);
@ -21022,13 +21039,13 @@ ix86_expand_vec_perm (rtx operands[])
stands for other 12 bytes. */ stands for other 12 bytes. */
/* The bit whether element is from the same lane or the other /* The bit whether element is from the same lane or the other
lane is bit 4, so shift it up by 3 to the MSB position. */ lane is bit 4, so shift it up by 3 to the MSB position. */
emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode, t1), t5 = gen_reg_rtx (V4DImode);
gen_lowpart (V4DImode, mask), emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
GEN_INT (3))); GEN_INT (3)));
/* Clear MSB bits from the mask just in case it had them set. */ /* Clear MSB bits from the mask just in case it had them set. */
emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask)); emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
/* After this t1 will have MSB set for elements from other lane. */ /* After this t1 will have MSB set for elements from other lane. */
emit_insn (gen_xorv32qi3 (t1, t1, vt2)); emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
/* Clear bits other than MSB. */ /* Clear bits other than MSB. */
emit_insn (gen_andv32qi3 (t1, t1, vt)); emit_insn (gen_andv32qi3 (t1, t1, vt));
/* Or in the lower bits from mask into t3. */ /* Or in the lower bits from mask into t3. */
@ -21037,8 +21054,8 @@ ix86_expand_vec_perm (rtx operands[])
lane. */ lane. */
emit_insn (gen_xorv32qi3 (t1, t1, vt)); emit_insn (gen_xorv32qi3 (t1, t1, vt));
/* Swap 128-bit lanes in t3. */ /* Swap 128-bit lanes in t3. */
emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3), t6 = gen_reg_rtx (V4DImode);
gen_lowpart (V4DImode, t3), emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
const2_rtx, GEN_INT (3), const2_rtx, GEN_INT (3),
const0_rtx, const1_rtx)); const0_rtx, const1_rtx));
/* And or in the lower bits from mask into t1. */ /* And or in the lower bits from mask into t1. */
@ -21048,15 +21065,20 @@ ix86_expand_vec_perm (rtx operands[])
/* Each of these shuffles will put 0s in places where /* Each of these shuffles will put 0s in places where
element from the other 128-bit lane is needed, otherwise element from the other 128-bit lane is needed, otherwise
will shuffle in the requested value. */ will shuffle in the requested value. */
emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3)); emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
gen_lowpart (V32QImode, t6)));
emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1)); emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
/* For t3 the 128-bit lanes are swapped again. */ /* For t3 the 128-bit lanes are swapped again. */
emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3), t7 = gen_reg_rtx (V4DImode);
gen_lowpart (V4DImode, t3), emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
const2_rtx, GEN_INT (3), const2_rtx, GEN_INT (3),
const0_rtx, const1_rtx)); const0_rtx, const1_rtx));
/* And oring both together leads to the result. */ /* And oring both together leads to the result. */
emit_insn (gen_iorv32qi3 (target, t1, t3)); emit_insn (gen_iorv32qi3 (target, t1,
gen_lowpart (V32QImode, t7)));
if (target != operands[0])
emit_move_insn (operands[0],
gen_lowpart (GET_MODE (operands[0]), target));
return; return;
} }
@ -21064,20 +21086,22 @@ ix86_expand_vec_perm (rtx operands[])
/* Similarly to the above one_operand_shuffle code, /* Similarly to the above one_operand_shuffle code,
just for repeated twice for each operand. merge_two: just for repeated twice for each operand. merge_two:
code will merge the two results together. */ code will merge the two results together. */
emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3)); emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3)); gen_lowpart (V32QImode, t6)));
emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
gen_lowpart (V32QImode, t6)));
emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1)); emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1)); emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4), t7 = gen_reg_rtx (V4DImode);
gen_lowpart (V4DImode, t4), emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
const2_rtx, GEN_INT (3), const2_rtx, GEN_INT (3),
const0_rtx, const1_rtx)); const0_rtx, const1_rtx));
emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3), t8 = gen_reg_rtx (V4DImode);
gen_lowpart (V4DImode, t3), emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
const2_rtx, GEN_INT (3), const2_rtx, GEN_INT (3),
const0_rtx, const1_rtx)); const0_rtx, const1_rtx));
emit_insn (gen_iorv32qi3 (t4, t2, t4)); emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
emit_insn (gen_iorv32qi3 (t3, t1, t3)); emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
t1 = t4; t1 = t4;
t2 = t3; t2 = t3;
goto merge_two; goto merge_two;
@ -21146,15 +21170,24 @@ ix86_expand_vec_perm (rtx operands[])
/* The actual shuffle operations all operate on V16QImode. */ /* The actual shuffle operations all operate on V16QImode. */
op0 = gen_lowpart (V16QImode, op0); op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1); op1 = gen_lowpart (V16QImode, op1);
target = gen_lowpart (V16QImode, target);
if (TARGET_XOP) if (TARGET_XOP)
{ {
if (GET_MODE (target) != V16QImode)
target = gen_reg_rtx (V16QImode);
emit_insn (gen_xop_pperm (target, op0, op1, mask)); emit_insn (gen_xop_pperm (target, op0, op1, mask));
if (target != operands[0])
emit_move_insn (operands[0],
gen_lowpart (GET_MODE (operands[0]), target));
} }
else if (one_operand_shuffle) else if (one_operand_shuffle)
{ {
if (GET_MODE (target) != V16QImode)
target = gen_reg_rtx (V16QImode);
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask)); emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
if (target != operands[0])
emit_move_insn (operands[0],
gen_lowpart (GET_MODE (operands[0]), target));
} }
else else
{ {
@ -21194,7 +21227,9 @@ ix86_expand_vec_perm (rtx operands[])
mask = expand_simple_binop (maskmode, AND, mask, vt, mask = expand_simple_binop (maskmode, AND, mask, vt,
NULL_RTX, 0, OPTAB_DIRECT); NULL_RTX, 0, OPTAB_DIRECT);
xops[0] = gen_lowpart (mode, operands[0]); if (GET_MODE (target) != mode)
target = gen_reg_rtx (mode);
xops[0] = target;
xops[1] = gen_lowpart (mode, t2); xops[1] = gen_lowpart (mode, t2);
xops[2] = gen_lowpart (mode, t1); xops[2] = gen_lowpart (mode, t1);
xops[3] = gen_rtx_EQ (maskmode, mask, vt); xops[3] = gen_rtx_EQ (maskmode, mask, vt);
@ -21202,6 +21237,9 @@ ix86_expand_vec_perm (rtx operands[])
xops[5] = vt; xops[5] = vt;
ok = ix86_expand_int_vcond (xops); ok = ix86_expand_int_vcond (xops);
gcc_assert (ok); gcc_assert (ok);
if (target != operands[0])
emit_move_insn (operands[0],
gen_lowpart (GET_MODE (operands[0]), target));
} }
} }
@ -21280,10 +21318,10 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
else if (high_p) else if (high_p)
{ {
/* Shift higher 8 bytes to lower 8 bytes. */ /* Shift higher 8 bytes to lower 8 bytes. */
tmp = gen_reg_rtx (imode); tmp = gen_reg_rtx (V1TImode);
emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp), emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
gen_lowpart (V1TImode, src),
GEN_INT (64))); GEN_INT (64)));
tmp = gen_lowpart (imode, tmp);
} }
else else
tmp = src; tmp = src;
@ -21324,7 +21362,9 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
src, pc_rtx, pc_rtx); src, pc_rtx, pc_rtx);
emit_insn (unpack (gen_lowpart (imode, dest), src, tmp)); rtx tmp2 = gen_reg_rtx (imode);
emit_insn (unpack (tmp2, src, tmp));
emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
} }
} }
@ -31967,8 +32007,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
} }
else else
{ {
target = gen_reg_rtx (rmode); real_target = gen_reg_rtx (tmode);
real_target = simplify_gen_subreg (tmode, target, rmode, 0); target = simplify_gen_subreg (rmode, real_target, tmode, 0);
} }
for (i = 0; i < nargs; i++) for (i = 0; i < nargs; i++)
@ -36691,8 +36731,9 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
emit_move_insn (tmp1, gen_lowpart (SImode, val)); emit_move_insn (tmp1, gen_lowpart (SImode, val));
/* Insert the SImode value as low element of a V4SImode vector. */ /* Insert the SImode value as low element of a V4SImode vector. */
tmp2 = gen_lowpart (V4SImode, dperm.op0); tmp2 = gen_reg_rtx (V4SImode);
emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
ok = (expand_vec_perm_1 (&dperm) ok = (expand_vec_perm_1 (&dperm)
|| expand_vec_perm_broadcast_1 (&dperm)); || expand_vec_perm_broadcast_1 (&dperm));
@ -36722,9 +36763,10 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
NULL_RTX, 1, OPTAB_LIB_WIDEN); NULL_RTX, 1, OPTAB_LIB_WIDEN);
val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
x = gen_lowpart (wvmode, target); x = gen_reg_rtx (wvmode);
ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val); ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
gcc_assert (ok); gcc_assert (ok);
emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
return ok; return ok;
} }
@ -37599,8 +37641,9 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
else else
{ {
/* For SSE1, we have to reuse the V4SF code. */ /* For SSE1, we have to reuse the V4SF code. */
ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target), rtx t = gen_reg_rtx (V4SFmode);
gen_lowpart (SFmode, val), elt); ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
emit_move_insn (target, gen_lowpart (mode, t));
} }
return; return;
@ -37918,7 +37961,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
static void static void
emit_reduc_half (rtx dest, rtx src, int i) emit_reduc_half (rtx dest, rtx src, int i)
{ {
rtx tem; rtx tem, d = dest;
switch (GET_MODE (src)) switch (GET_MODE (src))
{ {
case V4SFmode: case V4SFmode:
@ -37935,8 +37978,8 @@ emit_reduc_half (rtx dest, rtx src, int i)
case V8HImode: case V8HImode:
case V4SImode: case V4SImode:
case V2DImode: case V2DImode:
tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest), d = gen_reg_rtx (V1TImode);
gen_lowpart (V1TImode, src), tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
GEN_INT (i / 2)); GEN_INT (i / 2));
break; break;
case V8SFmode: case V8SFmode:
@ -37957,19 +38000,26 @@ emit_reduc_half (rtx dest, rtx src, int i)
case V8SImode: case V8SImode:
case V4DImode: case V4DImode:
if (i == 256) if (i == 256)
tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest), {
gen_lowpart (V4DImode, src), if (GET_MODE (dest) != V4DImode)
gen_lowpart (V4DImode, src), d = gen_reg_rtx (V4DImode);
const1_rtx); tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
gen_lowpart (V4DImode, src),
const1_rtx);
}
else else
tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest), {
gen_lowpart (V2TImode, src), d = gen_reg_rtx (V2TImode);
GEN_INT (i / 2)); tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
GEN_INT (i / 2));
}
break; break;
default: default:
gcc_unreachable (); gcc_unreachable ();
} }
emit_insn (tem); emit_insn (tem);
if (d != dest)
emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
} }
/* Expand a vector reduction. FN is the binary pattern to reduce; /* Expand a vector reduction. FN is the binary pattern to reduce;
@ -39462,6 +39512,8 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm)); emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
else else
emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm)); emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
if (target != d->target)
emit_move_insn (d->target, gen_lowpart (d->vmode, target));
return true; return true;
} }
@ -39471,7 +39523,7 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
/* FALLTHRU */ /* FALLTHRU */
do_subreg: do_subreg:
target = gen_lowpart (vmode, target); target = gen_reg_rtx (vmode);
op0 = gen_lowpart (vmode, op0); op0 = gen_lowpart (vmode, op0);
op1 = gen_lowpart (vmode, op1); op1 = gen_lowpart (vmode, op1);
break; break;
@ -39525,7 +39577,7 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
vmode = V32QImode; vmode = V32QImode;
nelt = 32; nelt = 32;
target = gen_lowpart (vmode, target); target = gen_reg_rtx (vmode);
op0 = gen_lowpart (vmode, op0); op0 = gen_lowpart (vmode, op0);
op1 = gen_lowpart (vmode, op1); op1 = gen_lowpart (vmode, op1);
goto finish_pblendvb; goto finish_pblendvb;
@ -39558,6 +39610,8 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask)); x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
x = gen_rtx_SET (VOIDmode, target, x); x = gen_rtx_SET (VOIDmode, target, x);
emit_insn (x); emit_insn (x);
if (target != d->target)
emit_move_insn (d->target, gen_lowpart (d->vmode, target));
return true; return true;
} }
@ -39663,13 +39717,17 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
/* Use vperm2i128 insn. The pattern uses /* Use vperm2i128 insn. The pattern uses
V4DImode instead of V2TImode. */ V4DImode instead of V2TImode. */
target = gen_lowpart (V4DImode, d->target); target = d->target;
if (d->vmode != V4DImode)
target = gen_reg_rtx (V4DImode);
op0 = gen_lowpart (V4DImode, d->op0); op0 = gen_lowpart (V4DImode, d->op0);
op1 = gen_lowpart (V4DImode, d->op1); op1 = gen_lowpart (V4DImode, d->op1);
rperm[0] rperm[0]
= GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0) = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
|| ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0)); || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0])); emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
if (target != d->target)
emit_move_insn (d->target, gen_lowpart (d->vmode, target));
return true; return true;
} }
return false; return false;
@ -39704,9 +39762,15 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3; perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
if (d->testing_p) if (d->testing_p)
return true; return true;
return expand_vselect (gen_lowpart (V4DImode, d->target), target = gen_reg_rtx (V4DImode);
gen_lowpart (V4DImode, d->op0), if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
perm, 4, false); perm, 4, false))
{
emit_move_insn (d->target,
gen_lowpart (d->vmode, target));
return true;
}
return false;
} }
/* Next see if vpermd can be used. */ /* Next see if vpermd can be used. */
@ -39758,7 +39822,9 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm)); gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
vperm = force_reg (vmode, vperm); vperm = force_reg (vmode, vperm);
target = gen_lowpart (vmode, d->target); target = d->target;
if (d->vmode != vmode)
target = gen_reg_rtx (vmode);
op0 = gen_lowpart (vmode, d->op0); op0 = gen_lowpart (vmode, d->op0);
if (d->one_operand_p) if (d->one_operand_p)
{ {
@ -39776,6 +39842,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
op1 = gen_lowpart (vmode, d->op1); op1 = gen_lowpart (vmode, d->op1);
emit_insn (gen_xop_pperm (target, op0, op1, vperm)); emit_insn (gen_xop_pperm (target, op0, op1, vperm));
} }
if (target != d->target)
emit_move_insn (d->target, gen_lowpart (d->vmode, target));
return true; return true;
} }
@ -39975,7 +40043,8 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
unsigned i, nelt = d->nelt; unsigned i, nelt = d->nelt;
unsigned min, max; unsigned min, max;
bool in_order, ok; bool in_order, ok;
rtx shift; rtx shift, target;
struct expand_vec_perm_d dcopy;
/* Even with AVX, palignr only operates on 128-bit vectors. */ /* Even with AVX, palignr only operates on 128-bit vectors. */
if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
@ -39998,29 +40067,33 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
if (d->testing_p) if (d->testing_p)
return true; return true;
dcopy = *d;
shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode))); shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target), target = gen_reg_rtx (TImode);
gen_lowpart (TImode, d->op1), emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
gen_lowpart (TImode, d->op0), shift)); gen_lowpart (TImode, d->op0), shift));
d->op0 = d->op1 = d->target; dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
d->one_operand_p = true; dcopy.one_operand_p = true;
in_order = true; in_order = true;
for (i = 0; i < nelt; ++i) for (i = 0; i < nelt; ++i)
{ {
unsigned e = d->perm[i] - min; unsigned e = dcopy.perm[i] - min;
if (e != i) if (e != i)
in_order = false; in_order = false;
d->perm[i] = e; dcopy.perm[i] = e;
} }
/* Test for the degenerate case where the alignment by itself /* Test for the degenerate case where the alignment by itself
produces the desired permutation. */ produces the desired permutation. */
if (in_order) if (in_order)
return true; {
emit_move_insn (d->target, dcopy.op0);
return true;
}
ok = expand_vec_perm_1 (d); ok = expand_vec_perm_1 (&dcopy);
gcc_assert (ok); gcc_assert (ok);
return ok; return ok;
@ -40274,10 +40347,10 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
else else
dfinal.perm[i] = e; dfinal.perm[i] = e;
} }
dfinal.op0 = gen_reg_rtx (dfinal.vmode); dremap.target = gen_reg_rtx (dremap.vmode);
dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
dfinal.op1 = dfinal.op0; dfinal.op1 = dfinal.op0;
dfinal.one_operand_p = true; dfinal.one_operand_p = true;
dremap.target = dfinal.op0;
/* Test if the final remap can be done with a single insn. For V4SFmode or /* Test if the final remap can be done with a single insn. For V4SFmode or
V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */ V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
@ -40294,7 +40367,6 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
if (dremap.vmode != dfinal.vmode) if (dremap.vmode != dfinal.vmode)
{ {
dremap.target = gen_lowpart (dremap.vmode, dremap.target);
dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0); dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1); dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
} }
@ -40745,8 +40817,12 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
op = gen_lowpart (V16QImode, d->op1); op = gen_lowpart (V16QImode, d->op1);
emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm)); emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
op = gen_lowpart (V16QImode, d->target); op = d->target;
if (d->vmode != V16QImode)
op = gen_reg_rtx (V16QImode);
emit_insn (gen_iorv16qi3 (op, l, h)); emit_insn (gen_iorv16qi3 (op, l, h));
if (op != d->target)
emit_move_insn (d->target, gen_lowpart (d->vmode, op));
return true; return true;
} }
@ -40812,8 +40888,12 @@ expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
op = gen_lowpart (V32QImode, d->op0); op = gen_lowpart (V32QImode, d->op0);
emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm)); emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
op = gen_lowpart (V32QImode, d->target); op = d->target;
if (d->vmode != V32QImode)
op = gen_reg_rtx (V32QImode);
emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp))); emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
if (op != d->target)
emit_move_insn (d->target, gen_lowpart (d->vmode, op));
return true; return true;
} }
@ -40889,10 +40969,11 @@ expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
emit_insn (gen_iorv32qi3 (ior, l, h)); emit_insn (gen_iorv32qi3 (ior, l, h));
/* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */ /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
op = gen_lowpart (V4DImode, d->target); op = gen_reg_rtx (V4DImode);
ior = gen_lowpart (V4DImode, ior); ior = gen_lowpart (V4DImode, ior);
emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx, emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
const1_rtx, GEN_INT (3))); const1_rtx, GEN_INT (3)));
emit_move_insn (d->target, gen_lowpart (d->vmode, op));
return true; return true;
} }
@ -40903,7 +40984,7 @@ expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
static bool static bool
expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
{ {
rtx t1, t2, t3; rtx t1, t2, t3, t4, t5;
switch (d->vmode) switch (d->vmode)
{ {
@ -41015,10 +41096,17 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
{ {
struct expand_vec_perm_d d_copy = *d; struct expand_vec_perm_d d_copy = *d;
d_copy.vmode = V4DFmode; d_copy.vmode = V4DFmode;
d_copy.target = gen_lowpart (V4DFmode, d->target); d_copy.target = gen_reg_rtx (V4DFmode);
d_copy.op0 = gen_lowpart (V4DFmode, d->op0); d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
d_copy.op1 = gen_lowpart (V4DFmode, d->op1); d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
return expand_vec_perm_even_odd_1 (&d_copy, odd); if (expand_vec_perm_even_odd_1 (&d_copy, odd))
{
if (!d->testing_p)
emit_move_insn (d->target,
gen_lowpart (V4DImode, d_copy.target));
return true;
}
return false;
} }
t1 = gen_reg_rtx (V4DImode); t1 = gen_reg_rtx (V4DImode);
@ -41041,44 +41129,51 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
{ {
struct expand_vec_perm_d d_copy = *d; struct expand_vec_perm_d d_copy = *d;
d_copy.vmode = V8SFmode; d_copy.vmode = V8SFmode;
d_copy.target = gen_lowpart (V8SFmode, d->target); d_copy.target = gen_reg_rtx (V8SFmode);
d_copy.op0 = gen_lowpart (V8SFmode, d->op0); d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
d_copy.op1 = gen_lowpart (V8SFmode, d->op1); d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
return expand_vec_perm_even_odd_1 (&d_copy, odd); if (expand_vec_perm_even_odd_1 (&d_copy, odd))
{
if (!d->testing_p)
emit_move_insn (d->target,
gen_lowpart (V8SImode, d_copy.target));
return true;
}
return false;
} }
t1 = gen_reg_rtx (V8SImode); t1 = gen_reg_rtx (V8SImode);
t2 = gen_reg_rtx (V8SImode); t2 = gen_reg_rtx (V8SImode);
t3 = gen_reg_rtx (V4DImode);
t4 = gen_reg_rtx (V4DImode);
t5 = gen_reg_rtx (V4DImode);
/* Shuffle the lanes around into /* Shuffle the lanes around into
{ 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */ { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1), emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
gen_lowpart (V4DImode, d->op0),
gen_lowpart (V4DImode, d->op1), gen_lowpart (V4DImode, d->op1),
GEN_INT (0x20))); GEN_INT (0x20)));
emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2), emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
gen_lowpart (V4DImode, d->op0),
gen_lowpart (V4DImode, d->op1), gen_lowpart (V4DImode, d->op1),
GEN_INT (0x31))); GEN_INT (0x31)));
/* Swap the 2nd and 3rd position in each lane into /* Swap the 2nd and 3rd position in each lane into
{ 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */ { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
emit_insn (gen_avx2_pshufdv3 (t1, t1, emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
emit_insn (gen_avx2_pshufdv3 (t2, t2, emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
/* Now an vpunpck[lh]qdq will produce /* Now an vpunpck[lh]qdq will produce
{ 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */ { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
if (odd) if (odd)
t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target), t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
gen_lowpart (V4DImode, t1),
gen_lowpart (V4DImode, t2)); gen_lowpart (V4DImode, t2));
else else
t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target), t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
gen_lowpart (V4DImode, t1),
gen_lowpart (V4DImode, t2)); gen_lowpart (V4DImode, t2));
emit_insn (t3); emit_insn (t3);
emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
break; break;
default: default:
@ -41116,7 +41211,7 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
unsigned elt = d->perm[0], nelt2 = d->nelt / 2; unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
enum machine_mode vmode = d->vmode; enum machine_mode vmode = d->vmode;
unsigned char perm2[4]; unsigned char perm2[4];
rtx op0 = d->op0; rtx op0 = d->op0, dest;
bool ok; bool ok;
switch (vmode) switch (vmode)
@ -41162,9 +41257,11 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
while (vmode != V4SImode); while (vmode != V4SImode);
memset (perm2, elt, 4); memset (perm2, elt, 4);
ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4, dest = gen_reg_rtx (V4SImode);
d->testing_p); ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
gcc_assert (ok); gcc_assert (ok);
if (!d->testing_p)
emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
return true; return true;
case V32QImode: case V32QImode:
@ -41306,8 +41403,12 @@ expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
} }
gcc_assert (l[0] && l[1]); gcc_assert (l[0] && l[1]);
op = gen_lowpart (V32QImode, d->target); op = d->target;
if (d->vmode != V32QImode)
op = gen_reg_rtx (V32QImode);
emit_insn (gen_iorv32qi3 (op, l[0], l[1])); emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
if (op != d->target)
emit_move_insn (d->target, gen_lowpart (d->vmode, op));
return true; return true;
} }
@ -41875,7 +41976,9 @@ ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT); op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
gcc_assert (t1 && t2); gcc_assert (t1 && t2);
ix86_expand_vec_interleave (gen_lowpart (mode, dest), t1, t2, high_p); t3 = gen_reg_rtx (mode);
ix86_expand_vec_interleave (t3, t1, t2, high_p);
emit_move_insn (dest, gen_lowpart (wmode, t3));
break; break;
case V16QImode: case V16QImode:
@ -41896,14 +41999,14 @@ ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
void void
ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2) ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
{ {
rtx res_1, res_2; rtx res_1, res_2, res_3, res_4;
res_1 = gen_reg_rtx (V4SImode); res_1 = gen_reg_rtx (V4SImode);
res_2 = gen_reg_rtx (V4SImode); res_2 = gen_reg_rtx (V4SImode);
ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode, res_1), res_3 = gen_reg_rtx (V2DImode);
op1, op2, true, false); res_4 = gen_reg_rtx (V2DImode);
ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode, res_2), ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
op1, op2, true, true); ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
/* Move the results in element 2 down to element 1; we don't care /* Move the results in element 2 down to element 1; we don't care
what goes in elements 2 and 3. Then we can merge the parts what goes in elements 2 and 3. Then we can merge the parts
@ -41917,9 +42020,11 @@ ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
In both cases the cost of the reformatting stall was too high In both cases the cost of the reformatting stall was too high
and the overall sequence slower. */ and the overall sequence slower. */
emit_insn (gen_sse2_pshufd_1 (res_1, res_1, const0_rtx, const2_rtx, emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
const0_rtx, const2_rtx,
const0_rtx, const0_rtx)); const0_rtx, const0_rtx));
emit_insn (gen_sse2_pshufd_1 (res_2, res_2, const0_rtx, const2_rtx, emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
const0_rtx, const2_rtx,
const0_rtx, const0_rtx)); const0_rtx, const0_rtx));
res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2)); res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
@ -42138,12 +42243,17 @@ ix86_expand_pinsr (rtx *operands)
return false; return false;
} }
dst = gen_lowpart (dstmode, dst); rtx d = dst;
if (GET_MODE (dst) != dstmode)
d = gen_reg_rtx (dstmode);
src = gen_lowpart (srcmode, src); src = gen_lowpart (srcmode, src);
pos /= size; pos /= size;
emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos))); emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
GEN_INT (1 << pos)));
if (d != dst)
emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
return true; return true;
} }

View File

@ -800,10 +800,13 @@
gen_rtx_SUBREG (SImode, operands[1], 4))); gen_rtx_SUBREG (SImode, operands[1], 4)));
emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
operands[2])); operands[2]));
} }
else if (memory_operand (operands[1], DImode)) else if (memory_operand (operands[1], DImode))
emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), {
operands[1], const0_rtx)); rtx tmp = gen_reg_rtx (V2DImode);
emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
}
else else
gcc_unreachable (); gcc_unreachable ();
}) })
@ -4208,7 +4211,7 @@
(match_operand:V2DF 2 "nonimmediate_operand")] (match_operand:V2DF 2 "nonimmediate_operand")]
"TARGET_SSE2" "TARGET_SSE2"
{ {
rtx tmp0, tmp1; rtx tmp0, tmp1, tmp2;
if (TARGET_AVX && !TARGET_PREFER_AVX128) if (TARGET_AVX && !TARGET_PREFER_AVX128)
{ {
@ -4222,13 +4225,14 @@
{ {
tmp0 = gen_reg_rtx (V4SImode); tmp0 = gen_reg_rtx (V4SImode);
tmp1 = gen_reg_rtx (V4SImode); tmp1 = gen_reg_rtx (V4SImode);
tmp2 = gen_reg_rtx (V2DImode);
emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1])); emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2])); emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
emit_insn emit_insn (gen_vec_interleave_lowv2di (tmp2,
(gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), gen_lowpart (V2DImode, tmp0),
gen_lowpart (V2DImode, tmp0), gen_lowpart (V2DImode, tmp1)));
gen_lowpart (V2DImode, tmp1))); emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
} }
DONE; DONE;
}) })
@ -4289,7 +4293,7 @@
(match_operand:V2DF 2 "nonimmediate_operand")] (match_operand:V2DF 2 "nonimmediate_operand")]
"TARGET_SSE2" "TARGET_SSE2"
{ {
rtx tmp0, tmp1; rtx tmp0, tmp1, tmp2;
if (TARGET_AVX && !TARGET_PREFER_AVX128) if (TARGET_AVX && !TARGET_PREFER_AVX128)
{ {
@ -4303,13 +4307,14 @@
{ {
tmp0 = gen_reg_rtx (V4SImode); tmp0 = gen_reg_rtx (V4SImode);
tmp1 = gen_reg_rtx (V4SImode); tmp1 = gen_reg_rtx (V4SImode);
tmp2 = gen_reg_rtx (V2DImode);
emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1])); emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2])); emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
emit_insn emit_insn (gen_vec_interleave_lowv2di (tmp2,
(gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), gen_lowpart (V2DImode, tmp0),
gen_lowpart (V2DImode, tmp0), gen_lowpart (V2DImode, tmp1)));
gen_lowpart (V2DImode, tmp1))); emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
} }
DONE; DONE;
}) })
@ -7328,14 +7333,16 @@
(set_attr "mode" "<sseinsnmode>")]) (set_attr "mode" "<sseinsnmode>")])
(define_expand "vec_shl_<mode>" (define_expand "vec_shl_<mode>"
[(set (match_operand:VI_128 0 "register_operand") [(set (match_dup 3)
(ashift:V1TI (ashift:V1TI
(match_operand:VI_128 1 "register_operand") (match_operand:VI_128 1 "register_operand")
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))] (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
(set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
"TARGET_SSE2" "TARGET_SSE2"
{ {
operands[0] = gen_lowpart (V1TImode, operands[0]);
operands[1] = gen_lowpart (V1TImode, operands[1]); operands[1] = gen_lowpart (V1TImode, operands[1]);
operands[3] = gen_reg_rtx (V1TImode);
operands[4] = gen_lowpart (<MODE>mode, operands[3]);
}) })
(define_insn "<sse2_avx2>_ashl<mode>3" (define_insn "<sse2_avx2>_ashl<mode>3"
@ -7365,14 +7372,16 @@
(set_attr "mode" "<sseinsnmode>")]) (set_attr "mode" "<sseinsnmode>")])
(define_expand "vec_shr_<mode>" (define_expand "vec_shr_<mode>"
[(set (match_operand:VI_128 0 "register_operand") [(set (match_dup 3)
(lshiftrt:V1TI (lshiftrt:V1TI
(match_operand:VI_128 1 "register_operand") (match_operand:VI_128 1 "register_operand")
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))] (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
(set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
"TARGET_SSE2" "TARGET_SSE2"
{ {
operands[0] = gen_lowpart (V1TImode, operands[0]);
operands[1] = gen_lowpart (V1TImode, operands[1]); operands[1] = gen_lowpart (V1TImode, operands[1]);
operands[3] = gen_reg_rtx (V1TImode);
operands[4] = gen_lowpart (<MODE>mode, operands[3]);
}) })
(define_insn "<sse2_avx2>_lshr<mode>3" (define_insn "<sse2_avx2>_lshr<mode>3"
@ -8542,12 +8551,13 @@
{ {
rtx t1 = gen_reg_rtx (<MODE>mode); rtx t1 = gen_reg_rtx (<MODE>mode);
rtx t2 = gen_reg_rtx (<MODE>mode); rtx t2 = gen_reg_rtx (<MODE>mode);
rtx t3 = gen_reg_rtx (V4DImode);
emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
emit_insn (gen_avx2_permv2ti emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
(gen_lowpart (V4DImode, operands[0]), gen_lowpart (V4DImode, t2),
gen_lowpart (V4DImode, t1), GEN_INT (1 + (3 << 4))));
gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4)))); emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
DONE; DONE;
}) })
@ -8559,12 +8569,13 @@
{ {
rtx t1 = gen_reg_rtx (<MODE>mode); rtx t1 = gen_reg_rtx (<MODE>mode);
rtx t2 = gen_reg_rtx (<MODE>mode); rtx t2 = gen_reg_rtx (<MODE>mode);
rtx t3 = gen_reg_rtx (V4DImode);
emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
emit_insn (gen_avx2_permv2ti emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
(gen_lowpart (V4DImode, operands[0]), gen_lowpart (V4DImode, t2),
gen_lowpart (V4DImode, t1), GEN_INT (0 + (2 << 4))));
gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4)))); emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
DONE; DONE;
}) })

View File

@ -624,13 +624,28 @@ store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
|| (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0))) || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
{ {
/* Use the subreg machinery either to narrow OP0 to the required /* Use the subreg machinery either to narrow OP0 to the required
words or to cope with mode punning between equal-sized modes. */ words or to cope with mode punning between equal-sized modes.
rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), In the latter case, use subreg on the rhs side, not lhs. */
bitnum / BITS_PER_UNIT); rtx sub;
if (sub)
if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
{ {
emit_move_insn (sub, value); sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
return true; if (sub)
{
emit_move_insn (op0, sub);
return true;
}
}
else
{
sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
bitnum / BITS_PER_UNIT);
if (sub)
{
emit_move_insn (sub, value);
return true;
}
} }
} }

View File

@ -6624,8 +6624,8 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
icode = direct_optab_handler (vec_perm_const_optab, qimode); icode = direct_optab_handler (vec_perm_const_optab, qimode);
if (icode != CODE_FOR_nothing) if (icode != CODE_FOR_nothing)
{ {
tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target), tmp = mode != qimode ? gen_reg_rtx (qimode) : target;
gen_lowpart (qimode, v0), tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
gen_lowpart (qimode, v1), sel_qi); gen_lowpart (qimode, v1), sel_qi);
if (tmp) if (tmp)
return gen_lowpart (mode, tmp); return gen_lowpart (mode, tmp);
@ -6674,7 +6674,7 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
} }
tmp = gen_rtx_CONST_VECTOR (qimode, vec); tmp = gen_rtx_CONST_VECTOR (qimode, vec);
sel = gen_lowpart (qimode, sel); sel = gen_lowpart (qimode, sel);
sel = expand_vec_perm (qimode, sel, sel, tmp, NULL); sel = expand_vec_perm (qimode, gen_reg_rtx (qimode), sel, tmp, NULL);
gcc_assert (sel != NULL); gcc_assert (sel != NULL);
/* Add the byte offset to each byte element. */ /* Add the byte offset to each byte element. */
@ -6689,8 +6689,8 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
gcc_assert (sel_qi != NULL); gcc_assert (sel_qi != NULL);
} }
tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target), tmp = mode != qimode ? gen_reg_rtx (qimode) : target;
gen_lowpart (qimode, v0), tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
gen_lowpart (qimode, v1), sel_qi); gen_lowpart (qimode, v1), sel_qi);
if (tmp) if (tmp)
tmp = gen_lowpart (mode, tmp); tmp = gen_lowpart (mode, tmp);