mirror of git://gcc.gnu.org/git/gcc.git
Addd 256bit AVX vectorizer patterns.
2010-10-13 H.J. Lu <hongjiu.lu@intel.com> * config/i386/i386.c (ix86_build_const_vector): Check vector mode instead of scalar mode. (ix86_build_signbit_mask): Likewise. (ix86_expand_fp_absneg_operator): Updated. (ix86_expand_copysign): Likewise. (ix86_expand_int_vcond): Likewise. (ix86_emit_swdivsf): Likewise. (ix86_sse_copysign_to_positive): Likewise. (ix86_expand_sse_fabs): Likewise. * config/i386/i386.md (fixuns_trunc<mode>si2): Likewise. * config/i386/sse.md (copysign<mode>3): Likewise. (sse2_cvtudq2ps): Likewise. (vec_unpacku_float_hi_v4si): Likewise. (vec_unpacku_float_lo_v4si): Likewise. * config/i386/i386.c (ix86_builtins): Add IX86_BUILTIN_CPYSGNPS256 and IX86_BUILTIN_CPYSGNPD256. (bdesc_args): Likewise. (ix86_builtin_vectorized_function): Support IX86_BUILTIN_CPYSGNPS256, IX86_BUILTIN_CPYSGNPD256, IX86_BUILTIN_SQRTPD256, IX86_BUILTIN_SQRTPS_NR256, and IX86_BUILTIN_CVTPS2DQ256. (ix86_builtin_reciprocal): Support IX86_BUILTIN_SQRTPS_NR256. * config/i386/sse.md (STORENT_MODE): New. (VEC_FLOAT_MODE): Likewise. (VEC_EXTRACT_MODE): Likewise. (*avx_cvtdq2pd256_2): Likewise. (vec_pack_trunc_v4df): Likewise. (vec_interleave_highv8sf): Likewise. (vec_interleave_lowv8sf): Likewise. (storent<mode>): Macroized. (<code><mode>2: absneg): Likewise. (copysign<mode>3): Likewise. (vec_extract<mode>): Likewise. PR target/44180 * config/i386/i386.c (expand_vec_perm_even_odd_1): Rewritten for V8SFmode. 2010-10-13 Richard Guenther <rguenther@suse.de> H.J. Lu <hongjiu.lu@intel.com> * config/i386/sse.md (reduc_splus_v8sf): Add. (reduc_splus_v4df): Likewise. (vec_unpacks_hi_v8sf): Likewise. (vec_unpacks_lo_v8sf): Likewise. (*avx_cvtps2pd256_2): Likewise. (vec_unpacks_float_hi_v8si): Likewise. (vec_unpacks_float_lo_v8si): Likewise. (vec_interleave_highv4df): Likewise. (vec_interleave_lowv4df): Likewise. From-SVN: r165436
This commit is contained in:
parent
0d8485e009
commit
1e27129f2e
|
@ -1,3 +1,58 @@
|
|||
2010-10-13 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* config/i386/i386.c (ix86_build_const_vector): Check vector
|
||||
mode instead of scalar mode.
|
||||
(ix86_build_signbit_mask): Likewise.
|
||||
(ix86_expand_fp_absneg_operator): Updated.
|
||||
(ix86_expand_copysign): Likewise.
|
||||
(ix86_expand_int_vcond): Likewise.
|
||||
(ix86_emit_swdivsf): Likewise.
|
||||
(ix86_sse_copysign_to_positive): Likewise.
|
||||
(ix86_expand_sse_fabs): Likewise.
|
||||
* config/i386/i386.md (fixuns_trunc<mode>si2): Likewise.
|
||||
* config/i386/sse.md (copysign<mode>3): Likewise.
|
||||
(sse2_cvtudq2ps): Likewise.
|
||||
(vec_unpacku_float_hi_v4si): Likewise.
|
||||
(vec_unpacku_float_lo_v4si): Likewise.
|
||||
|
||||
* config/i386/i386.c (ix86_builtins): Add
|
||||
IX86_BUILTIN_CPYSGNPS256 and IX86_BUILTIN_CPYSGNPD256.
|
||||
(bdesc_args): Likewise.
|
||||
(ix86_builtin_vectorized_function): Support
|
||||
IX86_BUILTIN_CPYSGNPS256, IX86_BUILTIN_CPYSGNPD256,
|
||||
IX86_BUILTIN_SQRTPD256, IX86_BUILTIN_SQRTPS_NR256,
|
||||
and IX86_BUILTIN_CVTPS2DQ256.
|
||||
(ix86_builtin_reciprocal): Support IX86_BUILTIN_SQRTPS_NR256.
|
||||
|
||||
* config/i386/sse.md (STORENT_MODE): New.
|
||||
(VEC_FLOAT_MODE): Likewise.
|
||||
(VEC_EXTRACT_MODE): Likewise.
|
||||
(*avx_cvtdq2pd256_2): Likewise.
|
||||
(vec_pack_trunc_v4df): Likewise.
|
||||
(vec_interleave_highv8sf): Likewise.
|
||||
(vec_interleave_lowv8sf): Likewise.
|
||||
(storent<mode>): Macroized.
|
||||
(<code><mode>2: absneg): Likewise.
|
||||
(copysign<mode>3): Likewise.
|
||||
(vec_extract<mode>): Likewise.
|
||||
|
||||
PR target/44180
|
||||
* config/i386/i386.c (expand_vec_perm_even_odd_1): Rewritten
|
||||
for V8SFmode.
|
||||
|
||||
2010-10-13 Richard Guenther <rguenther@suse.de>
|
||||
H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* config/i386/sse.md (reduc_splus_v8sf): Add.
|
||||
(reduc_splus_v4df): Likewise.
|
||||
(vec_unpacks_hi_v8sf): Likewise.
|
||||
(vec_unpacks_lo_v8sf): Likewise.
|
||||
(*avx_cvtps2pd256_2): Likewise.
|
||||
(vec_unpacks_float_hi_v8si): Likewise.
|
||||
(vec_unpacks_float_lo_v8si): Likewise.
|
||||
(vec_interleave_highv4df): Likewise.
|
||||
(vec_interleave_lowv4df): Likewise.
|
||||
|
||||
2010-10-13 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR objc/45878
|
||||
|
|
|
@ -15752,17 +15752,28 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
|
|||
rtvec v;
|
||||
switch (mode)
|
||||
{
|
||||
case SImode:
|
||||
case V4SImode:
|
||||
gcc_assert (vect);
|
||||
v = gen_rtvec (4, value, value, value, value);
|
||||
return gen_rtx_CONST_VECTOR (V4SImode, v);
|
||||
|
||||
case DImode:
|
||||
case V2DImode:
|
||||
gcc_assert (vect);
|
||||
v = gen_rtvec (2, value, value);
|
||||
return gen_rtx_CONST_VECTOR (V2DImode, v);
|
||||
|
||||
case SFmode:
|
||||
case V8SFmode:
|
||||
if (vect)
|
||||
v = gen_rtvec (8, value, value, value, value,
|
||||
value, value, value, value);
|
||||
else
|
||||
v = gen_rtvec (8, value, CONST0_RTX (SFmode),
|
||||
CONST0_RTX (SFmode), CONST0_RTX (SFmode),
|
||||
CONST0_RTX (SFmode), CONST0_RTX (SFmode),
|
||||
CONST0_RTX (SFmode), CONST0_RTX (SFmode));
|
||||
return gen_rtx_CONST_VECTOR (V8SFmode, v);
|
||||
|
||||
case V4SFmode:
|
||||
if (vect)
|
||||
v = gen_rtvec (4, value, value, value, value);
|
||||
else
|
||||
|
@ -15770,7 +15781,15 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
|
|||
CONST0_RTX (SFmode), CONST0_RTX (SFmode));
|
||||
return gen_rtx_CONST_VECTOR (V4SFmode, v);
|
||||
|
||||
case DFmode:
|
||||
case V4DFmode:
|
||||
if (vect)
|
||||
v = gen_rtvec (4, value, value, value, value);
|
||||
else
|
||||
v = gen_rtvec (4, value, CONST0_RTX (DFmode),
|
||||
CONST0_RTX (DFmode), CONST0_RTX (DFmode));
|
||||
return gen_rtx_CONST_VECTOR (V4DFmode, v);
|
||||
|
||||
case V2DFmode:
|
||||
if (vect)
|
||||
v = gen_rtvec (2, value, value);
|
||||
else
|
||||
|
@ -15800,17 +15819,21 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
|
|||
/* Find the sign bit, sign extended to 2*HWI. */
|
||||
switch (mode)
|
||||
{
|
||||
case SImode:
|
||||
case SFmode:
|
||||
case V4SImode:
|
||||
case V8SFmode:
|
||||
case V4SFmode:
|
||||
vec_mode = mode;
|
||||
mode = GET_MODE_INNER (mode);
|
||||
imode = SImode;
|
||||
vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
|
||||
lo = 0x80000000, hi = lo < 0;
|
||||
break;
|
||||
|
||||
case DImode:
|
||||
case DFmode:
|
||||
case V2DImode:
|
||||
case V4DFmode:
|
||||
case V2DFmode:
|
||||
vec_mode = mode;
|
||||
mode = GET_MODE_INNER (mode);
|
||||
imode = DImode;
|
||||
vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
|
||||
if (HOST_BITS_PER_WIDE_INT >= 64)
|
||||
lo = (HOST_WIDE_INT)1 << shift, hi = -1;
|
||||
else
|
||||
|
@ -15864,7 +15887,7 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
|
|||
if (vec_mode == VOIDmode)
|
||||
return force_reg (mode, mask);
|
||||
|
||||
v = ix86_build_const_vector (mode, vect, mask);
|
||||
v = ix86_build_const_vector (vec_mode, vect, mask);
|
||||
return force_reg (vec_mode, v);
|
||||
}
|
||||
|
||||
|
@ -15877,22 +15900,25 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
|
|||
rtx mask, set, use, clob, dst, src;
|
||||
bool use_sse = false;
|
||||
bool vector_mode = VECTOR_MODE_P (mode);
|
||||
enum machine_mode elt_mode = mode;
|
||||
enum machine_mode vmode = mode;
|
||||
|
||||
if (vector_mode)
|
||||
{
|
||||
elt_mode = GET_MODE_INNER (mode);
|
||||
use_sse = true;
|
||||
}
|
||||
use_sse = true;
|
||||
else if (mode == TFmode)
|
||||
use_sse = true;
|
||||
else if (TARGET_SSE_MATH)
|
||||
use_sse = SSE_FLOAT_MODE_P (mode);
|
||||
{
|
||||
use_sse = SSE_FLOAT_MODE_P (mode);
|
||||
if (mode == SFmode)
|
||||
vmode = V4SFmode;
|
||||
else if (mode == DFmode)
|
||||
vmode = V2DFmode;
|
||||
}
|
||||
|
||||
/* NEG and ABS performed with SSE use bitwise mask operations.
|
||||
Create the appropriate mask now. */
|
||||
if (use_sse)
|
||||
mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
|
||||
mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
|
||||
else
|
||||
mask = NULL_RTX;
|
||||
|
||||
|
@ -15926,7 +15952,7 @@ ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
|
|||
void
|
||||
ix86_expand_copysign (rtx operands[])
|
||||
{
|
||||
enum machine_mode mode;
|
||||
enum machine_mode mode, vmode;
|
||||
rtx dest, op0, op1, mask, nmask;
|
||||
|
||||
dest = operands[0];
|
||||
|
@ -15935,6 +15961,13 @@ ix86_expand_copysign (rtx operands[])
|
|||
|
||||
mode = GET_MODE (dest);
|
||||
|
||||
if (mode == SFmode)
|
||||
vmode = V4SFmode;
|
||||
else if (mode == DFmode)
|
||||
vmode = V2DFmode;
|
||||
else
|
||||
vmode = mode;
|
||||
|
||||
if (GET_CODE (op0) == CONST_DOUBLE)
|
||||
{
|
||||
rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
|
||||
|
@ -15944,15 +15977,11 @@ ix86_expand_copysign (rtx operands[])
|
|||
|
||||
if (mode == SFmode || mode == DFmode)
|
||||
{
|
||||
enum machine_mode vmode;
|
||||
|
||||
vmode = mode == SFmode ? V4SFmode : V2DFmode;
|
||||
|
||||
if (op0 == CONST0_RTX (mode))
|
||||
op0 = CONST0_RTX (vmode);
|
||||
else
|
||||
{
|
||||
rtx v = ix86_build_const_vector (mode, false, op0);
|
||||
rtx v = ix86_build_const_vector (vmode, false, op0);
|
||||
|
||||
op0 = force_reg (vmode, v);
|
||||
}
|
||||
|
@ -15960,7 +15989,7 @@ ix86_expand_copysign (rtx operands[])
|
|||
else if (op0 != CONST0_RTX (mode))
|
||||
op0 = force_reg (mode, op0);
|
||||
|
||||
mask = ix86_build_signbit_mask (mode, 0, 0);
|
||||
mask = ix86_build_signbit_mask (vmode, 0, 0);
|
||||
|
||||
if (mode == SFmode)
|
||||
copysign_insn = gen_copysignsf3_const;
|
||||
|
@ -15975,8 +16004,8 @@ ix86_expand_copysign (rtx operands[])
|
|||
{
|
||||
rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
|
||||
|
||||
nmask = ix86_build_signbit_mask (mode, 0, 1);
|
||||
mask = ix86_build_signbit_mask (mode, 0, 0);
|
||||
nmask = ix86_build_signbit_mask (vmode, 0, 1);
|
||||
mask = ix86_build_signbit_mask (vmode, 0, 0);
|
||||
|
||||
if (mode == SFmode)
|
||||
copysign_insn = gen_copysignsf3_var;
|
||||
|
@ -17877,8 +17906,7 @@ ix86_expand_int_vcond (rtx operands[])
|
|||
|
||||
/* Subtract (-(INT MAX) - 1) from both operands to make
|
||||
them signed. */
|
||||
mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
|
||||
true, false);
|
||||
mask = ix86_build_signbit_mask (mode, true, false);
|
||||
gen_sub3 = (mode == V4SImode
|
||||
? gen_subv4si3 : gen_subv2di3);
|
||||
t1 = gen_reg_rtx (mode);
|
||||
|
@ -22713,6 +22741,8 @@ enum ix86_builtins
|
|||
/* Vectorizer support builtins. */
|
||||
IX86_BUILTIN_CPYSGNPS,
|
||||
IX86_BUILTIN_CPYSGNPD,
|
||||
IX86_BUILTIN_CPYSGNPS256,
|
||||
IX86_BUILTIN_CPYSGNPD256,
|
||||
|
||||
IX86_BUILTIN_CVTUDQ2PS,
|
||||
|
||||
|
@ -23850,6 +23880,9 @@ static const struct builtin_description bdesc_args[] =
|
|||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
|
||||
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
|
||||
|
||||
{ OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
|
||||
|
||||
/* F16C */
|
||||
|
@ -26036,15 +26069,23 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
|
|||
switch (fn)
|
||||
{
|
||||
case BUILT_IN_SQRT:
|
||||
if (out_mode == DFmode && out_n == 2
|
||||
&& in_mode == DFmode && in_n == 2)
|
||||
return ix86_builtins[IX86_BUILTIN_SQRTPD];
|
||||
if (out_mode == DFmode && in_mode == DFmode)
|
||||
{
|
||||
if (out_n == 2 && in_n == 2)
|
||||
return ix86_builtins[IX86_BUILTIN_SQRTPD];
|
||||
else if (out_n == 4 && in_n == 4)
|
||||
return ix86_builtins[IX86_BUILTIN_SQRTPD256];
|
||||
}
|
||||
break;
|
||||
|
||||
case BUILT_IN_SQRTF:
|
||||
if (out_mode == SFmode && out_n == 4
|
||||
&& in_mode == SFmode && in_n == 4)
|
||||
return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
|
||||
if (out_mode == SFmode && in_mode == SFmode)
|
||||
{
|
||||
if (out_n == 4 && in_n == 4)
|
||||
return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
|
||||
else if (out_n == 8 && in_n == 8)
|
||||
return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
|
||||
}
|
||||
break;
|
||||
|
||||
case BUILT_IN_LRINT:
|
||||
|
@ -26054,21 +26095,33 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
|
|||
break;
|
||||
|
||||
case BUILT_IN_LRINTF:
|
||||
if (out_mode == SImode && out_n == 4
|
||||
&& in_mode == SFmode && in_n == 4)
|
||||
return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
|
||||
if (out_mode == SImode && in_mode == SFmode)
|
||||
{
|
||||
if (out_n == 4 && in_n == 4)
|
||||
return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
|
||||
else if (out_n == 8 && in_n == 8)
|
||||
return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
|
||||
}
|
||||
break;
|
||||
|
||||
case BUILT_IN_COPYSIGN:
|
||||
if (out_mode == DFmode && out_n == 2
|
||||
&& in_mode == DFmode && in_n == 2)
|
||||
return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
|
||||
if (out_mode == DFmode && in_mode == DFmode)
|
||||
{
|
||||
if (out_n == 2 && in_n == 2)
|
||||
return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
|
||||
else if (out_n == 4 && in_n == 4)
|
||||
return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
|
||||
}
|
||||
break;
|
||||
|
||||
case BUILT_IN_COPYSIGNF:
|
||||
if (out_mode == SFmode && out_n == 4
|
||||
&& in_mode == SFmode && in_n == 4)
|
||||
return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
|
||||
if (out_mode == SFmode && in_mode == SFmode)
|
||||
{
|
||||
if (out_n == 4 && in_n == 4)
|
||||
return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
|
||||
else if (out_n == 8 && in_n == 8)
|
||||
return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -26391,6 +26444,9 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
|
|||
case IX86_BUILTIN_SQRTPS_NR:
|
||||
return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
|
||||
|
||||
case IX86_BUILTIN_SQRTPS_NR256:
|
||||
return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
|
||||
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
@ -30053,7 +30109,7 @@ void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
|
|||
two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
|
||||
|
||||
if (VECTOR_MODE_P (mode))
|
||||
two = ix86_build_const_vector (SFmode, true, two);
|
||||
two = ix86_build_const_vector (mode, true, two);
|
||||
|
||||
two = force_reg (mode, two);
|
||||
|
||||
|
@ -30100,8 +30156,8 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
|
|||
|
||||
if (VECTOR_MODE_P (mode))
|
||||
{
|
||||
mthree = ix86_build_const_vector (SFmode, true, mthree);
|
||||
mhalf = ix86_build_const_vector (SFmode, true, mhalf);
|
||||
mthree = ix86_build_const_vector (mode, true, mthree);
|
||||
mhalf = ix86_build_const_vector (mode, true, mhalf);
|
||||
}
|
||||
|
||||
/* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
|
||||
|
@ -30246,7 +30302,16 @@ ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
|
|||
rtx sgn = gen_reg_rtx (mode);
|
||||
if (mask == NULL_RTX)
|
||||
{
|
||||
mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
|
||||
enum machine_mode vmode;
|
||||
|
||||
if (mode == SFmode)
|
||||
vmode = V4SFmode;
|
||||
else if (mode == DFmode)
|
||||
vmode = V2DFmode;
|
||||
else
|
||||
vmode = mode;
|
||||
|
||||
mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
|
||||
if (!VECTOR_MODE_P (mode))
|
||||
{
|
||||
/* We need to generate a scalar mode mask in this case. */
|
||||
|
@ -30270,11 +30335,17 @@ ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
|
|||
static rtx
|
||||
ix86_expand_sse_fabs (rtx op0, rtx *smask)
|
||||
{
|
||||
enum machine_mode mode = GET_MODE (op0);
|
||||
enum machine_mode vmode, mode = GET_MODE (op0);
|
||||
rtx xa, mask;
|
||||
|
||||
xa = gen_reg_rtx (mode);
|
||||
mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
|
||||
if (mode == SFmode)
|
||||
vmode = V4SFmode;
|
||||
else if (mode == DFmode)
|
||||
vmode = V2DFmode;
|
||||
else
|
||||
vmode = mode;
|
||||
mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
|
||||
if (!VECTOR_MODE_P (mode))
|
||||
{
|
||||
/* We need to generate a scalar mode mask in this case. */
|
||||
|
@ -31617,7 +31688,7 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
|
|||
static bool
|
||||
expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
|
||||
{
|
||||
rtx t1, t2, t3, t4;
|
||||
rtx t1, t2, t3;
|
||||
|
||||
switch (d->vmode)
|
||||
{
|
||||
|
@ -31639,34 +31710,34 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
|
|||
|
||||
case V8SFmode:
|
||||
{
|
||||
static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
|
||||
static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
|
||||
static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
|
||||
int mask = odd ? 0xdd : 0x88;
|
||||
|
||||
t1 = gen_reg_rtx (V8SFmode);
|
||||
t2 = gen_reg_rtx (V8SFmode);
|
||||
t3 = gen_reg_rtx (V8SFmode);
|
||||
t4 = gen_reg_rtx (V8SFmode);
|
||||
|
||||
/* Shuffle within the 128-bit lanes to produce:
|
||||
{ 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
|
||||
expand_vselect (t1, d->op0, perm1, 8);
|
||||
expand_vselect (t2, d->op1, perm1, 8);
|
||||
{ 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
|
||||
emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
|
||||
GEN_INT (mask)));
|
||||
|
||||
/* Shuffle the lanes around to produce:
|
||||
{ 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
|
||||
emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
|
||||
emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
|
||||
{ 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
|
||||
emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
|
||||
GEN_INT (0x3)));
|
||||
|
||||
/* Now a vpermil2p will produce the result required. */
|
||||
/* ??? The vpermil2p requires a vector constant. Another option
|
||||
is a unpck[lh]ps to merge the two vectors to produce
|
||||
{ 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
|
||||
vpermilps to get the elements into the final order. */
|
||||
d->op0 = t3;
|
||||
d->op1 = t4;
|
||||
memcpy (d->perm, odd ? permo: perme, 8);
|
||||
expand_vec_perm_vpermil (d);
|
||||
/* Shuffle within the 128-bit lanes to produce:
|
||||
{ 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
|
||||
emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
|
||||
|
||||
/* Shuffle within the 128-bit lanes to produce:
|
||||
{ 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
|
||||
emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
|
||||
|
||||
/* Shuffle the lanes around to produce:
|
||||
{ 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
|
||||
emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
|
||||
GEN_INT (0x20)));
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
@ -4509,7 +4509,7 @@
|
|||
|
||||
real_ldexp (&TWO31r, &dconst1, 31);
|
||||
two31 = const_double_from_real_value (TWO31r, mode);
|
||||
two31 = ix86_build_const_vector (mode, true, two31);
|
||||
two31 = ix86_build_const_vector (vecmode, true, two31);
|
||||
operands[2] = force_reg (vecmode, two31);
|
||||
})
|
||||
|
||||
|
|
|
@ -86,6 +86,25 @@
|
|||
(V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
|
||||
(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
|
||||
|
||||
;; Modes handled by storent patterns.
|
||||
(define_mode_iterator STORENT_MODE
|
||||
[(SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
|
||||
(SI "TARGET_SSE2") (V2DI "TARGET_SSE2") (V2DF "TARGET_SSE2")
|
||||
(V4SF "TARGET_SSE")
|
||||
(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
|
||||
|
||||
;; Modes handled by vector float patterns.
|
||||
(define_mode_iterator VEC_FLOAT_MODE
|
||||
[(V2DF "TARGET_SSE2") (V4SF "TARGET_SSE")
|
||||
(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
|
||||
|
||||
;; Modes handled by vector extract patterns.
|
||||
(define_mode_iterator VEC_EXTRACT_MODE
|
||||
[(V2DI "TARGET_SSE") (V4SI "TARGET_SSE")
|
||||
(V8HI "TARGET_SSE") (V16QI "TARGET_SSE")
|
||||
(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
|
||||
(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
|
||||
|
||||
;; Mapping from float mode to required SSE level
|
||||
(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
|
||||
|
||||
|
@ -504,30 +523,10 @@
|
|||
; define patterns for other modes that would expand to several insns.
|
||||
|
||||
(define_expand "storent<mode>"
|
||||
[(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
|
||||
(unspec:SSEMODEF2P
|
||||
[(match_operand:SSEMODEF2P 1 "register_operand" "")]
|
||||
UNSPEC_MOVNT))]
|
||||
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)")
|
||||
|
||||
(define_expand "storent<mode>"
|
||||
[(set (match_operand:MODEF 0 "memory_operand" "")
|
||||
(unspec:MODEF
|
||||
[(match_operand:MODEF 1 "register_operand" "")]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_SSE4A")
|
||||
|
||||
(define_expand "storentv2di"
|
||||
[(set (match_operand:V2DI 0 "memory_operand" "")
|
||||
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_SSE2")
|
||||
|
||||
(define_expand "storentsi"
|
||||
[(set (match_operand:SI 0 "memory_operand" "")
|
||||
(unspec:SI [(match_operand:SI 1 "register_operand" "")]
|
||||
UNSPEC_MOVNT))]
|
||||
"TARGET_SSE2")
|
||||
[(set (match_operand:STORENT_MODE 0 "memory_operand" "")
|
||||
(unspec:STORENT_MODE
|
||||
[(match_operand:STORENT_MODE 1 "register_operand" "")]
|
||||
UNSPEC_MOVNT))])
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
|
@ -536,10 +535,10 @@
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define_expand "<code><mode>2"
|
||||
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
|
||||
(absneg:SSEMODEF2P
|
||||
(match_operand:SSEMODEF2P 1 "register_operand" "")))]
|
||||
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
|
||||
[(set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
|
||||
(absneg:VEC_FLOAT_MODE
|
||||
(match_operand:VEC_FLOAT_MODE 1 "register_operand" "")))]
|
||||
""
|
||||
"ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
|
||||
|
||||
(define_expand "<plusminus_insn><mode>3"
|
||||
|
@ -1380,6 +1379,19 @@
|
|||
[(set_attr "type" "sseadd")
|
||||
(set_attr "mode" "V2DF")])
|
||||
|
||||
(define_expand "reduc_splus_v8sf"
|
||||
[(match_operand:V8SF 0 "register_operand" "")
|
||||
(match_operand:V8SF 1 "register_operand" "")]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
rtx tmp = gen_reg_rtx (V8SFmode);
|
||||
rtx tmp2 = gen_reg_rtx (V8SFmode);
|
||||
emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
|
||||
emit_insn (gen_avx_haddv8sf3 (tmp2, operands[1], operands[1]));
|
||||
emit_insn (gen_avx_haddv8sf3 (operands[0], tmp2, tmp2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "reduc_splus_v4sf"
|
||||
[(match_operand:V4SF 0 "register_operand" "")
|
||||
(match_operand:V4SF 1 "register_operand" "")]
|
||||
|
@ -1396,6 +1408,17 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "reduc_splus_v4df"
|
||||
[(match_operand:V4DF 0 "register_operand" "")
|
||||
(match_operand:V4DF 1 "register_operand" "")]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
rtx tmp = gen_reg_rtx (V4DFmode);
|
||||
emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
|
||||
emit_insn (gen_avx_haddv4df3 (operands[0], tmp, tmp));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "reduc_splus_v2df"
|
||||
[(match_operand:V2DF 0 "register_operand" "")
|
||||
(match_operand:V2DF 1 "register_operand" "")]
|
||||
|
@ -1650,17 +1673,17 @@
|
|||
|
||||
(define_expand "copysign<mode>3"
|
||||
[(set (match_dup 4)
|
||||
(and:SSEMODEF2P
|
||||
(not:SSEMODEF2P (match_dup 3))
|
||||
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
|
||||
(and:VEC_FLOAT_MODE
|
||||
(not:VEC_FLOAT_MODE (match_dup 3))
|
||||
(match_operand:VEC_FLOAT_MODE 1 "nonimmediate_operand" "")))
|
||||
(set (match_dup 5)
|
||||
(and:SSEMODEF2P (match_dup 3)
|
||||
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
|
||||
(set (match_operand:SSEMODEF2P 0 "register_operand" "")
|
||||
(ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
|
||||
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
|
||||
(and:VEC_FLOAT_MODE (match_dup 3)
|
||||
(match_operand:VEC_FLOAT_MODE 2 "nonimmediate_operand" "")))
|
||||
(set (match_operand:VEC_FLOAT_MODE 0 "register_operand" "")
|
||||
(ior:VEC_FLOAT_MODE (match_dup 4) (match_dup 5)))]
|
||||
""
|
||||
{
|
||||
operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
|
||||
operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
|
||||
|
||||
operands[4] = gen_reg_rtx (<MODE>mode);
|
||||
operands[5] = gen_reg_rtx (<MODE>mode);
|
||||
|
@ -2657,7 +2680,8 @@
|
|||
x = const_double_from_real_value (TWO32r, SFmode);
|
||||
|
||||
operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
|
||||
operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
|
||||
operands[4] = force_reg (V4SFmode,
|
||||
ix86_build_const_vector (V4SFmode, 1, x));
|
||||
|
||||
for (i = 5; i < 8; i++)
|
||||
operands[i] = gen_reg_rtx (V4SFmode);
|
||||
|
@ -2892,6 +2916,18 @@
|
|||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V4DF")])
|
||||
|
||||
(define_insn "*avx_cvtdq2pd256_2"
|
||||
[(set (match_operand:V4DF 0 "register_operand" "=x")
|
||||
(float:V4DF
|
||||
(vec_select:V4SI
|
||||
(match_operand:V8SI 1 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
|
||||
"TARGET_AVX"
|
||||
"vcvtdq2pd\t{%x1, %0|%0, %x1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V4DF")])
|
||||
|
||||
(define_insn "sse2_cvtdq2pd"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x")
|
||||
(float:V2DF
|
||||
|
@ -3072,6 +3108,18 @@
|
|||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V4DF")])
|
||||
|
||||
(define_insn "*avx_cvtps2pd256_2"
|
||||
[(set (match_operand:V4DF 0 "register_operand" "=x")
|
||||
(float_extend:V4DF
|
||||
(vec_select:V4SF
|
||||
(match_operand:V8SF 1 "nonimmediate_operand" "xm")
|
||||
(parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
|
||||
"TARGET_AVX"
|
||||
"vcvtps2pd\t{%x1, %0|%0, %x1}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V4DF")])
|
||||
|
||||
(define_insn "sse2_cvtps2pd"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=x")
|
||||
(float_extend:V2DF
|
||||
|
@ -3104,6 +3152,22 @@
|
|||
"TARGET_SSE2"
|
||||
"operands[2] = gen_reg_rtx (V4SFmode);")
|
||||
|
||||
(define_expand "vec_unpacks_hi_v8sf"
|
||||
[(set (match_dup 2)
|
||||
(vec_select:V4SF
|
||||
(match_operand:V8SF 1 "nonimmediate_operand" "")
|
||||
(parallel [(const_int 4)
|
||||
(const_int 5)
|
||||
(const_int 6)
|
||||
(const_int 7)])))
|
||||
(set (match_operand:V4DF 0 "register_operand" "")
|
||||
(float_extend:V4DF
|
||||
(match_dup 2)))]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
operands[2] = gen_reg_rtx (V4SFmode);
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_lo_v4sf"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "")
|
||||
(float_extend:V2DF
|
||||
|
@ -3112,6 +3176,14 @@
|
|||
(parallel [(const_int 0) (const_int 1)]))))]
|
||||
"TARGET_SSE2")
|
||||
|
||||
(define_expand "vec_unpacks_lo_v8sf"
|
||||
[(set (match_operand:V4DF 0 "register_operand" "")
|
||||
(float_extend:V4DF
|
||||
(vec_select:V4SF
|
||||
(match_operand:V8SF 1 "nonimmediate_operand" "")
|
||||
(parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
|
||||
"TARGET_AVX")
|
||||
|
||||
(define_expand "vec_unpacks_float_hi_v8hi"
|
||||
[(match_operand:V4SF 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")]
|
||||
|
@ -3184,6 +3256,28 @@
|
|||
(parallel [(const_int 0) (const_int 1)]))))]
|
||||
"TARGET_SSE2")
|
||||
|
||||
(define_expand "vec_unpacks_float_hi_v8si"
|
||||
[(set (match_dup 2)
|
||||
(vec_select:V4SI
|
||||
(match_operand:V8SI 1 "nonimmediate_operand" "")
|
||||
(parallel [(const_int 4)
|
||||
(const_int 5)
|
||||
(const_int 6)
|
||||
(const_int 7)])))
|
||||
(set (match_operand:V4DF 0 "register_operand" "")
|
||||
(float:V4DF
|
||||
(match_dup 2)))]
|
||||
"TARGET_AVX"
|
||||
"operands[2] = gen_reg_rtx (V4SImode);")
|
||||
|
||||
(define_expand "vec_unpacks_float_lo_v8si"
|
||||
[(set (match_operand:V4DF 0 "register_operand" "")
|
||||
(float:V4DF
|
||||
(vec_select:V4SI
|
||||
(match_operand:V8SI 1 "nonimmediate_operand" "")
|
||||
(parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
|
||||
"TARGET_AVX")
|
||||
|
||||
(define_expand "vec_unpacku_float_hi_v4si"
|
||||
[(set (match_dup 5)
|
||||
(vec_select:V4SI
|
||||
|
@ -3213,7 +3307,8 @@
|
|||
x = const_double_from_real_value (TWO32r, DFmode);
|
||||
|
||||
operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
|
||||
operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
|
||||
operands[4] = force_reg (V2DFmode,
|
||||
ix86_build_const_vector (V2DFmode, 1, x));
|
||||
|
||||
operands[5] = gen_reg_rtx (V4SImode);
|
||||
|
||||
|
@ -3243,12 +3338,30 @@
|
|||
x = const_double_from_real_value (TWO32r, DFmode);
|
||||
|
||||
operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
|
||||
operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
|
||||
operands[4] = force_reg (V2DFmode,
|
||||
ix86_build_const_vector (V2DFmode, 1, x));
|
||||
|
||||
for (i = 5; i < 8; i++)
|
||||
operands[i] = gen_reg_rtx (V2DFmode);
|
||||
})
|
||||
|
||||
(define_expand "vec_pack_trunc_v4df"
|
||||
[(set (match_dup 3)
|
||||
(float_truncate:V4SF
|
||||
(match_operand:V4DF 1 "nonimmediate_operand" "")))
|
||||
(set (match_dup 4)
|
||||
(float_truncate:V4SF
|
||||
(match_operand:V4DF 2 "nonimmediate_operand" "")))
|
||||
(set (match_operand:V8SF 0 "register_operand" "")
|
||||
(vec_concat:V8SF
|
||||
(match_dup 3)
|
||||
(match_dup 4)))]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
operands[3] = gen_reg_rtx (V4SFmode);
|
||||
operands[4] = gen_reg_rtx (V4SFmode);
|
||||
})
|
||||
|
||||
(define_expand "vec_pack_trunc_v2df"
|
||||
[(match_operand:V4SF 0 "register_operand" "")
|
||||
(match_operand:V2DF 1 "nonimmediate_operand" "")
|
||||
|
@ -3441,6 +3554,41 @@
|
|||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
||||
(define_expand "vec_interleave_highv8sf"
|
||||
[(set (match_dup 3)
|
||||
(vec_select:V8SF
|
||||
(vec_concat:V16SF
|
||||
(match_operand:V8SF 1 "register_operand" "x")
|
||||
(match_operand:V8SF 2 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 0) (const_int 8)
|
||||
(const_int 1) (const_int 9)
|
||||
(const_int 4) (const_int 12)
|
||||
(const_int 5) (const_int 13)])))
|
||||
(set (match_dup 4)
|
||||
(vec_select:V8SF
|
||||
(vec_concat:V16SF
|
||||
(match_dup 1)
|
||||
(match_dup 2))
|
||||
(parallel [(const_int 2) (const_int 10)
|
||||
(const_int 3) (const_int 11)
|
||||
(const_int 6) (const_int 14)
|
||||
(const_int 7) (const_int 15)])))
|
||||
(set (match_operand:V8SF 0 "register_operand" "")
|
||||
(vec_concat:V8SF
|
||||
(vec_select:V4SF
|
||||
(match_dup 3)
|
||||
(parallel [(const_int 4) (const_int 5)
|
||||
(const_int 6) (const_int 7)]))
|
||||
(vec_select:V4SF
|
||||
(match_dup 4)
|
||||
(parallel [(const_int 4) (const_int 5)
|
||||
(const_int 6) (const_int 7)]))))]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
operands[3] = gen_reg_rtx (V8SFmode);
|
||||
operands[4] = gen_reg_rtx (V8SFmode);
|
||||
})
|
||||
|
||||
(define_insn "vec_interleave_highv4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_select:V4SF
|
||||
|
@ -3485,6 +3633,41 @@
|
|||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V4SF")])
|
||||
|
||||
(define_expand "vec_interleave_lowv8sf"
|
||||
[(set (match_dup 3)
|
||||
(vec_select:V8SF
|
||||
(vec_concat:V16SF
|
||||
(match_operand:V8SF 1 "register_operand" "x")
|
||||
(match_operand:V8SF 2 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 0) (const_int 8)
|
||||
(const_int 1) (const_int 9)
|
||||
(const_int 4) (const_int 12)
|
||||
(const_int 5) (const_int 13)])))
|
||||
(set (match_dup 4)
|
||||
(vec_select:V8SF
|
||||
(vec_concat:V16SF
|
||||
(match_dup 1)
|
||||
(match_dup 2))
|
||||
(parallel [(const_int 2) (const_int 10)
|
||||
(const_int 3) (const_int 11)
|
||||
(const_int 6) (const_int 14)
|
||||
(const_int 7) (const_int 15)])))
|
||||
(set (match_operand:V8SF 0 "register_operand" "")
|
||||
(vec_concat:V8SF
|
||||
(vec_select:V4SF
|
||||
(match_dup 3)
|
||||
(parallel [(const_int 0) (const_int 1)
|
||||
(const_int 2) (const_int 3)]))
|
||||
(vec_select:V4SF
|
||||
(match_dup 4)
|
||||
(parallel [(const_int 0) (const_int 1)
|
||||
(const_int 2) (const_int 3)]))))]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
operands[3] = gen_reg_rtx (V8SFmode);
|
||||
operands[4] = gen_reg_rtx (V8SFmode);
|
||||
})
|
||||
|
||||
(define_insn "vec_interleave_lowv4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_select:V4SF
|
||||
|
@ -4353,8 +4536,8 @@
|
|||
})
|
||||
|
||||
(define_expand "vec_extract<mode>"
|
||||
[(match_operand:<ssescalarmode> 0 "register_operand" "")
|
||||
(match_operand:SSEMODE 1 "register_operand" "")
|
||||
[(match_operand:<avxscalarmode> 0 "register_operand" "")
|
||||
(match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
|
||||
(match_operand 2 "const_int_operand" "")]
|
||||
"TARGET_SSE"
|
||||
{
|
||||
|
@ -4384,6 +4567,36 @@
|
|||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V4DF")])
|
||||
|
||||
(define_expand "vec_interleave_highv4df"
|
||||
[(set (match_dup 3)
|
||||
(vec_select:V4DF
|
||||
(vec_concat:V8DF
|
||||
(match_operand:V4DF 1 "register_operand" "x")
|
||||
(match_operand:V4DF 2 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 0) (const_int 4)
|
||||
(const_int 2) (const_int 6)])))
|
||||
(set (match_dup 4)
|
||||
(vec_select:V4DF
|
||||
(vec_concat:V8DF
|
||||
(match_dup 1)
|
||||
(match_dup 2))
|
||||
(parallel [(const_int 1) (const_int 5)
|
||||
(const_int 3) (const_int 7)])))
|
||||
(set (match_operand:V4DF 0 "register_operand" "")
|
||||
(vec_concat:V4DF
|
||||
(vec_select:V2DF
|
||||
(match_dup 3)
|
||||
(parallel [(const_int 2) (const_int 3)]))
|
||||
(vec_select:V2DF
|
||||
(match_dup 4)
|
||||
(parallel [(const_int 2) (const_int 3)]))))]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
operands[3] = gen_reg_rtx (V4DFmode);
|
||||
operands[4] = gen_reg_rtx (V4DFmode);
|
||||
})
|
||||
|
||||
|
||||
(define_expand "vec_interleave_highv2df"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "")
|
||||
(vec_select:V2DF
|
||||
|
@ -4489,6 +4702,35 @@
|
|||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "V4DF")])
|
||||
|
||||
(define_expand "vec_interleave_lowv4df"
|
||||
[(set (match_dup 3)
|
||||
(vec_select:V4DF
|
||||
(vec_concat:V8DF
|
||||
(match_operand:V4DF 1 "register_operand" "x")
|
||||
(match_operand:V4DF 2 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 0) (const_int 4)
|
||||
(const_int 2) (const_int 6)])))
|
||||
(set (match_dup 4)
|
||||
(vec_select:V4DF
|
||||
(vec_concat:V8DF
|
||||
(match_dup 1)
|
||||
(match_dup 2))
|
||||
(parallel [(const_int 1) (const_int 5)
|
||||
(const_int 3) (const_int 7)])))
|
||||
(set (match_operand:V4DF 0 "register_operand" "")
|
||||
(vec_concat:V4DF
|
||||
(vec_select:V2DF
|
||||
(match_dup 3)
|
||||
(parallel [(const_int 0) (const_int 1)]))
|
||||
(vec_select:V2DF
|
||||
(match_dup 4)
|
||||
(parallel [(const_int 0) (const_int 1)]))))]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
operands[3] = gen_reg_rtx (V4DFmode);
|
||||
operands[4] = gen_reg_rtx (V4DFmode);
|
||||
})
|
||||
|
||||
(define_expand "vec_interleave_lowv2df"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "")
|
||||
(vec_select:V2DF
|
||||
|
|
Loading…
Reference in New Issue