mirror of git://gcc.gnu.org/git/gcc.git
i386.c (avx_vpermilp_parallel): New function.
* i386.c (avx_vpermilp_parallel): New function. * i386-protos.h: Declare it. * predicates.md (avx_vpermilp_v8sf_operand, avx_vpermilp_v4df_operand, avx_vpermilp_v4sf_operand, avx_vpermilp_v2df_operand): New. * sse.md (AVXMODEFDP, AVXMODEFSP): New iterators. (ssescalarnum, ssedoublesizemode): Add AVX modes. (vpermilbits): Remove. (avx_vpermil<mode>): Change insns to expanders. (*avx_vpermil<mode>): New. Use vec_select. From-SVN: r154427
This commit is contained in:
parent
d90a2c59eb
commit
8a67ca92a1
|
@ -1,3 +1,15 @@
|
|||
2009-11-22 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* i386.c (avx_vpermilp_parallel): New function.
|
||||
* i386-protos.h: Declare it.
|
||||
* predicates.md (avx_vpermilp_v8sf_operand, avx_vpermilp_v4df_operand,
|
||||
avx_vpermilp_v4sf_operand, avx_vpermilp_v2df_operand): New.
|
||||
* sse.md (AVXMODEFDP, AVXMODEFSP): New iterators.
|
||||
(ssescalarnum, ssedoublesizemode): Add AVX modes.
|
||||
(vpermilbits): Remove.
|
||||
(avx_vpermil<mode>): Change insns to expanders.
|
||||
(*avx_vpermil<mode>): New. Use vec_select.
|
||||
|
||||
2009-11-22 Richard Earnshaw <rearnsha@arm.com>
|
||||
|
||||
* opts.c (decode_options): Don't enable flag_schedule_insns
|
||||
|
|
|
@ -47,6 +47,8 @@ extern bool x86_extended_QIreg_mentioned_p (rtx);
|
|||
extern bool x86_extended_reg_mentioned_p (rtx);
|
||||
extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
|
||||
|
||||
extern int avx_vpermilp_parallel (rtx par, enum machine_mode mode);
|
||||
|
||||
extern int ix86_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx);
|
||||
extern int ix86_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx);
|
||||
extern int ix86_expand_strlen (rtx, rtx, rtx, rtx);
|
||||
|
@ -275,3 +277,4 @@ extern int asm_preferred_eh_data_format (int, int);
|
|||
#ifdef HAVE_ATTR_cpu
|
||||
extern enum attr_cpu ix86_schedule;
|
||||
#endif
|
||||
|
||||
|
|
|
@ -24527,6 +24527,82 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
|
|||
return NULL_TREE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Helper for avx_vpermilps256_operand et al. This is also used by
|
||||
the expansion functions to turn the parallel back into a mask.
|
||||
The return value is 0 for no match and the imm8+1 for a match. */
|
||||
|
||||
int
|
||||
avx_vpermilp_parallel (rtx par, enum machine_mode mode)
|
||||
{
|
||||
unsigned i, nelt = GET_MODE_NUNITS (mode);
|
||||
unsigned mask = 0;
|
||||
unsigned char ipar[8];
|
||||
|
||||
if (XVECLEN (par, 0) != nelt)
|
||||
return 0;
|
||||
|
||||
/* Validate that all of the elements are constants, and not totally
|
||||
out of range. Copy the data into an integral array to make the
|
||||
subsequent checks easier. */
|
||||
for (i = 0; i < nelt; ++i)
|
||||
{
|
||||
rtx er = XVECEXP (par, 0, i);
|
||||
unsigned HOST_WIDE_INT ei;
|
||||
|
||||
if (!CONST_INT_P (er))
|
||||
return 0;
|
||||
ei = INTVAL (er);
|
||||
if (ei >= nelt)
|
||||
return 0;
|
||||
ipar[i] = ei;
|
||||
}
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V4DFmode:
|
||||
/* In the 256-bit DFmode case, we can only move elements within
|
||||
a 128-bit lane. */
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
if (ipar[i] >= 2)
|
||||
return 0;
|
||||
mask |= ipar[i] << i;
|
||||
}
|
||||
for (i = 2; i < 4; ++i)
|
||||
{
|
||||
if (ipar[i] < 2)
|
||||
return 0;
|
||||
mask |= (ipar[i] - 2) << i;
|
||||
}
|
||||
break;
|
||||
|
||||
case V8SFmode:
|
||||
/* In the 256-bit SFmode case, we have full freedom of movement
|
||||
within the low 128-bit lane, but the high 128-bit lane must
|
||||
mirror the exact same pattern. */
|
||||
for (i = 0; i < 4; ++i)
|
||||
if (ipar[i] + 4 != ipar[i + 4])
|
||||
return 0;
|
||||
nelt = 4;
|
||||
/* FALLTHRU */
|
||||
|
||||
case V2DFmode:
|
||||
case V4SFmode:
|
||||
/* In the 128-bit case, we've full freedom in the placement of
|
||||
the elements from the source operand. */
|
||||
for (i = 0; i < nelt; ++i)
|
||||
mask |= ipar[i] << (i * (nelt / 2));
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Make sure success has a non-zero value by adding one. */
|
||||
return mask + 1;
|
||||
}
|
||||
|
||||
|
||||
/* Store OPERAND to the memory after reload is completed. This means
|
||||
that we can't easily use assign_stack_local. */
|
||||
|
|
|
@ -1148,3 +1148,24 @@
|
|||
|
||||
return 1;
|
||||
})
|
||||
|
||||
;; Return 1 if OP is a parallel for a vpermilp[ds] permute.
|
||||
;; ??? It would be much easier if the PARALLEL for a VEC_SELECT
|
||||
;; had a mode, but it doesn't. So we have 4 copies and install
|
||||
;; the mode by hand.
|
||||
|
||||
(define_predicate "avx_vpermilp_v8sf_operand"
|
||||
(and (match_code "parallel")
|
||||
(match_test "avx_vpermilp_parallel (op, V8SFmode)")))
|
||||
|
||||
(define_predicate "avx_vpermilp_v4df_operand"
|
||||
(and (match_code "parallel")
|
||||
(match_test "avx_vpermilp_parallel (op, V4DFmode)")))
|
||||
|
||||
(define_predicate "avx_vpermilp_v4sf_operand"
|
||||
(and (match_code "parallel")
|
||||
(match_test "avx_vpermilp_parallel (op, V4SFmode)")))
|
||||
|
||||
(define_predicate "avx_vpermilp_v2df_operand"
|
||||
(and (match_code "parallel")
|
||||
(match_test "avx_vpermilp_parallel (op, V2DFmode)")))
|
||||
|
|
|
@ -58,6 +58,8 @@
|
|||
(define_mode_iterator AVX256MODE8P [V8SI V8SF])
|
||||
(define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
|
||||
(define_mode_iterator AVXMODEF4P [V4SF V4DF])
|
||||
(define_mode_iterator AVXMODEFDP [V2DF V4DF])
|
||||
(define_mode_iterator AVXMODEFSP [V4SF V8SF])
|
||||
(define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
|
||||
(define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
|
||||
|
||||
|
@ -95,13 +97,16 @@
|
|||
(V4SI "SI") (V2DI "DI")])
|
||||
|
||||
;; Mapping of vector modes to a vector mode of double size
|
||||
(define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
|
||||
(V4SF "V8SF") (V4SI "V8SI")])
|
||||
(define_mode_attr ssedoublesizemode
|
||||
[(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
|
||||
(V8HI "V16HI") (V16QI "V32QI")
|
||||
(V4DF "V8DF") (V8SF "V16SF")
|
||||
(V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
|
||||
|
||||
;; Number of scalar elements in each vector type
|
||||
(define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
|
||||
(V16QI "16") (V8HI "8")
|
||||
(V4SI "4") (V2DI "2")])
|
||||
(define_mode_attr ssescalarnum
|
||||
[(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
|
||||
(V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
|
||||
|
||||
;; Mapping for AVX
|
||||
(define_mode_attr avxvecmode
|
||||
|
@ -134,10 +139,6 @@
|
|||
(define_mode_attr blendbits
|
||||
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
|
||||
|
||||
;; Mapping of immediate bits for vpermil instructions
|
||||
(define_mode_attr vpermilbits
|
||||
[(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
|
||||
|
||||
;; Mapping of immediate bits for pinsr instructions
|
||||
(define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
|
||||
|
||||
|
@ -12088,14 +12089,66 @@
|
|||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "OI")])
|
||||
|
||||
(define_insn "avx_vpermil<mode>"
|
||||
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
|
||||
(unspec:AVXMODEF2P
|
||||
[(match_operand:AVXMODEF2P 1 "register_operand" "xm")
|
||||
(match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
|
||||
UNSPEC_VPERMIL))]
|
||||
(define_expand "avx_vpermil<mode>"
|
||||
[(set (match_operand:AVXMODEFDP 0 "register_operand" "")
|
||||
(vec_select:AVXMODEFDP
|
||||
(match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
|
||||
(match_operand:SI 2 "const_0_to_255_operand" "")))]
|
||||
"TARGET_AVX"
|
||||
"vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
|
||||
{
|
||||
int mask = INTVAL (operands[2]);
|
||||
rtx perm[<ssescalarnum>];
|
||||
|
||||
perm[0] = GEN_INT (mask & 1);
|
||||
perm[1] = GEN_INT ((mask >> 1) & 1);
|
||||
if (<MODE>mode == V4DFmode)
|
||||
{
|
||||
perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
|
||||
perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
|
||||
}
|
||||
|
||||
operands[2]
|
||||
= gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
|
||||
})
|
||||
|
||||
(define_expand "avx_vpermil<mode>"
|
||||
[(set (match_operand:AVXMODEFSP 0 "register_operand" "")
|
||||
(vec_select:AVXMODEFSP
|
||||
(match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
|
||||
(match_operand:SI 2 "const_0_to_255_operand" "")))]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
int mask = INTVAL (operands[2]);
|
||||
rtx perm[<ssescalarnum>];
|
||||
|
||||
perm[0] = GEN_INT (mask & 3);
|
||||
perm[1] = GEN_INT ((mask >> 2) & 3);
|
||||
perm[2] = GEN_INT ((mask >> 4) & 3);
|
||||
perm[3] = GEN_INT ((mask >> 6) & 3);
|
||||
if (<MODE>mode == V8SFmode)
|
||||
{
|
||||
perm[4] = GEN_INT ((mask & 3) + 4);
|
||||
perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
|
||||
perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
|
||||
perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
|
||||
}
|
||||
|
||||
operands[2]
|
||||
= gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
|
||||
})
|
||||
|
||||
(define_insn "*avx_vpermilp<mode>"
|
||||
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
|
||||
(vec_select:AVXMODEF2P
|
||||
(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
|
||||
(match_parallel 2 "avx_vpermilp_<mode>_operand"
|
||||
[(match_operand 3 "const_int_operand" "")])))]
|
||||
"TARGET_AVX"
|
||||
{
|
||||
int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
|
||||
operands[2] = GEN_INT (mask);
|
||||
return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}";
|
||||
}
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
|
|
Loading…
Reference in New Issue