mirror of git://gcc.gnu.org/git/gcc.git
re PR target/52607 (v4df __builtin_shuffle with {0,2,1,3} or {1,3,0,2})
PR target/52607 * config/i386/i386.md ("isa" attribute): Add avx2 and noavx2. ("enabled" attribute): Handle avx2 and noavx2 isas. * config/i386/sse.md (avx2_vec_dupv8sf_1, avx2_pbroadcast<mode>_1): New insns. (vec_dup<mode>): Add avx2 =x,x alternative. (vec_dup<mode> splitter): Don't split if TARGET_AVX2. (*avx_vperm_broadcast_<mode>): Don't split V4DFmode if TARGET_AVX2. For TARGET_AVX2, V8SFmode and elt == 0 split into vbroadcastss. * config/i386/i386.c (expand_vec_perm_pshufb): Emit also vpermps for V8SFmode. (expand_vec_perm_1): For broadcasts, use avx2_pbroadcast<mode>_1 if possible, handle also V8SFmode. From-SVN: r185577
This commit is contained in:
parent
681676df51
commit
6945a32ec3
|
@ -1,3 +1,19 @@
|
||||||
|
2012-03-20 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
|
PR target/52607
|
||||||
|
* config/i386/i386.md ("isa" attribute): Add avx2 and noavx2.
|
||||||
|
("enabled" attribute): Handle avx2 and noavx2 isas.
|
||||||
|
* config/i386/sse.md (avx2_vec_dupv8sf_1, avx2_pbroadcast<mode>_1):
|
||||||
|
New insns.
|
||||||
|
(vec_dup<mode>): Add avx2 =x,x alternative.
|
||||||
|
(vec_dup<mode> splitter): Don't split if TARGET_AVX2.
|
||||||
|
(*avx_vperm_broadcast_<mode>): Don't split V4DFmode if TARGET_AVX2.
|
||||||
|
For TARGET_AVX2, V8SFmode and elt == 0 split into vbroadcastss.
|
||||||
|
* config/i386/i386.c (expand_vec_perm_pshufb): Emit also vpermps
|
||||||
|
for V8SFmode.
|
||||||
|
(expand_vec_perm_1): For broadcasts, use avx2_pbroadcast<mode>_1
|
||||||
|
if possible, handle also V8SFmode.
|
||||||
|
|
||||||
2012-03-20 Richard Earnshaw <rearnsha@arm.com>
|
2012-03-20 Richard Earnshaw <rearnsha@arm.com>
|
||||||
|
|
||||||
* arm/predicates.md (zero_operand, reg_or_zero_operand): New predicates.
|
* arm/predicates.md (zero_operand, reg_or_zero_operand): New predicates.
|
||||||
|
|
|
@ -35836,7 +35836,7 @@ valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
||||||
in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
|
in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
||||||
|
@ -35910,6 +35910,9 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
||||||
if (valid_perm_using_mode_p (V8SImode, d))
|
if (valid_perm_using_mode_p (V8SImode, d))
|
||||||
vmode = V8SImode;
|
vmode = V8SImode;
|
||||||
}
|
}
|
||||||
|
/* Or if vpermps can be used. */
|
||||||
|
else if (d->vmode == V8SFmode)
|
||||||
|
vmode = V8SImode;
|
||||||
|
|
||||||
if (vmode == V32QImode)
|
if (vmode == V32QImode)
|
||||||
{
|
{
|
||||||
|
@ -35952,6 +35955,12 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
||||||
gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
|
gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
|
||||||
vperm = force_reg (vmode, vperm);
|
vperm = force_reg (vmode, vperm);
|
||||||
|
|
||||||
|
if (vmode == V8SImode && d->vmode == V8SFmode)
|
||||||
|
{
|
||||||
|
vmode = V8SFmode;
|
||||||
|
vperm = gen_lowpart (vmode, vperm);
|
||||||
|
}
|
||||||
|
|
||||||
target = gen_lowpart (vmode, d->target);
|
target = gen_lowpart (vmode, d->target);
|
||||||
op0 = gen_lowpart (vmode, d->op0);
|
op0 = gen_lowpart (vmode, d->op0);
|
||||||
if (d->op0 == d->op1)
|
if (d->op0 == d->op1)
|
||||||
|
@ -35960,6 +35969,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
||||||
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
|
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
|
||||||
else if (vmode == V32QImode)
|
else if (vmode == V32QImode)
|
||||||
emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
|
emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
|
||||||
|
else if (vmode == V8SFmode)
|
||||||
|
emit_insn (gen_avx2_permvarv8sf (target, vperm, op0));
|
||||||
else
|
else
|
||||||
emit_insn (gen_avx2_permvarv8si (target, vperm, op0));
|
emit_insn (gen_avx2_permvarv8si (target, vperm, op0));
|
||||||
}
|
}
|
||||||
|
@ -36008,20 +36019,17 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
|
||||||
else if (broadcast_perm && TARGET_AVX2)
|
else if (broadcast_perm && TARGET_AVX2)
|
||||||
{
|
{
|
||||||
/* Use vpbroadcast{b,w,d}. */
|
/* Use vpbroadcast{b,w,d}. */
|
||||||
rtx op = d->op0, (*gen) (rtx, rtx) = NULL;
|
rtx (*gen) (rtx, rtx) = NULL;
|
||||||
switch (d->vmode)
|
switch (d->vmode)
|
||||||
{
|
{
|
||||||
case V32QImode:
|
case V32QImode:
|
||||||
op = gen_lowpart (V16QImode, op);
|
gen = gen_avx2_pbroadcastv32qi_1;
|
||||||
gen = gen_avx2_pbroadcastv32qi;
|
|
||||||
break;
|
break;
|
||||||
case V16HImode:
|
case V16HImode:
|
||||||
op = gen_lowpart (V8HImode, op);
|
gen = gen_avx2_pbroadcastv16hi_1;
|
||||||
gen = gen_avx2_pbroadcastv16hi;
|
|
||||||
break;
|
break;
|
||||||
case V8SImode:
|
case V8SImode:
|
||||||
op = gen_lowpart (V4SImode, op);
|
gen = gen_avx2_pbroadcastv8si_1;
|
||||||
gen = gen_avx2_pbroadcastv8si;
|
|
||||||
break;
|
break;
|
||||||
case V16QImode:
|
case V16QImode:
|
||||||
gen = gen_avx2_pbroadcastv16qi;
|
gen = gen_avx2_pbroadcastv16qi;
|
||||||
|
@ -36029,13 +36037,16 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
|
||||||
case V8HImode:
|
case V8HImode:
|
||||||
gen = gen_avx2_pbroadcastv8hi;
|
gen = gen_avx2_pbroadcastv8hi;
|
||||||
break;
|
break;
|
||||||
|
case V8SFmode:
|
||||||
|
gen = gen_avx2_vec_dupv8sf_1;
|
||||||
|
break;
|
||||||
/* For other modes prefer other shuffles this function creates. */
|
/* For other modes prefer other shuffles this function creates. */
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
if (gen != NULL)
|
if (gen != NULL)
|
||||||
{
|
{
|
||||||
if (!d->testing_p)
|
if (!d->testing_p)
|
||||||
emit_insn (gen (d->target, op));
|
emit_insn (gen (d->target, d->op0));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -36103,7 +36114,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
/* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
|
/* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
|
||||||
vpshufb, vpermd or vpermq variable permutation. */
|
vpshufb, vpermd, vpermps or vpermq variable permutation. */
|
||||||
if (expand_vec_perm_pshufb (d))
|
if (expand_vec_perm_pshufb (d))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
|
|
@ -639,7 +639,7 @@
|
||||||
(define_attr "movu" "0,1" (const_string "0"))
|
(define_attr "movu" "0,1" (const_string "0"))
|
||||||
|
|
||||||
;; Used to control the "enabled" attribute on a per-instruction basis.
|
;; Used to control the "enabled" attribute on a per-instruction basis.
|
||||||
(define_attr "isa" "base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx,bmi2"
|
(define_attr "isa" "base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx,avx2,noavx2,bmi2"
|
||||||
(const_string "base"))
|
(const_string "base"))
|
||||||
|
|
||||||
(define_attr "enabled" ""
|
(define_attr "enabled" ""
|
||||||
|
@ -652,6 +652,8 @@
|
||||||
(symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
|
(symbol_ref "TARGET_SSE4_1 && !TARGET_AVX")
|
||||||
(eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
|
(eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
|
||||||
(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
|
(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
|
||||||
|
(eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2")
|
||||||
|
(eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2")
|
||||||
(eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
|
(eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
|
||||||
]
|
]
|
||||||
(const_int 1)))
|
(const_int 1)))
|
||||||
|
|
|
@ -3808,6 +3808,18 @@
|
||||||
(set_attr "prefix" "vex")
|
(set_attr "prefix" "vex")
|
||||||
(set_attr "mode" "<MODE>")])
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
(define_insn "avx2_vec_dupv8sf_1"
|
||||||
|
[(set (match_operand:V8SF 0 "register_operand" "=x")
|
||||||
|
(vec_duplicate:V8SF
|
||||||
|
(vec_select:SF
|
||||||
|
(match_operand:V8SF 1 "register_operand" "x")
|
||||||
|
(parallel [(const_int 0)]))))]
|
||||||
|
"TARGET_AVX2"
|
||||||
|
"vbroadcastss\t{%x1, %0|%0, %x1}"
|
||||||
|
[(set_attr "type" "sselog1")
|
||||||
|
(set_attr "prefix" "vex")
|
||||||
|
(set_attr "mode" "V8SF")])
|
||||||
|
|
||||||
(define_insn "vec_dupv4sf"
|
(define_insn "vec_dupv4sf"
|
||||||
[(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
|
[(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
|
||||||
(vec_duplicate:V4SF
|
(vec_duplicate:V4SF
|
||||||
|
@ -11876,6 +11888,19 @@
|
||||||
(set_attr "prefix" "vex")
|
(set_attr "prefix" "vex")
|
||||||
(set_attr "mode" "<sseinsnmode>")])
|
(set_attr "mode" "<sseinsnmode>")])
|
||||||
|
|
||||||
|
(define_insn "avx2_pbroadcast<mode>_1"
|
||||||
|
[(set (match_operand:VI_256 0 "register_operand" "=x")
|
||||||
|
(vec_duplicate:VI_256
|
||||||
|
(vec_select:<ssescalarmode>
|
||||||
|
(match_operand:VI_256 1 "nonimmediate_operand" "xm")
|
||||||
|
(parallel [(const_int 0)]))))]
|
||||||
|
"TARGET_AVX2"
|
||||||
|
"vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
|
||||||
|
[(set_attr "type" "ssemov")
|
||||||
|
(set_attr "prefix_extra" "1")
|
||||||
|
(set_attr "prefix" "vex")
|
||||||
|
(set_attr "mode" "<sseinsnmode>")])
|
||||||
|
|
||||||
(define_insn "avx2_permvarv8si"
|
(define_insn "avx2_permvarv8si"
|
||||||
[(set (match_operand:V8SI 0 "register_operand" "=x")
|
[(set (match_operand:V8SI 0 "register_operand" "=x")
|
||||||
(unspec:V8SI
|
(unspec:V8SI
|
||||||
|
@ -11967,16 +11992,18 @@
|
||||||
[V8SI V8SF V4DI V4DF])
|
[V8SI V8SF V4DI V4DF])
|
||||||
|
|
||||||
(define_insn "vec_dup<mode>"
|
(define_insn "vec_dup<mode>"
|
||||||
[(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
|
[(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
|
||||||
(vec_duplicate:AVX_VEC_DUP_MODE
|
(vec_duplicate:AVX_VEC_DUP_MODE
|
||||||
(match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
|
(match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
|
||||||
"TARGET_AVX"
|
"TARGET_AVX"
|
||||||
"@
|
"@
|
||||||
vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
|
vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
|
||||||
|
vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
|
||||||
#"
|
#"
|
||||||
[(set_attr "type" "ssemov")
|
[(set_attr "type" "ssemov")
|
||||||
(set_attr "prefix_extra" "1")
|
(set_attr "prefix_extra" "1")
|
||||||
(set_attr "prefix" "vex")
|
(set_attr "prefix" "vex")
|
||||||
|
(set_attr "isa" "*,avx2,noavx2")
|
||||||
(set_attr "mode" "V8SF")])
|
(set_attr "mode" "V8SF")])
|
||||||
|
|
||||||
(define_insn "avx2_vbroadcasti128_<mode>"
|
(define_insn "avx2_vbroadcasti128_<mode>"
|
||||||
|
@ -11995,7 +12022,7 @@
|
||||||
[(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
|
[(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
|
||||||
(vec_duplicate:AVX_VEC_DUP_MODE
|
(vec_duplicate:AVX_VEC_DUP_MODE
|
||||||
(match_operand:<ssescalarmode> 1 "register_operand")))]
|
(match_operand:<ssescalarmode> 1 "register_operand")))]
|
||||||
"TARGET_AVX && reload_completed"
|
"TARGET_AVX && !TARGET_AVX2 && reload_completed"
|
||||||
[(set (match_dup 2)
|
[(set (match_dup 2)
|
||||||
(vec_duplicate:<ssehalfvecmode> (match_dup 1)))
|
(vec_duplicate:<ssehalfvecmode> (match_dup 1)))
|
||||||
(set (match_dup 0)
|
(set (match_dup 0)
|
||||||
|
@ -12057,7 +12084,7 @@
|
||||||
[(match_operand 3 "const_int_operand" "C,n,n")])))]
|
[(match_operand 3 "const_int_operand" "C,n,n")])))]
|
||||||
"TARGET_AVX"
|
"TARGET_AVX"
|
||||||
"#"
|
"#"
|
||||||
"&& reload_completed"
|
"&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
|
||||||
[(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
|
[(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
|
||||||
{
|
{
|
||||||
rtx op0 = operands[0], op1 = operands[1];
|
rtx op0 = operands[0], op1 = operands[1];
|
||||||
|
@ -12067,6 +12094,13 @@
|
||||||
{
|
{
|
||||||
int mask;
|
int mask;
|
||||||
|
|
||||||
|
if (TARGET_AVX2 && elt == 0)
|
||||||
|
{
|
||||||
|
emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
|
||||||
|
op1)));
|
||||||
|
DONE;
|
||||||
|
}
|
||||||
|
|
||||||
/* Shuffle element we care about into all elements of the 128-bit lane.
|
/* Shuffle element we care about into all elements of the 128-bit lane.
|
||||||
The other lane gets shuffled too, but we don't care. */
|
The other lane gets shuffled too, but we don't care. */
|
||||||
if (<MODE>mode == V4DFmode)
|
if (<MODE>mode == V4DFmode)
|
||||||
|
|
Loading…
Reference in New Issue