mirror of git://gcc.gnu.org/git/gcc.git
i386: Delete sse sdot_prod; add sdot_prodv4si for xop
Now that we support mult_even/odd hooks, the vectorizer can generate the exact same code for plain sse dot_prod by itself, as well as other reductions other than plus. From-SVN: r188960
This commit is contained in:
parent
93703e7981
commit
a2051b26af
|
|
@ -1,3 +1,9 @@
|
|||
2012-06-25 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/sse.md (sse2_sse4_1): Remove code attr.
|
||||
(<s>dot_prodv4si, <s>dot_prodv8si): Remove
|
||||
(sdot_prodv4si): New; handle only XOP.
|
||||
|
||||
2012-06-25 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/i386-builtin-types.def (V4UDI, V8USI): New.
|
||||
|
|
|
|||
|
|
@ -5737,64 +5737,18 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_code_attr sse2_sse4_1
|
||||
[(zero_extend "sse2") (sign_extend "sse4_1")])
|
||||
|
||||
(define_expand "<s>dot_prodv4si"
|
||||
;; Normally we use widen_mul_even/odd, but combine can't quite get it all
|
||||
;; back together when madd is available.
|
||||
(define_expand "sdot_prodv4si"
|
||||
[(match_operand:V2DI 0 "register_operand")
|
||||
(any_extend:V2DI (match_operand:V4SI 1 "register_operand"))
|
||||
(match_operand:V4SI 1 "register_operand")
|
||||
(match_operand:V4SI 2 "register_operand")
|
||||
(match_operand:V2DI 3 "register_operand")]
|
||||
"<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
|
||||
"TARGET_XOP"
|
||||
{
|
||||
rtx t1, t2, t3, t4;
|
||||
|
||||
t1 = gen_reg_rtx (V2DImode);
|
||||
emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
|
||||
emit_insn (gen_addv2di3 (t1, t1, operands[3]));
|
||||
|
||||
t2 = gen_reg_rtx (V4SImode);
|
||||
t3 = gen_reg_rtx (V4SImode);
|
||||
emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
|
||||
gen_lowpart (V1TImode, operands[1]),
|
||||
GEN_INT (32)));
|
||||
emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
|
||||
gen_lowpart (V1TImode, operands[2]),
|
||||
GEN_INT (32)));
|
||||
|
||||
t4 = gen_reg_rtx (V2DImode);
|
||||
emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
|
||||
|
||||
emit_insn (gen_addv2di3 (operands[0], t1, t4));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "<s>dot_prodv8si"
|
||||
[(match_operand:V4DI 0 "register_operand")
|
||||
(any_extend:V4DI (match_operand:V8SI 1 "register_operand"))
|
||||
(match_operand:V8SI 2 "register_operand")
|
||||
(match_operand:V4DI 3 "register_operand")]
|
||||
"TARGET_AVX2"
|
||||
{
|
||||
rtx t1, t2, t3, t4;
|
||||
|
||||
t1 = gen_reg_rtx (V4DImode);
|
||||
emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
|
||||
emit_insn (gen_addv4di3 (t1, t1, operands[3]));
|
||||
|
||||
t2 = gen_reg_rtx (V8SImode);
|
||||
t3 = gen_reg_rtx (V8SImode);
|
||||
emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
|
||||
gen_lowpart (V2TImode, operands[1]),
|
||||
GEN_INT (32)));
|
||||
emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
|
||||
gen_lowpart (V2TImode, operands[2]),
|
||||
GEN_INT (32)));
|
||||
|
||||
t4 = gen_reg_rtx (V4DImode);
|
||||
emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
|
||||
|
||||
emit_insn (gen_addv4di3 (operands[0], t1, t4));
|
||||
rtx t = gen_reg_rtx (V2DImode);
|
||||
emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
|
||||
emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue