re PR tree-optimization/51581 (Integer division by constant is not vectorized)

PR tree-optimization/51581
	* tree-vect-stmts.c (permute_vec_elements): Add forward decl.
	(vectorizable_operation): Handle vectorization of MULT_HIGHPART_EXPR
	also using VEC_WIDEN_MULT_*_EXPR or builtin_mul_widen_* plus
	VEC_PERM_EXPR if vector MULT_HIGHPART_EXPR isn't supported.
	* tree-vect-patterns.c (vect_recog_divmod_pattern): Use
	MULT_HIGHPART_EXPR instead of VEC_WIDEN_MULT_*_EXPR and shifts.

	* gcc.dg/vect/pr51581-4.c: New test.

From-SVN: r189053
This commit is contained in:
Jakub Jelinek 2012-06-28 19:53:58 +02:00 committed by Jakub Jelinek
parent c9ba330781
commit 5deb57cb1b
5 changed files with 399 additions and 143 deletions

View File

@ -1,5 +1,13 @@
2012-06-28 Jakub Jelinek <jakub@redhat.com> 2012-06-28 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51581
* tree-vect-stmts.c (permute_vec_elements): Add forward decl.
(vectorizable_operation): Handle vectorization of MULT_HIGHPART_EXPR
also using VEC_WIDEN_MULT_*_EXPR or builtin_mul_widen_* plus
VEC_PERM_EXPR if vector MULT_HIGHPART_EXPR isn't supported.
* tree-vect-patterns.c (vect_recog_divmod_pattern): Use
MULT_HIGHPART_EXPR instead of VEC_WIDEN_MULT_*_EXPR and shifts.
PR tree-optimization/53645 PR tree-optimization/53645
* tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR * tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR
instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR

View File

@ -1,5 +1,8 @@
2012-06-28 Jakub Jelinek <jakub@redhat.com> 2012-06-28 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51581
* gcc.dg/vect/pr51581-4.c: New test.
PR tree-optimization/53645 PR tree-optimization/53645
* gcc.c-torture/execute/pr53645-2.c: New test. * gcc.c-torture/execute/pr53645-2.c: New test.

View File

@ -0,0 +1,166 @@
/* PR tree-optimization/51581 */
#include "tree-vect.h"
short int a[16], b[16];
unsigned short int c[16], d[16];
void
f1 (void)
{
a[0] = b[0] / 8;
a[1] = b[1] / 8;
a[2] = b[2] / 8;
a[3] = b[3] / 8;
a[4] = b[4] / 8;
a[5] = b[5] / 8;
a[6] = b[6] / 8;
a[7] = b[7] / 8;
a[8] = b[8] / 8;
a[9] = b[9] / 8;
a[10] = b[10] / 8;
a[11] = b[11] / 8;
a[12] = b[12] / 8;
a[13] = b[13] / 8;
a[14] = b[14] / 8;
a[15] = b[15] / 8;
}
void
f2 (void)
{
c[0] = d[0] / 3;
c[1] = d[1] / 3;
c[2] = d[2] / 3;
c[3] = d[3] / 3;
c[4] = d[4] / 3;
c[5] = d[5] / 3;
c[6] = d[6] / 3;
c[7] = d[7] / 3;
c[8] = d[8] / 3;
c[9] = d[9] / 3;
c[10] = d[10] / 3;
c[11] = d[11] / 3;
c[12] = d[12] / 3;
c[13] = d[13] / 3;
c[14] = d[14] / 3;
c[15] = d[15] / 3;
}
void
f3 (void)
{
a[0] = b[0] / 8;
a[1] = b[1] / 4;
a[2] = b[2] / 8;
a[3] = b[3] / 4;
a[4] = b[4] / 8;
a[5] = b[5] / 4;
a[6] = b[6] / 8;
a[7] = b[7] / 4;
a[8] = b[8] / 8;
a[9] = b[9] / 4;
a[10] = b[10] / 8;
a[11] = b[11] / 4;
a[12] = b[12] / 8;
a[13] = b[13] / 4;
a[14] = b[14] / 8;
a[15] = b[15] / 4;
}
void
f4 (void)
{
c[0] = d[0] / 3;
c[1] = d[1] / 5;
c[2] = d[2] / 3;
c[3] = d[3] / 5;
c[4] = d[4] / 3;
c[5] = d[5] / 5;
c[6] = d[6] / 3;
c[7] = d[7] / 5;
c[8] = d[8] / 3;
c[9] = d[9] / 5;
c[10] = d[10] / 3;
c[11] = d[11] / 5;
c[12] = d[12] / 3;
c[13] = d[13] / 5;
c[14] = d[14] / 3;
c[15] = d[15] / 5;
}
void
f5 (void)
{
a[0] = b[0] / 14;
a[1] = b[1] / 15;
a[2] = b[2] / 14;
a[3] = b[3] / 15;
a[4] = b[4] / 14;
a[5] = b[5] / 15;
a[6] = b[6] / 14;
a[7] = b[7] / 15;
a[8] = b[8] / 14;
a[9] = b[9] / 15;
a[10] = b[10] / 14;
a[11] = b[11] / 15;
a[12] = b[12] / 14;
a[13] = b[13] / 15;
a[14] = b[14] / 14;
a[15] = b[15] / 15;
}
void
f6 (void)
{
c[0] = d[0] / 6;
c[1] = d[1] / 5;
c[2] = d[2] / 6;
c[3] = d[3] / 5;
c[4] = d[4] / 6;
c[5] = d[5] / 5;
c[6] = d[6] / 13;
c[7] = d[7] / 5;
c[8] = d[8] / 6;
c[9] = d[9] / 5;
c[10] = d[10] / 6;
c[11] = d[11] / 5;
c[12] = d[12] / 6;
c[13] = d[13] / 5;
c[14] = d[14] / 13;
c[15] = d[15] / 5;
}
int
main ()
{
int i;
check_vect ();
asm ("");
for (i = 0; i < 16; i++)
{
asm ("");
b[i] = i - 8;
d[i] = i - 8;
}
f1 ();
f2 ();
for (i = 0; i < 16; i++)
if (a[i] != b[i] / 8 || c[i] != d[i] / 3)
abort ();
f3 ();
f4 ();
for (i = 0; i < 16; i+= 2)
if (a[i] != b[i] / 8 || a[i + 1] != b[i + 1] / 4
|| c[i] != d[i] / 3 || c[i + 1] != d[i + 1] / 5)
abort ();
f5 ();
f6 ();
for (i = 0; i < 16; i+= 2)
if (a[i] != b[i] / 14 || a[i + 1] != b[i + 1] / 15
|| c[i] != d[i] / ((i & 7) == 6 ? 13 : 6) || c[i + 1] != d[i + 1] / 5)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -1635,7 +1635,7 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
tree *type_in, tree *type_out) tree *type_in, tree *type_out)
{ {
gimple last_stmt = VEC_pop (gimple, *stmts); gimple last_stmt = VEC_pop (gimple, *stmts);
tree oprnd0, oprnd1, vectype, itype, witype, vecwtype, cond; tree oprnd0, oprnd1, vectype, itype, cond;
gimple pattern_stmt, def_stmt; gimple pattern_stmt, def_stmt;
enum tree_code rhs_code; enum tree_code rhs_code;
stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
@ -1814,17 +1814,23 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
|| prec > HOST_BITS_PER_WIDE_INT) || prec > HOST_BITS_PER_WIDE_INT)
return NULL; return NULL;
witype = build_nonstandard_integer_type (prec * 2, optab = optab_for_tree_code (MULT_HIGHPART_EXPR, vectype, optab_default);
TYPE_UNSIGNED (itype)); if (optab == NULL
vecwtype = get_vectype_for_scalar_type (witype); || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
if (vecwtype == NULL_TREE) {
return NULL; tree witype = build_nonstandard_integer_type (prec * 2,
TYPE_UNSIGNED (itype));
tree vecwtype = get_vectype_for_scalar_type (witype);
if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, if (vecwtype == NULL_TREE)
vecwtype, vectype, return NULL;
&dummy, &dummy, &dummy_code, if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
&dummy_code, &dummy_int, &dummy_vec)) vecwtype, vectype,
return NULL; &dummy, &dummy, &dummy_code,
&dummy_code, &dummy_int,
&dummy_vec))
return NULL;
}
STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
@ -1834,7 +1840,7 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
int pre_shift, post_shift; int pre_shift, post_shift;
unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1) unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
& GET_MODE_MASK (TYPE_MODE (itype)); & GET_MODE_MASK (TYPE_MODE (itype));
tree t1, t2, t3, t4, t5, t6; tree t1, t2, t3, t4;
if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1))) if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */ /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
@ -1861,65 +1867,46 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
if (post_shift - 1 >= prec) if (post_shift - 1 >= prec)
return NULL; return NULL;
/* t1 = oprnd0 w* ml; /* t1 = oprnd0 h* ml;
t2 = t1 >> prec; t2 = oprnd0 - t1;
t3 = (type) t2; t3 = t2 >> 1;
t4 = oprnd0 - t3; t4 = t1 + t3;
t5 = t4 >> 1; q = t4 >> (post_shift - 1); */
t6 = t3 + t5; t1 = vect_recog_temp_ssa_var (itype, NULL);
q = t6 >> (post_shift - 1); */
t1 = vect_recog_temp_ssa_var (witype, NULL);
def_stmt def_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t1, oprnd0, = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
build_int_cst (itype, ml)); build_int_cst (itype, ml));
append_pattern_def_seq (stmt_vinfo, def_stmt); append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
t2 = vect_recog_temp_ssa_var (witype, NULL); t2 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t2, t1, = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
build_int_cst (itype, prec));
append_pattern_def_seq (stmt_vinfo, def_stmt); append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
t3 = vect_recog_temp_ssa_var (itype, NULL); t3 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt def_stmt
= gimple_build_assign_with_ops (NOP_EXPR, t3, t2, NULL_TREE); = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
integer_one_node);
append_pattern_def_seq (stmt_vinfo, def_stmt); append_pattern_def_seq (stmt_vinfo, def_stmt);
t4 = vect_recog_temp_ssa_var (itype, NULL); t4 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt def_stmt
= gimple_build_assign_with_ops (MINUS_EXPR, t4, oprnd0, t3); = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
append_pattern_def_seq (stmt_vinfo, def_stmt);
t5 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t5, t4,
integer_one_node);
append_pattern_def_seq (stmt_vinfo, def_stmt);
t6 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (PLUS_EXPR, t6, t3, t5);
if (post_shift != 1) if (post_shift != 1)
{ {
append_pattern_def_seq (stmt_vinfo, def_stmt); append_pattern_def_seq (stmt_vinfo, def_stmt);
q = vect_recog_temp_ssa_var (witype, NULL); q = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt pattern_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, q, t6, = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
build_int_cst (itype, build_int_cst (itype,
post_shift post_shift
- 1)); - 1));
} }
else else
{ {
q = t6; q = t4;
pattern_stmt = def_stmt; pattern_stmt = def_stmt;
} }
} }
@ -1929,9 +1916,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
return NULL; return NULL;
/* t1 = oprnd0 >> pre_shift; /* t1 = oprnd0 >> pre_shift;
t2 = t1 w* ml; t2 = t1 h* ml;
t3 = t2 >> (prec + post_shift); q = t2 >> post_shift; */
q = (type) t3; */
if (pre_shift) if (pre_shift)
{ {
t1 = vect_recog_temp_ssa_var (itype, NULL); t1 = vect_recog_temp_ssa_var (itype, NULL);
@ -1944,28 +1930,25 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
else else
t1 = oprnd0; t1 = oprnd0;
t2 = vect_recog_temp_ssa_var (witype, NULL); t2 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt def_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t2, t1, = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
build_int_cst (itype, ml)); build_int_cst (itype, ml));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
t3 = vect_recog_temp_ssa_var (witype, NULL); if (post_shift)
def_stmt {
= gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2, append_pattern_def_seq (stmt_vinfo, def_stmt);
build_int_cst (itype, post_shift
+ prec));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
q = vect_recog_temp_ssa_var (itype, NULL); q = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt def_stmt
= gimple_build_assign_with_ops (NOP_EXPR, q, t3, NULL_TREE); = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
build_int_cst (itype,
post_shift));
}
else
q = t2;
pattern_stmt = def_stmt;
} }
} }
else else
@ -1975,21 +1958,12 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
HOST_WIDE_INT d = tree_low_cst (oprnd1, 0); HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
unsigned HOST_WIDE_INT abs_d; unsigned HOST_WIDE_INT abs_d;
bool add = false; bool add = false;
tree uwitype = NULL, vecuwtype = NULL; tree t1, t2, t3, t4;
tree t1, t2, t3, t4, t5, t6, t7;
/* Give up for -1. */ /* Give up for -1. */
if (d == -1) if (d == -1)
return NULL; return NULL;
if (!vect_supportable_shift (RSHIFT_EXPR, witype))
{
uwitype = build_nonstandard_integer_type (prec * 2, 1);
vecuwtype = get_vectype_for_scalar_type (uwitype);
if (vecuwtype == NULL_TREE)
return NULL;
}
/* Since d might be INT_MIN, we have to cast to /* Since d might be INT_MIN, we have to cast to
unsigned HOST_WIDE_INT before negating to avoid unsigned HOST_WIDE_INT before negating to avoid
undefined signed overflow. */ undefined signed overflow. */
@ -2017,85 +1991,48 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
if (post_shift >= prec) if (post_shift >= prec)
return NULL; return NULL;
/* t1 = oprnd1 w* ml; */ /* t1 = oprnd1 h* ml; */
t1 = vect_recog_temp_ssa_var (witype, NULL); t1 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt def_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t1, oprnd0, = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
build_int_cst (itype, ml)); build_int_cst (itype, ml));
append_pattern_def_seq (stmt_vinfo, def_stmt); append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
if (vecuwtype != NULL)
{
/* t2 = (uwtype) t1; */
t2 = vect_recog_temp_ssa_var (uwitype, NULL);
def_stmt
= gimple_build_assign_with_ops (NOP_EXPR, t2, t1, NULL_TREE);
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecuwtype;
}
else
t2 = t1;
/* t3 = t2 >> prec; or t3 = t2 >> (prec + post_shift); */
t3 = vect_recog_temp_ssa_var (vecuwtype ? uwitype : witype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
build_int_cst (itype,
prec
+ (!add
&& vecuwtype == NULL
? post_shift : 0)));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecuwtype ? vecuwtype : vecwtype;
/* t4 = (type) t3; */
t4 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (NOP_EXPR, t4, t3, NULL_TREE);
append_pattern_def_seq (stmt_vinfo, def_stmt);
if (add) if (add)
{ {
/* t5 = t4 + oprnd0; */ /* t2 = t1 + oprnd0; */
t5 = vect_recog_temp_ssa_var (itype, NULL); t2 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt def_stmt
= gimple_build_assign_with_ops (PLUS_EXPR, t5, t4, oprnd0); = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
append_pattern_def_seq (stmt_vinfo, def_stmt); append_pattern_def_seq (stmt_vinfo, def_stmt);
} }
else else
t5 = t4; t2 = t1;
if ((add || vecuwtype != NULL) && post_shift) if (post_shift)
{ {
/* t6 = t5 >> post_shift; */ /* t3 = t2 >> post_shift; */
t6 = vect_recog_temp_ssa_var (itype, NULL); t3 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t6, t5, = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
build_int_cst (itype, post_shift)); build_int_cst (itype, post_shift));
append_pattern_def_seq (stmt_vinfo, def_stmt); append_pattern_def_seq (stmt_vinfo, def_stmt);
} }
else else
t6 = t5; t3 = t2;
/* t7 = oprnd0 >> (prec - 1); */ /* t4 = oprnd0 >> (prec - 1); */
t7 = vect_recog_temp_ssa_var (itype, NULL); t4 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t7, oprnd0, = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
build_int_cst (itype, prec - 1)); build_int_cst (itype, prec - 1));
append_pattern_def_seq (stmt_vinfo, def_stmt); append_pattern_def_seq (stmt_vinfo, def_stmt);
/* q = t6 - t7; or q = t7 - t6; */ /* q = t3 - t4; or q = t4 - t3; */
q = vect_recog_temp_ssa_var (itype, NULL); q = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt pattern_stmt
= gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t7 : t6, = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
d < 0 ? t6 : t7); d < 0 ? t3 : t4);
} }
if (rhs_code == TRUNC_MOD_EXPR) if (rhs_code == TRUNC_MOD_EXPR)

View File

@ -3288,6 +3288,10 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
} }
static tree permute_vec_elements (tree, tree, tree, gimple,
gimple_stmt_iterator *);
/* Function vectorizable_operation. /* Function vectorizable_operation.
Check if STMT performs a binary, unary or ternary operation that can Check if STMT performs a binary, unary or ternary operation that can
@ -3300,17 +3304,18 @@ static bool
vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
gimple *vec_stmt, slp_tree slp_node) gimple *vec_stmt, slp_tree slp_node)
{ {
tree vec_dest; tree vec_dest, vec_dest2 = NULL_TREE;
tree vec_dest3 = NULL_TREE, vec_dest4 = NULL_TREE;
tree scalar_dest; tree scalar_dest;
tree op0, op1 = NULL_TREE, op2 = NULL_TREE; tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype; tree vectype, wide_vectype = NULL_TREE;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
enum tree_code code; enum tree_code code;
enum machine_mode vec_mode; enum machine_mode vec_mode;
tree new_temp; tree new_temp;
int op_type; int op_type;
optab optab; optab optab, optab2 = NULL;
int icode; int icode;
tree def; tree def;
gimple def_stmt; gimple def_stmt;
@ -3327,6 +3332,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
tree vop0, vop1, vop2; tree vop0, vop1, vop2;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
int vf; int vf;
unsigned char *sel = NULL;
tree decl1 = NULL_TREE, decl2 = NULL_TREE, perm_mask = NULL_TREE;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false; return false;
@ -3451,31 +3458,97 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
optab = optab_for_tree_code (code, vectype, optab_default); optab = optab_for_tree_code (code, vectype, optab_default);
/* Supportable by target? */ /* Supportable by target? */
if (!optab) if (!optab && code != MULT_HIGHPART_EXPR)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "no optab."); fprintf (vect_dump, "no optab.");
return false; return false;
} }
vec_mode = TYPE_MODE (vectype); vec_mode = TYPE_MODE (vectype);
icode = (int) optab_handler (optab, vec_mode); icode = optab ? (int) optab_handler (optab, vec_mode) : CODE_FOR_nothing;
if (icode == CODE_FOR_nothing
&& code == MULT_HIGHPART_EXPR
&& VECTOR_MODE_P (vec_mode)
&& BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
{
/* If MULT_HIGHPART_EXPR isn't supported by the backend, see
if we can emit VEC_WIDEN_MULT_{LO,HI}_EXPR followed by VEC_PERM_EXPR
or builtin_mul_widen_{even,odd} followed by VEC_PERM_EXPR. */
unsigned int prec = TYPE_PRECISION (TREE_TYPE (scalar_dest));
unsigned int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scalar_dest));
tree wide_type
= build_nonstandard_integer_type (prec * 2, unsignedp);
wide_vectype
= get_same_sized_vectype (wide_type, vectype);
sel = XALLOCAVEC (unsigned char, nunits_in);
if (VECTOR_MODE_P (TYPE_MODE (wide_vectype))
&& GET_MODE_SIZE (TYPE_MODE (wide_vectype))
== GET_MODE_SIZE (vec_mode))
{
if (targetm.vectorize.builtin_mul_widen_even
&& (decl1 = targetm.vectorize.builtin_mul_widen_even (vectype))
&& targetm.vectorize.builtin_mul_widen_odd
&& (decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype))
&& TYPE_MODE (TREE_TYPE (TREE_TYPE (decl1)))
== TYPE_MODE (wide_vectype))
{
for (i = 0; i < nunits_in; i++)
sel[i] = !BYTES_BIG_ENDIAN + (i & ~1)
+ ((i & 1) ? nunits_in : 0);
if (can_vec_perm_p (vec_mode, false, sel))
icode = 0;
}
if (icode == CODE_FOR_nothing)
{
decl1 = NULL_TREE;
decl2 = NULL_TREE;
optab = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
vectype, optab_default);
optab2 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
vectype, optab_default);
if (optab != NULL
&& optab2 != NULL
&& optab_handler (optab, vec_mode) != CODE_FOR_nothing
&& optab_handler (optab2, vec_mode) != CODE_FOR_nothing)
{
for (i = 0; i < nunits_in; i++)
sel[i] = !BYTES_BIG_ENDIAN + 2 * i;
if (can_vec_perm_p (vec_mode, false, sel))
icode = optab_handler (optab, vec_mode);
}
}
}
if (icode == CODE_FOR_nothing)
{
if (optab_for_tree_code (code, vectype, optab_default) == NULL)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "no optab.");
return false;
}
wide_vectype = NULL_TREE;
optab2 = NULL;
}
}
if (icode == CODE_FOR_nothing) if (icode == CODE_FOR_nothing)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "op not supported by target."); fprintf (vect_dump, "op not supported by target.");
/* Check only during analysis. */ /* Check only during analysis. */
if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
|| (vf < vect_min_worthwhile_factor (code) || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
&& !vec_stmt))
return false; return false;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "proceeding using word mode."); fprintf (vect_dump, "proceeding using word mode.");
} }
/* Worthwhile without SIMD support? Check only during analysis. */ /* Worthwhile without SIMD support? Check only during analysis. */
if (!VECTOR_MODE_P (TYPE_MODE (vectype)) if (!VECTOR_MODE_P (vec_mode)
&& vf < vect_min_worthwhile_factor (code) && !vec_stmt
&& !vec_stmt) && vf < vect_min_worthwhile_factor (code))
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "not worthwhile without SIMD support."); fprintf (vect_dump, "not worthwhile without SIMD support.");
@ -3497,7 +3570,16 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
fprintf (vect_dump, "transform binary/unary operation."); fprintf (vect_dump, "transform binary/unary operation.");
/* Handle def. */ /* Handle def. */
vec_dest = vect_create_destination_var (scalar_dest, vectype); if (wide_vectype)
{
vec_dest = vect_create_destination_var (scalar_dest, wide_vectype);
vec_dest2 = vect_create_destination_var (scalar_dest, wide_vectype);
vec_dest3 = vect_create_destination_var (scalar_dest, vectype);
vec_dest4 = vect_create_destination_var (scalar_dest, vectype);
perm_mask = vect_gen_perm_mask (vectype, sel);
}
else
vec_dest = vect_create_destination_var (scalar_dest, vectype);
/* Allocate VECs for vector operands. In case of SLP, vector operands are /* Allocate VECs for vector operands. In case of SLP, vector operands are
created in the previous stages of the recursion, so no allocation is created in the previous stages of the recursion, so no allocation is
@ -3606,6 +3688,66 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
? VEC_index (tree, vec_oprnds1, i) : NULL_TREE); ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
vop2 = ((op_type == ternary_op) vop2 = ((op_type == ternary_op)
? VEC_index (tree, vec_oprnds2, i) : NULL_TREE); ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
if (wide_vectype)
{
tree new_temp2, vce;
gcc_assert (code == MULT_HIGHPART_EXPR);
if (decl1 != NULL_TREE)
{
new_stmt = gimple_build_call (decl1, 2, vop0, vop1);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_stmt = gimple_build_call (decl2, 2, vop0, vop1);
new_temp2 = make_ssa_name (vec_dest2, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
else
{
new_temp = make_ssa_name (vec_dest, NULL);
new_stmt
= gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
? VEC_WIDEN_MULT_HI_EXPR
: VEC_WIDEN_MULT_LO_EXPR,
new_temp, vop0, vop1);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_temp2 = make_ssa_name (vec_dest2, NULL);
new_stmt
= gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
? VEC_WIDEN_MULT_LO_EXPR
: VEC_WIDEN_MULT_HI_EXPR,
new_temp2, vop0, vop1);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
vec_dest3, vce,
NULL_TREE);
new_temp = make_ssa_name (vec_dest3, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp2);
new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
vec_dest4, vce,
NULL_TREE);
new_temp2 = make_ssa_name (vec_dest4, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_temp = permute_vec_elements (new_temp, new_temp2,
perm_mask, stmt, gsi);
new_stmt = SSA_NAME_DEF_STMT (new_temp);
if (slp_node)
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
new_stmt);
continue;
}
new_stmt = gimple_build_assign_with_ops3 (code, vec_dest, new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
vop0, vop1, vop2); vop0, vop1, vop2);
new_temp = make_ssa_name (vec_dest, new_stmt); new_temp = make_ssa_name (vec_dest, new_stmt);