diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 86d0176d3993..4e327763ce82 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,13 @@ 2012-06-28 Jakub Jelinek + PR tree-optimization/51581 + * tree-vect-stmts.c (permute_vec_elements): Add forward decl. + (vectorizable_operation): Handle vectorization of MULT_HIGHPART_EXPR + also using VEC_WIDEN_MULT_*_EXPR or builtin_mul_widen_* plus + VEC_PERM_EXPR if vector MULT_HIGHPART_EXPR isn't supported. + * tree-vect-patterns.c (vect_recog_divmod_pattern): Use + MULT_HIGHPART_EXPR instead of VEC_WIDEN_MULT_*_EXPR and shifts. + PR tree-optimization/53645 * tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 20e38a0a99d0..d2ff7b93fe89 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,8 @@ 2012-06-28 Jakub Jelinek + PR tree-optimization/51581 + * gcc.dg/vect/pr51581-4.c: New test. + PR tree-optimization/53645 * gcc.c-torture/execute/pr53645-2.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/pr51581-4.c b/gcc/testsuite/gcc.dg/vect/pr51581-4.c new file mode 100644 index 000000000000..f48ec6b59378 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr51581-4.c @@ -0,0 +1,166 @@ +/* PR tree-optimization/51581 */ + +#include "tree-vect.h" + +short int a[16], b[16]; +unsigned short int c[16], d[16]; + +void +f1 (void) +{ + a[0] = b[0] / 8; + a[1] = b[1] / 8; + a[2] = b[2] / 8; + a[3] = b[3] / 8; + a[4] = b[4] / 8; + a[5] = b[5] / 8; + a[6] = b[6] / 8; + a[7] = b[7] / 8; + a[8] = b[8] / 8; + a[9] = b[9] / 8; + a[10] = b[10] / 8; + a[11] = b[11] / 8; + a[12] = b[12] / 8; + a[13] = b[13] / 8; + a[14] = b[14] / 8; + a[15] = b[15] / 8; +} + +void +f2 (void) +{ + c[0] = d[0] / 3; + c[1] = d[1] / 3; + c[2] = d[2] / 3; + c[3] = d[3] / 3; + c[4] = d[4] / 3; + c[5] = d[5] / 3; + c[6] = d[6] / 3; + c[7] = d[7] / 3; + c[8] = d[8] / 3; + c[9] = d[9] / 3; + c[10] = d[10] / 3; + c[11] = d[11] / 3; + c[12] = d[12] / 3; + c[13] = d[13] / 3; + c[14] = d[14] / 3; + c[15] = d[15] / 3; +} + +void +f3 (void) +{ + a[0] = b[0] / 8; + a[1] = b[1] / 4; + a[2] = b[2] / 8; + a[3] = b[3] / 4; + a[4] = b[4] / 8; + a[5] = b[5] / 4; + a[6] = b[6] / 8; + a[7] = b[7] / 4; + a[8] = b[8] / 8; + a[9] = b[9] / 4; + a[10] = b[10] / 8; + a[11] = b[11] / 4; + a[12] = b[12] / 8; + a[13] = b[13] / 4; + a[14] = b[14] / 8; + a[15] = b[15] / 4; +} + +void +f4 (void) +{ + c[0] = d[0] / 3; + c[1] = d[1] / 5; + c[2] = d[2] / 3; + c[3] = d[3] / 5; + c[4] = d[4] / 3; + c[5] = d[5] / 5; + c[6] = d[6] / 3; + c[7] = d[7] / 5; + c[8] = d[8] / 3; + c[9] = d[9] / 5; + c[10] = d[10] / 3; + c[11] = d[11] / 5; + c[12] = d[12] / 3; + c[13] = d[13] / 5; + c[14] = d[14] / 3; + c[15] = d[15] / 5; +} + +void +f5 (void) +{ + a[0] = b[0] / 14; + a[1] = b[1] / 15; + a[2] = b[2] / 14; + a[3] = b[3] / 15; + a[4] = b[4] / 14; + a[5] = b[5] / 15; + a[6] = b[6] / 14; + a[7] = b[7] / 15; + a[8] = b[8] / 14; + a[9] = b[9] / 15; + a[10] = b[10] / 14; + a[11] = b[11] / 15; + a[12] = b[12] / 14; + a[13] = b[13] / 15; + a[14] = b[14] / 14; + a[15] = b[15] / 15; +} + +void +f6 (void) +{ + c[0] = d[0] / 6; + c[1] = d[1] / 5; + c[2] = d[2] / 6; + c[3] = d[3] / 5; + c[4] = d[4] / 6; + c[5] = d[5] / 5; + c[6] = d[6] / 13; + c[7] = d[7] / 5; + c[8] = d[8] / 6; + c[9] = d[9] / 5; + c[10] = d[10] / 6; + c[11] = d[11] / 5; + c[12] = d[12] / 6; + c[13] = d[13] / 5; + c[14] = d[14] / 13; + c[15] = d[15] / 5; +} + +int +main () +{ + int i; + check_vect (); + asm (""); + for (i = 0; i < 16; i++) + { + asm (""); + b[i] = i - 8; + d[i] = i - 8; + } + f1 (); + f2 (); + for (i = 0; i < 16; i++) + if (a[i] != b[i] / 8 || c[i] != d[i] / 3) + abort (); + f3 (); + f4 (); + for (i = 0; i < 16; i+= 2) + if (a[i] != b[i] / 8 || a[i + 1] != b[i + 1] / 4 + || c[i] != d[i] / 3 || c[i + 1] != d[i + 1] / 5) + abort (); + f5 (); + f6 (); + for (i = 0; i < 16; i+= 2) + if (a[i] != b[i] / 14 || a[i + 1] != b[i + 1] / 15 + || c[i] != d[i] / ((i & 7) == 6 ? 13 : 6) || c[i + 1] != d[i + 1] / 5) + abort (); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index ff9f8e4ea825..3f57e5dcf8b1 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1635,7 +1635,7 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, tree *type_in, tree *type_out) { gimple last_stmt = VEC_pop (gimple, *stmts); - tree oprnd0, oprnd1, vectype, itype, witype, vecwtype, cond; + tree oprnd0, oprnd1, vectype, itype, cond; gimple pattern_stmt, def_stmt; enum tree_code rhs_code; stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); @@ -1814,17 +1814,23 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, || prec > HOST_BITS_PER_WIDE_INT) return NULL; - witype = build_nonstandard_integer_type (prec * 2, - TYPE_UNSIGNED (itype)); - vecwtype = get_vectype_for_scalar_type (witype); - if (vecwtype == NULL_TREE) - return NULL; + optab = optab_for_tree_code (MULT_HIGHPART_EXPR, vectype, optab_default); + if (optab == NULL + || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) + { + tree witype = build_nonstandard_integer_type (prec * 2, + TYPE_UNSIGNED (itype)); + tree vecwtype = get_vectype_for_scalar_type (witype); - if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, - vecwtype, vectype, - &dummy, &dummy, &dummy_code, - &dummy_code, &dummy_int, &dummy_vec)) - return NULL; + if (vecwtype == NULL_TREE) + return NULL; + if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, + vecwtype, vectype, + &dummy, &dummy, &dummy_code, + &dummy_code, &dummy_int, + &dummy_vec)) + return NULL; + } STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; @@ -1834,7 +1840,7 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, int pre_shift, post_shift; unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1) & GET_MODE_MASK (TYPE_MODE (itype)); - tree t1, t2, t3, t4, t5, t6; + tree t1, t2, t3, t4; if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1))) /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */ @@ -1861,65 +1867,46 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, if (post_shift - 1 >= prec) return NULL; - /* t1 = oprnd0 w* ml; - t2 = t1 >> prec; - t3 = (type) t2; - t4 = oprnd0 - t3; - t5 = t4 >> 1; - t6 = t3 + t5; - q = t6 >> (post_shift - 1); */ - t1 = vect_recog_temp_ssa_var (witype, NULL); + /* t1 = oprnd0 h* ml; + t2 = oprnd0 - t1; + t3 = t2 >> 1; + t4 = t1 + t3; + q = t4 >> (post_shift - 1); */ + t1 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t1, oprnd0, + = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0, build_int_cst (itype, ml)); append_pattern_def_seq (stmt_vinfo, def_stmt); - def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); - set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); - STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype; - t2 = vect_recog_temp_ssa_var (witype, NULL); + t2 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign_with_ops (RSHIFT_EXPR, t2, t1, - build_int_cst (itype, prec)); + = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1); append_pattern_def_seq (stmt_vinfo, def_stmt); - def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); - set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); - STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype; t3 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign_with_ops (NOP_EXPR, t3, t2, NULL_TREE); + = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2, + integer_one_node); append_pattern_def_seq (stmt_vinfo, def_stmt); t4 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign_with_ops (MINUS_EXPR, t4, oprnd0, t3); - append_pattern_def_seq (stmt_vinfo, def_stmt); - - t5 = vect_recog_temp_ssa_var (itype, NULL); - def_stmt - = gimple_build_assign_with_ops (RSHIFT_EXPR, t5, t4, - integer_one_node); - append_pattern_def_seq (stmt_vinfo, def_stmt); - - t6 = vect_recog_temp_ssa_var (itype, NULL); - def_stmt - = gimple_build_assign_with_ops (PLUS_EXPR, t6, t3, t5); + = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3); if (post_shift != 1) { append_pattern_def_seq (stmt_vinfo, def_stmt); - q = vect_recog_temp_ssa_var (witype, NULL); + q = vect_recog_temp_ssa_var (itype, NULL); pattern_stmt - = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t6, + = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4, build_int_cst (itype, post_shift - 1)); } else { - q = t6; + q = t4; pattern_stmt = def_stmt; } } @@ -1929,9 +1916,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, return NULL; /* t1 = oprnd0 >> pre_shift; - t2 = t1 w* ml; - t3 = t2 >> (prec + post_shift); - q = (type) t3; */ + t2 = t1 h* ml; + q = t2 >> post_shift; */ if (pre_shift) { t1 = vect_recog_temp_ssa_var (itype, NULL); @@ -1944,28 +1930,25 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, else t1 = oprnd0; - t2 = vect_recog_temp_ssa_var (witype, NULL); + t2 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t2, t1, + = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1, build_int_cst (itype, ml)); - append_pattern_def_seq (stmt_vinfo, def_stmt); - def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); - set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); - STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype; - t3 = vect_recog_temp_ssa_var (witype, NULL); - def_stmt - = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2, - build_int_cst (itype, post_shift - + prec)); - append_pattern_def_seq (stmt_vinfo, def_stmt); - def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); - set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); - STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype; + if (post_shift) + { + append_pattern_def_seq (stmt_vinfo, def_stmt); - q = vect_recog_temp_ssa_var (itype, NULL); - pattern_stmt - = gimple_build_assign_with_ops (NOP_EXPR, q, t3, NULL_TREE); + q = vect_recog_temp_ssa_var (itype, NULL); + def_stmt + = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2, + build_int_cst (itype, + post_shift)); + } + else + q = t2; + + pattern_stmt = def_stmt; } } else @@ -1975,21 +1958,12 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, HOST_WIDE_INT d = tree_low_cst (oprnd1, 0); unsigned HOST_WIDE_INT abs_d; bool add = false; - tree uwitype = NULL, vecuwtype = NULL; - tree t1, t2, t3, t4, t5, t6, t7; + tree t1, t2, t3, t4; /* Give up for -1. */ if (d == -1) return NULL; - if (!vect_supportable_shift (RSHIFT_EXPR, witype)) - { - uwitype = build_nonstandard_integer_type (prec * 2, 1); - vecuwtype = get_vectype_for_scalar_type (uwitype); - if (vecuwtype == NULL_TREE) - return NULL; - } - /* Since d might be INT_MIN, we have to cast to unsigned HOST_WIDE_INT before negating to avoid undefined signed overflow. */ @@ -2017,85 +1991,48 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts, if (post_shift >= prec) return NULL; - /* t1 = oprnd1 w* ml; */ - t1 = vect_recog_temp_ssa_var (witype, NULL); + /* t1 = oprnd1 h* ml; */ + t1 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t1, oprnd0, + = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0, build_int_cst (itype, ml)); append_pattern_def_seq (stmt_vinfo, def_stmt); - def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); - set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); - STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype; - - if (vecuwtype != NULL) - { - /* t2 = (uwtype) t1; */ - t2 = vect_recog_temp_ssa_var (uwitype, NULL); - def_stmt - = gimple_build_assign_with_ops (NOP_EXPR, t2, t1, NULL_TREE); - append_pattern_def_seq (stmt_vinfo, def_stmt); - def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); - set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); - STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecuwtype; - } - else - t2 = t1; - - /* t3 = t2 >> prec; or t3 = t2 >> (prec + post_shift); */ - t3 = vect_recog_temp_ssa_var (vecuwtype ? uwitype : witype, NULL); - def_stmt - = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2, - build_int_cst (itype, - prec - + (!add - && vecuwtype == NULL - ? post_shift : 0))); - append_pattern_def_seq (stmt_vinfo, def_stmt); - def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo); - set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); - STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecuwtype ? vecuwtype : vecwtype; - - /* t4 = (type) t3; */ - t4 = vect_recog_temp_ssa_var (itype, NULL); - def_stmt - = gimple_build_assign_with_ops (NOP_EXPR, t4, t3, NULL_TREE); - append_pattern_def_seq (stmt_vinfo, def_stmt); if (add) { - /* t5 = t4 + oprnd0; */ - t5 = vect_recog_temp_ssa_var (itype, NULL); + /* t2 = t1 + oprnd0; */ + t2 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign_with_ops (PLUS_EXPR, t5, t4, oprnd0); + = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0); append_pattern_def_seq (stmt_vinfo, def_stmt); } else - t5 = t4; + t2 = t1; - if ((add || vecuwtype != NULL) && post_shift) + if (post_shift) { - /* t6 = t5 >> post_shift; */ - t6 = vect_recog_temp_ssa_var (itype, NULL); + /* t3 = t2 >> post_shift; */ + t3 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign_with_ops (RSHIFT_EXPR, t6, t5, + = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2, build_int_cst (itype, post_shift)); append_pattern_def_seq (stmt_vinfo, def_stmt); } else - t6 = t5; + t3 = t2; - /* t7 = oprnd0 >> (prec - 1); */ - t7 = vect_recog_temp_ssa_var (itype, NULL); + /* t4 = oprnd0 >> (prec - 1); */ + t4 = vect_recog_temp_ssa_var (itype, NULL); def_stmt - = gimple_build_assign_with_ops (RSHIFT_EXPR, t7, oprnd0, + = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0, build_int_cst (itype, prec - 1)); append_pattern_def_seq (stmt_vinfo, def_stmt); - /* q = t6 - t7; or q = t7 - t6; */ + /* q = t3 - t4; or q = t4 - t3; */ q = vect_recog_temp_ssa_var (itype, NULL); pattern_stmt - = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t7 : t6, - d < 0 ? t6 : t7); + = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3, + d < 0 ? t3 : t4); } if (rhs_code == TRUNC_MOD_EXPR) diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 010181c3124d..b40e68c4060f 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3288,6 +3288,10 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi, } +static tree permute_vec_elements (tree, tree, tree, gimple, + gimple_stmt_iterator *); + + /* Function vectorizable_operation. Check if STMT performs a binary, unary or ternary operation that can @@ -3300,17 +3304,18 @@ static bool vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, slp_tree slp_node) { - tree vec_dest; + tree vec_dest, vec_dest2 = NULL_TREE; + tree vec_dest3 = NULL_TREE, vec_dest4 = NULL_TREE; tree scalar_dest; tree op0, op1 = NULL_TREE, op2 = NULL_TREE; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree vectype; + tree vectype, wide_vectype = NULL_TREE; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); enum tree_code code; enum machine_mode vec_mode; tree new_temp; int op_type; - optab optab; + optab optab, optab2 = NULL; int icode; tree def; gimple def_stmt; @@ -3327,6 +3332,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, tree vop0, vop1, vop2; bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); int vf; + unsigned char *sel = NULL; + tree decl1 = NULL_TREE, decl2 = NULL_TREE, perm_mask = NULL_TREE; if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) return false; @@ -3451,31 +3458,97 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, optab = optab_for_tree_code (code, vectype, optab_default); /* Supportable by target? */ - if (!optab) + if (!optab && code != MULT_HIGHPART_EXPR) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "no optab."); return false; } vec_mode = TYPE_MODE (vectype); - icode = (int) optab_handler (optab, vec_mode); + icode = optab ? (int) optab_handler (optab, vec_mode) : CODE_FOR_nothing; + + if (icode == CODE_FOR_nothing + && code == MULT_HIGHPART_EXPR + && VECTOR_MODE_P (vec_mode) + && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN) + { + /* If MULT_HIGHPART_EXPR isn't supported by the backend, see + if we can emit VEC_WIDEN_MULT_{LO,HI}_EXPR followed by VEC_PERM_EXPR + or builtin_mul_widen_{even,odd} followed by VEC_PERM_EXPR. */ + unsigned int prec = TYPE_PRECISION (TREE_TYPE (scalar_dest)); + unsigned int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scalar_dest)); + tree wide_type + = build_nonstandard_integer_type (prec * 2, unsignedp); + wide_vectype + = get_same_sized_vectype (wide_type, vectype); + + sel = XALLOCAVEC (unsigned char, nunits_in); + if (VECTOR_MODE_P (TYPE_MODE (wide_vectype)) + && GET_MODE_SIZE (TYPE_MODE (wide_vectype)) + == GET_MODE_SIZE (vec_mode)) + { + if (targetm.vectorize.builtin_mul_widen_even + && (decl1 = targetm.vectorize.builtin_mul_widen_even (vectype)) + && targetm.vectorize.builtin_mul_widen_odd + && (decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype)) + && TYPE_MODE (TREE_TYPE (TREE_TYPE (decl1))) + == TYPE_MODE (wide_vectype)) + { + for (i = 0; i < nunits_in; i++) + sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + + ((i & 1) ? nunits_in : 0); + if (can_vec_perm_p (vec_mode, false, sel)) + icode = 0; + } + if (icode == CODE_FOR_nothing) + { + decl1 = NULL_TREE; + decl2 = NULL_TREE; + optab = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, + vectype, optab_default); + optab2 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, + vectype, optab_default); + if (optab != NULL + && optab2 != NULL + && optab_handler (optab, vec_mode) != CODE_FOR_nothing + && optab_handler (optab2, vec_mode) != CODE_FOR_nothing) + { + for (i = 0; i < nunits_in; i++) + sel[i] = !BYTES_BIG_ENDIAN + 2 * i; + if (can_vec_perm_p (vec_mode, false, sel)) + icode = optab_handler (optab, vec_mode); + } + } + } + if (icode == CODE_FOR_nothing) + { + if (optab_for_tree_code (code, vectype, optab_default) == NULL) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "no optab."); + return false; + } + wide_vectype = NULL_TREE; + optab2 = NULL; + } + } + if (icode == CODE_FOR_nothing) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "op not supported by target."); /* Check only during analysis. */ if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD - || (vf < vect_min_worthwhile_factor (code) - && !vec_stmt)) + || (!vec_stmt && vf < vect_min_worthwhile_factor (code))) return false; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "proceeding using word mode."); } /* Worthwhile without SIMD support? Check only during analysis. */ - if (!VECTOR_MODE_P (TYPE_MODE (vectype)) - && vf < vect_min_worthwhile_factor (code) - && !vec_stmt) + if (!VECTOR_MODE_P (vec_mode) + && !vec_stmt + && vf < vect_min_worthwhile_factor (code)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "not worthwhile without SIMD support."); @@ -3497,7 +3570,16 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, fprintf (vect_dump, "transform binary/unary operation."); /* Handle def. */ - vec_dest = vect_create_destination_var (scalar_dest, vectype); + if (wide_vectype) + { + vec_dest = vect_create_destination_var (scalar_dest, wide_vectype); + vec_dest2 = vect_create_destination_var (scalar_dest, wide_vectype); + vec_dest3 = vect_create_destination_var (scalar_dest, vectype); + vec_dest4 = vect_create_destination_var (scalar_dest, vectype); + perm_mask = vect_gen_perm_mask (vectype, sel); + } + else + vec_dest = vect_create_destination_var (scalar_dest, vectype); /* Allocate VECs for vector operands. In case of SLP, vector operands are created in the previous stages of the recursion, so no allocation is @@ -3606,6 +3688,66 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE); vop2 = ((op_type == ternary_op) ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE); + if (wide_vectype) + { + tree new_temp2, vce; + + gcc_assert (code == MULT_HIGHPART_EXPR); + if (decl1 != NULL_TREE) + { + new_stmt = gimple_build_call (decl1, 2, vop0, vop1); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_call_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + new_stmt = gimple_build_call (decl2, 2, vop0, vop1); + new_temp2 = make_ssa_name (vec_dest2, new_stmt); + gimple_call_set_lhs (new_stmt, new_temp2); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + } + else + { + new_temp = make_ssa_name (vec_dest, NULL); + new_stmt + = gimple_build_assign_with_ops (BYTES_BIG_ENDIAN + ? VEC_WIDEN_MULT_HI_EXPR + : VEC_WIDEN_MULT_LO_EXPR, + new_temp, vop0, vop1); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + new_temp2 = make_ssa_name (vec_dest2, NULL); + new_stmt + = gimple_build_assign_with_ops (BYTES_BIG_ENDIAN + ? VEC_WIDEN_MULT_LO_EXPR + : VEC_WIDEN_MULT_HI_EXPR, + new_temp2, vop0, vop1); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + } + + vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp); + new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, + vec_dest3, vce, + NULL_TREE); + new_temp = make_ssa_name (vec_dest3, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp2); + new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, + vec_dest4, vce, + NULL_TREE); + new_temp2 = make_ssa_name (vec_dest4, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp2); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + new_temp = permute_vec_elements (new_temp, new_temp2, + perm_mask, stmt, gsi); + new_stmt = SSA_NAME_DEF_STMT (new_temp); + if (slp_node) + VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), + new_stmt); + continue; + } new_stmt = gimple_build_assign_with_ops3 (code, vec_dest, vop0, vop1, vop2); new_temp = make_ssa_name (vec_dest, new_stmt);