mirror of git://gcc.gnu.org/git/gcc.git
md.texi (vec_load_lanes, [...]): Document.
gcc/ * doc/md.texi (vec_load_lanes, vec_store_lanes): Document. * optabs.h (COI_vec_load_lanes, COI_vec_store_lanes): New convert_optab_index values. (vec_load_lanes_optab, vec_store_lanes_optab): New convert optabs. * genopinit.c (optabs): Initialize the new optabs. * internal-fn.def (LOAD_LANES, STORE_LANES): New internal functions. * internal-fn.c (get_multi_vector_move, expand_LOAD_LANES) (expand_STORE_LANES): New functions. * tree.h (build_array_type_nelts): Declare. * tree.c (build_array_type_nelts): New function. * tree-vectorizer.h (vect_model_store_cost): Add a bool argument. (vect_model_load_cost): Likewise. (vect_store_lanes_supported, vect_load_lanes_supported) (vect_record_strided_load_vectors): Declare. * tree-vect-data-refs.c (vect_lanes_optab_supported_p) (vect_store_lanes_supported, vect_load_lanes_supported): New functions. (vect_transform_strided_load): Split out statement recording into... (vect_record_strided_load_vectors): ...this new function. * tree-vect-stmts.c (create_vector_array, read_vector_array) (write_vector_array, create_array_ref): New functions. (vect_model_store_cost): Add store_lanes_p argument. (vect_model_load_cost): Add load_lanes_p argument. (vectorizable_store): Try to use store-lanes functions for interleaved stores. (vectorizable_load): Likewise load-lanes and loads. * tree-vect-slp.c (vect_get_and_check_slp_defs) (vect_build_slp_tree): From-SVN: r172760
This commit is contained in:
parent
1da0876c95
commit
c2d7ab2aea
|
@ -1,3 +1,33 @@
|
||||||
|
2011-04-20 Richard Sandiford <richard.sandiford@linaro.org>
|
||||||
|
|
||||||
|
* doc/md.texi (vec_load_lanes, vec_store_lanes): Document.
|
||||||
|
* optabs.h (COI_vec_load_lanes, COI_vec_store_lanes): New
|
||||||
|
convert_optab_index values.
|
||||||
|
(vec_load_lanes_optab, vec_store_lanes_optab): New convert optabs.
|
||||||
|
* genopinit.c (optabs): Initialize the new optabs.
|
||||||
|
* internal-fn.def (LOAD_LANES, STORE_LANES): New internal functions.
|
||||||
|
* internal-fn.c (get_multi_vector_move, expand_LOAD_LANES)
|
||||||
|
(expand_STORE_LANES): New functions.
|
||||||
|
* tree.h (build_array_type_nelts): Declare.
|
||||||
|
* tree.c (build_array_type_nelts): New function.
|
||||||
|
* tree-vectorizer.h (vect_model_store_cost): Add a bool argument.
|
||||||
|
(vect_model_load_cost): Likewise.
|
||||||
|
(vect_store_lanes_supported, vect_load_lanes_supported)
|
||||||
|
(vect_record_strided_load_vectors): Declare.
|
||||||
|
* tree-vect-data-refs.c (vect_lanes_optab_supported_p)
|
||||||
|
(vect_store_lanes_supported, vect_load_lanes_supported): New functions.
|
||||||
|
(vect_transform_strided_load): Split out statement recording into...
|
||||||
|
(vect_record_strided_load_vectors): ...this new function.
|
||||||
|
* tree-vect-stmts.c (create_vector_array, read_vector_array)
|
||||||
|
(write_vector_array, create_array_ref): New functions.
|
||||||
|
(vect_model_store_cost): Add store_lanes_p argument.
|
||||||
|
(vect_model_load_cost): Add load_lanes_p argument.
|
||||||
|
(vectorizable_store): Try to use store-lanes functions for
|
||||||
|
interleaved stores.
|
||||||
|
(vectorizable_load): Likewise load-lanes and loads.
|
||||||
|
* tree-vect-slp.c (vect_get_and_check_slp_defs)
|
||||||
|
(vect_build_slp_tree):
|
||||||
|
|
||||||
2011-04-20 Richard Sandiford <richard.sandiford@linaro.org>
|
2011-04-20 Richard Sandiford <richard.sandiford@linaro.org>
|
||||||
|
|
||||||
* tree-vect-stmts.c (vectorizable_store): Only chain one related
|
* tree-vect-stmts.c (vectorizable_store): Only chain one related
|
||||||
|
|
|
@ -3846,6 +3846,48 @@ into consecutive memory locations. Operand 0 is the first of the
|
||||||
consecutive memory locations, operand 1 is the first register, and
|
consecutive memory locations, operand 1 is the first register, and
|
||||||
operand 2 is a constant: the number of consecutive registers.
|
operand 2 is a constant: the number of consecutive registers.
|
||||||
|
|
||||||
|
@cindex @code{vec_load_lanes@var{m}@var{n}} instruction pattern
|
||||||
|
@item @samp{vec_load_lanes@var{m}@var{n}}
|
||||||
|
Perform an interleaved load of several vectors from memory operand 1
|
||||||
|
into register operand 0. Both operands have mode @var{m}. The register
|
||||||
|
operand is viewed as holding consecutive vectors of mode @var{n},
|
||||||
|
while the memory operand is a flat array that contains the same number
|
||||||
|
of elements. The operation is equivalent to:
|
||||||
|
|
||||||
|
@smallexample
|
||||||
|
int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n});
|
||||||
|
for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++)
|
||||||
|
for (i = 0; i < c; i++)
|
||||||
|
operand0[i][j] = operand1[j * c + i];
|
||||||
|
@end smallexample
|
||||||
|
|
||||||
|
For example, @samp{vec_load_lanestiv4hi} loads 8 16-bit values
|
||||||
|
from memory into a register of mode @samp{TI}@. The register
|
||||||
|
contains two consecutive vectors of mode @samp{V4HI}@.
|
||||||
|
|
||||||
|
This pattern can only be used if:
|
||||||
|
@smallexample
|
||||||
|
TARGET_ARRAY_MODE_SUPPORTED_P (@var{n}, @var{c})
|
||||||
|
@end smallexample
|
||||||
|
is true. GCC assumes that, if a target supports this kind of
|
||||||
|
instruction for some mode @var{n}, it also supports unaligned
|
||||||
|
loads for vectors of mode @var{n}.
|
||||||
|
|
||||||
|
@cindex @code{vec_store_lanes@var{m}@var{n}} instruction pattern
|
||||||
|
@item @samp{vec_store_lanes@var{m}@var{n}}
|
||||||
|
Equivalent to @samp{vec_load_lanes@var{m}@var{n}}, with the memory
|
||||||
|
and register operands reversed. That is, the instruction is
|
||||||
|
equivalent to:
|
||||||
|
|
||||||
|
@smallexample
|
||||||
|
int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n});
|
||||||
|
for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++)
|
||||||
|
for (i = 0; i < c; i++)
|
||||||
|
operand0[j * c + i] = operand1[i][j];
|
||||||
|
@end smallexample
|
||||||
|
|
||||||
|
for a memory operand 0 and register operand 1.
|
||||||
|
|
||||||
@cindex @code{vec_set@var{m}} instruction pattern
|
@cindex @code{vec_set@var{m}} instruction pattern
|
||||||
@item @samp{vec_set@var{m}}
|
@item @samp{vec_set@var{m}}
|
||||||
Set given field in the vector value. Operand 0 is the vector to modify,
|
Set given field in the vector value. Operand 0 is the vector to modify,
|
||||||
|
|
|
@ -74,6 +74,8 @@ static const char * const optabs[] =
|
||||||
"set_convert_optab_handler (fractuns_optab, $B, $A, CODE_FOR_$(fractuns$Q$a$I$b2$))",
|
"set_convert_optab_handler (fractuns_optab, $B, $A, CODE_FOR_$(fractuns$Q$a$I$b2$))",
|
||||||
"set_convert_optab_handler (satfract_optab, $B, $A, CODE_FOR_$(satfract$a$Q$b2$))",
|
"set_convert_optab_handler (satfract_optab, $B, $A, CODE_FOR_$(satfract$a$Q$b2$))",
|
||||||
"set_convert_optab_handler (satfractuns_optab, $B, $A, CODE_FOR_$(satfractuns$I$a$Q$b2$))",
|
"set_convert_optab_handler (satfractuns_optab, $B, $A, CODE_FOR_$(satfractuns$I$a$Q$b2$))",
|
||||||
|
"set_convert_optab_handler (vec_load_lanes_optab, $A, $B, CODE_FOR_$(vec_load_lanes$a$b$))",
|
||||||
|
"set_convert_optab_handler (vec_store_lanes_optab, $A, $B, CODE_FOR_$(vec_store_lanes$a$b$))",
|
||||||
"set_optab_handler (add_optab, $A, CODE_FOR_$(add$P$a3$))",
|
"set_optab_handler (add_optab, $A, CODE_FOR_$(add$P$a3$))",
|
||||||
"set_optab_handler (addv_optab, $A, CODE_FOR_$(add$F$a3$)),\n\
|
"set_optab_handler (addv_optab, $A, CODE_FOR_$(add$F$a3$)),\n\
|
||||||
set_optab_handler (add_optab, $A, CODE_FOR_$(add$F$a3$))",
|
set_optab_handler (add_optab, $A, CODE_FOR_$(add$F$a3$))",
|
||||||
|
|
|
@ -42,6 +42,73 @@ const int internal_fn_flags_array[] = {
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* ARRAY_TYPE is an array of vector modes. Return the associated insn
|
||||||
|
for load-lanes-style optab OPTAB. The insn must exist. */
|
||||||
|
|
||||||
|
static enum insn_code
|
||||||
|
get_multi_vector_move (tree array_type, convert_optab optab)
|
||||||
|
{
|
||||||
|
enum insn_code icode;
|
||||||
|
enum machine_mode imode;
|
||||||
|
enum machine_mode vmode;
|
||||||
|
|
||||||
|
gcc_assert (TREE_CODE (array_type) == ARRAY_TYPE);
|
||||||
|
imode = TYPE_MODE (array_type);
|
||||||
|
vmode = TYPE_MODE (TREE_TYPE (array_type));
|
||||||
|
|
||||||
|
icode = convert_optab_handler (optab, imode, vmode);
|
||||||
|
gcc_assert (icode != CODE_FOR_nothing);
|
||||||
|
return icode;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Expand LOAD_LANES call STMT. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
expand_LOAD_LANES (gimple stmt)
|
||||||
|
{
|
||||||
|
struct expand_operand ops[2];
|
||||||
|
tree type, lhs, rhs;
|
||||||
|
rtx target, mem;
|
||||||
|
|
||||||
|
lhs = gimple_call_lhs (stmt);
|
||||||
|
rhs = gimple_call_arg (stmt, 0);
|
||||||
|
type = TREE_TYPE (lhs);
|
||||||
|
|
||||||
|
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
|
||||||
|
mem = expand_normal (rhs);
|
||||||
|
|
||||||
|
gcc_assert (MEM_P (mem));
|
||||||
|
PUT_MODE (mem, TYPE_MODE (type));
|
||||||
|
|
||||||
|
create_output_operand (&ops[0], target, TYPE_MODE (type));
|
||||||
|
create_fixed_operand (&ops[1], mem);
|
||||||
|
expand_insn (get_multi_vector_move (type, vec_load_lanes_optab), 2, ops);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Expand STORE_LANES call STMT. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
expand_STORE_LANES (gimple stmt)
|
||||||
|
{
|
||||||
|
struct expand_operand ops[2];
|
||||||
|
tree type, lhs, rhs;
|
||||||
|
rtx target, reg;
|
||||||
|
|
||||||
|
lhs = gimple_call_lhs (stmt);
|
||||||
|
rhs = gimple_call_arg (stmt, 0);
|
||||||
|
type = TREE_TYPE (rhs);
|
||||||
|
|
||||||
|
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
|
||||||
|
reg = expand_normal (rhs);
|
||||||
|
|
||||||
|
gcc_assert (MEM_P (target));
|
||||||
|
PUT_MODE (target, TYPE_MODE (type));
|
||||||
|
|
||||||
|
create_fixed_operand (&ops[0], target);
|
||||||
|
create_input_operand (&ops[1], reg, TYPE_MODE (type));
|
||||||
|
expand_insn (get_multi_vector_move (type, vec_store_lanes_optab), 2, ops);
|
||||||
|
}
|
||||||
|
|
||||||
/* Routines to expand each internal function, indexed by function number.
|
/* Routines to expand each internal function, indexed by function number.
|
||||||
Each routine has the prototype:
|
Each routine has the prototype:
|
||||||
|
|
||||||
|
|
|
@ -37,3 +37,6 @@ along with GCC; see the file COPYING3. If not see
|
||||||
void expand_NAME (gimple stmt)
|
void expand_NAME (gimple stmt)
|
||||||
|
|
||||||
where STMT is the statement that performs the call. */
|
where STMT is the statement that performs the call. */
|
||||||
|
|
||||||
|
DEF_INTERNAL_FN (LOAD_LANES, ECF_CONST | ECF_LEAF)
|
||||||
|
DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF)
|
||||||
|
|
|
@ -578,6 +578,9 @@ enum convert_optab_index
|
||||||
COI_satfract,
|
COI_satfract,
|
||||||
COI_satfractuns,
|
COI_satfractuns,
|
||||||
|
|
||||||
|
COI_vec_load_lanes,
|
||||||
|
COI_vec_store_lanes,
|
||||||
|
|
||||||
COI_MAX
|
COI_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -598,6 +601,8 @@ enum convert_optab_index
|
||||||
#define fractuns_optab (&convert_optab_table[COI_fractuns])
|
#define fractuns_optab (&convert_optab_table[COI_fractuns])
|
||||||
#define satfract_optab (&convert_optab_table[COI_satfract])
|
#define satfract_optab (&convert_optab_table[COI_satfract])
|
||||||
#define satfractuns_optab (&convert_optab_table[COI_satfractuns])
|
#define satfractuns_optab (&convert_optab_table[COI_satfractuns])
|
||||||
|
#define vec_load_lanes_optab (&convert_optab_table[COI_vec_load_lanes])
|
||||||
|
#define vec_store_lanes_optab (&convert_optab_table[COI_vec_store_lanes])
|
||||||
|
|
||||||
/* Contains the optab used for each rtx code. */
|
/* Contains the optab used for each rtx code. */
|
||||||
extern optab code_to_optab[NUM_RTX_CODE + 1];
|
extern optab code_to_optab[NUM_RTX_CODE + 1];
|
||||||
|
|
|
@ -43,6 +43,45 @@ along with GCC; see the file COPYING3. If not see
|
||||||
#include "expr.h"
|
#include "expr.h"
|
||||||
#include "optabs.h"
|
#include "optabs.h"
|
||||||
|
|
||||||
|
/* Return true if load- or store-lanes optab OPTAB is implemented for
|
||||||
|
COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
vect_lanes_optab_supported_p (const char *name, convert_optab optab,
|
||||||
|
tree vectype, unsigned HOST_WIDE_INT count)
|
||||||
|
{
|
||||||
|
enum machine_mode mode, array_mode;
|
||||||
|
bool limit_p;
|
||||||
|
|
||||||
|
mode = TYPE_MODE (vectype);
|
||||||
|
limit_p = !targetm.array_mode_supported_p (mode, count);
|
||||||
|
array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
|
||||||
|
MODE_INT, limit_p);
|
||||||
|
|
||||||
|
if (array_mode == BLKmode)
|
||||||
|
{
|
||||||
|
if (vect_print_dump_info (REPORT_DETAILS))
|
||||||
|
fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
|
||||||
|
GET_MODE_NAME (mode), count);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
|
||||||
|
{
|
||||||
|
if (vect_print_dump_info (REPORT_DETAILS))
|
||||||
|
fprintf (vect_dump, "cannot use %s<%s><%s>",
|
||||||
|
name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vect_print_dump_info (REPORT_DETAILS))
|
||||||
|
fprintf (vect_dump, "can use %s<%s><%s>",
|
||||||
|
name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Return the smallest scalar part of STMT.
|
/* Return the smallest scalar part of STMT.
|
||||||
This is used to determine the vectype of the stmt. We generally set the
|
This is used to determine the vectype of the stmt. We generally set the
|
||||||
vectype according to the type of the result (lhs). For stmts whose
|
vectype according to the type of the result (lhs). For stmts whose
|
||||||
|
@ -3376,6 +3415,18 @@ vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Return TRUE if vec_store_lanes is available for COUNT vectors of
|
||||||
|
type VECTYPE. */
|
||||||
|
|
||||||
|
bool
|
||||||
|
vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
|
||||||
|
{
|
||||||
|
return vect_lanes_optab_supported_p ("vec_store_lanes",
|
||||||
|
vec_store_lanes_optab,
|
||||||
|
vectype, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Function vect_permute_store_chain.
|
/* Function vect_permute_store_chain.
|
||||||
|
|
||||||
Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
|
Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
|
||||||
|
@ -3830,6 +3881,16 @@ vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return TRUE if vec_load_lanes is available for COUNT vectors of
|
||||||
|
type VECTYPE. */
|
||||||
|
|
||||||
|
bool
|
||||||
|
vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
|
||||||
|
{
|
||||||
|
return vect_lanes_optab_supported_p ("vec_load_lanes",
|
||||||
|
vec_load_lanes_optab,
|
||||||
|
vectype, count);
|
||||||
|
}
|
||||||
|
|
||||||
/* Function vect_permute_load_chain.
|
/* Function vect_permute_load_chain.
|
||||||
|
|
||||||
|
@ -3977,19 +4038,28 @@ void
|
||||||
vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
|
vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
|
||||||
gimple_stmt_iterator *gsi)
|
gimple_stmt_iterator *gsi)
|
||||||
{
|
{
|
||||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
|
||||||
gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
|
|
||||||
gimple next_stmt, new_stmt;
|
|
||||||
VEC(tree,heap) *result_chain = NULL;
|
VEC(tree,heap) *result_chain = NULL;
|
||||||
unsigned int i, gap_count;
|
|
||||||
tree tmp_data_ref;
|
|
||||||
|
|
||||||
/* DR_CHAIN contains input data-refs that are a part of the interleaving.
|
/* DR_CHAIN contains input data-refs that are a part of the interleaving.
|
||||||
RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
|
RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
|
||||||
vectors, that are ready for vector computation. */
|
vectors, that are ready for vector computation. */
|
||||||
result_chain = VEC_alloc (tree, heap, size);
|
result_chain = VEC_alloc (tree, heap, size);
|
||||||
/* Permute. */
|
|
||||||
vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
|
vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
|
||||||
|
vect_record_strided_load_vectors (stmt, result_chain);
|
||||||
|
VEC_free (tree, heap, result_chain);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RESULT_CHAIN contains the output of a group of strided loads that were
|
||||||
|
generated as part of the vectorization of STMT. Assign the statement
|
||||||
|
for each vector to the associated scalar statement. */
|
||||||
|
|
||||||
|
void
|
||||||
|
vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
|
||||||
|
{
|
||||||
|
gimple first_stmt = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
|
||||||
|
gimple next_stmt, new_stmt;
|
||||||
|
unsigned int i, gap_count;
|
||||||
|
tree tmp_data_ref;
|
||||||
|
|
||||||
/* Put a permuted data-ref in the VECTORIZED_STMT field.
|
/* Put a permuted data-ref in the VECTORIZED_STMT field.
|
||||||
Since we scan the chain starting from it's first node, their order
|
Since we scan the chain starting from it's first node, their order
|
||||||
|
@ -4051,8 +4121,6 @@ vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
VEC_free (tree, heap, result_chain);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Function vect_force_dr_alignment_p.
|
/* Function vect_force_dr_alignment_p.
|
||||||
|
|
|
@ -215,7 +215,8 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
||||||
vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
|
vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
|
||||||
else
|
else
|
||||||
/* Store. */
|
/* Store. */
|
||||||
vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
|
vect_model_store_cost (stmt_info, ncopies_for_cost, false,
|
||||||
|
dt[0], slp_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
else
|
else
|
||||||
|
@ -579,7 +580,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
||||||
|
|
||||||
/* Analyze costs (for the first stmt in the group). */
|
/* Analyze costs (for the first stmt in the group). */
|
||||||
vect_model_load_cost (vinfo_for_stmt (stmt),
|
vect_model_load_cost (vinfo_for_stmt (stmt),
|
||||||
ncopies_for_cost, *node);
|
ncopies_for_cost, false, *node);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Store the place of this load in the interleaving chain. In
|
/* Store the place of this load in the interleaving chain. In
|
||||||
|
|
|
@ -42,6 +42,82 @@ along with GCC; see the file COPYING3. If not see
|
||||||
#include "langhooks.h"
|
#include "langhooks.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* Return a variable of type ELEM_TYPE[NELEMS]. */
|
||||||
|
|
||||||
|
static tree
|
||||||
|
create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
|
||||||
|
{
|
||||||
|
return create_tmp_var (build_array_type_nelts (elem_type, nelems),
|
||||||
|
"vect_array");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ARRAY is an array of vectors created by create_vector_array.
|
||||||
|
Return an SSA_NAME for the vector in index N. The reference
|
||||||
|
is part of the vectorization of STMT and the vector is associated
|
||||||
|
with scalar destination SCALAR_DEST. */
|
||||||
|
|
||||||
|
static tree
|
||||||
|
read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
|
||||||
|
tree array, unsigned HOST_WIDE_INT n)
|
||||||
|
{
|
||||||
|
tree vect_type, vect, vect_name, array_ref;
|
||||||
|
gimple new_stmt;
|
||||||
|
|
||||||
|
gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
|
||||||
|
vect_type = TREE_TYPE (TREE_TYPE (array));
|
||||||
|
vect = vect_create_destination_var (scalar_dest, vect_type);
|
||||||
|
array_ref = build4 (ARRAY_REF, vect_type, array,
|
||||||
|
build_int_cst (size_type_node, n),
|
||||||
|
NULL_TREE, NULL_TREE);
|
||||||
|
|
||||||
|
new_stmt = gimple_build_assign (vect, array_ref);
|
||||||
|
vect_name = make_ssa_name (vect, new_stmt);
|
||||||
|
gimple_assign_set_lhs (new_stmt, vect_name);
|
||||||
|
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||||
|
mark_symbols_for_renaming (new_stmt);
|
||||||
|
|
||||||
|
return vect_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ARRAY is an array of vectors created by create_vector_array.
|
||||||
|
Emit code to store SSA_NAME VECT in index N of the array.
|
||||||
|
The store is part of the vectorization of STMT. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
|
||||||
|
tree array, unsigned HOST_WIDE_INT n)
|
||||||
|
{
|
||||||
|
tree array_ref;
|
||||||
|
gimple new_stmt;
|
||||||
|
|
||||||
|
array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
|
||||||
|
build_int_cst (size_type_node, n),
|
||||||
|
NULL_TREE, NULL_TREE);
|
||||||
|
|
||||||
|
new_stmt = gimple_build_assign (array_ref, vect);
|
||||||
|
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||||
|
mark_symbols_for_renaming (new_stmt);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* PTR is a pointer to an array of type TYPE. Return a representation
|
||||||
|
of *PTR. The memory reference replaces those in FIRST_DR
|
||||||
|
(and its group). */
|
||||||
|
|
||||||
|
static tree
|
||||||
|
create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
|
||||||
|
{
|
||||||
|
struct ptr_info_def *pi;
|
||||||
|
tree mem_ref, alias_ptr_type;
|
||||||
|
|
||||||
|
alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
|
||||||
|
mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
|
||||||
|
/* Arrays have the same alignment as their type. */
|
||||||
|
pi = get_ptr_info (ptr);
|
||||||
|
pi->align = TYPE_ALIGN_UNIT (type);
|
||||||
|
pi->misalign = 0;
|
||||||
|
return mem_ref;
|
||||||
|
}
|
||||||
|
|
||||||
/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
|
/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
|
||||||
|
|
||||||
/* Function vect_mark_relevant.
|
/* Function vect_mark_relevant.
|
||||||
|
@ -648,7 +724,8 @@ vect_cost_strided_group_size (stmt_vec_info stmt_info)
|
||||||
|
|
||||||
void
|
void
|
||||||
vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
||||||
enum vect_def_type dt, slp_tree slp_node)
|
bool store_lanes_p, enum vect_def_type dt,
|
||||||
|
slp_tree slp_node)
|
||||||
{
|
{
|
||||||
int group_size;
|
int group_size;
|
||||||
unsigned int inside_cost = 0, outside_cost = 0;
|
unsigned int inside_cost = 0, outside_cost = 0;
|
||||||
|
@ -685,9 +762,11 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
||||||
first_dr = STMT_VINFO_DATA_REF (stmt_info);
|
first_dr = STMT_VINFO_DATA_REF (stmt_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Is this an access in a group of stores, which provide strided access?
|
/* We assume that the cost of a single store-lanes instruction is
|
||||||
If so, add in the cost of the permutes. */
|
equivalent to the cost of GROUP_SIZE separate stores. If a strided
|
||||||
if (group_size > 1)
|
access is instead being provided by a permute-and-store operation,
|
||||||
|
include the cost of the permutes. */
|
||||||
|
if (!store_lanes_p && group_size > 1)
|
||||||
{
|
{
|
||||||
/* Uses a high and low interleave operation for each needed permute. */
|
/* Uses a high and low interleave operation for each needed permute. */
|
||||||
inside_cost = ncopies * exact_log2(group_size) * group_size
|
inside_cost = ncopies * exact_log2(group_size) * group_size
|
||||||
|
@ -763,8 +842,8 @@ vect_get_store_cost (struct data_reference *dr, int ncopies,
|
||||||
access scheme chosen. */
|
access scheme chosen. */
|
||||||
|
|
||||||
void
|
void
|
||||||
vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
|
vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
|
||||||
|
slp_tree slp_node)
|
||||||
{
|
{
|
||||||
int group_size;
|
int group_size;
|
||||||
gimple first_stmt;
|
gimple first_stmt;
|
||||||
|
@ -789,9 +868,11 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
|
||||||
first_dr = dr;
|
first_dr = dr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Is this an access in a group of loads providing strided access?
|
/* We assume that the cost of a single load-lanes instruction is
|
||||||
If so, add in the cost of the permutes. */
|
equivalent to the cost of GROUP_SIZE separate loads. If a strided
|
||||||
if (group_size > 1)
|
access is instead being provided by a load-and-permute operation,
|
||||||
|
include the cost of the permutes. */
|
||||||
|
if (!load_lanes_p && group_size > 1)
|
||||||
{
|
{
|
||||||
/* Uses an even and odd extract operations for each needed permute. */
|
/* Uses an even and odd extract operations for each needed permute. */
|
||||||
inside_cost = ncopies * exact_log2(group_size) * group_size
|
inside_cost = ncopies * exact_log2(group_size) * group_size
|
||||||
|
@ -3329,6 +3410,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||||
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
|
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
|
||||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||||
|
tree elem_type;
|
||||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||||
struct loop *loop = NULL;
|
struct loop *loop = NULL;
|
||||||
enum machine_mode vec_mode;
|
enum machine_mode vec_mode;
|
||||||
|
@ -3344,6 +3426,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
int j;
|
int j;
|
||||||
gimple next_stmt, first_stmt = NULL;
|
gimple next_stmt, first_stmt = NULL;
|
||||||
bool strided_store = false;
|
bool strided_store = false;
|
||||||
|
bool store_lanes_p = false;
|
||||||
unsigned int group_size, i;
|
unsigned int group_size, i;
|
||||||
VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
|
VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
|
||||||
bool inv_p;
|
bool inv_p;
|
||||||
|
@ -3351,6 +3434,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
bool slp = (slp_node != NULL);
|
bool slp = (slp_node != NULL);
|
||||||
unsigned int vec_num;
|
unsigned int vec_num;
|
||||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||||
|
tree aggr_type;
|
||||||
|
|
||||||
if (loop_vinfo)
|
if (loop_vinfo)
|
||||||
loop = LOOP_VINFO_LOOP (loop_vinfo);
|
loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||||
|
@ -3404,7 +3488,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
|
|
||||||
/* The scalar rhs type needs to be trivially convertible to the vector
|
/* The scalar rhs type needs to be trivially convertible to the vector
|
||||||
component type. This should always be the case. */
|
component type. This should always be the case. */
|
||||||
if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
|
elem_type = TREE_TYPE (vectype);
|
||||||
|
if (!useless_type_conversion_p (elem_type, TREE_TYPE (op)))
|
||||||
{
|
{
|
||||||
if (vect_print_dump_info (REPORT_DETAILS))
|
if (vect_print_dump_info (REPORT_DETAILS))
|
||||||
fprintf (vect_dump, "??? operands of different types");
|
fprintf (vect_dump, "??? operands of different types");
|
||||||
|
@ -3434,7 +3519,9 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
if (!slp && !PURE_SLP_STMT (stmt_info))
|
if (!slp && !PURE_SLP_STMT (stmt_info))
|
||||||
{
|
{
|
||||||
group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
|
group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
|
||||||
if (!vect_strided_store_supported (vectype, group_size))
|
if (vect_store_lanes_supported (vectype, group_size))
|
||||||
|
store_lanes_p = true;
|
||||||
|
else if (!vect_strided_store_supported (vectype, group_size))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3462,7 +3549,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
if (!vec_stmt) /* transformation not required. */
|
if (!vec_stmt) /* transformation not required. */
|
||||||
{
|
{
|
||||||
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
|
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
|
||||||
vect_model_store_cost (stmt_info, ncopies, dt, NULL);
|
vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3517,6 +3604,16 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
|
|
||||||
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
|
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
|
||||||
gcc_assert (alignment_support_scheme);
|
gcc_assert (alignment_support_scheme);
|
||||||
|
/* Targets with store-lane instructions must not require explicit
|
||||||
|
realignment. */
|
||||||
|
gcc_assert (!store_lanes_p
|
||||||
|
|| alignment_support_scheme == dr_aligned
|
||||||
|
|| alignment_support_scheme == dr_unaligned_supported);
|
||||||
|
|
||||||
|
if (store_lanes_p)
|
||||||
|
aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
|
||||||
|
else
|
||||||
|
aggr_type = vectype;
|
||||||
|
|
||||||
/* In case the vectorization factor (VF) is bigger than the number
|
/* In case the vectorization factor (VF) is bigger than the number
|
||||||
of elements that we can fit in a vectype (nunits), we have to generate
|
of elements that we can fit in a vectype (nunits), we have to generate
|
||||||
|
@ -3605,7 +3702,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
/* We should have catched mismatched types earlier. */
|
/* We should have catched mismatched types earlier. */
|
||||||
gcc_assert (useless_type_conversion_p (vectype,
|
gcc_assert (useless_type_conversion_p (vectype,
|
||||||
TREE_TYPE (vec_oprnd)));
|
TREE_TYPE (vec_oprnd)));
|
||||||
dataref_ptr = vect_create_data_ref_ptr (first_stmt, vectype, NULL,
|
dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
|
||||||
NULL_TREE, &dummy, gsi,
|
NULL_TREE, &dummy, gsi,
|
||||||
&ptr_incr, false, &inv_p);
|
&ptr_incr, false, &inv_p);
|
||||||
gcc_assert (bb_vinfo || !inv_p);
|
gcc_assert (bb_vinfo || !inv_p);
|
||||||
|
@ -3628,10 +3725,32 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
VEC_replace(tree, dr_chain, i, vec_oprnd);
|
VEC_replace(tree, dr_chain, i, vec_oprnd);
|
||||||
VEC_replace(tree, oprnds, i, vec_oprnd);
|
VEC_replace(tree, oprnds, i, vec_oprnd);
|
||||||
}
|
}
|
||||||
dataref_ptr =
|
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
|
||||||
bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
|
TYPE_SIZE_UNIT (aggr_type));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (store_lanes_p)
|
||||||
|
{
|
||||||
|
tree vec_array;
|
||||||
|
|
||||||
|
/* Combine all the vectors into an array. */
|
||||||
|
vec_array = create_vector_array (vectype, vec_num);
|
||||||
|
for (i = 0; i < vec_num; i++)
|
||||||
|
{
|
||||||
|
vec_oprnd = VEC_index (tree, dr_chain, i);
|
||||||
|
write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Emit:
|
||||||
|
MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
|
||||||
|
data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
|
||||||
|
new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
|
||||||
|
gimple_call_set_lhs (new_stmt, data_ref);
|
||||||
|
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||||
|
mark_symbols_for_renaming (new_stmt);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
new_stmt = NULL;
|
new_stmt = NULL;
|
||||||
if (strided_store)
|
if (strided_store)
|
||||||
{
|
{
|
||||||
|
@ -3648,8 +3767,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
|
|
||||||
if (i > 0)
|
if (i > 0)
|
||||||
/* Bump the vector pointer. */
|
/* Bump the vector pointer. */
|
||||||
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
|
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
|
||||||
NULL_TREE);
|
stmt, NULL_TREE);
|
||||||
|
|
||||||
if (slp)
|
if (slp)
|
||||||
vec_oprnd = VEC_index (tree, vec_oprnds, i);
|
vec_oprnd = VEC_index (tree, vec_oprnds, i);
|
||||||
|
@ -3669,15 +3788,15 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
{
|
{
|
||||||
TREE_TYPE (data_ref)
|
TREE_TYPE (data_ref)
|
||||||
= build_aligned_type (TREE_TYPE (data_ref),
|
= build_aligned_type (TREE_TYPE (data_ref),
|
||||||
TYPE_ALIGN (TREE_TYPE (vectype)));
|
TYPE_ALIGN (elem_type));
|
||||||
pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
|
pi->align = TYPE_ALIGN_UNIT (elem_type);
|
||||||
pi->misalign = 0;
|
pi->misalign = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
TREE_TYPE (data_ref)
|
TREE_TYPE (data_ref)
|
||||||
= build_aligned_type (TREE_TYPE (data_ref),
|
= build_aligned_type (TREE_TYPE (data_ref),
|
||||||
TYPE_ALIGN (TREE_TYPE (vectype)));
|
TYPE_ALIGN (elem_type));
|
||||||
pi->misalign = DR_MISALIGNMENT (first_dr);
|
pi->misalign = DR_MISALIGNMENT (first_dr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3693,6 +3812,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
if (!next_stmt)
|
if (!next_stmt)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (!slp)
|
if (!slp)
|
||||||
{
|
{
|
||||||
if (j == 0)
|
if (j == 0)
|
||||||
|
@ -3810,6 +3930,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
bool nested_in_vect_loop = false;
|
bool nested_in_vect_loop = false;
|
||||||
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
|
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
|
||||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||||
|
tree elem_type;
|
||||||
tree new_temp;
|
tree new_temp;
|
||||||
enum machine_mode mode;
|
enum machine_mode mode;
|
||||||
gimple new_stmt = NULL;
|
gimple new_stmt = NULL;
|
||||||
|
@ -3826,6 +3947,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
gimple phi = NULL;
|
gimple phi = NULL;
|
||||||
VEC(tree,heap) *dr_chain = NULL;
|
VEC(tree,heap) *dr_chain = NULL;
|
||||||
bool strided_load = false;
|
bool strided_load = false;
|
||||||
|
bool load_lanes_p = false;
|
||||||
gimple first_stmt;
|
gimple first_stmt;
|
||||||
tree scalar_type;
|
tree scalar_type;
|
||||||
bool inv_p;
|
bool inv_p;
|
||||||
|
@ -3838,6 +3960,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
enum tree_code code;
|
enum tree_code code;
|
||||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||||
int vf;
|
int vf;
|
||||||
|
tree aggr_type;
|
||||||
|
|
||||||
if (loop_vinfo)
|
if (loop_vinfo)
|
||||||
{
|
{
|
||||||
|
@ -3914,7 +4037,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
|
|
||||||
/* The vector component type needs to be trivially convertible to the
|
/* The vector component type needs to be trivially convertible to the
|
||||||
scalar lhs. This should always be the case. */
|
scalar lhs. This should always be the case. */
|
||||||
if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
|
elem_type = TREE_TYPE (vectype);
|
||||||
|
if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), elem_type))
|
||||||
{
|
{
|
||||||
if (vect_print_dump_info (REPORT_DETAILS))
|
if (vect_print_dump_info (REPORT_DETAILS))
|
||||||
fprintf (vect_dump, "??? operands of different types");
|
fprintf (vect_dump, "??? operands of different types");
|
||||||
|
@ -3932,7 +4056,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
if (!slp && !PURE_SLP_STMT (stmt_info))
|
if (!slp && !PURE_SLP_STMT (stmt_info))
|
||||||
{
|
{
|
||||||
group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
|
group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
|
||||||
if (!vect_strided_load_supported (vectype, group_size))
|
if (vect_load_lanes_supported (vectype, group_size))
|
||||||
|
load_lanes_p = true;
|
||||||
|
else if (!vect_strided_load_supported (vectype, group_size))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3959,7 +4085,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
if (!vec_stmt) /* transformation not required. */
|
if (!vec_stmt) /* transformation not required. */
|
||||||
{
|
{
|
||||||
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
|
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
|
||||||
vect_model_load_cost (stmt_info, ncopies, NULL);
|
vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4000,6 +4126,11 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
|
|
||||||
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
|
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
|
||||||
gcc_assert (alignment_support_scheme);
|
gcc_assert (alignment_support_scheme);
|
||||||
|
/* Targets with load-lane instructions must not require explicit
|
||||||
|
realignment. */
|
||||||
|
gcc_assert (!load_lanes_p
|
||||||
|
|| alignment_support_scheme == dr_aligned
|
||||||
|
|| alignment_support_scheme == dr_unaligned_supported);
|
||||||
|
|
||||||
/* In case the vectorization factor (VF) is bigger than the number
|
/* In case the vectorization factor (VF) is bigger than the number
|
||||||
of elements that we can fit in a vectype (nunits), we have to generate
|
of elements that we can fit in a vectype (nunits), we have to generate
|
||||||
|
@ -4131,26 +4262,58 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
if (negative)
|
if (negative)
|
||||||
offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
|
offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
|
||||||
|
|
||||||
|
if (load_lanes_p)
|
||||||
|
aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
|
||||||
|
else
|
||||||
|
aggr_type = vectype;
|
||||||
|
|
||||||
prev_stmt_info = NULL;
|
prev_stmt_info = NULL;
|
||||||
for (j = 0; j < ncopies; j++)
|
for (j = 0; j < ncopies; j++)
|
||||||
{
|
{
|
||||||
/* 1. Create the vector pointer update chain. */
|
/* 1. Create the vector or array pointer update chain. */
|
||||||
if (j == 0)
|
if (j == 0)
|
||||||
dataref_ptr = vect_create_data_ref_ptr (first_stmt, vectype, at_loop,
|
dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
|
||||||
offset, &dummy, gsi,
|
offset, &dummy, gsi,
|
||||||
&ptr_incr, false, &inv_p);
|
&ptr_incr, false, &inv_p);
|
||||||
else
|
else
|
||||||
dataref_ptr =
|
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
|
||||||
bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
|
TYPE_SIZE_UNIT (aggr_type));
|
||||||
|
|
||||||
if (strided_load || slp_perm)
|
if (strided_load || slp_perm)
|
||||||
dr_chain = VEC_alloc (tree, heap, vec_num);
|
dr_chain = VEC_alloc (tree, heap, vec_num);
|
||||||
|
|
||||||
|
if (load_lanes_p)
|
||||||
|
{
|
||||||
|
tree vec_array;
|
||||||
|
|
||||||
|
vec_array = create_vector_array (vectype, vec_num);
|
||||||
|
|
||||||
|
/* Emit:
|
||||||
|
VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
|
||||||
|
data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
|
||||||
|
new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
|
||||||
|
gimple_call_set_lhs (new_stmt, vec_array);
|
||||||
|
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||||
|
mark_symbols_for_renaming (new_stmt);
|
||||||
|
|
||||||
|
/* Extract each vector into an SSA_NAME. */
|
||||||
|
for (i = 0; i < vec_num; i++)
|
||||||
|
{
|
||||||
|
new_temp = read_vector_array (stmt, gsi, scalar_dest,
|
||||||
|
vec_array, i);
|
||||||
|
VEC_quick_push (tree, dr_chain, new_temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Record the mapping between SSA_NAMEs and statements. */
|
||||||
|
vect_record_strided_load_vectors (stmt, dr_chain);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
for (i = 0; i < vec_num; i++)
|
for (i = 0; i < vec_num; i++)
|
||||||
{
|
{
|
||||||
if (i > 0)
|
if (i > 0)
|
||||||
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
|
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
|
||||||
NULL_TREE);
|
stmt, NULL_TREE);
|
||||||
|
|
||||||
/* 2. Create the vector-load in the loop. */
|
/* 2. Create the vector-load in the loop. */
|
||||||
switch (alignment_support_scheme)
|
switch (alignment_support_scheme)
|
||||||
|
@ -4174,15 +4337,15 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
{
|
{
|
||||||
TREE_TYPE (data_ref)
|
TREE_TYPE (data_ref)
|
||||||
= build_aligned_type (TREE_TYPE (data_ref),
|
= build_aligned_type (TREE_TYPE (data_ref),
|
||||||
TYPE_ALIGN (TREE_TYPE (vectype)));
|
TYPE_ALIGN (elem_type));
|
||||||
pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
|
pi->align = TYPE_ALIGN_UNIT (elem_type);
|
||||||
pi->misalign = 0;
|
pi->misalign = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
TREE_TYPE (data_ref)
|
TREE_TYPE (data_ref)
|
||||||
= build_aligned_type (TREE_TYPE (data_ref),
|
= build_aligned_type (TREE_TYPE (data_ref),
|
||||||
TYPE_ALIGN (TREE_TYPE (vectype)));
|
TYPE_ALIGN (elem_type));
|
||||||
pi->misalign = DR_MISALIGNMENT (first_dr);
|
pi->misalign = DR_MISALIGNMENT (first_dr);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -4190,7 +4353,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
case dr_explicit_realign:
|
case dr_explicit_realign:
|
||||||
{
|
{
|
||||||
tree ptr, bump;
|
tree ptr, bump;
|
||||||
tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
|
tree vs_minus_1;
|
||||||
|
|
||||||
|
vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
|
||||||
|
|
||||||
if (compute_in_loop)
|
if (compute_in_loop)
|
||||||
msq = vect_setup_realignment (first_stmt, gsi,
|
msq = vect_setup_realignment (first_stmt, gsi,
|
||||||
|
@ -4210,7 +4375,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
= build2 (MEM_REF, vectype, ptr,
|
= build2 (MEM_REF, vectype, ptr,
|
||||||
build_int_cst (reference_alias_ptr_type
|
build_int_cst (reference_alias_ptr_type
|
||||||
(DR_REF (first_dr)), 0));
|
(DR_REF (first_dr)), 0));
|
||||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
vec_dest = vect_create_destination_var (scalar_dest,
|
||||||
|
vectype);
|
||||||
new_stmt = gimple_build_assign (vec_dest, data_ref);
|
new_stmt = gimple_build_assign (vec_dest, data_ref);
|
||||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||||
|
@ -4242,7 +4408,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
build_int_cst
|
build_int_cst
|
||||||
(TREE_TYPE (dataref_ptr),
|
(TREE_TYPE (dataref_ptr),
|
||||||
-(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
|
-(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
|
||||||
new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
|
new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
|
||||||
|
new_stmt);
|
||||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||||
data_ref
|
data_ref
|
||||||
|
@ -4260,8 +4427,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||||
mark_symbols_for_renaming (new_stmt);
|
mark_symbols_for_renaming (new_stmt);
|
||||||
|
|
||||||
/* 3. Handle explicit realignment if necessary/supported. Create in
|
/* 3. Handle explicit realignment if necessary/supported.
|
||||||
loop: vec_dest = realign_load (msq, lsq, realignment_token) */
|
Create in loop:
|
||||||
|
vec_dest = realign_load (msq, lsq, realignment_token) */
|
||||||
if (alignment_support_scheme == dr_explicit_realign_optimized
|
if (alignment_support_scheme == dr_explicit_realign_optimized
|
||||||
|| alignment_support_scheme == dr_explicit_realign)
|
|| alignment_support_scheme == dr_explicit_realign)
|
||||||
{
|
{
|
||||||
|
@ -4270,8 +4438,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
realignment_token = dataref_ptr;
|
realignment_token = dataref_ptr;
|
||||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||||
new_stmt
|
new_stmt
|
||||||
= gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR, vec_dest,
|
= gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
|
||||||
msq, lsq, realignment_token);
|
vec_dest, msq, lsq,
|
||||||
|
realignment_token);
|
||||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||||
|
@ -4280,7 +4449,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
{
|
{
|
||||||
gcc_assert (phi);
|
gcc_assert (phi);
|
||||||
if (i == vec_num - 1 && j == ncopies - 1)
|
if (i == vec_num - 1 && j == ncopies - 1)
|
||||||
add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
|
add_phi_arg (phi, lsq,
|
||||||
|
loop_latch_edge (containing_loop),
|
||||||
UNKNOWN_LOCATION);
|
UNKNOWN_LOCATION);
|
||||||
msq = lsq;
|
msq = lsq;
|
||||||
}
|
}
|
||||||
|
@ -4301,8 +4471,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
bitpos = bitsize_zero_node;
|
bitpos = bitsize_zero_node;
|
||||||
vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
|
vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
|
||||||
bitsize, bitpos);
|
bitsize, bitpos);
|
||||||
vec_dest =
|
vec_dest = vect_create_destination_var (scalar_dest,
|
||||||
vect_create_destination_var (scalar_dest, NULL_TREE);
|
NULL_TREE);
|
||||||
new_stmt = gimple_build_assign (vec_dest, vec_inv);
|
new_stmt = gimple_build_assign (vec_dest, vec_inv);
|
||||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||||
|
@ -4312,7 +4482,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
t = tree_cons (NULL_TREE, new_temp, t);
|
t = tree_cons (NULL_TREE, new_temp, t);
|
||||||
/* FIXME: use build_constructor directly. */
|
/* FIXME: use build_constructor directly. */
|
||||||
vec_inv = build_constructor_from_list (vectype, t);
|
vec_inv = build_constructor_from_list (vectype, t);
|
||||||
new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
|
new_temp = vect_init_vector (stmt, vec_inv,
|
||||||
|
vectype, gsi);
|
||||||
new_stmt = SSA_NAME_DEF_STMT (new_temp);
|
new_stmt = SSA_NAME_DEF_STMT (new_temp);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -4332,7 +4503,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
|
|
||||||
/* Store vector loads in the corresponding SLP_NODE. */
|
/* Store vector loads in the corresponding SLP_NODE. */
|
||||||
if (slp && !slp_perm)
|
if (slp && !slp_perm)
|
||||||
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
|
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
|
||||||
|
new_stmt);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (slp && !slp_perm)
|
if (slp && !slp_perm)
|
||||||
|
@ -4351,6 +4524,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
{
|
{
|
||||||
if (strided_load)
|
if (strided_load)
|
||||||
{
|
{
|
||||||
|
if (!load_lanes_p)
|
||||||
vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
|
vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
|
||||||
*vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
|
*vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
|
||||||
}
|
}
|
||||||
|
|
|
@ -788,9 +788,9 @@ extern void free_stmt_vec_info (gimple stmt);
|
||||||
extern tree vectorizable_function (gimple, tree, tree);
|
extern tree vectorizable_function (gimple, tree, tree);
|
||||||
extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
|
extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
|
||||||
slp_tree);
|
slp_tree);
|
||||||
extern void vect_model_store_cost (stmt_vec_info, int, enum vect_def_type,
|
extern void vect_model_store_cost (stmt_vec_info, int, bool,
|
||||||
slp_tree);
|
enum vect_def_type, slp_tree);
|
||||||
extern void vect_model_load_cost (stmt_vec_info, int, slp_tree);
|
extern void vect_model_load_cost (stmt_vec_info, int, bool, slp_tree);
|
||||||
extern void vect_finish_stmt_generation (gimple, gimple,
|
extern void vect_finish_stmt_generation (gimple, gimple,
|
||||||
gimple_stmt_iterator *);
|
gimple_stmt_iterator *);
|
||||||
extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
|
extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
|
||||||
|
@ -829,7 +829,9 @@ extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
|
||||||
extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree);
|
extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree);
|
||||||
extern tree vect_create_destination_var (tree, tree);
|
extern tree vect_create_destination_var (tree, tree);
|
||||||
extern bool vect_strided_store_supported (tree, unsigned HOST_WIDE_INT);
|
extern bool vect_strided_store_supported (tree, unsigned HOST_WIDE_INT);
|
||||||
|
extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT);
|
||||||
extern bool vect_strided_load_supported (tree, unsigned HOST_WIDE_INT);
|
extern bool vect_strided_load_supported (tree, unsigned HOST_WIDE_INT);
|
||||||
|
extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT);
|
||||||
extern void vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple,
|
extern void vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple,
|
||||||
gimple_stmt_iterator *, VEC(tree,heap) **);
|
gimple_stmt_iterator *, VEC(tree,heap) **);
|
||||||
extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *,
|
extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *,
|
||||||
|
@ -837,6 +839,7 @@ extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *,
|
||||||
struct loop **);
|
struct loop **);
|
||||||
extern void vect_transform_strided_load (gimple, VEC(tree,heap) *, int,
|
extern void vect_transform_strided_load (gimple, VEC(tree,heap) *, int,
|
||||||
gimple_stmt_iterator *);
|
gimple_stmt_iterator *);
|
||||||
|
extern void vect_record_strided_load_vectors (gimple, VEC(tree,heap) *);
|
||||||
extern int vect_get_place_in_interleaving_chain (gimple, gimple);
|
extern int vect_get_place_in_interleaving_chain (gimple, gimple);
|
||||||
extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
|
extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
|
||||||
extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
|
extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
|
||||||
|
|
|
@ -7340,6 +7340,15 @@ build_nonshared_array_type (tree elt_type, tree index_type)
|
||||||
return build_array_type_1 (elt_type, index_type, false);
|
return build_array_type_1 (elt_type, index_type, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return a representation of ELT_TYPE[NELTS], using indices of type
|
||||||
|
sizetype. */
|
||||||
|
|
||||||
|
tree
|
||||||
|
build_array_type_nelts (tree elt_type, unsigned HOST_WIDE_INT nelts)
|
||||||
|
{
|
||||||
|
return build_array_type (elt_type, build_index_type (size_int (nelts - 1)));
|
||||||
|
}
|
||||||
|
|
||||||
/* Recursively examines the array elements of TYPE, until a non-array
|
/* Recursively examines the array elements of TYPE, until a non-array
|
||||||
element type is found. */
|
element type is found. */
|
||||||
|
|
||||||
|
|
|
@ -4247,6 +4247,7 @@ extern tree build_type_no_quals (tree);
|
||||||
extern tree build_index_type (tree);
|
extern tree build_index_type (tree);
|
||||||
extern tree build_array_type (tree, tree);
|
extern tree build_array_type (tree, tree);
|
||||||
extern tree build_nonshared_array_type (tree, tree);
|
extern tree build_nonshared_array_type (tree, tree);
|
||||||
|
extern tree build_array_type_nelts (tree, unsigned HOST_WIDE_INT);
|
||||||
extern tree build_function_type (tree, tree);
|
extern tree build_function_type (tree, tree);
|
||||||
extern tree build_function_type_list (tree, ...);
|
extern tree build_function_type_list (tree, ...);
|
||||||
extern tree build_function_type_skip_args (tree, bitmap);
|
extern tree build_function_type_skip_args (tree, bitmap);
|
||||||
|
|
Loading…
Reference in New Issue