mirror of git://gcc.gnu.org/git/gcc.git
Compare commits
2 Commits
d6986e06db
...
239535e9b0
Author | SHA1 | Date |
---|---|---|
![]() |
239535e9b0 | |
![]() |
d1965b1fd8 |
|
@ -21,7 +21,10 @@
|
|||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "tm.h"
|
||||
#include "basic-block.h"
|
||||
#include "tree.h"
|
||||
#include "function.h"
|
||||
#include "gimple.h"
|
||||
#include "rtl.h"
|
||||
#include "tm_p.h"
|
||||
#include "memmodel.h"
|
||||
|
@ -68,23 +71,36 @@ za_group_is_pure_overload (const function_group_info &group)
|
|||
types in ARGUMENT_TYPES. RETURN_TYPE is the type returned by the
|
||||
function. */
|
||||
static void
|
||||
apply_predication (const function_instance &instance, tree return_type,
|
||||
apply_predication (function_instance &instance, tree return_type,
|
||||
vec<tree> &argument_types)
|
||||
{
|
||||
/* Initially mark the function as not being predicated. */
|
||||
instance.gp_index = -1;
|
||||
|
||||
/* There are currently no SME ZA instructions that have both merging and
|
||||
unpredicated forms, so for simplicity, the predicates are always included
|
||||
in the original format string. */
|
||||
if (instance.pred != PRED_none && instance.pred != PRED_za_m)
|
||||
{
|
||||
argument_types.quick_insert (0, instance.gp_type ());
|
||||
instance.gp_index = 0;
|
||||
/* For unary merge operations, the first argument is a vector with
|
||||
the same type as the result. For unary_convert_narrowt it also
|
||||
provides the "bottom" half of active elements, and is present
|
||||
for all types of predication. */
|
||||
auto nargs = argument_types.length () - 1;
|
||||
if (instance.shape->has_merge_argument_p (instance, nargs))
|
||||
argument_types.quick_insert (0, return_type);
|
||||
{
|
||||
argument_types.quick_insert (0, return_type);
|
||||
instance.gp_index = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* In this case the predicate type we added above is a non-governing
|
||||
predicate operand (and there is no GP), so update the gp_index value
|
||||
accordingly. */
|
||||
if (!instance.shape->has_gp_argument_p (instance))
|
||||
instance.gp_index = -1;
|
||||
}
|
||||
|
||||
/* Parse and move past an element type in FORMAT and return it as a type
|
||||
|
@ -3332,6 +3348,14 @@ struct pmov_to_vector_lane_def : public overloaded_base<0>
|
|||
but it doesn't currently have the necessary information. */
|
||||
return c.require_immediate_range (1, 1, bytes - 1);
|
||||
}
|
||||
|
||||
/* This function has a predicate argument, and is a merging instruction, but
|
||||
the predicate is not a GP. */
|
||||
bool
|
||||
has_gp_argument_p (const function_instance &) const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
SHAPE (pmov_to_vector_lane)
|
||||
|
||||
|
|
|
@ -3632,24 +3632,22 @@ gimple_folder::redirect_pred_x ()
|
|||
gimple *
|
||||
gimple_folder::fold_pfalse ()
|
||||
{
|
||||
if (pred == PRED_none)
|
||||
tree gp = gp_value (call);
|
||||
/* If there isn't a GP then we can't do any folding as the instruction isn't
|
||||
predicated. */
|
||||
if (!gp)
|
||||
return nullptr;
|
||||
tree arg0 = gimple_call_arg (call, 0);
|
||||
|
||||
if (pred == PRED_m)
|
||||
{
|
||||
/* Unary function shapes with _m predication are folded to the
|
||||
inactive vector (arg0), while other function shapes are folded
|
||||
to op1 (arg1). */
|
||||
tree arg1 = gimple_call_arg (call, 1);
|
||||
if (is_pfalse (arg1))
|
||||
return fold_call_to (arg0);
|
||||
if (is_pfalse (arg0))
|
||||
return fold_call_to (arg1);
|
||||
tree val = inactive_values (call);
|
||||
if (is_pfalse (gp))
|
||||
return fold_call_to (val);
|
||||
return nullptr;
|
||||
}
|
||||
if ((pred == PRED_x || pred == PRED_z) && is_pfalse (arg0))
|
||||
if ((pred == PRED_x || pred == PRED_z) && is_pfalse (gp))
|
||||
return fold_call_to (build_zero_cst (TREE_TYPE (lhs)));
|
||||
if (pred == PRED_implicit && is_pfalse (arg0))
|
||||
if (pred == PRED_implicit && is_pfalse (gp))
|
||||
{
|
||||
unsigned int flags = call_properties ();
|
||||
/* Folding to lhs = {0, ...} is not appropriate for intrinsics with
|
||||
|
|
|
@ -403,6 +403,8 @@ public:
|
|||
bool could_trap_p () const;
|
||||
|
||||
vector_type_index gp_type_index () const;
|
||||
tree gp_value (gcall *) const;
|
||||
tree inactive_values (gcall *) const;
|
||||
tree gp_type () const;
|
||||
|
||||
unsigned int vectors_per_tuple () const;
|
||||
|
@ -436,6 +438,7 @@ public:
|
|||
group_suffix_index group_suffix_id;
|
||||
predication_index pred;
|
||||
fpm_mode_index fpm_mode;
|
||||
int gp_index;
|
||||
};
|
||||
|
||||
class registered_function;
|
||||
|
@ -801,6 +804,8 @@ public:
|
|||
virtual bool has_merge_argument_p (const function_instance &,
|
||||
unsigned int) const;
|
||||
|
||||
virtual bool has_gp_argument_p (const function_instance &) const;
|
||||
|
||||
virtual bool explicit_type_suffix_p (unsigned int) const = 0;
|
||||
|
||||
/* True if the group suffix is present in overloaded names.
|
||||
|
@ -949,6 +954,33 @@ function_instance::gp_type () const
|
|||
return acle_vector_types[0][gp_type_index ()];
|
||||
}
|
||||
|
||||
/* Return the tree value that should be used as the governing predicate of
|
||||
this function. If none then return NULL_TREE. */
|
||||
inline tree
|
||||
function_instance::gp_value (gcall *call) const
|
||||
{
|
||||
if (gp_index < 0)
|
||||
return NULL_TREE;
|
||||
|
||||
return gimple_call_arg (call, gp_index);
|
||||
}
|
||||
|
||||
/* Return the tree value that should be used for the inactive lanes should this
|
||||
function be a predicated function with a gp. Otherwise return NULL_TREE. */
|
||||
inline tree
|
||||
function_instance::inactive_values (gcall *call) const
|
||||
{
|
||||
if (gp_index < 0)
|
||||
return NULL_TREE;
|
||||
|
||||
/* Function is unary with m predicate. */
|
||||
if (gp_index == 1)
|
||||
return gimple_call_arg (call, 0);
|
||||
|
||||
/* Else the inactive values are the next element. */
|
||||
return gimple_call_arg (call, 1);
|
||||
}
|
||||
|
||||
/* If the function operates on tuples of vectors, return the number
|
||||
of vectors in the tuples, otherwise return 1. */
|
||||
inline unsigned int
|
||||
|
@ -1123,6 +1155,14 @@ function_shape::has_merge_argument_p (const function_instance &instance,
|
|||
return nargs == 1 && instance.pred == PRED_m;
|
||||
}
|
||||
|
||||
/* Return true if INSTANCE has an predicate argument that can be used as the global
|
||||
predicate. */
|
||||
inline bool
|
||||
function_shape::has_gp_argument_p (const function_instance &instance) const
|
||||
{
|
||||
return instance.pred != PRED_none;
|
||||
}
|
||||
|
||||
/* Return the mode of the result of a call. */
|
||||
inline machine_mode
|
||||
function_expander::result_mode () const
|
||||
|
|
|
@ -1427,25 +1427,82 @@ bpf_expand_setmem (rtx *operands)
|
|||
unsigned inc = GET_MODE_SIZE (mode);
|
||||
unsigned offset = 0;
|
||||
|
||||
/* If val is a constant, then build a new constant value duplicating
|
||||
the byte across to the size of stores we might do.
|
||||
e.g. if val is 0xab and we can store in 4-byte chunks, build
|
||||
0xabababab and use that to do the memset.
|
||||
If val is not a constant, then by constraint it is a QImode register
|
||||
and we similarly duplicate the byte across. */
|
||||
rtx src;
|
||||
if (CONST_INT_P (val))
|
||||
{
|
||||
unsigned HOST_WIDE_INT tmp = UINTVAL (val) & 0xff;
|
||||
/* Need src in the proper mode. */
|
||||
switch (mode)
|
||||
{
|
||||
case DImode:
|
||||
src = gen_rtx_CONST_INT (DImode, tmp * 0x0101010101010101);
|
||||
break;
|
||||
case SImode:
|
||||
src = gen_rtx_CONST_INT (SImode, tmp * 0x01010101);
|
||||
break;
|
||||
case HImode:
|
||||
src = gen_rtx_CONST_INT (HImode, tmp * 0x0101);
|
||||
break;
|
||||
default:
|
||||
src = val;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* VAL is a subreg:QI (reg:DI N).
|
||||
Copy that byte to fill the whole register. */
|
||||
src = gen_reg_rtx (mode);
|
||||
emit_move_insn (src, gen_rtx_ZERO_EXTEND (mode, val));
|
||||
|
||||
/* We can fill the whole register with copies of the byte by multiplying
|
||||
by 0x010101...
|
||||
For DImode this requires a tmp reg with lldw, but only if we will
|
||||
actually do nonzero iterations of stxdw. */
|
||||
if (mode < DImode || iters == 0)
|
||||
emit_move_insn (src, gen_rtx_MULT (mode, src, GEN_INT (0x01010101)));
|
||||
else
|
||||
{
|
||||
rtx tmp = gen_reg_rtx (mode);
|
||||
emit_move_insn (tmp, GEN_INT (0x0101010101010101));
|
||||
emit_move_insn (src, gen_rtx_MULT (mode, src, tmp));
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < iters; i++)
|
||||
{
|
||||
emit_move_insn (adjust_address (dst, mode, offset), val);
|
||||
emit_move_insn (adjust_address (dst, mode, offset), src);
|
||||
offset += inc;
|
||||
}
|
||||
if (remainder & 4)
|
||||
{
|
||||
emit_move_insn (adjust_address (dst, SImode, offset), val);
|
||||
emit_move_insn (adjust_address (dst, SImode, offset),
|
||||
REG_P (src)
|
||||
? simplify_gen_subreg (SImode, src, mode, 0)
|
||||
: src);
|
||||
offset += 4;
|
||||
remainder -= 4;
|
||||
}
|
||||
if (remainder & 2)
|
||||
{
|
||||
emit_move_insn (adjust_address (dst, HImode, offset), val);
|
||||
emit_move_insn (adjust_address (dst, HImode, offset),
|
||||
REG_P (src)
|
||||
? simplify_gen_subreg (HImode, src, mode, 0)
|
||||
: src);
|
||||
offset += 2;
|
||||
remainder -= 2;
|
||||
}
|
||||
if (remainder & 1)
|
||||
emit_move_insn (adjust_address (dst, QImode, offset), val);
|
||||
emit_move_insn (adjust_address (dst, QImode, offset),
|
||||
REG_P (src)
|
||||
? simplify_gen_subreg (QImode, src, mode, 0)
|
||||
: src);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
||||
/*
|
||||
** foo:
|
||||
** ptrue p0\.b, all
|
||||
** brkb p0\.b, p0/z, p0\.b
|
||||
** ret
|
||||
*/
|
||||
svbool_t foo () {
|
||||
return svbrkb_b_m (svpfalse (), svptrue_b8 (), svptrue_b8 ());
|
||||
}
|
||||
|
||||
/*
|
||||
** bar:
|
||||
** ptrue p0\.b, all
|
||||
** brka p0\.b, p0/z, p0\.b
|
||||
** ret
|
||||
*/
|
||||
svbool_t bar () {
|
||||
return svbrka_b_m (svpfalse (), svptrue_b8 (), svptrue_b8 ());
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=armv8.2-a+sve2p1" } */
|
||||
/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
||||
/*
|
||||
** f:
|
||||
** pfalse p([0-7]+)\.b
|
||||
** mov z0\.b, #-1
|
||||
** pmov z0\[1\], p\1\.d
|
||||
** ret
|
||||
*/
|
||||
svuint64_t f () {
|
||||
return svpmov_lane_u64_m (svdup_u64 (~0UL), svpfalse (), 1);
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/* Test that inline memset expansion properly duplicates the byte value
|
||||
across the bytes to fill. PR target/122139. */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1 -masm=normal" } */
|
||||
|
||||
#define SIZE 63
|
||||
|
||||
unsigned char cdata[SIZE];
|
||||
unsigned short sdata[SIZE / 2 + 1];
|
||||
unsigned int idata[SIZE / 4 + 1];
|
||||
unsigned long ldata[SIZE / 8 + 1];
|
||||
|
||||
void
|
||||
a (void)
|
||||
{
|
||||
__builtin_memset (cdata, 0x54, SIZE);
|
||||
}
|
||||
/* 0x54=84 */
|
||||
/* { dg-final { scan-assembler-times "\[\t \]stb\[^\r\n\]+,84" 63 } } */
|
||||
|
||||
void
|
||||
b (void)
|
||||
{
|
||||
__builtin_memset (sdata, 0x7a, SIZE);
|
||||
}
|
||||
|
||||
/* 0x7a=122, 0x7a7a=31354 */
|
||||
/* { dg-final { scan-assembler-times "\[\t \]sth\[^\r\n\]+,31354" 31 } } */
|
||||
/* { dg-final { scan-assembler-times "\[\t \]stb\[^\r\n\]+,122" 1 } } */
|
||||
|
||||
void
|
||||
c (void)
|
||||
{
|
||||
__builtin_memset (idata, 0x23, SIZE);
|
||||
}
|
||||
|
||||
/* 0x23=35, 0x2323=8995, 0x23232323=589505315 */
|
||||
/* { dg-final { scan-assembler-times "\[\t \]stw\[^\r\n\]+,589505315" 15 } } */
|
||||
/* { dg-final { scan-assembler-times "\[\t \]sth\[^\r\n\]+,8995" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "\[\t \]stb\[^\r\n\]+,35" 1 } } */
|
||||
|
||||
void
|
||||
d (void)
|
||||
{
|
||||
__builtin_memset (ldata, 0xcb, SIZE);
|
||||
}
|
||||
|
||||
/* 0xcbcbcbcb_cbcbcbcb = -3761688987579986997,
|
||||
0xcbcbcbcb = -875836469
|
||||
0xcbcb = -13365
|
||||
0xcb = -53 */
|
||||
/* { dg-final { scan-assembler-times "lddw\t%r.,-3761688987579986997"}} */
|
||||
/* { dg-final { scan-assembler-times "stxdw" 7 } } */
|
||||
/* { dg-final { scan-assembler-times "\[\t \]stw\[^\r\n\]+,-875836469" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "\[\t \]sth\[^\r\n\]+,-13365" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "\[\t \]stb\[^\r\n\]+,-53" 1 } } */
|
|
@ -0,0 +1,24 @@
|
|||
/* Test that inline memset expansion properly duplicates the byte value
|
||||
across the bytes to fill for non-const value. PR target/122139. */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O1 -masm=normal" } */
|
||||
|
||||
#define SIZE 63
|
||||
|
||||
unsigned char cdata[SIZE];
|
||||
unsigned short sdata[SIZE / 2 + 1];
|
||||
unsigned int idata[SIZE / 4 + 1];
|
||||
unsigned long ldata[SIZE / 8 + 1];
|
||||
|
||||
void
|
||||
c (unsigned char byte)
|
||||
{
|
||||
__builtin_memset (idata, byte, SIZE);
|
||||
}
|
||||
|
||||
/* Hard to verify for non-const value. Look for the mul by 0x01010101
|
||||
and the proper number of stores... */
|
||||
/* { dg-final { scan-assembler "mul32\[\t \]%r.,16843009" } } */
|
||||
/* { dg-final { scan-assembler-times "stxw" 15 } } */
|
||||
/* { dg-final { scan-assembler-times "stxh" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "stxb" 1 } } */
|
Loading…
Reference in New Issue