mirror of git://gcc.gnu.org/git/gcc.git
11534 lines
313 KiB
C++
11534 lines
313 KiB
C++
/* Subroutines used for LoongArch code generation.
|
||
Copyright (C) 2021-2025 Free Software Foundation, Inc.
|
||
Contributed by Loongson Ltd.
|
||
Based on MIPS and RISC-V target for GNU compiler.
|
||
|
||
This file is part of GCC.
|
||
|
||
GCC is free software; you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation; either version 3, or (at your option)
|
||
any later version.
|
||
|
||
GCC is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with GCC; see the file COPYING3. If not see
|
||
<http://www.gnu.org/licenses/>. */
|
||
|
||
#define IN_TARGET_CODE 1
|
||
|
||
#include "config.h"
|
||
#include "system.h"
|
||
#include "coretypes.h"
|
||
#include "backend.h"
|
||
#include "target.h"
|
||
#include "rtl.h"
|
||
#include "tree.h"
|
||
#include "memmodel.h"
|
||
#include "gimple.h"
|
||
#include "cfghooks.h"
|
||
#include "df.h"
|
||
#include "tm_p.h"
|
||
#include "stringpool.h"
|
||
#include "attribs.h"
|
||
#include "optabs.h"
|
||
#include "regs.h"
|
||
#include "emit-rtl.h"
|
||
#include "recog.h"
|
||
#include "cgraph.h"
|
||
#include "diagnostic.h"
|
||
#include "insn-attr.h"
|
||
#include "output.h"
|
||
#include "alias.h"
|
||
#include "fold-const.h"
|
||
#include "varasm.h"
|
||
#include "stor-layout.h"
|
||
#include "calls.h"
|
||
#include "explow.h"
|
||
#include "expr.h"
|
||
#include "libfuncs.h"
|
||
#include "reload.h"
|
||
#include "common/common-target.h"
|
||
#include "langhooks.h"
|
||
#include "cfgrtl.h"
|
||
#include "cfganal.h"
|
||
#include "sched-int.h"
|
||
#include "gimplify.h"
|
||
#include "target-globals.h"
|
||
#include "tree-pass.h"
|
||
#include "context.h"
|
||
#include "builtins.h"
|
||
#include "rtl-iter.h"
|
||
#include "opts.h"
|
||
#include "function-abi.h"
|
||
#include "cfgloop.h"
|
||
#include "tree-vectorizer.h"
|
||
|
||
/* This file should be included last. */
|
||
#include "target-def.h"
|
||
|
||
/* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */
|
||
#define UNSPEC_ADDRESS_P(X) \
|
||
(GET_CODE (X) == UNSPEC \
|
||
&& XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \
|
||
&& XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
|
||
|
||
/* Extract the symbol or label from UNSPEC wrapper X. */
|
||
#define UNSPEC_ADDRESS(X) XVECEXP (X, 0, 0)
|
||
|
||
/* Extract the symbol type from UNSPEC wrapper X. */
|
||
#define UNSPEC_ADDRESS_TYPE(X) \
|
||
((enum loongarch_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
|
||
|
||
/* True if INSN is a loongarch.md pattern or asm statement. */
|
||
/* ??? This test exists through the compiler, perhaps it should be
|
||
moved to rtl.h. */
|
||
#define USEFUL_INSN_P(INSN) \
|
||
(NONDEBUG_INSN_P (INSN) \
|
||
&& GET_CODE (PATTERN (INSN)) != USE \
|
||
&& GET_CODE (PATTERN (INSN)) != CLOBBER)
|
||
|
||
/* True if bit BIT is set in VALUE. */
|
||
#define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0)
|
||
|
||
/* Classifies an address.
|
||
|
||
ADDRESS_REG
|
||
A natural register + offset address. The register satisfies
|
||
loongarch_valid_base_register_p and the offset is a const_arith_operand.
|
||
|
||
ADDRESS_REG_REG
|
||
A base register indexed by (optionally scaled) register.
|
||
|
||
ADDRESS_LO_SUM
|
||
A LO_SUM rtx. The first operand is a valid base register and the second
|
||
operand is a symbolic address.
|
||
|
||
ADDRESS_CONST_INT
|
||
A signed 16-bit constant address.
|
||
|
||
ADDRESS_SYMBOLIC:
|
||
A constant symbolic address. */
|
||
enum loongarch_address_type
|
||
{
|
||
ADDRESS_REG,
|
||
ADDRESS_REG_REG,
|
||
ADDRESS_LO_SUM,
|
||
ADDRESS_CONST_INT,
|
||
ADDRESS_SYMBOLIC
|
||
};
|
||
|
||
|
||
/* Information about an address described by loongarch_address_type. */
|
||
struct loongarch_address_info
|
||
{
|
||
enum loongarch_address_type type;
|
||
rtx reg;
|
||
rtx offset;
|
||
enum loongarch_symbol_type symbol_type;
|
||
};
|
||
|
||
/* Method of loading instant numbers:
|
||
|
||
METHOD_NORMAL:
|
||
Load 0-31 bit of the immediate number.
|
||
|
||
METHOD_LU32I:
|
||
Load 32-51 bit of the immediate number.
|
||
|
||
METHOD_LU52I:
|
||
Load 52-63 bit of the immediate number.
|
||
|
||
METHOD_MIRROR:
|
||
Copy 0-31 bit of the immediate number to 32-63bit.
|
||
*/
|
||
enum loongarch_load_imm_method
|
||
{
|
||
METHOD_NORMAL,
|
||
METHOD_LU32I,
|
||
METHOD_LU52I,
|
||
METHOD_MIRROR
|
||
};
|
||
|
||
struct loongarch_integer_op
|
||
{
|
||
enum rtx_code code;
|
||
HOST_WIDE_INT value;
|
||
/* Represent the result of the immediate count of the load instruction at
|
||
each step. */
|
||
HOST_WIDE_INT curr_value;
|
||
enum loongarch_load_imm_method method;
|
||
};
|
||
|
||
/* The largest number of operations needed to load an integer constant.
|
||
The worst accepted case for 64-bit constants is LU12I.W,LU32I.D,LU52I.D,ORI
|
||
or LU12I.W,LU32I.D,LU52I.D,ADDI.D DECL_ASSEMBLER_NAME. */
|
||
#define LARCH_MAX_INTEGER_OPS 4
|
||
|
||
/* Arrays that map GCC register numbers to debugger register numbers. */
|
||
int loongarch_dwarf_regno[FIRST_PSEUDO_REGISTER];
|
||
|
||
/* Index [M][R] is true if register R is allowed to hold a value of mode M. */
|
||
static bool loongarch_hard_regno_mode_ok_p[MAX_MACHINE_MODE]
|
||
[FIRST_PSEUDO_REGISTER];
|
||
|
||
/* Index C is true if character C is a valid PRINT_OPERAND punctation
|
||
character. */
|
||
static bool loongarch_print_operand_punct[256];
|
||
|
||
/* Cached value of can_issue_more. This is cached in loongarch_variable_issue
|
||
hook and returned from loongarch_sched_reorder2. */
|
||
static int cached_can_issue_more;
|
||
|
||
/* Index R is the smallest register class that contains register R. */
|
||
const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = {
|
||
GR_REGS, GR_REGS, GR_REGS, GR_REGS,
|
||
JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS,
|
||
JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS,
|
||
SIBCALL_REGS, JIRL_REGS, SIBCALL_REGS, SIBCALL_REGS,
|
||
SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
|
||
SIBCALL_REGS, GR_REGS, GR_REGS, JIRL_REGS,
|
||
JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS,
|
||
JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS,
|
||
|
||
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
|
||
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
|
||
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
|
||
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
|
||
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
|
||
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
|
||
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
|
||
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
|
||
FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS,
|
||
FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS,
|
||
FRAME_REGS, FRAME_REGS
|
||
};
|
||
|
||
/* Information about a single argument. */
|
||
struct loongarch_arg_info
|
||
{
|
||
/* True if the argument is at least partially passed on the stack. */
|
||
bool stack_p;
|
||
|
||
/* The number of integer registers allocated to this argument. */
|
||
unsigned int num_gprs;
|
||
|
||
/* The offset of the first register used, provided num_gprs is nonzero.
|
||
If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */
|
||
unsigned int gpr_offset;
|
||
|
||
/* The number of floating-point registers allocated to this argument. */
|
||
unsigned int num_fprs;
|
||
|
||
/* The offset of the first register used, provided num_fprs is nonzero. */
|
||
unsigned int fpr_offset;
|
||
};
|
||
|
||
/* Invoke MACRO (COND) for each fcmp.cond.{s/d} condition. */
|
||
#define LARCH_FP_CONDITIONS(MACRO) \
|
||
MACRO (f), \
|
||
MACRO (un), \
|
||
MACRO (eq), \
|
||
MACRO (ueq), \
|
||
MACRO (olt), \
|
||
MACRO (ult), \
|
||
MACRO (ole), \
|
||
MACRO (ule), \
|
||
MACRO (sf), \
|
||
MACRO (ngle), \
|
||
MACRO (seq), \
|
||
MACRO (ngl), \
|
||
MACRO (lt), \
|
||
MACRO (nge), \
|
||
MACRO (le), \
|
||
MACRO (ngt)
|
||
|
||
/* Enumerates the codes above as LARCH_FP_COND_<X>. */
|
||
#define DECLARE_LARCH_COND(X) LARCH_FP_COND_##X
|
||
enum loongarch_fp_condition
|
||
{
|
||
LARCH_FP_CONDITIONS (DECLARE_LARCH_COND)
|
||
};
|
||
#undef DECLARE_LARCH_COND
|
||
|
||
/* Index X provides the string representation of LARCH_FP_COND_<X>. */
|
||
#define STRINGIFY(X) #X
|
||
const char *const
|
||
loongarch_fp_conditions[16]= {LARCH_FP_CONDITIONS (STRINGIFY)};
|
||
#undef STRINGIFY
|
||
|
||
/* Size of guard page. */
|
||
#define STACK_CLASH_PROTECTION_GUARD_SIZE \
|
||
(1 << param_stack_clash_protection_guard_size)
|
||
|
||
/* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at
|
||
least PARM_BOUNDARY bits of alignment, but will be given anything up
|
||
to PREFERRED_STACK_BOUNDARY bits if the type requires it. */
|
||
|
||
static unsigned int
|
||
loongarch_function_arg_boundary (machine_mode mode, const_tree type)
|
||
{
|
||
unsigned int alignment;
|
||
|
||
/* Use natural alignment if the type is not aggregate data. */
|
||
if (type && !AGGREGATE_TYPE_P (type))
|
||
alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
|
||
else
|
||
alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
|
||
|
||
return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment));
|
||
}
|
||
|
||
/* If MODE represents an argument that can be passed or returned in
|
||
floating-point registers, return the number of registers, else 0. */
|
||
|
||
static unsigned
|
||
loongarch_pass_mode_in_fpr_p (machine_mode mode)
|
||
{
|
||
if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG)
|
||
{
|
||
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
|
||
return 1;
|
||
|
||
if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
|
||
return 2;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
typedef struct
|
||
{
|
||
const_tree type;
|
||
HOST_WIDE_INT offset;
|
||
} loongarch_aggregate_field;
|
||
|
||
/* Identify subfields of aggregates that are candidates for passing in
|
||
floating-point registers. */
|
||
|
||
static int
|
||
loongarch_flatten_aggregate_field (const_tree type,
|
||
loongarch_aggregate_field fields[2], int n,
|
||
HOST_WIDE_INT offset)
|
||
{
|
||
switch (TREE_CODE (type))
|
||
{
|
||
case RECORD_TYPE:
|
||
/* Can't handle incomplete types nor sizes that are not fixed. */
|
||
if (!COMPLETE_TYPE_P (type)
|
||
|| TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
|
||
|| !tree_fits_uhwi_p (TYPE_SIZE (type)))
|
||
return -1;
|
||
|
||
for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
|
||
if (TREE_CODE (f) == FIELD_DECL)
|
||
{
|
||
if (!TYPE_P (TREE_TYPE (f)))
|
||
return -1;
|
||
|
||
if (DECL_SIZE (f) && integer_zerop (DECL_SIZE (f)))
|
||
continue;
|
||
|
||
HOST_WIDE_INT pos = offset + int_byte_position (f);
|
||
n = loongarch_flatten_aggregate_field (TREE_TYPE (f), fields, n,
|
||
pos);
|
||
if (n < 0)
|
||
return -1;
|
||
}
|
||
return n;
|
||
|
||
case ARRAY_TYPE:
|
||
{
|
||
HOST_WIDE_INT n_elts;
|
||
loongarch_aggregate_field subfields[2];
|
||
tree index = TYPE_DOMAIN (type);
|
||
tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
|
||
int n_subfields = loongarch_flatten_aggregate_field (TREE_TYPE (type),
|
||
subfields, 0,
|
||
offset);
|
||
|
||
/* Can't handle incomplete types nor sizes that are not fixed. */
|
||
if (n_subfields <= 0
|
||
|| !COMPLETE_TYPE_P (type)
|
||
|| TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
|
||
|| !index
|
||
|| !TYPE_MAX_VALUE (index)
|
||
|| !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
|
||
|| !TYPE_MIN_VALUE (index)
|
||
|| !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
|
||
|| !tree_fits_uhwi_p (elt_size))
|
||
return -1;
|
||
|
||
n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
|
||
- tree_to_uhwi (TYPE_MIN_VALUE (index));
|
||
gcc_assert (n_elts >= 0);
|
||
|
||
for (HOST_WIDE_INT i = 0; i < n_elts; i++)
|
||
for (int j = 0; j < n_subfields; j++)
|
||
{
|
||
if (n >= 2)
|
||
return -1;
|
||
|
||
fields[n] = subfields[j];
|
||
fields[n++].offset += i * tree_to_uhwi (elt_size);
|
||
}
|
||
|
||
return n;
|
||
}
|
||
|
||
case COMPLEX_TYPE:
|
||
{
|
||
/* Complex type need consume 2 field, so n must be 0. */
|
||
if (n != 0)
|
||
return -1;
|
||
|
||
HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type)));
|
||
|
||
if (elt_size <= UNITS_PER_FP_ARG)
|
||
{
|
||
fields[0].type = TREE_TYPE (type);
|
||
fields[0].offset = offset;
|
||
fields[1].type = TREE_TYPE (type);
|
||
fields[1].offset = offset + elt_size;
|
||
|
||
return 2;
|
||
}
|
||
|
||
return -1;
|
||
}
|
||
|
||
default:
|
||
if (n < 2
|
||
&& ((SCALAR_FLOAT_TYPE_P (type)
|
||
&& GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG)
|
||
|| (INTEGRAL_TYPE_P (type)
|
||
&& GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD)))
|
||
{
|
||
fields[n].type = type;
|
||
fields[n].offset = offset;
|
||
return n + 1;
|
||
}
|
||
else
|
||
return -1;
|
||
}
|
||
}
|
||
|
||
/* Identify candidate aggregates for passing in floating-point registers.
|
||
Candidates have at most two fields after flattening. */
|
||
|
||
static int
|
||
loongarch_flatten_aggregate_argument (const_tree type,
|
||
loongarch_aggregate_field fields[2])
|
||
{
|
||
if (!type || TREE_CODE (type) != RECORD_TYPE)
|
||
return -1;
|
||
|
||
return loongarch_flatten_aggregate_field (type, fields, 0, 0);
|
||
}
|
||
|
||
/* See whether TYPE is a record whose fields should be returned in one or
|
||
two floating-point registers. If so, populate FIELDS accordingly. */
|
||
|
||
static unsigned
|
||
loongarch_pass_aggregate_num_fpr (const_tree type,
|
||
loongarch_aggregate_field fields[2])
|
||
{
|
||
int n = loongarch_flatten_aggregate_argument (type, fields);
|
||
|
||
for (int i = 0; i < n; i++)
|
||
if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
|
||
return 0;
|
||
|
||
return n > 0 ? n : 0;
|
||
}
|
||
|
||
/* See whether TYPE is a record whose fields should be returned in one
|
||
floating-point register and one integer register. If so, populate
|
||
FIELDS accordingly. */
|
||
|
||
static bool
|
||
loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
|
||
loongarch_aggregate_field fields[2])
|
||
{
|
||
unsigned num_int = 0, num_float = 0;
|
||
int n = loongarch_flatten_aggregate_argument (type, fields);
|
||
|
||
for (int i = 0; i < n; i++)
|
||
{
|
||
num_float += SCALAR_FLOAT_TYPE_P (fields[i].type);
|
||
num_int += INTEGRAL_TYPE_P (fields[i].type);
|
||
}
|
||
|
||
return num_int == 1 && num_float == 1;
|
||
}
|
||
|
||
/* Return the representation of an argument passed or returned in an FPR
|
||
when the value has mode VALUE_MODE and the type has TYPE_MODE. The
|
||
two modes may be different for structures like:
|
||
|
||
struct __attribute__((packed)) foo { float f; }
|
||
|
||
where the SFmode value "f" is passed in REGNO but the struct itself
|
||
has mode BLKmode. */
|
||
|
||
static rtx
|
||
loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno,
|
||
machine_mode value_mode,
|
||
HOST_WIDE_INT offset)
|
||
{
|
||
rtx x = gen_rtx_REG (value_mode, regno);
|
||
|
||
if (type_mode != value_mode)
|
||
{
|
||
x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset));
|
||
x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
|
||
}
|
||
return x;
|
||
}
|
||
|
||
/* Pass or return a composite value in the FPR pair REGNO and REGNO + 1.
|
||
MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and
|
||
byte offset for the first value, likewise MODE2 and OFFSET2 for the
|
||
second value. */
|
||
|
||
static rtx
|
||
loongarch_pass_fpr_pair (machine_mode mode, unsigned regno1,
|
||
machine_mode mode1, HOST_WIDE_INT offset1,
|
||
unsigned regno2, machine_mode mode2,
|
||
HOST_WIDE_INT offset2)
|
||
{
|
||
return gen_rtx_PARALLEL (
|
||
mode, gen_rtvec (2,
|
||
gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode1, regno1),
|
||
GEN_INT (offset1)),
|
||
gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode2, regno2),
|
||
GEN_INT (offset2))));
|
||
}
|
||
|
||
/* Fill INFO with information about a single argument, and return an
|
||
RTL pattern to pass or return the argument. CUM is the cumulative
|
||
state for earlier arguments. MODE is the mode of this argument and
|
||
TYPE is its type (if known). NAMED is true if this is a named
|
||
(fixed) argument rather than a variable one. RETURN_P is true if
|
||
returning the argument, or false if passing the argument. */
|
||
|
||
static rtx
|
||
loongarch_get_arg_info (struct loongarch_arg_info *info,
|
||
const CUMULATIVE_ARGS *cum, machine_mode mode,
|
||
const_tree type, bool named, bool return_p)
|
||
{
|
||
unsigned num_bytes, num_words;
|
||
unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
|
||
unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
|
||
unsigned alignment = loongarch_function_arg_boundary (mode, type);
|
||
|
||
memset (info, 0, sizeof (*info));
|
||
info->gpr_offset = cum->num_gprs;
|
||
info->fpr_offset = cum->num_fprs;
|
||
|
||
if (named)
|
||
{
|
||
loongarch_aggregate_field fields[2];
|
||
unsigned fregno = fpr_base + info->fpr_offset;
|
||
unsigned gregno = gpr_base + info->gpr_offset;
|
||
|
||
/* Pass one- or two-element floating-point aggregates in FPRs. */
|
||
if ((info->num_fprs
|
||
= loongarch_pass_aggregate_num_fpr (type, fields))
|
||
&& info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
|
||
switch (info->num_fprs)
|
||
{
|
||
case 1:
|
||
return loongarch_pass_fpr_single (mode, fregno,
|
||
TYPE_MODE (fields[0].type),
|
||
fields[0].offset);
|
||
|
||
case 2:
|
||
return loongarch_pass_fpr_pair (mode, fregno,
|
||
TYPE_MODE (fields[0].type),
|
||
fields[0].offset,
|
||
fregno + 1,
|
||
TYPE_MODE (fields[1].type),
|
||
fields[1].offset);
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
/* Pass real and complex floating-point numbers in FPRs. */
|
||
if ((info->num_fprs = loongarch_pass_mode_in_fpr_p (mode))
|
||
&& info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
|
||
switch (GET_MODE_CLASS (mode))
|
||
{
|
||
case MODE_FLOAT:
|
||
return gen_rtx_REG (mode, fregno);
|
||
|
||
case MODE_COMPLEX_FLOAT:
|
||
return loongarch_pass_fpr_pair (mode, fregno,
|
||
GET_MODE_INNER (mode), 0,
|
||
fregno + 1, GET_MODE_INNER (mode),
|
||
GET_MODE_UNIT_SIZE (mode));
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
/* Pass structs with one float and one integer in an FPR and a GPR. */
|
||
if (loongarch_pass_aggregate_in_fpr_and_gpr_p (type, fields)
|
||
&& info->gpr_offset < MAX_ARGS_IN_REGISTERS
|
||
&& info->fpr_offset < MAX_ARGS_IN_REGISTERS)
|
||
{
|
||
info->num_gprs = 1;
|
||
info->num_fprs = 1;
|
||
|
||
if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
|
||
std::swap (fregno, gregno);
|
||
|
||
return loongarch_pass_fpr_pair (mode, fregno,
|
||
TYPE_MODE (fields[0].type),
|
||
fields[0].offset, gregno,
|
||
TYPE_MODE (fields[1].type),
|
||
fields[1].offset);
|
||
}
|
||
}
|
||
|
||
/* Work out the size of the argument. */
|
||
num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
|
||
num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
|
||
|
||
/* Doubleword-aligned varargs start on an even register boundary. */
|
||
if (!named && num_bytes != 0 && alignment > BITS_PER_WORD)
|
||
info->gpr_offset += info->gpr_offset & 1;
|
||
|
||
/* Partition the argument between registers and stack. */
|
||
info->num_fprs = 0;
|
||
info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
|
||
info->stack_p = (num_words - info->num_gprs) != 0;
|
||
|
||
if (info->num_gprs || return_p)
|
||
return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
|
||
|
||
return NULL_RTX;
|
||
}
|
||
|
||
/* Implement TARGET_FUNCTION_ARG. */
|
||
|
||
static rtx
|
||
loongarch_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
|
||
{
|
||
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
|
||
struct loongarch_arg_info info;
|
||
|
||
if (arg.end_marker_p ())
|
||
return NULL;
|
||
|
||
return loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named,
|
||
false);
|
||
}
|
||
|
||
/* Implement TARGET_FUNCTION_ARG_ADVANCE. */
|
||
|
||
static void
|
||
loongarch_function_arg_advance (cumulative_args_t cum_v,
|
||
const function_arg_info &arg)
|
||
{
|
||
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
|
||
struct loongarch_arg_info info;
|
||
|
||
loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
|
||
|
||
/* Advance the register count. This has the effect of setting
|
||
num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
|
||
argument required us to skip the final GPR and pass the whole
|
||
argument on the stack. */
|
||
cum->num_fprs = info.fpr_offset + info.num_fprs;
|
||
cum->num_gprs = info.gpr_offset + info.num_gprs;
|
||
}
|
||
|
||
/* Implement TARGET_ARG_PARTIAL_BYTES. */
|
||
|
||
static int
|
||
loongarch_arg_partial_bytes (cumulative_args_t cum,
|
||
const function_arg_info &generic_arg)
|
||
{
|
||
struct loongarch_arg_info arg;
|
||
|
||
loongarch_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode,
|
||
generic_arg.type, generic_arg.named, false);
|
||
return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
|
||
}
|
||
|
||
/* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
|
||
VALTYPE is the return type and MODE is VOIDmode. For libcalls,
|
||
VALTYPE is null and MODE is the mode of the return value. */
|
||
|
||
static rtx
|
||
loongarch_function_value_1 (const_tree type, const_tree func,
|
||
machine_mode mode)
|
||
{
|
||
struct loongarch_arg_info info;
|
||
CUMULATIVE_ARGS args;
|
||
|
||
if (type)
|
||
{
|
||
int unsigned_p = TYPE_UNSIGNED (type);
|
||
|
||
mode = TYPE_MODE (type);
|
||
|
||
/* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
|
||
return values, promote the mode here too. */
|
||
mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
|
||
}
|
||
|
||
memset (&args, 0, sizeof (args));
|
||
return loongarch_get_arg_info (&info, &args, mode, type, true, true);
|
||
}
|
||
|
||
|
||
/* Implement TARGET_FUNCTION_VALUE. */
|
||
|
||
static rtx
|
||
loongarch_function_value (const_tree valtype, const_tree fn_decl_or_type,
|
||
bool outgoing ATTRIBUTE_UNUSED)
|
||
{
|
||
return loongarch_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
|
||
}
|
||
|
||
/* Implement TARGET_LIBCALL_VALUE. */
|
||
|
||
static rtx
|
||
loongarch_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
|
||
{
|
||
return loongarch_function_value_1 (NULL_TREE, NULL_TREE, mode);
|
||
}
|
||
|
||
|
||
/* Implement TARGET_PASS_BY_REFERENCE. */
|
||
|
||
static bool
|
||
loongarch_pass_by_reference (cumulative_args_t cum_v,
|
||
const function_arg_info &arg)
|
||
{
|
||
HOST_WIDE_INT size = arg.type_size_in_bytes ();
|
||
struct loongarch_arg_info info;
|
||
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
|
||
|
||
/* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we
|
||
never pass variadic arguments in floating-point registers, so we can
|
||
avoid the call to loongarch_get_arg_info in this case. */
|
||
if (cum != NULL)
|
||
{
|
||
/* Don't pass by reference if we can use a floating-point register. */
|
||
loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named,
|
||
false);
|
||
if (info.num_fprs)
|
||
return false;
|
||
}
|
||
|
||
/* Pass by reference if the data do not fit in two integer registers. */
|
||
return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
|
||
}
|
||
|
||
/* Implement TARGET_RETURN_IN_MEMORY. */
|
||
|
||
static bool
|
||
loongarch_return_in_memory (const_tree type,
|
||
const_tree fndecl ATTRIBUTE_UNUSED)
|
||
{
|
||
CUMULATIVE_ARGS args;
|
||
cumulative_args_t cum = pack_cumulative_args (&args);
|
||
|
||
/* The rules for returning in memory are the same as for passing the
|
||
first named argument by reference. */
|
||
memset (&args, 0, sizeof (args));
|
||
function_arg_info arg (const_cast<tree> (type), /*named=*/true);
|
||
return loongarch_pass_by_reference (cum, arg);
|
||
}
|
||
|
||
/* Implement TARGET_SETUP_INCOMING_VARARGS. */
|
||
|
||
static void
|
||
loongarch_setup_incoming_varargs (cumulative_args_t cum,
|
||
const function_arg_info &arg,
|
||
int *pretend_size ATTRIBUTE_UNUSED,
|
||
int no_rtl)
|
||
{
|
||
CUMULATIVE_ARGS local_cum;
|
||
int gp_saved;
|
||
|
||
/* The caller has advanced CUM up to, but not beyond, the last named
|
||
argument. Advance a local copy of CUM past the last "real" named
|
||
argument, to find out how many registers are left over. */
|
||
local_cum = *get_cumulative_args (cum);
|
||
|
||
/* For a C23 variadic function w/o any named argument, and w/o an
|
||
artifical argument for large return value, skip advancing args.
|
||
There is such an artifical argument iff. arg.type is non-NULL
|
||
(PR 114175). */
|
||
if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
|
||
|| arg.type != NULL_TREE)
|
||
loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg);
|
||
|
||
/* Found out how many registers we need to save. */
|
||
gp_saved = cfun->va_list_gpr_size / UNITS_PER_WORD;
|
||
if (gp_saved > (int) (MAX_ARGS_IN_REGISTERS - local_cum.num_gprs))
|
||
gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
|
||
|
||
if (!no_rtl && gp_saved > 0)
|
||
{
|
||
rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
|
||
REG_PARM_STACK_SPACE (cfun->decl)
|
||
- gp_saved * UNITS_PER_WORD);
|
||
rtx mem = gen_frame_mem (BLKmode, ptr);
|
||
set_mem_alias_set (mem, get_varargs_alias_set ());
|
||
|
||
move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, mem, gp_saved);
|
||
}
|
||
if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
|
||
cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
|
||
}
|
||
|
||
/* Make the last instruction frame-related and note that it performs
|
||
the operation described by FRAME_PATTERN. */
|
||
|
||
static void
|
||
loongarch_set_frame_expr (rtx frame_pattern)
|
||
{
|
||
rtx insn;
|
||
|
||
insn = get_last_insn ();
|
||
RTX_FRAME_RELATED_P (insn) = 1;
|
||
REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, frame_pattern,
|
||
REG_NOTES (insn));
|
||
}
|
||
|
||
/* Return a frame-related rtx that stores REG at MEM.
|
||
REG must be a single register. */
|
||
|
||
static rtx
|
||
loongarch_frame_set (rtx mem, rtx reg)
|
||
{
|
||
rtx set = gen_rtx_SET (mem, reg);
|
||
RTX_FRAME_RELATED_P (set) = 1;
|
||
return set;
|
||
}
|
||
|
||
/* Return true if the current function must save register REGNO. */
|
||
|
||
static bool
|
||
loongarch_save_reg_p (unsigned int regno)
|
||
{
|
||
bool call_saved = !global_regs[regno] && !call_used_regs[regno];
|
||
bool might_clobber
|
||
= crtl->saves_all_registers || df_regs_ever_live_p (regno);
|
||
|
||
if (call_saved && might_clobber)
|
||
return true;
|
||
|
||
if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
|
||
return true;
|
||
|
||
if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return)
|
||
return true;
|
||
|
||
return false;
|
||
}
|
||
|
||
/* Determine which GPR save/restore routine to call. */
|
||
|
||
static unsigned
|
||
loongarch_save_libcall_count (unsigned mask)
|
||
{
|
||
for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
|
||
if (BITSET_P (mask, n))
|
||
return CALLEE_SAVED_REG_NUMBER (n) + 1;
|
||
abort ();
|
||
}
|
||
|
||
/* Populate the current function's loongarch_frame_info structure.
|
||
|
||
LoongArch stack frames grown downward. High addresses are at the top.
|
||
|
||
+-------------------------------+
|
||
| |
|
||
| incoming stack arguments |
|
||
| |
|
||
+-------------------------------+ <-- incoming stack pointer
|
||
| |
|
||
| callee-allocated save area |
|
||
| for arguments that are |
|
||
| split between registers and |
|
||
| the stack |
|
||
| |
|
||
+-------------------------------+ <-- arg_pointer_rtx (virtual)
|
||
| |
|
||
| callee-allocated save area |
|
||
| for register varargs |
|
||
| |
|
||
+-------------------------------+ <-- hard_frame_pointer_rtx;
|
||
| | stack_pointer_rtx + gp_sp_offset
|
||
| GPR save area | + UNITS_PER_WORD
|
||
| |
|
||
+-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
|
||
| | + UNITS_PER_HWVALUE
|
||
| FPR save area |
|
||
| |
|
||
+-------------------------------+ <-- frame_pointer_rtx (virtual)
|
||
| |
|
||
| local variables |
|
||
| |
|
||
P +-------------------------------+
|
||
| |
|
||
| outgoing stack arguments |
|
||
| |
|
||
+-------------------------------+ <-- stack_pointer_rtx
|
||
|
||
Dynamic stack allocations such as alloca insert data at point P.
|
||
They decrease stack_pointer_rtx but leave frame_pointer_rtx and
|
||
hard_frame_pointer_rtx unchanged. */
|
||
|
||
static void
|
||
loongarch_compute_frame_info (void)
|
||
{
|
||
struct loongarch_frame_info *frame;
|
||
HOST_WIDE_INT offset;
|
||
unsigned int regno, i, num_x_saved = 0, num_f_saved = 0;
|
||
|
||
frame = &cfun->machine->frame;
|
||
memset (frame, 0, sizeof (*frame));
|
||
|
||
/* Find out which GPRs we need to save. */
|
||
for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||
if (loongarch_save_reg_p (regno))
|
||
frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
|
||
|
||
/* If this function calls eh_return, we must also save and restore the
|
||
EH data registers. */
|
||
if (crtl->calls_eh_return)
|
||
for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
|
||
frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
|
||
|
||
/* Find out which FPRs we need to save. This loop must iterate over
|
||
the same space as its companion in loongarch_for_each_saved_reg. */
|
||
if (TARGET_HARD_FLOAT)
|
||
for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||
if (loongarch_save_reg_p (regno))
|
||
frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
|
||
|
||
/* At the bottom of the frame are any outgoing stack arguments. */
|
||
offset = LARCH_STACK_ALIGN (crtl->outgoing_args_size);
|
||
/* Next are local stack variables. */
|
||
offset += LARCH_STACK_ALIGN (get_frame_size ());
|
||
/* The virtual frame pointer points above the local variables. */
|
||
frame->frame_pointer_offset = offset;
|
||
/* Next are the callee-saved FPRs. */
|
||
if (frame->fmask)
|
||
{
|
||
offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG);
|
||
frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
|
||
}
|
||
else
|
||
frame->fp_sp_offset = offset;
|
||
/* Next are the callee-saved GPRs. */
|
||
if (frame->mask)
|
||
{
|
||
unsigned x_save_size = LARCH_STACK_ALIGN (num_x_saved * UNITS_PER_WORD);
|
||
unsigned num_save_restore
|
||
= 1 + loongarch_save_libcall_count (frame->mask);
|
||
|
||
/* Only use save/restore routines if they don't alter the stack size. */
|
||
if (LARCH_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size)
|
||
frame->save_libcall_adjustment = x_save_size;
|
||
|
||
offset += x_save_size;
|
||
frame->gp_sp_offset = offset - UNITS_PER_WORD;
|
||
}
|
||
else
|
||
frame->gp_sp_offset = offset;
|
||
/* The hard frame pointer points above the callee-saved GPRs. */
|
||
frame->hard_frame_pointer_offset = offset;
|
||
/* Above the hard frame pointer is the callee-allocated varags save area. */
|
||
offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size);
|
||
/* Next is the callee-allocated area for pretend stack arguments. */
|
||
offset += LARCH_STACK_ALIGN (crtl->args.pretend_args_size);
|
||
/* Arg pointer must be below pretend args, but must be above alignment
|
||
padding. */
|
||
frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
|
||
frame->total_size = offset;
|
||
/* Next points the incoming stack pointer and any incoming arguments. */
|
||
|
||
/* Only use save/restore routines when the GPRs are atop the frame. */
|
||
if (frame->hard_frame_pointer_offset != frame->total_size)
|
||
frame->save_libcall_adjustment = 0;
|
||
}
|
||
|
||
/* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer
|
||
or argument pointer. TO is either the stack pointer or hard frame
|
||
pointer. */
|
||
|
||
HOST_WIDE_INT
|
||
loongarch_initial_elimination_offset (int from, int to)
|
||
{
|
||
HOST_WIDE_INT src, dest;
|
||
|
||
loongarch_compute_frame_info ();
|
||
|
||
if (to == HARD_FRAME_POINTER_REGNUM)
|
||
dest = cfun->machine->frame.hard_frame_pointer_offset;
|
||
else if (to == STACK_POINTER_REGNUM)
|
||
dest = 0; /* The stack pointer is the base of all offsets, hence 0. */
|
||
else
|
||
gcc_unreachable ();
|
||
|
||
if (from == FRAME_POINTER_REGNUM)
|
||
src = cfun->machine->frame.frame_pointer_offset;
|
||
else if (from == ARG_POINTER_REGNUM)
|
||
src = cfun->machine->frame.arg_pointer_offset;
|
||
else
|
||
gcc_unreachable ();
|
||
|
||
return src - dest;
|
||
}
|
||
|
||
/* A function to save or store a register. The first argument is the
|
||
register and the second is the stack slot. */
|
||
typedef void (*loongarch_save_restore_fn) (rtx, rtx);
|
||
|
||
/* Use FN to save or restore register REGNO. MODE is the register's
|
||
mode and OFFSET is the offset of its save slot from the current
|
||
stack pointer. */
|
||
|
||
static void
|
||
loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset,
|
||
loongarch_save_restore_fn fn)
|
||
{
|
||
rtx mem;
|
||
|
||
mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset));
|
||
fn (gen_rtx_REG (mode, regno), mem);
|
||
}
|
||
|
||
/* Call FN for each register that is saved by the current function.
|
||
SP_OFFSET is the offset of the current stack pointer from the start
|
||
of the frame. */
|
||
|
||
static void
|
||
loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
|
||
loongarch_save_restore_fn fn,
|
||
bool skip_eh_data_regs_p)
|
||
{
|
||
HOST_WIDE_INT offset;
|
||
|
||
/* Save the link register and s-registers. */
|
||
offset = cfun->machine->frame.gp_sp_offset - sp_offset;
|
||
for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
||
{
|
||
/* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO)
|
||
when returning normally from a function that calls
|
||
__builtin_eh_return. In this case, these registers are saved but
|
||
should not be restored, or the return value may be clobbered. */
|
||
|
||
if (!(cfun->machine->reg_is_wrapped_separately[regno]
|
||
|| (skip_eh_data_regs_p
|
||
&& GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4)))
|
||
loongarch_save_restore_reg (word_mode, regno, offset, fn);
|
||
|
||
offset -= UNITS_PER_WORD;
|
||
}
|
||
|
||
/* This loop must iterate over the same space as its companion in
|
||
loongarch_compute_frame_info. */
|
||
offset = cfun->machine->frame.fp_sp_offset - sp_offset;
|
||
machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
|
||
|
||
for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||
if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
|
||
{
|
||
if (!cfun->machine->reg_is_wrapped_separately[regno])
|
||
loongarch_save_restore_reg (word_mode, regno, offset, fn);
|
||
|
||
offset -= GET_MODE_SIZE (mode);
|
||
}
|
||
}
|
||
|
||
/* Emit a move from SRC to DEST. Assume that the move expanders can
|
||
handle all moves if !can_create_pseudo_p (). The distinction is
|
||
important because, unlike emit_move_insn, the move expanders know
|
||
how to force Pmode objects into the constant pool even when the
|
||
constant pool address is not itself legitimate. */
|
||
|
||
rtx
|
||
loongarch_emit_move (rtx dest, rtx src)
|
||
{
|
||
return (can_create_pseudo_p () ? emit_move_insn (dest, src)
|
||
: emit_move_insn_1 (dest, src));
|
||
}
|
||
|
||
/* Save register REG to MEM. Make the instruction frame-related. */
|
||
|
||
static void
|
||
loongarch_save_reg (rtx reg, rtx mem)
|
||
{
|
||
loongarch_emit_move (mem, reg);
|
||
loongarch_set_frame_expr (loongarch_frame_set (mem, reg));
|
||
}
|
||
|
||
/* Restore register REG from MEM. */
|
||
|
||
static void
|
||
loongarch_restore_reg (rtx reg, rtx mem)
|
||
{
|
||
rtx insn = loongarch_emit_move (reg, mem);
|
||
rtx dwarf = NULL_RTX;
|
||
dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
|
||
REG_NOTES (insn) = dwarf;
|
||
|
||
RTX_FRAME_RELATED_P (insn) = 1;
|
||
}
|
||
|
||
/* For stack frames that can't be allocated with a single ADDI instruction,
|
||
compute the best value to initially allocate. It must at a minimum
|
||
allocate enough space to spill the callee-saved registers. */
|
||
|
||
static HOST_WIDE_INT
|
||
loongarch_first_stack_step (struct loongarch_frame_info *frame)
|
||
{
|
||
HOST_WIDE_INT min_first_step
|
||
= LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset);
|
||
|
||
/* When stack checking is required, if the sum of frame->total_size
|
||
and stack_check_protect is greater than stack clash protection guard
|
||
size, then return min_first_step. */
|
||
if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
|
||
|| (flag_stack_clash_protection
|
||
&& frame->total_size > STACK_CLASH_PROTECTION_GUARD_SIZE))
|
||
return min_first_step;
|
||
|
||
if (IMM12_OPERAND (frame->total_size))
|
||
return frame->total_size;
|
||
|
||
HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
|
||
HOST_WIDE_INT min_second_step = frame->total_size - max_first_step;
|
||
gcc_assert (min_first_step <= max_first_step);
|
||
|
||
/* As an optimization, use the least-significant bits of the total frame
|
||
size, so that the second adjustment step is just LU12I + ADD. */
|
||
if (!IMM12_OPERAND (min_second_step)
|
||
&& frame->total_size % IMM_REACH < IMM_REACH / 2
|
||
&& frame->total_size % IMM_REACH >= min_first_step)
|
||
return frame->total_size % IMM_REACH;
|
||
|
||
return max_first_step;
|
||
}
|
||
|
||
static void
|
||
loongarch_emit_stack_tie (void)
|
||
{
|
||
emit_insn (gen_stack_tie (Pmode, stack_pointer_rtx,
|
||
frame_pointer_needed ? hard_frame_pointer_rtx
|
||
: stack_pointer_rtx));
|
||
}
|
||
|
||
#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
|
||
|
||
#if PROBE_INTERVAL > 16384
|
||
#error Cannot use indexed addressing mode for stack probing
|
||
#endif
|
||
|
||
/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
|
||
inclusive. These are offsets from the current stack pointer. */
|
||
|
||
static void
|
||
loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
|
||
{
|
||
HOST_WIDE_INT rounded_size;
|
||
HOST_WIDE_INT interval;
|
||
|
||
if (flag_stack_clash_protection)
|
||
interval = STACK_CLASH_PROTECTION_GUARD_SIZE;
|
||
else
|
||
interval = PROBE_INTERVAL;
|
||
|
||
rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode);
|
||
rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode);
|
||
|
||
size = size + first;
|
||
|
||
/* Sanity check for the addressing mode we're going to use. */
|
||
gcc_assert (first <= 16384);
|
||
|
||
/* Step 1: round SIZE to the previous multiple of the interval. */
|
||
|
||
rounded_size = ROUND_DOWN (size, interval);
|
||
|
||
/* Step 2: compute initial and final value of the loop counter. */
|
||
|
||
emit_move_insn (r14, GEN_INT (interval));
|
||
|
||
/* If rounded_size is zero, it means that the space requested by
|
||
the local variable is less than the interval, and there is no
|
||
need to display and detect the allocated space. */
|
||
if (rounded_size != 0)
|
||
{
|
||
/* Step 3: the loop
|
||
|
||
do
|
||
{
|
||
TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
|
||
probe at TEST_ADDR
|
||
}
|
||
while (TEST_ADDR != LAST_ADDR)
|
||
|
||
probes at FIRST + N * PROBE_INTERVAL for values of N from 1
|
||
until it is equal to ROUNDED_SIZE. */
|
||
|
||
if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * interval)
|
||
{
|
||
for (HOST_WIDE_INT i = 0; i < rounded_size; i += interval)
|
||
{
|
||
emit_insn (gen_rtx_SET (stack_pointer_rtx,
|
||
gen_rtx_MINUS (Pmode,
|
||
stack_pointer_rtx,
|
||
r14)));
|
||
emit_move_insn (gen_rtx_MEM (Pmode,
|
||
gen_rtx_PLUS (Pmode,
|
||
stack_pointer_rtx,
|
||
const0_rtx)),
|
||
const0_rtx);
|
||
emit_insn (gen_blockage ());
|
||
}
|
||
dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
|
||
}
|
||
else
|
||
{
|
||
emit_move_insn (r12, GEN_INT (rounded_size));
|
||
emit_insn (gen_rtx_SET (r12,
|
||
gen_rtx_MINUS (Pmode,
|
||
stack_pointer_rtx,
|
||
r12)));
|
||
|
||
emit_insn (gen_probe_stack_range (Pmode, stack_pointer_rtx,
|
||
stack_pointer_rtx, r12, r14));
|
||
emit_insn (gen_blockage ());
|
||
dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
|
||
}
|
||
}
|
||
else
|
||
dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
|
||
|
||
|
||
/* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
|
||
that SIZE is equal to ROUNDED_SIZE. */
|
||
|
||
if (size != rounded_size)
|
||
{
|
||
if (size - rounded_size >= 2048)
|
||
{
|
||
emit_move_insn (r14, GEN_INT (size - rounded_size));
|
||
emit_insn (gen_rtx_SET (stack_pointer_rtx,
|
||
gen_rtx_MINUS (Pmode,
|
||
stack_pointer_rtx,
|
||
r14)));
|
||
}
|
||
else
|
||
emit_insn (gen_rtx_SET (stack_pointer_rtx,
|
||
gen_rtx_PLUS (Pmode,
|
||
stack_pointer_rtx,
|
||
GEN_INT (rounded_size - size))));
|
||
}
|
||
|
||
if (first)
|
||
{
|
||
emit_move_insn (r12, GEN_INT (first));
|
||
emit_insn (gen_rtx_SET (stack_pointer_rtx,
|
||
gen_rtx_PLUS (Pmode,
|
||
stack_pointer_rtx, r12)));
|
||
}
|
||
/* Make sure nothing is scheduled before we are done. */
|
||
emit_insn (gen_blockage ());
|
||
}
|
||
|
||
/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
|
||
absolute addresses. */
|
||
const char *
|
||
loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
|
||
{
|
||
static int labelno = 0;
|
||
char loop_lab[32], tmp[64];
|
||
rtx xops[3];
|
||
|
||
ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
|
||
|
||
/* Loop. */
|
||
ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
|
||
|
||
/* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
|
||
xops[0] = reg1;
|
||
xops[2] = reg3;
|
||
if (TARGET_64BIT)
|
||
output_asm_insn ("sub.d\t%0,%0,%2", xops);
|
||
else
|
||
output_asm_insn ("sub.w\t%0,%0,%2", xops);
|
||
|
||
/* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch. */
|
||
xops[1] = reg2;
|
||
strcpy (tmp, "bne\t%0,%1,");
|
||
if (TARGET_64BIT)
|
||
output_asm_insn ("st.d\t$r0,%0,0", xops);
|
||
else
|
||
output_asm_insn ("st.w\t$r0,%0,0", xops);
|
||
output_asm_insn (strcat (tmp, &loop_lab[1]), xops);
|
||
|
||
return "";
|
||
}
|
||
|
||
/* Expand the "prologue" pattern. */
|
||
|
||
void
|
||
loongarch_expand_prologue (void)
|
||
{
|
||
struct loongarch_frame_info *frame = &cfun->machine->frame;
|
||
HOST_WIDE_INT size = frame->total_size;
|
||
rtx insn;
|
||
|
||
if (flag_stack_usage_info)
|
||
current_function_static_stack_size = size;
|
||
|
||
/* Save the registers. */
|
||
if ((frame->mask | frame->fmask) != 0)
|
||
{
|
||
HOST_WIDE_INT step1 = MIN (size, loongarch_first_stack_step (frame));
|
||
|
||
insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
|
||
GEN_INT (-step1));
|
||
RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
|
||
size -= step1;
|
||
loongarch_for_each_saved_reg (size, loongarch_save_reg, false);
|
||
}
|
||
|
||
/* Set up the frame pointer, if we're using one. */
|
||
if (frame_pointer_needed)
|
||
{
|
||
insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
|
||
GEN_INT (frame->hard_frame_pointer_offset - size));
|
||
RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
|
||
|
||
loongarch_emit_stack_tie ();
|
||
}
|
||
|
||
if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
|
||
|| flag_stack_clash_protection)
|
||
{
|
||
HOST_WIDE_INT first = get_stack_check_protect ();
|
||
|
||
if (frame->total_size == 0)
|
||
{
|
||
/* do nothing. */
|
||
dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
|
||
return;
|
||
}
|
||
|
||
if (crtl->is_leaf && !cfun->calls_alloca)
|
||
{
|
||
HOST_WIDE_INT interval;
|
||
|
||
if (flag_stack_clash_protection)
|
||
interval = STACK_CLASH_PROTECTION_GUARD_SIZE;
|
||
else
|
||
interval = PROBE_INTERVAL;
|
||
|
||
if (size > interval && size > first)
|
||
loongarch_emit_probe_stack_range (first, size - first);
|
||
else
|
||
loongarch_emit_probe_stack_range (first, size);
|
||
}
|
||
else
|
||
loongarch_emit_probe_stack_range (first, size);
|
||
|
||
if (size > 0)
|
||
{
|
||
/* Describe the effect of the previous instructions. */
|
||
insn = plus_constant (Pmode, stack_pointer_rtx, -size);
|
||
insn = gen_rtx_SET (stack_pointer_rtx, insn);
|
||
loongarch_set_frame_expr (insn);
|
||
}
|
||
return;
|
||
}
|
||
|
||
if (size > 0)
|
||
{
|
||
if (IMM12_OPERAND (-size))
|
||
{
|
||
insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
|
||
GEN_INT (-size));
|
||
RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
|
||
}
|
||
else
|
||
{
|
||
loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode),
|
||
GEN_INT (-size));
|
||
emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
|
||
LARCH_PROLOGUE_TEMP (Pmode)));
|
||
|
||
/* Describe the effect of the previous instructions. */
|
||
insn = plus_constant (Pmode, stack_pointer_rtx, -size);
|
||
insn = gen_rtx_SET (stack_pointer_rtx, insn);
|
||
loongarch_set_frame_expr (insn);
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Return nonzero if this function is known to have a null epilogue.
|
||
This allows the optimizer to omit jumps to jumps if no stack
|
||
was created. */
|
||
|
||
bool
|
||
loongarch_can_use_return_insn (void)
|
||
{
|
||
return reload_completed && cfun->machine->frame.total_size == 0;
|
||
}
|
||
|
||
/* Expand function epilogue using the following insn patterns:
|
||
"epilogue" (style == NORMAL_RETURN)
|
||
"sibcall_epilogue" (style == SIBCALL_RETURN)
|
||
"eh_return" (style == EXCEPTION_RETURN) */
|
||
|
||
void
|
||
loongarch_expand_epilogue (int style)
|
||
{
|
||
/* Split the frame into two. STEP1 is the amount of stack we should
|
||
deallocate before restoring the registers. STEP2 is the amount we
|
||
should deallocate afterwards.
|
||
|
||
Start off by assuming that no registers need to be restored. */
|
||
struct loongarch_frame_info *frame = &cfun->machine->frame;
|
||
HOST_WIDE_INT step1 = frame->total_size;
|
||
HOST_WIDE_INT step2 = 0;
|
||
rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
|
||
rtx insn;
|
||
|
||
/* We need to add memory barrier to prevent read from deallocated stack. */
|
||
bool need_barrier_p
|
||
= (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
|
||
|
||
/* Handle simple returns. */
|
||
if (style == NORMAL_RETURN && loongarch_can_use_return_insn ())
|
||
{
|
||
emit_jump_insn (gen_return ());
|
||
return;
|
||
}
|
||
|
||
/* Move past any dynamic stack allocations. */
|
||
if (cfun->calls_alloca)
|
||
{
|
||
/* Emit a barrier to prevent loads from a deallocated stack. */
|
||
loongarch_emit_stack_tie ();
|
||
need_barrier_p = false;
|
||
|
||
rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset);
|
||
if (!IMM12_OPERAND (INTVAL (adjust)))
|
||
{
|
||
loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust);
|
||
adjust = LARCH_PROLOGUE_TEMP (Pmode);
|
||
}
|
||
|
||
insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
|
||
hard_frame_pointer_rtx,
|
||
adjust));
|
||
|
||
rtx dwarf = NULL_RTX;
|
||
rtx minus_offset = GEN_INT (-frame->hard_frame_pointer_offset);
|
||
rtx cfa_adjust_value = gen_rtx_PLUS (Pmode,
|
||
hard_frame_pointer_rtx,
|
||
minus_offset);
|
||
|
||
rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
|
||
dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
|
||
RTX_FRAME_RELATED_P (insn) = 1;
|
||
|
||
REG_NOTES (insn) = dwarf;
|
||
}
|
||
|
||
/* If we need to restore registers, deallocate as much stack as
|
||
possible in the second step without going out of range. */
|
||
if ((frame->mask | frame->fmask) != 0)
|
||
{
|
||
step2 = loongarch_first_stack_step (frame);
|
||
step1 -= step2;
|
||
}
|
||
|
||
/* Set TARGET to BASE + STEP1. */
|
||
if (step1 > 0)
|
||
{
|
||
/* Emit a barrier to prevent loads from a deallocated stack. */
|
||
loongarch_emit_stack_tie ();
|
||
need_barrier_p = false;
|
||
|
||
/* Get an rtx for STEP1 that we can add to BASE. */
|
||
rtx adjust = GEN_INT (step1);
|
||
if (!IMM12_OPERAND (step1))
|
||
{
|
||
loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust);
|
||
adjust = LARCH_PROLOGUE_TEMP (Pmode);
|
||
}
|
||
|
||
insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
|
||
stack_pointer_rtx,
|
||
adjust));
|
||
|
||
rtx dwarf = NULL_RTX;
|
||
rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
|
||
GEN_INT (step2));
|
||
|
||
dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
|
||
RTX_FRAME_RELATED_P (insn) = 1;
|
||
|
||
REG_NOTES (insn) = dwarf;
|
||
}
|
||
|
||
/* Restore the registers. */
|
||
loongarch_for_each_saved_reg (frame->total_size - step2,
|
||
loongarch_restore_reg,
|
||
crtl->calls_eh_return
|
||
&& style != EXCEPTION_RETURN);
|
||
|
||
if (need_barrier_p)
|
||
loongarch_emit_stack_tie ();
|
||
|
||
/* Deallocate the final bit of the frame. */
|
||
if (step2 > 0)
|
||
{
|
||
insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
|
||
stack_pointer_rtx,
|
||
GEN_INT (step2)));
|
||
|
||
rtx dwarf = NULL_RTX;
|
||
rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, const0_rtx);
|
||
dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
|
||
RTX_FRAME_RELATED_P (insn) = 1;
|
||
|
||
REG_NOTES (insn) = dwarf;
|
||
}
|
||
|
||
/* Add in the __builtin_eh_return stack adjustment. */
|
||
if (crtl->calls_eh_return && style == EXCEPTION_RETURN)
|
||
emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
|
||
EH_RETURN_STACKADJ_RTX));
|
||
|
||
/* Emit return unless doing sibcall. */
|
||
if (style != SIBCALL_RETURN)
|
||
emit_jump_insn (gen_simple_return_internal (ra));
|
||
}
|
||
|
||
#define LU32I_B (0xfffffULL << 32)
|
||
#define LU52I_B (0xfffULL << 52)
|
||
|
||
/* Fill CODES with a sequence of rtl operations to load VALUE.
|
||
Return the number of operations needed. */
|
||
|
||
static unsigned int
|
||
loongarch_build_integer (struct loongarch_integer_op *codes,
|
||
HOST_WIDE_INT value)
|
||
|
||
{
|
||
unsigned int cost = 0;
|
||
|
||
/* Get the lower 32 bits of the value. */
|
||
HOST_WIDE_INT low_part = (int32_t)value;
|
||
|
||
if (IMM12_OPERAND (low_part) || IMM12_OPERAND_UNSIGNED (low_part))
|
||
{
|
||
/* The value of the lower 32 bit be loaded with one instruction.
|
||
lu12i.w. */
|
||
codes[cost].code = UNKNOWN;
|
||
codes[cost].method = METHOD_NORMAL;
|
||
codes[cost].value = low_part;
|
||
codes[cost].curr_value = low_part;
|
||
cost++;
|
||
}
|
||
else
|
||
{
|
||
/* lu12i.w + ior. */
|
||
codes[cost].code = UNKNOWN;
|
||
codes[cost].method = METHOD_NORMAL;
|
||
codes[cost].value = low_part & ~(IMM_REACH - 1);
|
||
codes[cost].curr_value = codes[cost].value;
|
||
cost++;
|
||
HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1);
|
||
if (iorv != 0)
|
||
{
|
||
codes[cost].code = IOR;
|
||
codes[cost].method = METHOD_NORMAL;
|
||
codes[cost].value = iorv;
|
||
codes[cost].curr_value = low_part;
|
||
cost++;
|
||
}
|
||
}
|
||
|
||
if (TARGET_64BIT)
|
||
{
|
||
bool lu32i[2] = {(value & LU32I_B) == 0, (value & LU32I_B) == LU32I_B};
|
||
bool lu52i[2] = {(value & LU52I_B) == 0, (value & LU52I_B) == LU52I_B};
|
||
|
||
int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
|
||
int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
|
||
|
||
uint32_t hival = (uint32_t) (value >> 32);
|
||
uint32_t loval = (uint32_t) value;
|
||
|
||
/* Determine whether the upper 32 bits are sign-extended from the lower
|
||
32 bits. If it is, the instructions to load the high order can be
|
||
ommitted. */
|
||
if (lu32i[sign31] && lu52i[sign31])
|
||
return cost;
|
||
/* If the lower 32 bits are the same as the upper 32 bits, just copy
|
||
the lower 32 bits to the upper 32 bits. */
|
||
else if (loval == hival)
|
||
{
|
||
codes[cost].method = METHOD_MIRROR;
|
||
codes[cost].curr_value = value;
|
||
return cost + 1;
|
||
}
|
||
/* Determine whether bits 32-51 are sign-extended from the lower 32
|
||
bits. If so, directly load 52-63 bits. */
|
||
else if (lu32i[sign31])
|
||
{
|
||
codes[cost].method = METHOD_LU52I;
|
||
codes[cost].value = value & LU52I_B;
|
||
codes[cost].curr_value = value;
|
||
return cost + 1;
|
||
}
|
||
|
||
codes[cost].method = METHOD_LU32I;
|
||
codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0);
|
||
codes[cost].curr_value = (value & 0xfffffffffffff)
|
||
| (sign51 ? LU52I_B : 0);
|
||
cost++;
|
||
|
||
/* Determine whether the 52-61 bits are sign-extended from the low order,
|
||
and if not, load the 52-61 bits. */
|
||
if (!lu52i[(value & (HOST_WIDE_INT_1U << 51)) >> 51])
|
||
{
|
||
codes[cost].method = METHOD_LU52I;
|
||
codes[cost].value = value & LU52I_B;
|
||
codes[cost].curr_value = value;
|
||
cost++;
|
||
}
|
||
}
|
||
|
||
gcc_assert (cost <= LARCH_MAX_INTEGER_OPS);
|
||
|
||
return cost;
|
||
}
|
||
|
||
/* Fill CODES with a sequence of rtl operations to load VALUE.
|
||
Return the number of operations needed.
|
||
Split interger in loongarch_output_move. */
|
||
|
||
static unsigned int
|
||
loongarch_integer_cost (HOST_WIDE_INT value)
|
||
{
|
||
struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS];
|
||
return loongarch_build_integer (codes, value);
|
||
}
|
||
|
||
/* Implement TARGET_LEGITIMATE_CONSTANT_P. */
|
||
|
||
static bool
|
||
loongarch_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
|
||
{
|
||
return loongarch_const_insns (x) > 0;
|
||
}
|
||
|
||
/* Return true if X is a thread-local symbol. */
|
||
|
||
static bool
|
||
loongarch_tls_symbol_p (rtx x)
|
||
{
|
||
return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0;
|
||
}
|
||
|
||
/* Return true if SYMBOL_REF X is associated with a global symbol
|
||
(in the STB_GLOBAL sense). */
|
||
|
||
bool
|
||
loongarch_global_symbol_p (const_rtx x)
|
||
{
|
||
if (LABEL_REF_P (x))
|
||
return false;
|
||
|
||
const_tree decl = SYMBOL_REF_DECL (x);
|
||
|
||
if (!decl)
|
||
return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
|
||
|
||
/* Weakref symbols are not TREE_PUBLIC, but their targets are global
|
||
or weak symbols. Relocations in the object file will be against
|
||
the target symbol, so it's that symbol's binding that matters here. */
|
||
return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl));
|
||
}
|
||
|
||
bool
|
||
loongarch_global_symbol_noweak_p (const_rtx x)
|
||
{
|
||
if (LABEL_REF_P (x))
|
||
return false;
|
||
|
||
const_tree decl = SYMBOL_REF_DECL (x);
|
||
|
||
if (!decl)
|
||
return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
|
||
|
||
return DECL_P (decl) && TREE_PUBLIC (decl);
|
||
}
|
||
|
||
bool
|
||
loongarch_weak_symbol_p (const_rtx x)
|
||
{
|
||
const_tree decl;
|
||
if (LABEL_REF_P (x) || !(decl = SYMBOL_REF_DECL (x)))
|
||
return false;
|
||
return DECL_P (decl) && DECL_WEAK (decl);
|
||
}
|
||
|
||
/* Return true if SYMBOL_REF X binds locally. */
|
||
|
||
bool
|
||
loongarch_symbol_binds_local_p (const_rtx x)
|
||
{
|
||
if (TARGET_DIRECT_EXTERN_ACCESS)
|
||
return true;
|
||
|
||
if (SYMBOL_REF_P (x))
|
||
return (SYMBOL_REF_DECL (x)
|
||
? targetm.binds_local_p (SYMBOL_REF_DECL (x))
|
||
: SYMBOL_REF_LOCAL_P (x));
|
||
else
|
||
return false;
|
||
}
|
||
|
||
/* Return true if OP is a constant vector with the number of units in MODE,
|
||
and each unit has the same bit set. */
|
||
|
||
bool
|
||
loongarch_const_vector_bitimm_set_p (rtx op, machine_mode mode)
|
||
{
|
||
if (GET_CODE (op) == CONST_VECTOR && op != CONST0_RTX (mode))
|
||
{
|
||
unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
|
||
int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode)));
|
||
|
||
if (vlog2 != -1)
|
||
{
|
||
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
|
||
gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1);
|
||
return loongarch_const_vector_same_val_p (op, mode);
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
/* Return true if OP is a constant vector with the number of units in MODE,
|
||
and each unit has the same bit clear. */
|
||
|
||
bool
|
||
loongarch_const_vector_bitimm_clr_p (rtx op, machine_mode mode)
|
||
{
|
||
if (GET_CODE (op) == CONST_VECTOR && op != CONSTM1_RTX (mode))
|
||
{
|
||
unsigned HOST_WIDE_INT val = ~UINTVAL (CONST_VECTOR_ELT (op, 0));
|
||
int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode)));
|
||
|
||
if (vlog2 != -1)
|
||
{
|
||
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
|
||
gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1);
|
||
return loongarch_const_vector_same_val_p (op, mode);
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
/* Return true if OP is a constant vector with the number of units in MODE,
|
||
and each unit has the same value. */
|
||
|
||
bool
|
||
loongarch_const_vector_same_val_p (rtx op, machine_mode mode)
|
||
{
|
||
int i, nunits = GET_MODE_NUNITS (mode);
|
||
rtx first;
|
||
|
||
if (GET_CODE (op) != CONST_VECTOR || GET_MODE (op) != mode)
|
||
return false;
|
||
|
||
first = CONST_VECTOR_ELT (op, 0);
|
||
for (i = 1; i < nunits; i++)
|
||
if (!rtx_equal_p (first, CONST_VECTOR_ELT (op, i)))
|
||
return false;
|
||
|
||
return true;
|
||
}
|
||
|
||
/* Return true if OP is a constant vector with the number of units in MODE,
|
||
and each unit has the same value as well as replicated bytes in the value.
|
||
*/
|
||
|
||
bool
|
||
loongarch_const_vector_same_bytes_p (rtx op, machine_mode mode)
|
||
{
|
||
int i, bytes;
|
||
HOST_WIDE_INT val, first_byte;
|
||
rtx first;
|
||
|
||
if (!loongarch_const_vector_same_val_p (op, mode))
|
||
return false;
|
||
|
||
first = CONST_VECTOR_ELT (op, 0);
|
||
bytes = GET_MODE_UNIT_SIZE (mode);
|
||
val = INTVAL (first);
|
||
first_byte = val & 0xff;
|
||
for (i = 1; i < bytes; i++)
|
||
{
|
||
val >>= 8;
|
||
if ((val & 0xff) != first_byte)
|
||
return false;
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
/* Return true if OP is a constant vector with the number of units in MODE,
|
||
and each unit has the same integer value in the range [LOW, HIGH]. */
|
||
|
||
bool
|
||
loongarch_const_vector_same_int_p (rtx op, machine_mode mode, HOST_WIDE_INT low,
|
||
HOST_WIDE_INT high)
|
||
{
|
||
HOST_WIDE_INT value;
|
||
rtx elem0;
|
||
|
||
if (!loongarch_const_vector_same_val_p (op, mode))
|
||
return false;
|
||
|
||
elem0 = CONST_VECTOR_ELT (op, 0);
|
||
if (!CONST_INT_P (elem0))
|
||
return false;
|
||
|
||
value = INTVAL (elem0);
|
||
return (value >= low && value <= high);
|
||
}
|
||
|
||
/* Return true if OP is a constant vector with repeated 4-element sets
|
||
in mode MODE. */
|
||
|
||
bool
|
||
loongarch_const_vector_shuffle_set_p (rtx op, machine_mode mode)
|
||
{
|
||
int nunits = GET_MODE_NUNITS (mode);
|
||
int nsets = nunits / 4;
|
||
int set = 0;
|
||
int i, j;
|
||
|
||
/* Check if we have the same 4-element sets. */
|
||
for (j = 0; j < nsets; j++, set = 4 * j)
|
||
for (i = 0; i < 4; i++)
|
||
if ((INTVAL (XVECEXP (op, 0, i))
|
||
!= (INTVAL (XVECEXP (op, 0, set + i)) - set))
|
||
|| !IN_RANGE (INTVAL (XVECEXP (op, 0, set + i)), 0, set + 3))
|
||
return false;
|
||
return true;
|
||
}
|
||
|
||
rtx
|
||
loongarch_const_vector_vrepli (rtx x, machine_mode mode)
|
||
{
|
||
int size = GET_MODE_SIZE (mode);
|
||
|
||
if (GET_CODE (x) != CONST_VECTOR
|
||
|| GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
|
||
return NULL_RTX;
|
||
|
||
for (scalar_int_mode elem_mode: {QImode, HImode, SImode, DImode})
|
||
{
|
||
machine_mode new_mode =
|
||
mode_for_vector (elem_mode, size / GET_MODE_SIZE (elem_mode))
|
||
.require ();
|
||
rtx op = lowpart_subreg (new_mode, x, mode);
|
||
if (loongarch_const_vector_same_int_p (op, new_mode, -512, 511))
|
||
return op;
|
||
}
|
||
|
||
return NULL_RTX;
|
||
}
|
||
|
||
/* Return true if rtx constants of mode MODE should be put into a small
|
||
data section. */
|
||
|
||
static bool
|
||
loongarch_rtx_constant_in_small_data_p (machine_mode mode)
|
||
{
|
||
return (GET_MODE_SIZE (mode) <= g_switch_value);
|
||
}
|
||
|
||
/* Return the method that should be used to access SYMBOL_REF or
|
||
LABEL_REF X. */
|
||
|
||
static enum loongarch_symbol_type
|
||
loongarch_classify_symbol (const_rtx x)
|
||
{
|
||
enum loongarch_symbol_type pcrel =
|
||
TARGET_CMODEL_EXTREME ? SYMBOL_PCREL64 : SYMBOL_PCREL;
|
||
|
||
if (!SYMBOL_REF_P (x))
|
||
return pcrel;
|
||
|
||
if (SYMBOL_REF_TLS_MODEL (x))
|
||
return SYMBOL_TLS;
|
||
|
||
if (!loongarch_symbol_binds_local_p (x))
|
||
return SYMBOL_GOT_DISP;
|
||
|
||
tree t = SYMBOL_REF_DECL (x);
|
||
if (!t)
|
||
return pcrel;
|
||
|
||
t = lookup_attribute ("model", DECL_ATTRIBUTES (t));
|
||
if (!t)
|
||
return pcrel;
|
||
|
||
t = TREE_VALUE (TREE_VALUE (t));
|
||
|
||
/* loongarch_handle_model_attribute should reject other values. */
|
||
gcc_assert (TREE_CODE (t) == STRING_CST);
|
||
|
||
const char *model = TREE_STRING_POINTER (t);
|
||
if (strcmp (model, "normal") == 0)
|
||
return SYMBOL_PCREL;
|
||
if (strcmp (model, "extreme") == 0)
|
||
return SYMBOL_PCREL64;
|
||
|
||
/* loongarch_handle_model_attribute should reject unknown model
|
||
name. */
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
/* Classify the base of symbolic expression X, given that X appears in
|
||
context CONTEXT. */
|
||
|
||
static enum loongarch_symbol_type
|
||
loongarch_classify_symbolic_expression (rtx x)
|
||
{
|
||
rtx offset;
|
||
|
||
split_const (x, &x, &offset);
|
||
if (UNSPEC_ADDRESS_P (x))
|
||
return UNSPEC_ADDRESS_TYPE (x);
|
||
|
||
return loongarch_classify_symbol (x);
|
||
}
|
||
|
||
/* Return true if X is a symbolic constant. If it is,
|
||
store the type of the symbol in *SYMBOL_TYPE. */
|
||
|
||
bool
|
||
loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type)
|
||
{
|
||
rtx offset;
|
||
|
||
split_const (x, &x, &offset);
|
||
if (UNSPEC_ADDRESS_P (x))
|
||
{
|
||
*symbol_type = UNSPEC_ADDRESS_TYPE (x);
|
||
x = UNSPEC_ADDRESS (x);
|
||
}
|
||
else if (SYMBOL_REF_P (x) || LABEL_REF_P (x))
|
||
*symbol_type = loongarch_classify_symbol (x);
|
||
else
|
||
return false;
|
||
|
||
if (offset == const0_rtx)
|
||
return true;
|
||
|
||
/* Check whether a nonzero offset is valid for the underlying
|
||
relocations. */
|
||
switch (*symbol_type)
|
||
{
|
||
case SYMBOL_PCREL64:
|
||
/* When the code model is extreme, the non-zero offset situation
|
||
has not been handled well, so it is disabled here now. */
|
||
if (!loongarch_explicit_relocs_p (SYMBOL_PCREL64))
|
||
return false;
|
||
/* fall through */
|
||
case SYMBOL_PCREL:
|
||
/* GAS rejects offsets outside the range [-2^31, 2^31-1]. */
|
||
return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
|
||
|
||
/* The following symbol types do not allow non-zero offsets. */
|
||
case SYMBOL_GOT_DISP:
|
||
case SYMBOL_TLS_IE:
|
||
case SYMBOL_TLSGD:
|
||
case SYMBOL_TLSLDM:
|
||
case SYMBOL_TLS:
|
||
/* From an implementation perspective, tls_le symbols are allowed to
|
||
have non-zero offsets, but currently binutils has not added support,
|
||
so the generation of non-zero offsets is prohibited here. */
|
||
case SYMBOL_TLS_LE:
|
||
return false;
|
||
}
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
/* If -mexplicit-relocs=auto, we use machine operations with reloc hints
|
||
for cases where the linker is unable to relax so we can schedule the
|
||
machine operations, otherwise use an assembler pseudo-op so the
|
||
assembler will generate R_LARCH_RELAX. */
|
||
|
||
bool
|
||
loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
|
||
{
|
||
if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
|
||
return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
|
||
|
||
/* The linker don't know how to relax accesses in extreme code model. */
|
||
if (loongarch_symbol_extreme_p (type))
|
||
return true;
|
||
|
||
switch (type)
|
||
{
|
||
case SYMBOL_TLS_IE:
|
||
case SYMBOL_TLS_LE:
|
||
case SYMBOL_PCREL64:
|
||
/* TLS IE cannot be relaxed. TLS LE relaxation is different from
|
||
the normal R_LARCH_RELAX-based relaxation and it **requires**
|
||
using the explicit %le_{lo12,hi20,add}_r relocs. The linker
|
||
does not relax 64-bit pc-relative accesses as at now. */
|
||
return true;
|
||
case SYMBOL_GOT_DISP:
|
||
/* If we are performing LTO for a final link, and we have the
|
||
linker plugin so we know the resolution of the symbols, then
|
||
all GOT references are binding to external symbols or
|
||
preemptable symbols. So the linker cannot relax them. */
|
||
return (in_lto_p
|
||
&& !flag_incremental_link
|
||
&& HAVE_LTO_PLUGIN == 2
|
||
&& (!global_options_set.x_flag_use_linker_plugin
|
||
|| global_options.x_flag_use_linker_plugin));
|
||
default:
|
||
return false;
|
||
}
|
||
}
|
||
|
||
/* Returns the number of instructions necessary to reference a symbol. */
|
||
|
||
static int
|
||
loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode)
|
||
{
|
||
/* LSX LD.* and ST.* cannot support loading symbols via an immediate
|
||
operand. */
|
||
if (mode != MAX_MACHINE_MODE
|
||
&& (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)))
|
||
return 0;
|
||
|
||
switch (type)
|
||
{
|
||
case SYMBOL_GOT_DISP:
|
||
/* The constant will have to be loaded from the GOT before it
|
||
is used in an address. */
|
||
if (!loongarch_explicit_relocs_p (type) && mode != MAX_MACHINE_MODE)
|
||
return 0;
|
||
|
||
return 3;
|
||
|
||
case SYMBOL_PCREL:
|
||
case SYMBOL_TLS_IE:
|
||
case SYMBOL_TLS_LE:
|
||
return 2;
|
||
|
||
case SYMBOL_TLSGD:
|
||
case SYMBOL_TLSLDM:
|
||
return TARGET_TLS_DESC ? 4 : 3;
|
||
|
||
case SYMBOL_PCREL64:
|
||
return 5;
|
||
|
||
case SYMBOL_TLS:
|
||
/* We don't treat a bare TLS symbol as a constant. */
|
||
return 0;
|
||
}
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
|
||
|
||
static bool
|
||
loongarch_cannot_force_const_mem (machine_mode mode, rtx x)
|
||
{
|
||
enum loongarch_symbol_type type;
|
||
rtx base, offset;
|
||
|
||
/* As an optimization, reject constants that loongarch_legitimize_move
|
||
can expand inline.
|
||
|
||
Suppose we have a multi-instruction sequence that loads constant C
|
||
into register R. If R does not get allocated a hard register, and
|
||
R is used in an operand that allows both registers and memory
|
||
references, reload will consider forcing C into memory and using
|
||
one of the instruction's memory alternatives. Returning false
|
||
here will force it to use an input reload instead. */
|
||
if ((CONST_INT_P (x) || GET_CODE (x) == CONST_VECTOR)
|
||
&& loongarch_legitimate_constant_p (mode, x))
|
||
return true;
|
||
|
||
split_const (x, &base, &offset);
|
||
if (loongarch_symbolic_constant_p (base, &type))
|
||
{
|
||
/* The same optimization as for CONST_INT. */
|
||
if (IMM12_INT (offset)
|
||
&& loongarch_symbol_insns (type, MAX_MACHINE_MODE) > 0)
|
||
return true;
|
||
}
|
||
|
||
/* TLS symbols must be computed by loongarch_legitimize_move. */
|
||
if (tls_referenced_p (x))
|
||
return true;
|
||
|
||
return false;
|
||
}
|
||
|
||
/* Return true if register REGNO is a valid base register for mode MODE.
|
||
STRICT_P is true if REG_OK_STRICT is in effect. */
|
||
|
||
int
|
||
loongarch_regno_mode_ok_for_base_p (int regno,
|
||
machine_mode mode ATTRIBUTE_UNUSED,
|
||
bool strict_p)
|
||
{
|
||
if (!HARD_REGISTER_NUM_P (regno))
|
||
{
|
||
if (!strict_p)
|
||
return true;
|
||
regno = reg_renumber[regno];
|
||
}
|
||
|
||
/* These fake registers will be eliminated to either the stack or
|
||
hard frame pointer, both of which are usually valid base registers.
|
||
Reload deals with the cases where the eliminated form isn't valid. */
|
||
if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
|
||
return true;
|
||
|
||
return GP_REG_P (regno);
|
||
}
|
||
|
||
/* Return true if X is a valid base register for mode MODE.
|
||
STRICT_P is true if REG_OK_STRICT is in effect. */
|
||
|
||
static bool
|
||
loongarch_valid_base_register_p (rtx x, machine_mode mode, bool strict_p)
|
||
{
|
||
if (!strict_p && SUBREG_P (x))
|
||
x = SUBREG_REG (x);
|
||
|
||
return (REG_P (x)
|
||
&& loongarch_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
|
||
}
|
||
|
||
/* Return true if, for every base register BASE_REG, (plus BASE_REG X)
|
||
can address a value of mode MODE. */
|
||
|
||
static bool
|
||
loongarch_valid_offset_p (rtx x, machine_mode mode)
|
||
{
|
||
/* Check that X is a signed 12-bit number,
|
||
or check that X is a signed 16-bit number
|
||
and offset 4 byte aligned. */
|
||
if (!(const_arith_operand (x, Pmode)
|
||
|| ((mode == E_SImode || mode == E_DImode)
|
||
&& const_imm16_operand (x, Pmode)
|
||
&& (loongarch_signed_immediate_p (INTVAL (x), 14, 2)))))
|
||
return false;
|
||
|
||
/* We may need to split multiword moves, so make sure that every word
|
||
is accessible. */
|
||
if (!(LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
|
||
&& GET_MODE_SIZE (mode) > UNITS_PER_WORD
|
||
&& !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
|
||
return false;
|
||
|
||
return true;
|
||
}
|
||
|
||
/* Should a symbol of type SYMBOL_TYPE should be split in two or more? */
|
||
|
||
bool
|
||
loongarch_split_symbol_type (enum loongarch_symbol_type symbol_type)
|
||
{
|
||
switch (symbol_type)
|
||
{
|
||
case SYMBOL_PCREL:
|
||
case SYMBOL_PCREL64:
|
||
case SYMBOL_GOT_DISP:
|
||
case SYMBOL_TLS_IE:
|
||
case SYMBOL_TLS_LE:
|
||
case SYMBOL_TLSGD:
|
||
case SYMBOL_TLSLDM:
|
||
return true;
|
||
|
||
case SYMBOL_TLS:
|
||
return false;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
/* Return true if a LO_SUM can address a value of mode MODE when the
|
||
LO_SUM symbol has type SYMBOL_TYPE. */
|
||
|
||
static bool
|
||
loongarch_valid_lo_sum_p (enum loongarch_symbol_type symbol_type,
|
||
machine_mode mode, rtx x)
|
||
{
|
||
int align, size;
|
||
|
||
/* Check that symbols of type SYMBOL_TYPE can be used to access values
|
||
of mode MODE. */
|
||
if (loongarch_symbol_insns (symbol_type, mode) == 0)
|
||
return false;
|
||
|
||
/* Check that there is a known low-part relocation. */
|
||
if (!loongarch_split_symbol_type (symbol_type))
|
||
return false;
|
||
|
||
/* We can't tell size or alignment when we have BLKmode, so try extracing a
|
||
decl from the symbol if possible. */
|
||
if (mode == BLKmode)
|
||
{
|
||
rtx offset;
|
||
|
||
/* Extract the symbol from the LO_SUM operand, if any. */
|
||
split_const (x, &x, &offset);
|
||
|
||
/* Might be a CODE_LABEL. We can compute align but not size for that,
|
||
so don't bother trying to handle it. */
|
||
if (!SYMBOL_REF_P (x))
|
||
return false;
|
||
|
||
/* Use worst case assumptions if we don't have a SYMBOL_REF_DECL. */
|
||
align = (SYMBOL_REF_DECL (x)
|
||
? DECL_ALIGN (SYMBOL_REF_DECL (x))
|
||
: 1);
|
||
size = (SYMBOL_REF_DECL (x) && DECL_SIZE (SYMBOL_REF_DECL (x))
|
||
? tree_to_uhwi (DECL_SIZE (SYMBOL_REF_DECL (x)))
|
||
: 2*BITS_PER_WORD);
|
||
}
|
||
else
|
||
{
|
||
align = GET_MODE_ALIGNMENT (mode);
|
||
size = GET_MODE_BITSIZE (mode);
|
||
}
|
||
|
||
/* We may need to split multiword moves, so make sure that each word
|
||
can be accessed without inducing a carry. */
|
||
if (size > BITS_PER_WORD
|
||
&& (!TARGET_STRICT_ALIGN || size > align))
|
||
return false;
|
||
|
||
return true;
|
||
}
|
||
|
||
static bool
|
||
loongarch_valid_index_p (struct loongarch_address_info *info, rtx x,
|
||
machine_mode mode, bool strict_p)
|
||
{
|
||
rtx index;
|
||
|
||
if ((REG_P (x) || SUBREG_P (x))
|
||
&& GET_MODE (x) == Pmode)
|
||
{
|
||
index = x;
|
||
}
|
||
else
|
||
return false;
|
||
|
||
if (!strict_p
|
||
&& SUBREG_P (index)
|
||
&& contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
|
||
index = SUBREG_REG (index);
|
||
|
||
if (loongarch_valid_base_register_p (index, mode, strict_p))
|
||
{
|
||
info->type = ADDRESS_REG_REG;
|
||
info->offset = index;
|
||
return true;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
/* Return true if X is a valid address for machine mode MODE. If it is,
|
||
fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
|
||
effect. */
|
||
|
||
static bool
|
||
loongarch_classify_address (struct loongarch_address_info *info, rtx x,
|
||
machine_mode mode, bool strict_p)
|
||
{
|
||
switch (GET_CODE (x))
|
||
{
|
||
case REG:
|
||
case SUBREG:
|
||
info->type = ADDRESS_REG;
|
||
info->reg = x;
|
||
info->offset = const0_rtx;
|
||
return loongarch_valid_base_register_p (info->reg, mode, strict_p);
|
||
|
||
case PLUS:
|
||
if (loongarch_valid_base_register_p (XEXP (x, 0), mode, strict_p)
|
||
&& loongarch_valid_index_p (info, XEXP (x, 1), mode, strict_p))
|
||
{
|
||
info->reg = XEXP (x, 0);
|
||
return true;
|
||
}
|
||
|
||
if (loongarch_valid_base_register_p (XEXP (x, 1), mode, strict_p)
|
||
&& loongarch_valid_index_p (info, XEXP (x, 0), mode, strict_p))
|
||
{
|
||
info->reg = XEXP (x, 1);
|
||
return true;
|
||
}
|
||
|
||
info->type = ADDRESS_REG;
|
||
info->reg = XEXP (x, 0);
|
||
info->offset = XEXP (x, 1);
|
||
return (loongarch_valid_base_register_p (info->reg, mode, strict_p)
|
||
&& loongarch_valid_offset_p (info->offset, mode));
|
||
|
||
case LO_SUM:
|
||
info->type = ADDRESS_LO_SUM;
|
||
info->reg = XEXP (x, 0);
|
||
info->offset = XEXP (x, 1);
|
||
/* We have to trust the creator of the LO_SUM to do something vaguely
|
||
sane. Target-independent code that creates a LO_SUM should also
|
||
create and verify the matching HIGH. Target-independent code that
|
||
adds an offset to a LO_SUM must prove that the offset will not
|
||
induce a carry. Failure to do either of these things would be
|
||
a bug, and we are not required to check for it here. The MIPS
|
||
backend itself should only create LO_SUMs for valid symbolic
|
||
constants, with the high part being either a HIGH or a copy
|
||
of _gp. */
|
||
info->symbol_type
|
||
= loongarch_classify_symbolic_expression (info->offset);
|
||
return (loongarch_valid_base_register_p (info->reg, mode, strict_p)
|
||
&& loongarch_valid_lo_sum_p (info->symbol_type, mode,
|
||
info->offset));
|
||
case CONST_INT:
|
||
/* Small-integer addresses don't occur very often, but they
|
||
are legitimate if $r0 is a valid base register. */
|
||
info->type = ADDRESS_CONST_INT;
|
||
return IMM12_OPERAND (INTVAL (x));
|
||
|
||
default:
|
||
return false;
|
||
}
|
||
}
|
||
|
||
/* Implement TARGET_LEGITIMATE_ADDRESS_P. */
|
||
|
||
static bool
|
||
loongarch_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
|
||
code_helper = ERROR_MARK)
|
||
{
|
||
struct loongarch_address_info addr;
|
||
|
||
return loongarch_classify_address (&addr, x, mode, strict_p);
|
||
}
|
||
|
||
/* Return true if ADDR matches the pattern for the indexed address
|
||
instruction. */
|
||
|
||
static bool
|
||
loongarch_index_address_p (rtx addr, machine_mode mode ATTRIBUTE_UNUSED)
|
||
{
|
||
if (GET_CODE (addr) != PLUS
|
||
|| !REG_P (XEXP (addr, 0))
|
||
|| !REG_P (XEXP (addr, 1)))
|
||
return false;
|
||
return true;
|
||
}
|
||
|
||
static int
|
||
loongarch_address_insns_1 (rtx x, machine_mode mode, bool might_split_p,
|
||
int reg_reg_cost)
|
||
{
|
||
struct loongarch_address_info addr;
|
||
int factor;
|
||
bool lsx_p = (!might_split_p
|
||
&& (LSX_SUPPORTED_MODE_P (mode)
|
||
|| LASX_SUPPORTED_MODE_P (mode)));
|
||
|
||
if (!loongarch_classify_address (&addr, x, mode, false))
|
||
return 0;
|
||
|
||
/* BLKmode is used for single unaligned loads and stores and should
|
||
not count as a multiword mode. (GET_MODE_SIZE (BLKmode) is pretty
|
||
meaningless, so we have to single it out as a special case one way
|
||
or the other.) */
|
||
if (mode != BLKmode && might_split_p)
|
||
factor = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
|
||
else
|
||
factor = 1;
|
||
|
||
if (loongarch_classify_address (&addr, x, mode, false))
|
||
switch (addr.type)
|
||
{
|
||
case ADDRESS_REG:
|
||
if (lsx_p)
|
||
{
|
||
/* LSX LD.* and ST.* supports 12-bit signed offsets. */
|
||
if (IMM12_OPERAND (INTVAL (addr.offset)))
|
||
return 1;
|
||
else
|
||
return 0;
|
||
}
|
||
return factor;
|
||
|
||
case ADDRESS_REG_REG:
|
||
return factor * reg_reg_cost;
|
||
|
||
case ADDRESS_CONST_INT:
|
||
return lsx_p ? 0 : factor;
|
||
|
||
case ADDRESS_LO_SUM:
|
||
return factor + 1;
|
||
|
||
case ADDRESS_SYMBOLIC:
|
||
return lsx_p ? 0
|
||
: factor * loongarch_symbol_insns (addr.symbol_type, mode);
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
/* Return the number of instructions needed to load or store a value
|
||
of mode MODE at address X. Return 0 if X isn't valid for MODE.
|
||
Assume that multiword moves may need to be split into word moves
|
||
if MIGHT_SPLIT_P, otherwise assume that a single load or store is
|
||
enough. */
|
||
|
||
int
|
||
loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
|
||
{
|
||
return loongarch_address_insns_1 (x, mode, might_split_p, 1);
|
||
}
|
||
|
||
/* Return true if X fits within an unsigned field of BITS bits that is
|
||
shifted left SHIFT bits before being used. */
|
||
|
||
bool
|
||
loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits,
|
||
int shift = 0)
|
||
{
|
||
return (x & ((1 << shift) - 1)) == 0 && x < ((unsigned) 1 << (shift + bits));
|
||
}
|
||
|
||
/* Return true if X fits within a signed field of BITS bits that is
|
||
shifted left SHIFT bits before being used. */
|
||
|
||
bool
|
||
loongarch_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits,
|
||
int shift = 0)
|
||
{
|
||
x += 1 << (bits + shift - 1);
|
||
return loongarch_unsigned_immediate_p (x, bits, shift);
|
||
}
|
||
|
||
/* Return the scale shift that applied to LSX LD/ST address offset. */
|
||
|
||
int
|
||
loongarch_ldst_scaled_shift (machine_mode mode)
|
||
{
|
||
int shift = exact_log2 (GET_MODE_UNIT_SIZE (mode));
|
||
|
||
if (shift < 0 || shift > 8)
|
||
gcc_unreachable ();
|
||
|
||
return shift;
|
||
}
|
||
|
||
/* Return true if X is a legitimate address with a 12-bit offset
|
||
or addr.type is ADDRESS_LO_SUM.
|
||
MODE is the mode of the value being accessed. */
|
||
|
||
bool
|
||
loongarch_12bit_offset_address_p (rtx x, machine_mode mode)
|
||
{
|
||
struct loongarch_address_info addr;
|
||
|
||
return (loongarch_classify_address (&addr, x, mode, false)
|
||
&& ((addr.type == ADDRESS_REG
|
||
&& CONST_INT_P (addr.offset)
|
||
&& LARCH_12BIT_OFFSET_P (INTVAL (addr.offset)))
|
||
|| addr.type == ADDRESS_LO_SUM));
|
||
}
|
||
|
||
/* Return true if X is a legitimate address with a 14-bit offset shifted 2.
|
||
MODE is the mode of the value being accessed. */
|
||
|
||
bool
|
||
loongarch_14bit_shifted_offset_address_p (rtx x, machine_mode mode)
|
||
{
|
||
struct loongarch_address_info addr;
|
||
|
||
return (loongarch_classify_address (&addr, x, mode, false)
|
||
&& addr.type == ADDRESS_REG
|
||
&& CONST_INT_P (addr.offset)
|
||
&& LARCH_16BIT_OFFSET_P (INTVAL (addr.offset))
|
||
&& LARCH_SHIFT_2_OFFSET_P (INTVAL (addr.offset)));
|
||
}
|
||
|
||
/* Return true if X is a legitimate address with base and index.
|
||
MODE is the mode of the value being accessed. */
|
||
|
||
bool
|
||
loongarch_base_index_address_p (rtx x, machine_mode mode)
|
||
{
|
||
struct loongarch_address_info addr;
|
||
|
||
return (loongarch_classify_address (&addr, x, mode, false)
|
||
&& addr.type == ADDRESS_REG_REG
|
||
&& REG_P (addr.offset));
|
||
}
|
||
|
||
/* Return the number of instructions needed to load constant X,
|
||
Return 0 if X isn't a valid constant. */
|
||
|
||
int
|
||
loongarch_const_insns (rtx x)
|
||
{
|
||
enum loongarch_symbol_type symbol_type;
|
||
rtx offset;
|
||
|
||
switch (GET_CODE (x))
|
||
{
|
||
case HIGH:
|
||
if (!loongarch_symbolic_constant_p (XEXP (x, 0), &symbol_type)
|
||
|| !loongarch_split_symbol_type (symbol_type))
|
||
return 0;
|
||
|
||
/* This is simply a PCALAU12I. */
|
||
return 1;
|
||
|
||
case CONST_INT:
|
||
return loongarch_integer_cost (INTVAL (x));
|
||
|
||
case CONST_VECTOR:
|
||
if ((LSX_SUPPORTED_MODE_P (GET_MODE (x))
|
||
|| LASX_SUPPORTED_MODE_P (GET_MODE (x)))
|
||
&& loongarch_const_vector_vrepli (x, GET_MODE (x)))
|
||
return 1;
|
||
/* Fall through. */
|
||
case CONST_DOUBLE:
|
||
return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
|
||
|
||
case CONST:
|
||
/* See if we can refer to X directly. */
|
||
if (loongarch_symbolic_constant_p (x, &symbol_type))
|
||
return loongarch_symbol_insns (symbol_type, MAX_MACHINE_MODE);
|
||
|
||
/* Otherwise try splitting the constant into a base and offset.
|
||
If the offset is a 12-bit value, we can load the base address
|
||
into a register and then use ADDI.{W/D} to add in the offset.
|
||
If the offset is larger, we can load the base and offset
|
||
into separate registers and add them together with ADD.{W/D}.
|
||
However, the latter is only possible before reload; during
|
||
and after reload, we must have the option of forcing the
|
||
constant into the pool instead. */
|
||
split_const (x, &x, &offset);
|
||
if (offset != 0)
|
||
{
|
||
int n = loongarch_const_insns (x);
|
||
if (n != 0)
|
||
{
|
||
if (IMM12_INT (offset))
|
||
return n + 1;
|
||
else if (!targetm.cannot_force_const_mem (GET_MODE (x), x))
|
||
return n + 1 + loongarch_integer_cost (INTVAL (offset));
|
||
}
|
||
}
|
||
return 0;
|
||
|
||
case SYMBOL_REF:
|
||
case LABEL_REF:
|
||
return loongarch_symbol_insns (
|
||
loongarch_classify_symbol (x), MAX_MACHINE_MODE);
|
||
|
||
default:
|
||
return 0;
|
||
}
|
||
}
|
||
|
||
/* X is a doubleword constant that can be handled by splitting it into
|
||
two words and loading each word separately. Return the number of
|
||
instructions required to do this. */
|
||
|
||
int
|
||
loongarch_split_const_insns (rtx x)
|
||
{
|
||
unsigned int low, high;
|
||
|
||
low = loongarch_const_insns (loongarch_subword (x, false));
|
||
high = loongarch_const_insns (loongarch_subword (x, true));
|
||
gcc_assert (low > 0 && high > 0);
|
||
return low + high;
|
||
}
|
||
|
||
/* Return one word of 128-bit value OP, taking into account the fixed
|
||
endianness of certain registers. BYTE selects from the byte address. */
|
||
|
||
rtx
|
||
loongarch_subword_at_byte (rtx op, unsigned int byte)
|
||
{
|
||
machine_mode mode;
|
||
|
||
mode = GET_MODE (op);
|
||
if (mode == VOIDmode)
|
||
mode = TImode;
|
||
|
||
gcc_assert (!FP_REG_RTX_P (op));
|
||
|
||
if (MEM_P (op))
|
||
return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte));
|
||
|
||
return simplify_gen_subreg (word_mode, op, mode, byte);
|
||
}
|
||
|
||
/* Return the number of instructions needed to implement INSN,
|
||
given that it loads from or stores to MEM. */
|
||
|
||
int
|
||
loongarch_load_store_insns (rtx mem, rtx_insn *insn)
|
||
{
|
||
machine_mode mode;
|
||
bool might_split_p;
|
||
rtx set;
|
||
|
||
gcc_assert (MEM_P (mem));
|
||
mode = GET_MODE (mem);
|
||
|
||
/* Try to prove that INSN does not need to be split. */
|
||
might_split_p = GET_MODE_SIZE (mode) > UNITS_PER_WORD;
|
||
if (might_split_p)
|
||
{
|
||
set = single_set (insn);
|
||
if (set
|
||
&& !loongarch_split_move_p (SET_DEST (set), SET_SRC (set)))
|
||
might_split_p = false;
|
||
}
|
||
|
||
return loongarch_address_insns (XEXP (mem, 0), mode, might_split_p);
|
||
}
|
||
|
||
/* Return true if we need to trap on division by zero. */
|
||
|
||
bool
|
||
loongarch_check_zero_div_p (void)
|
||
{
|
||
/* if -m[no-]check-zero-division is given explicitly. */
|
||
if (target_flags_explicit & MASK_CHECK_ZERO_DIV)
|
||
return TARGET_CHECK_ZERO_DIV;
|
||
|
||
/* if not, don't trap for optimized code except -Og. */
|
||
return !optimize || optimize_debug;
|
||
}
|
||
|
||
/* Return the number of instructions needed for an integer division. */
|
||
|
||
int
|
||
loongarch_idiv_insns (machine_mode mode ATTRIBUTE_UNUSED)
|
||
{
|
||
int count;
|
||
|
||
count = 1;
|
||
if (loongarch_check_zero_div_p ())
|
||
count += 2;
|
||
|
||
return count;
|
||
}
|
||
|
||
/* Emit an instruction of the form (set TARGET (CODE OP0 OP1)). */
|
||
|
||
void
|
||
loongarch_emit_binary (enum rtx_code code, rtx target, rtx op0, rtx op1)
|
||
{
|
||
emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (code, GET_MODE (target),
|
||
op0, op1)));
|
||
}
|
||
|
||
/* Compute (CODE OP0 OP1) and store the result in a new register
|
||
of mode MODE. Return that new register. */
|
||
|
||
static rtx
|
||
loongarch_force_binary (machine_mode mode, enum rtx_code code, rtx op0,
|
||
rtx op1)
|
||
{
|
||
rtx reg;
|
||
|
||
reg = gen_reg_rtx (mode);
|
||
loongarch_emit_binary (code, reg, op0, op1);
|
||
return reg;
|
||
}
|
||
|
||
/* Copy VALUE to a register and return that register. If new pseudos
|
||
are allowed, copy it into a new register, otherwise use DEST. */
|
||
|
||
static rtx
|
||
loongarch_force_temporary (rtx dest, rtx value)
|
||
{
|
||
if (can_create_pseudo_p ())
|
||
return force_reg (Pmode, value);
|
||
else
|
||
{
|
||
loongarch_emit_move (dest, value);
|
||
return dest;
|
||
}
|
||
}
|
||
|
||
/* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
|
||
then add CONST_INT OFFSET to the result. */
|
||
|
||
static rtx
|
||
loongarch_unspec_address_offset (rtx base, rtx offset,
|
||
enum loongarch_symbol_type symbol_type)
|
||
{
|
||
base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
|
||
UNSPEC_ADDRESS_FIRST + symbol_type);
|
||
if (offset != const0_rtx)
|
||
base = gen_rtx_PLUS (Pmode, base, offset);
|
||
return gen_rtx_CONST (Pmode, base);
|
||
}
|
||
|
||
/* Return an UNSPEC address with underlying address ADDRESS and symbol
|
||
type SYMBOL_TYPE. */
|
||
|
||
rtx
|
||
loongarch_unspec_address (rtx address, enum loongarch_symbol_type symbol_type)
|
||
{
|
||
rtx base, offset;
|
||
|
||
split_const (address, &base, &offset);
|
||
return loongarch_unspec_address_offset (base, offset, symbol_type);
|
||
}
|
||
|
||
/* Emit an instruction of the form (set TARGET SRC). */
|
||
|
||
static rtx
|
||
loongarch_emit_set (rtx target, rtx src)
|
||
{
|
||
emit_insn (gen_rtx_SET (target, src));
|
||
return target;
|
||
}
|
||
|
||
/* If OP is an UNSPEC address, return the address to which it refers,
|
||
otherwise return OP itself. */
|
||
|
||
rtx
|
||
loongarch_strip_unspec_address (rtx op)
|
||
{
|
||
rtx base, offset;
|
||
|
||
split_const (op, &base, &offset);
|
||
if (UNSPEC_ADDRESS_P (base))
|
||
op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
|
||
return op;
|
||
}
|
||
|
||
/* Return a legitimate address for REG + OFFSET. TEMP is as for
|
||
loongarch_force_temporary; it is only needed when OFFSET is not a
|
||
IMM12_OPERAND. */
|
||
|
||
static rtx
|
||
loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
|
||
{
|
||
if (!IMM12_OPERAND (offset))
|
||
{
|
||
rtx high;
|
||
|
||
/* Leave OFFSET as a 12-bit offset and put the excess in HIGH.
|
||
The addition inside the macro CONST_HIGH_PART may cause an
|
||
overflow, so we need to force a sign-extension check. */
|
||
high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
|
||
offset = CONST_LOW_PART (offset);
|
||
high = loongarch_force_temporary (temp, high);
|
||
reg = loongarch_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
|
||
}
|
||
return plus_constant (Pmode, reg, offset);
|
||
}
|
||
|
||
/* The __tls_get_addr symbol. */
|
||
static GTY (()) rtx loongarch_tls_symbol;
|
||
|
||
/* Load an entry for a TLS access. */
|
||
|
||
static rtx
|
||
loongarch_load_tls (rtx dest, rtx sym, enum loongarch_symbol_type type)
|
||
{
|
||
/* TLS LE gets a 32 or 64 bit offset here, so one register can do it. */
|
||
if (type == SYMBOL_TLS_LE)
|
||
return gen_load_tls (Pmode, dest, sym);
|
||
|
||
return loongarch_symbol_extreme_p (type)
|
||
? gen_movdi_symbolic_off64 (dest, sym, gen_reg_rtx (DImode))
|
||
: gen_load_tls (Pmode, dest, sym);
|
||
}
|
||
|
||
/* Return an instruction sequence that calls __tls_get_addr. SYM is
|
||
the TLS symbol we are referencing and TYPE is the symbol type to use
|
||
(either global dynamic or local dynamic). V0 is an RTX for the
|
||
return value location. */
|
||
|
||
static rtx_insn *
|
||
loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
|
||
{
|
||
rtx loc, a0;
|
||
rtx_insn *insn;
|
||
rtx tmp = gen_reg_rtx (Pmode);
|
||
|
||
a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST);
|
||
|
||
if (!loongarch_tls_symbol)
|
||
loongarch_tls_symbol = init_one_libfunc ("__tls_get_addr");
|
||
|
||
loc = loongarch_unspec_address (sym, type);
|
||
|
||
start_sequence ();
|
||
|
||
if (loongarch_explicit_relocs_p (type))
|
||
{
|
||
if (TARGET_CMODEL_EXTREME)
|
||
{
|
||
rtx part1 = gen_reg_rtx (Pmode);
|
||
rtx part2 = gen_reg_rtx (Pmode);
|
||
|
||
emit_insn (gen_la_pcrel64_two_parts (part1, part2, loc));
|
||
emit_move_insn (a0, gen_rtx_PLUS (Pmode, part1, part2));
|
||
}
|
||
else
|
||
{
|
||
/* Split tls symbol to high and low. */
|
||
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
|
||
|
||
high = loongarch_force_temporary (tmp, high);
|
||
emit_insn (gen_tls_low (Pmode, a0, high, loc));
|
||
}
|
||
}
|
||
else
|
||
emit_insn (loongarch_load_tls (a0, loc, type));
|
||
|
||
if (flag_plt)
|
||
{
|
||
switch (la_target.cmodel)
|
||
{
|
||
case CMODEL_NORMAL:
|
||
insn = emit_call_insn (gen_call_value_internal (v0,
|
||
loongarch_tls_symbol,
|
||
const0_rtx));
|
||
break;
|
||
|
||
case CMODEL_MEDIUM:
|
||
{
|
||
if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
|
||
{
|
||
rtx call;
|
||
|
||
if (HAVE_AS_SUPPORT_CALL36)
|
||
call = gen_call_value_internal (v0, loongarch_tls_symbol,
|
||
const0_rtx);
|
||
else
|
||
{
|
||
rtx reg = gen_reg_rtx (Pmode);
|
||
emit_insn (gen_pcalau12i (Pmode, reg,
|
||
loongarch_tls_symbol));
|
||
call = gen_call_value_internal_1 (Pmode, v0, reg,
|
||
loongarch_tls_symbol,
|
||
const0_rtx);
|
||
}
|
||
insn = emit_call_insn (call);
|
||
}
|
||
else
|
||
{
|
||
rtx reg = gen_reg_rtx (Pmode);
|
||
emit_move_insn (reg, loongarch_tls_symbol);
|
||
insn = emit_call_insn (gen_call_value_internal (v0,
|
||
reg,
|
||
const0_rtx));
|
||
}
|
||
break;
|
||
}
|
||
|
||
/* code model extreme not support plt. */
|
||
case CMODEL_EXTREME:
|
||
case CMODEL_LARGE:
|
||
case CMODEL_TINY:
|
||
case CMODEL_TINY_STATIC:
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
else
|
||
{
|
||
rtx dest = gen_reg_rtx (Pmode);
|
||
|
||
switch (la_target.cmodel)
|
||
{
|
||
case CMODEL_NORMAL:
|
||
case CMODEL_MEDIUM:
|
||
{
|
||
if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP))
|
||
{
|
||
rtx high = gen_reg_rtx (Pmode);
|
||
loongarch_emit_move (high,
|
||
gen_rtx_HIGH (Pmode,
|
||
loongarch_tls_symbol));
|
||
emit_insn (gen_ld_from_got (Pmode, dest, high,
|
||
loongarch_tls_symbol));
|
||
}
|
||
else
|
||
loongarch_emit_move (dest, loongarch_tls_symbol);
|
||
break;
|
||
}
|
||
|
||
case CMODEL_EXTREME:
|
||
{
|
||
if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP))
|
||
{
|
||
gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE);
|
||
|
||
rtx part1 = gen_reg_rtx (Pmode);
|
||
rtx part2 = gen_reg_rtx (Pmode);
|
||
|
||
emit_insn (gen_la_pcrel64_two_parts (part1, part2,
|
||
loongarch_tls_symbol));
|
||
loongarch_emit_move (
|
||
dest,
|
||
gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode,
|
||
part1,
|
||
part2)));
|
||
|
||
/* Put an REG_EQUAL note here to allow CSE (storing
|
||
part1 + part2, i.e. the address of tls_get_addr into
|
||
a saved register and use it for multiple TLS
|
||
accesses). */
|
||
rtx sum = gen_rtx_UNSPEC (
|
||
Pmode, gen_rtvec (1, loongarch_tls_symbol),
|
||
UNSPEC_ADDRESS_FIRST
|
||
+ loongarch_classify_symbol (loongarch_tls_symbol));
|
||
set_unique_reg_note (get_last_insn (), REG_EQUAL, sum);
|
||
}
|
||
else
|
||
emit_insn (gen_movdi_symbolic_off64 (dest, loongarch_tls_symbol,
|
||
gen_reg_rtx (DImode)));
|
||
}
|
||
break;
|
||
|
||
case CMODEL_LARGE:
|
||
case CMODEL_TINY:
|
||
case CMODEL_TINY_STATIC:
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx));
|
||
}
|
||
|
||
RTL_CONST_CALL_P (insn) = 1;
|
||
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
|
||
insn = get_insns ();
|
||
|
||
end_sequence ();
|
||
|
||
return insn;
|
||
}
|
||
|
||
/* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
|
||
its address. The return value will be both a valid address and a valid
|
||
SET_SRC (either a REG or a LO_SUM). */
|
||
|
||
static rtx
|
||
loongarch_legitimize_tls_address (rtx loc)
|
||
{
|
||
rtx dest, tp, tmp, tmp1, tmp2, tmp3, a0;
|
||
enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
|
||
rtx_insn *insn;
|
||
|
||
switch (model)
|
||
{
|
||
case TLS_MODEL_LOCAL_DYNAMIC:
|
||
if (!TARGET_TLS_DESC)
|
||
{
|
||
tmp = gen_rtx_REG (Pmode, GP_RETURN);
|
||
dest = gen_reg_rtx (Pmode);
|
||
insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSLDM, tmp);
|
||
emit_libcall_block (insn, dest, tmp, loc);
|
||
break;
|
||
}
|
||
/* Fall through. */
|
||
case TLS_MODEL_GLOBAL_DYNAMIC:
|
||
if (TARGET_TLS_DESC)
|
||
{
|
||
a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST);
|
||
dest = gen_reg_rtx (Pmode);
|
||
tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
|
||
|
||
if (TARGET_CMODEL_EXTREME)
|
||
emit_insn (gen_got_load_tls_desc_off64 (loc, gen_reg_rtx (DImode)));
|
||
else
|
||
emit_insn (gen_got_load_tls_desc (Pmode, loc));
|
||
|
||
emit_insn (gen_add3_insn (dest, a0, tp));
|
||
}
|
||
else
|
||
{
|
||
tmp = gen_rtx_REG (Pmode, GP_RETURN);
|
||
dest = gen_reg_rtx (Pmode);
|
||
insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSGD, tmp);
|
||
emit_libcall_block (insn, dest, tmp, loc);
|
||
}
|
||
break;
|
||
|
||
case TLS_MODEL_INITIAL_EXEC:
|
||
{
|
||
/* la.tls.ie; tp-relative add. */
|
||
tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
|
||
tmp1 = gen_reg_rtx (Pmode);
|
||
tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
|
||
dest = gen_reg_rtx (Pmode);
|
||
if (loongarch_explicit_relocs_p (SYMBOL_TLS_IE))
|
||
{
|
||
if (TARGET_CMODEL_EXTREME)
|
||
{
|
||
gcc_assert (la_opt_explicit_relocs
|
||
!= EXPLICIT_RELOCS_NONE);
|
||
|
||
rtx part1 = gen_reg_rtx (Pmode);
|
||
rtx part2 = gen_reg_rtx (Pmode);
|
||
|
||
emit_insn (gen_la_pcrel64_two_parts (part1, part2,
|
||
tmp2));
|
||
emit_move_insn (tmp1,
|
||
gen_rtx_MEM (Pmode,
|
||
gen_rtx_PLUS (Pmode,
|
||
part1,
|
||
part2)));
|
||
}
|
||
else
|
||
{
|
||
tmp3 = gen_reg_rtx (Pmode);
|
||
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
|
||
|
||
high = loongarch_force_temporary (tmp3, high);
|
||
emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
|
||
}
|
||
}
|
||
else
|
||
emit_insn (loongarch_load_tls (tmp1, tmp2, SYMBOL_TLS_IE));
|
||
emit_insn (gen_add3_insn (dest, tmp1, tp));
|
||
}
|
||
break;
|
||
|
||
case TLS_MODEL_LOCAL_EXEC:
|
||
{
|
||
/* la.tls.le; tp-relative add.
|
||
|
||
normal:
|
||
lu12i.w $rd, %le_hi20(sym)
|
||
ori $rd, $rd, %le_lo12(sym)
|
||
add.{w/d} $rd, $rd, $tp
|
||
(st.{w/d}/ld.{w/d} $rs, $rd, 0)
|
||
|
||
tls le relax:
|
||
lu12i.w $rd, %le_hi20_r(sym)
|
||
add.{w/d} $rd,$rd,$tp
|
||
addi.{w/d} $rd,$rd,%le_lo12_r(sym)
|
||
(st.{w/d}/ld.{w/d} $rs, $rd, 0)
|
||
|
||
extreme (When the code model is set to extreme, the TLS le Relax
|
||
instruction sequence is not generated):
|
||
lu12i.w $rd, %le_hi20(sym)
|
||
ori $rd, $rd, %le_lo12(sym)
|
||
lu32i.d $rd, %le64_lo20(sym)
|
||
lu52i.d $rd, $rd, %le64_hi12(sym)
|
||
add.d $rd, $rd, $tp
|
||
(st.{w/d}/ld.{w/d} $rs, $rd, 0) */
|
||
|
||
tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
|
||
tmp1 = gen_reg_rtx (Pmode);
|
||
tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
|
||
dest = gen_reg_rtx (Pmode);
|
||
|
||
if (loongarch_explicit_relocs_p (SYMBOL_TLS_LE))
|
||
{
|
||
tmp3 = gen_reg_rtx (Pmode);
|
||
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
|
||
high = loongarch_force_temporary (tmp3, high);
|
||
|
||
/* The assembler does not implement tls le relax support when the
|
||
code model is extreme, so when the code model is extreme, the
|
||
old symbol address acquisition method is still used. */
|
||
if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME)
|
||
{
|
||
emit_insn (gen_add_tls_le_relax (Pmode, dest, high,
|
||
tp, loc));
|
||
loongarch_emit_move (dest,
|
||
gen_rtx_LO_SUM (Pmode, dest, tmp2));
|
||
return dest;
|
||
}
|
||
else
|
||
emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
|
||
|
||
if (TARGET_CMODEL_EXTREME)
|
||
{
|
||
emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2));
|
||
emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2));
|
||
}
|
||
}
|
||
else
|
||
emit_insn (loongarch_load_tls (tmp1, tmp2, SYMBOL_TLS_LE));
|
||
emit_insn (gen_add3_insn (dest, tmp1, tp));
|
||
}
|
||
break;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
return dest;
|
||
}
|
||
|
||
rtx
|
||
loongarch_legitimize_call_address (rtx addr)
|
||
{
|
||
if (!call_insn_operand (addr, VOIDmode))
|
||
{
|
||
rtx reg = gen_reg_rtx (Pmode);
|
||
loongarch_emit_move (reg, addr);
|
||
return reg;
|
||
}
|
||
|
||
enum loongarch_symbol_type symbol_type = loongarch_classify_symbol (addr);
|
||
|
||
/* If add the compilation option '-cmodel=medium', and the assembler does
|
||
not support call36. The following sequence of instructions will be
|
||
used for the function call:
|
||
pcalau12i $rd, %pc_hi20(sym)
|
||
jr $rd, %pc_lo12(sym)
|
||
*/
|
||
|
||
if (TARGET_CMODEL_MEDIUM
|
||
&& !HAVE_AS_SUPPORT_CALL36
|
||
&& (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
|
||
&& (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
|
||
&& (symbol_type == SYMBOL_PCREL
|
||
|| (symbol_type == SYMBOL_GOT_DISP && flag_plt)))
|
||
{
|
||
rtx reg = gen_reg_rtx (Pmode);
|
||
emit_insn (gen_pcalau12i (Pmode, reg, addr));
|
||
return gen_rtx_LO_SUM (Pmode, reg, addr);
|
||
}
|
||
|
||
return addr;
|
||
}
|
||
|
||
/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR
|
||
and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */
|
||
|
||
static void
|
||
loongarch_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr)
|
||
{
|
||
if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
|
||
{
|
||
*base_ptr = XEXP (x, 0);
|
||
*offset_ptr = INTVAL (XEXP (x, 1));
|
||
}
|
||
else
|
||
{
|
||
*base_ptr = x;
|
||
*offset_ptr = 0;
|
||
}
|
||
}
|
||
|
||
/* If X is not a valid address for mode MODE, force it into a register. */
|
||
|
||
static rtx
|
||
loongarch_force_address (rtx x, machine_mode mode)
|
||
{
|
||
if (!loongarch_legitimate_address_p (mode, x, false))
|
||
x = force_reg (Pmode, x);
|
||
return x;
|
||
}
|
||
|
||
bool
|
||
loongarch_symbol_extreme_p (enum loongarch_symbol_type type)
|
||
{
|
||
switch (type)
|
||
{
|
||
case SYMBOL_PCREL:
|
||
return false;
|
||
case SYMBOL_PCREL64:
|
||
return true;
|
||
default:
|
||
return TARGET_CMODEL_EXTREME;
|
||
}
|
||
}
|
||
|
||
/* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
|
||
it appears in a MEM of that mode. Return true if ADDR is a legitimate
|
||
constant in that context and can be split into high and low parts.
|
||
If so, and if LOW_OUT is nonnull, emit the high part and store the
|
||
low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise.
|
||
|
||
Return false if build with '-mexplicit-relocs=none'.
|
||
|
||
TEMP is as for loongarch_force_temporary and is used to load the high
|
||
part into a register.
|
||
|
||
When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
|
||
a legitimize SET_SRC for an .md pattern, otherwise the low part
|
||
is guaranteed to be a legitimate address for mode MODE. */
|
||
|
||
bool
|
||
loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
|
||
{
|
||
enum loongarch_symbol_type symbol_type;
|
||
|
||
if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
|
||
|| !loongarch_symbolic_constant_p (addr, &symbol_type)
|
||
|| !loongarch_explicit_relocs_p (symbol_type)
|
||
|| loongarch_symbol_insns (symbol_type, mode) == 0
|
||
|| !loongarch_split_symbol_type (symbol_type))
|
||
return false;
|
||
|
||
rtx high;
|
||
|
||
if (temp == NULL)
|
||
temp = gen_reg_rtx (Pmode);
|
||
|
||
if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ())
|
||
{
|
||
gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE);
|
||
|
||
high = gen_reg_rtx (Pmode);
|
||
emit_insn (gen_la_pcrel64_two_parts (high, temp, addr));
|
||
}
|
||
else
|
||
{
|
||
/* Get the 12-31 bits of the address. */
|
||
high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
|
||
high = loongarch_force_temporary (temp, high);
|
||
}
|
||
|
||
if (low_out)
|
||
switch (symbol_type)
|
||
{
|
||
case SYMBOL_PCREL64:
|
||
if (can_create_pseudo_p ())
|
||
{
|
||
*low_out = gen_rtx_PLUS (Pmode, high, temp);
|
||
break;
|
||
}
|
||
/* fall through */
|
||
case SYMBOL_PCREL:
|
||
*low_out = gen_rtx_LO_SUM (Pmode, high, addr);
|
||
break;
|
||
|
||
case SYMBOL_GOT_DISP:
|
||
/* SYMBOL_GOT_DISP symbols are loaded from the GOT. */
|
||
{
|
||
if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
|
||
*low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high,
|
||
temp));
|
||
else
|
||
{
|
||
rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
|
||
rtx mem = gen_rtx_MEM (Pmode, low);
|
||
*low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
|
||
UNSPEC_LOAD_FROM_GOT);
|
||
|
||
/* Nonzero in a mem, if the memory is statically allocated and
|
||
read-only. A common example of the later is a shared library’s
|
||
global offset table. */
|
||
MEM_READONLY_P (mem) = 1;
|
||
}
|
||
|
||
break;
|
||
}
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
/* Helper loongarch_legitimize_address. Given X, return true if it
|
||
is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
|
||
|
||
This respectively represent canonical shift-add rtxs or scaled
|
||
memory addresses. */
|
||
static bool
|
||
mem_shadd_or_shadd_rtx_p (rtx x)
|
||
{
|
||
return ((GET_CODE (x) == ASHIFT
|
||
|| GET_CODE (x) == MULT)
|
||
&& CONST_INT_P (XEXP (x, 1))
|
||
&& ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
|
||
|| (GET_CODE (x) == MULT
|
||
&& IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3))));
|
||
}
|
||
|
||
/* This function is used to implement LEGITIMIZE_ADDRESS. If X can
|
||
be legitimized in a way that the generic machinery might not expect,
|
||
return a new address, otherwise return NULL. MODE is the mode of
|
||
the memory being accessed. */
|
||
|
||
static rtx
|
||
loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
|
||
machine_mode mode)
|
||
{
|
||
rtx base, addr;
|
||
HOST_WIDE_INT offset;
|
||
|
||
if (loongarch_tls_symbol_p (x))
|
||
return loongarch_legitimize_tls_address (x);
|
||
|
||
/* See if the address can split into a high part and a LO_SUM. */
|
||
if (loongarch_split_symbol (NULL, x, mode, &addr))
|
||
return loongarch_force_address (addr, mode);
|
||
|
||
/* Handle BASE + OFFSET using loongarch_add_offset. */
|
||
loongarch_split_plus (x, &base, &offset);
|
||
if (offset != 0)
|
||
{
|
||
/* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */
|
||
if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
|
||
&& IMM12_OPERAND (offset))
|
||
{
|
||
rtx index = XEXP (base, 0);
|
||
rtx fp = XEXP (base, 1);
|
||
|
||
if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
|
||
{
|
||
/* If we were given a MULT, we must fix the constant
|
||
as we're going to create the ASHIFT form. */
|
||
int shift_val = INTVAL (XEXP (index, 1));
|
||
if (GET_CODE (index) == MULT)
|
||
shift_val = exact_log2 (shift_val);
|
||
|
||
rtx reg1 = gen_reg_rtx (Pmode);
|
||
rtx reg3 = gen_reg_rtx (Pmode);
|
||
loongarch_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
|
||
loongarch_emit_binary (PLUS, reg3,
|
||
gen_rtx_ASHIFT (Pmode, XEXP (index, 0),
|
||
GEN_INT (shift_val)),
|
||
reg1);
|
||
|
||
return reg3;
|
||
}
|
||
}
|
||
|
||
if (!loongarch_valid_base_register_p (base, mode, false))
|
||
base = copy_to_mode_reg (Pmode, base);
|
||
addr = loongarch_add_offset (NULL, base, offset);
|
||
return loongarch_force_address (addr, mode);
|
||
}
|
||
|
||
return x;
|
||
}
|
||
|
||
/* Load VALUE into DEST. TEMP is as for loongarch_force_temporary. */
|
||
|
||
void
|
||
loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
|
||
{
|
||
struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS];
|
||
machine_mode mode;
|
||
unsigned int i, num_ops;
|
||
rtx x;
|
||
|
||
mode = GET_MODE (dest);
|
||
num_ops = loongarch_build_integer (codes, value);
|
||
|
||
/* Apply each binary operation to X. Invariant: X is a legitimate
|
||
source operand for a SET pattern. */
|
||
x = GEN_INT (codes[0].value);
|
||
for (i = 1; i < num_ops; i++)
|
||
{
|
||
if (!can_create_pseudo_p ())
|
||
{
|
||
emit_insn (gen_rtx_SET (temp, x));
|
||
x = temp;
|
||
}
|
||
else
|
||
x = force_reg (mode, x);
|
||
|
||
set_unique_reg_note (get_last_insn (), REG_EQUAL,
|
||
GEN_INT (codes[i-1].curr_value));
|
||
|
||
switch (codes[i].method)
|
||
{
|
||
case METHOD_NORMAL:
|
||
x = gen_rtx_fmt_ee (codes[i].code, mode, x,
|
||
GEN_INT (codes[i].value));
|
||
break;
|
||
case METHOD_LU32I:
|
||
gcc_assert (mode == DImode);
|
||
x = gen_rtx_IOR (DImode,
|
||
gen_rtx_ZERO_EXTEND (DImode,
|
||
gen_rtx_SUBREG (SImode, x, 0)),
|
||
GEN_INT (codes[i].value));
|
||
break;
|
||
case METHOD_LU52I:
|
||
gcc_assert (mode == DImode);
|
||
x = gen_rtx_IOR (DImode,
|
||
gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
|
||
GEN_INT (codes[i].value));
|
||
break;
|
||
case METHOD_MIRROR:
|
||
gcc_assert (mode == DImode);
|
||
emit_insn (gen_insvdi (x, GEN_INT (32), GEN_INT (32), x));
|
||
break;
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
emit_insn (gen_rtx_SET (dest, x));
|
||
}
|
||
|
||
/* Subroutine of loongarch_legitimize_move. Move constant SRC into register
|
||
DEST given that SRC satisfies immediate_operand but doesn't satisfy
|
||
move_operand. */
|
||
|
||
static void
|
||
loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
|
||
{
|
||
rtx base, offset;
|
||
|
||
/* Split moves of big integers into smaller pieces. */
|
||
if (splittable_const_int_operand (src, mode))
|
||
{
|
||
loongarch_move_integer (dest, dest, INTVAL (src));
|
||
return;
|
||
}
|
||
|
||
/* Split moves of symbolic constants into high and low. */
|
||
if (loongarch_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
|
||
{
|
||
loongarch_emit_set (dest, src);
|
||
return;
|
||
}
|
||
|
||
/* Generate the appropriate access sequences for TLS symbols. */
|
||
if (loongarch_tls_symbol_p (src))
|
||
{
|
||
loongarch_emit_move (dest, loongarch_legitimize_tls_address (src));
|
||
return;
|
||
}
|
||
|
||
/* If we have (const (plus symbol offset)), and that expression cannot
|
||
be forced into memory, load the symbol first and add in the offset.
|
||
prefer to do this even if the constant _can_ be forced into memory,
|
||
as it usually produces better code. */
|
||
split_const (src, &base, &offset);
|
||
if (offset != const0_rtx
|
||
&& (targetm.cannot_force_const_mem (mode, src)
|
||
|| (can_create_pseudo_p ())))
|
||
{
|
||
base = loongarch_force_temporary (dest, base);
|
||
loongarch_emit_move (dest,
|
||
loongarch_add_offset (NULL, base, INTVAL (offset)));
|
||
return;
|
||
}
|
||
|
||
src = force_const_mem (mode, src);
|
||
|
||
loongarch_emit_move (dest, src);
|
||
}
|
||
|
||
/* If (set DEST SRC) is not a valid move instruction, emit an equivalent
|
||
sequence that is valid. */
|
||
|
||
bool
|
||
loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src)
|
||
{
|
||
if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
|
||
{
|
||
loongarch_emit_move (dest, force_reg (mode, src));
|
||
return true;
|
||
}
|
||
|
||
/* Both src and dest are non-registers; one special case is supported where
|
||
the source is (const_int 0) and the store can source the zero register.
|
||
LSX and LASX are never able to source the zero register directly in
|
||
memory operations. */
|
||
if (!register_operand (dest, mode) && !register_operand (src, mode)
|
||
&& (!const_0_operand (src, mode)
|
||
|| LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)))
|
||
{
|
||
loongarch_emit_move (dest, force_reg (mode, src));
|
||
return true;
|
||
}
|
||
|
||
/* We need to deal with constants that would be legitimate
|
||
immediate_operands but aren't legitimate move_operands. */
|
||
if (CONSTANT_P (src) && !move_operand (src, mode))
|
||
{
|
||
loongarch_legitimize_const_move (mode, dest, src);
|
||
set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
|
||
return true;
|
||
}
|
||
|
||
/* Obtain the address of the symbol through the macro instruction
|
||
of two registers. */
|
||
enum loongarch_symbol_type symbol_type;
|
||
if (TARGET_64BIT && register_operand (dest, mode)
|
||
&& loongarch_symbolic_constant_p (src, &symbol_type)
|
||
&& loongarch_symbol_extreme_p (symbol_type))
|
||
{
|
||
gcc_assert (can_create_pseudo_p ());
|
||
rtx tmp_reg = gen_reg_rtx (DImode);
|
||
emit_insn (gen_movdi_symbolic_off64 (dest, src, tmp_reg));
|
||
set_unique_reg_note (get_last_insn (), REG_UNUSED, tmp_reg);
|
||
set_unique_reg_note (get_last_insn (), REG_EQUAL, src);
|
||
return true;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
/* Return true if OP refers to small data symbols directly. */
|
||
|
||
static int
|
||
loongarch_small_data_pattern_1 (rtx x)
|
||
{
|
||
subrtx_var_iterator::array_type array;
|
||
FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
|
||
{
|
||
rtx x = *iter;
|
||
|
||
/* We make no particular guarantee about which symbolic constants are
|
||
acceptable as asm operands versus which must be forced into a GPR. */
|
||
if (GET_CODE (x) == ASM_OPERANDS)
|
||
iter.skip_subrtxes ();
|
||
else if (MEM_P (x))
|
||
{
|
||
if (loongarch_small_data_pattern_1 (XEXP (x, 0)))
|
||
return true;
|
||
iter.skip_subrtxes ();
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/* Return true if OP refers to small data symbols directly. */
|
||
|
||
bool
|
||
loongarch_small_data_pattern_p (rtx op)
|
||
{
|
||
return loongarch_small_data_pattern_1 (op);
|
||
}
|
||
|
||
/* Rewrite *LOC so that it refers to small data using explicit
|
||
relocations. */
|
||
|
||
static void
|
||
loongarch_rewrite_small_data_1 (rtx *loc)
|
||
{
|
||
subrtx_ptr_iterator::array_type array;
|
||
FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
|
||
{
|
||
rtx *loc = *iter;
|
||
if (MEM_P (*loc))
|
||
{
|
||
loongarch_rewrite_small_data_1 (&XEXP (*loc, 0));
|
||
iter.skip_subrtxes ();
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Rewrite instruction pattern PATTERN so that it refers to small data
|
||
using explicit relocations. */
|
||
|
||
rtx
|
||
loongarch_rewrite_small_data (rtx pattern)
|
||
{
|
||
pattern = copy_insn (pattern);
|
||
loongarch_rewrite_small_data_1 (&pattern);
|
||
return pattern;
|
||
}
|
||
|
||
/* The cost of loading values from the constant pool. It should be
|
||
larger than the cost of any constant we want to synthesize inline. */
|
||
#define CONSTANT_POOL_COST COSTS_N_INSNS (8)
|
||
|
||
/* Return true if there is a instruction that implements CODE
|
||
and if that instruction accepts X as an immediate operand. */
|
||
|
||
static int
|
||
loongarch_immediate_operand_p (int code, HOST_WIDE_INT x)
|
||
{
|
||
switch (code)
|
||
{
|
||
case ASHIFT:
|
||
case ASHIFTRT:
|
||
case LSHIFTRT:
|
||
/* All shift counts are truncated to a valid constant. */
|
||
return true;
|
||
|
||
case ROTATE:
|
||
case ROTATERT:
|
||
return true;
|
||
|
||
case AND:
|
||
case IOR:
|
||
case XOR:
|
||
/* These instructions take 12-bit unsigned immediates. */
|
||
return IMM12_OPERAND_UNSIGNED (x);
|
||
|
||
case PLUS:
|
||
case LT:
|
||
case LTU:
|
||
/* These instructions take 12-bit signed immediates. */
|
||
return IMM12_OPERAND (x);
|
||
|
||
case EQ:
|
||
case NE:
|
||
case GT:
|
||
case GTU:
|
||
/* The "immediate" forms of these instructions are really
|
||
implemented as comparisons with register 0. */
|
||
return x == 0;
|
||
|
||
case GE:
|
||
case GEU:
|
||
/* Likewise, meaning that the only valid immediate operand is 1. */
|
||
return x == 1;
|
||
|
||
case LE:
|
||
/* We add 1 to the immediate and use SLT. */
|
||
return IMM12_OPERAND (x + 1);
|
||
|
||
case LEU:
|
||
/* Likewise SLTU, but reject the always-true case. */
|
||
return IMM12_OPERAND (x + 1) && x + 1 != 0;
|
||
|
||
case SIGN_EXTRACT:
|
||
case ZERO_EXTRACT:
|
||
/* The bit position and size are immediate operands. */
|
||
return 1;
|
||
|
||
default:
|
||
/* By default assume that $0 can be used for 0. */
|
||
return x == 0;
|
||
}
|
||
}
|
||
|
||
/* Return the cost of binary operation X, given that the instruction
|
||
sequence for a word-sized or smaller operation has cost SINGLE_COST
|
||
and that the sequence of a double-word operation has cost DOUBLE_COST.
|
||
If SPEED is true, optimize for speed otherwise optimize for size. */
|
||
|
||
static int
|
||
loongarch_binary_cost (rtx x, int single_cost, int double_cost, bool speed)
|
||
{
|
||
int cost;
|
||
|
||
if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2)
|
||
cost = double_cost;
|
||
else
|
||
cost = single_cost;
|
||
return (cost
|
||
+ set_src_cost (XEXP (x, 0), GET_MODE (x), speed)
|
||
+ rtx_cost (XEXP (x, 1), GET_MODE (x), GET_CODE (x), 1, speed));
|
||
}
|
||
|
||
/* Return the cost of floating-point multiplications of mode MODE. */
|
||
|
||
static int
|
||
loongarch_fp_mult_cost (machine_mode mode)
|
||
{
|
||
return mode == DFmode ? loongarch_cost->fp_mult_df
|
||
: loongarch_cost->fp_mult_sf;
|
||
}
|
||
|
||
/* Return the cost of floating-point divisions of mode MODE. */
|
||
|
||
static int
|
||
loongarch_fp_div_cost (machine_mode mode)
|
||
{
|
||
return mode == DFmode ? loongarch_cost->fp_div_df
|
||
: loongarch_cost->fp_div_sf;
|
||
}
|
||
|
||
/* Return the cost of sign-extending OP to mode MODE, not including the
|
||
cost of OP itself. */
|
||
|
||
static int
|
||
loongarch_sign_extend_cost (rtx op)
|
||
{
|
||
if (MEM_P (op))
|
||
/* Extended loads are as cheap as unextended ones. */
|
||
return 0;
|
||
|
||
return COSTS_N_INSNS (1);
|
||
}
|
||
|
||
/* Return the cost of zero-extending OP to mode MODE, not including the
|
||
cost of OP itself. */
|
||
|
||
static int
|
||
loongarch_zero_extend_cost (rtx op)
|
||
{
|
||
if (MEM_P (op))
|
||
/* Extended loads are as cheap as unextended ones. */
|
||
return 0;
|
||
|
||
/* We can use ANDI. */
|
||
return COSTS_N_INSNS (1);
|
||
}
|
||
|
||
/* Return the cost of moving between two registers of mode MODE,
|
||
assuming that the move will be in pieces of at most UNITS bytes. */
|
||
|
||
static int
|
||
loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units)
|
||
{
|
||
return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
|
||
}
|
||
|
||
static int
|
||
loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode,
|
||
unsigned HOST_WIDE_INT mask1,
|
||
unsigned HOST_WIDE_INT mask2)
|
||
{
|
||
if (mask1 != ~mask2 || !mask1 || !mask2)
|
||
return 0;
|
||
|
||
/* Try to avoid a right-shift. */
|
||
if (low_bitmask_len (mode, mask1) != -1)
|
||
return -1;
|
||
|
||
if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
|
||
return 1;
|
||
|
||
if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
|
||
return -1;
|
||
|
||
return 0;
|
||
}
|
||
|
||
/* Return the cost of moving between two registers of mode MODE. */
|
||
|
||
static int
|
||
loongarch_set_reg_reg_cost (machine_mode mode)
|
||
{
|
||
switch (GET_MODE_CLASS (mode))
|
||
{
|
||
case MODE_CC:
|
||
return loongarch_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (CCmode));
|
||
|
||
case MODE_FLOAT:
|
||
case MODE_COMPLEX_FLOAT:
|
||
case MODE_VECTOR_FLOAT:
|
||
if (TARGET_HARD_FLOAT)
|
||
return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_HWFPVALUE);
|
||
/* Fall through. */
|
||
|
||
default:
|
||
return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_WORD);
|
||
}
|
||
}
|
||
|
||
/* Implement TARGET_ADDRESS_COST. */
|
||
|
||
static int
|
||
loongarch_address_cost (rtx addr, machine_mode mode,
|
||
addr_space_t as ATTRIBUTE_UNUSED,
|
||
bool speed ATTRIBUTE_UNUSED)
|
||
{
|
||
return loongarch_address_insns_1 (addr, mode, false,
|
||
la_addr_reg_reg_cost);
|
||
}
|
||
|
||
/* Implement TARGET_RTX_COSTS. */
|
||
|
||
static bool
|
||
loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
|
||
int opno ATTRIBUTE_UNUSED, int *total, bool speed)
|
||
{
|
||
int code = GET_CODE (x);
|
||
bool float_mode_p = FLOAT_MODE_P (mode);
|
||
int cost;
|
||
rtx addr;
|
||
|
||
if (outer_code == COMPARE)
|
||
{
|
||
gcc_assert (CONSTANT_P (x));
|
||
*total = 0;
|
||
return true;
|
||
}
|
||
|
||
switch (code)
|
||
{
|
||
case CONST_INT:
|
||
if (TARGET_64BIT && outer_code == AND && UINTVAL (x) == 0xffffffff)
|
||
{
|
||
*total = 0;
|
||
return true;
|
||
}
|
||
|
||
/* When not optimizing for size, we care more about the cost
|
||
of hot code, and hot code is often in a loop. If a constant
|
||
operand needs to be forced into a register, we will often be
|
||
able to hoist the constant load out of the loop, so the load
|
||
should not contribute to the cost. */
|
||
if (speed || loongarch_immediate_operand_p (outer_code, INTVAL (x)))
|
||
{
|
||
*total = 0;
|
||
return true;
|
||
}
|
||
/* Fall through. */
|
||
|
||
case CONST:
|
||
case SYMBOL_REF:
|
||
case LABEL_REF:
|
||
case CONST_DOUBLE:
|
||
cost = loongarch_const_insns (x);
|
||
if (cost > 0)
|
||
{
|
||
if (cost == 1 && outer_code == SET
|
||
&& !(float_mode_p && TARGET_HARD_FLOAT))
|
||
cost = 0;
|
||
else if ((outer_code == SET || GET_MODE (x) == VOIDmode))
|
||
cost = 1;
|
||
*total = COSTS_N_INSNS (cost);
|
||
return true;
|
||
}
|
||
/* The value will need to be fetched from the constant pool. */
|
||
*total = CONSTANT_POOL_COST;
|
||
return true;
|
||
|
||
case MEM:
|
||
/* If the address is legitimate, return the number of
|
||
instructions it needs. */
|
||
addr = XEXP (x, 0);
|
||
/* Check for a scaled indexed address. */
|
||
if (loongarch_index_address_p (addr, mode))
|
||
{
|
||
*total = COSTS_N_INSNS (2);
|
||
return true;
|
||
}
|
||
cost = loongarch_address_cost (addr, mode, true, speed);
|
||
if (cost > 0)
|
||
{
|
||
*total = COSTS_N_INSNS (cost + 1);
|
||
return true;
|
||
}
|
||
/* Otherwise use the default handling. */
|
||
return false;
|
||
|
||
case FFS:
|
||
*total = COSTS_N_INSNS (6);
|
||
return false;
|
||
|
||
case NOT:
|
||
*total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1);
|
||
return false;
|
||
|
||
case AND:
|
||
/* Check for a *clear_upper32 pattern and treat it like a zero
|
||
extension. See the pattern's comment for details. */
|
||
if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1))
|
||
&& UINTVAL (XEXP (x, 1)) == 0xffffffff)
|
||
{
|
||
*total = (loongarch_zero_extend_cost (XEXP (x, 0))
|
||
+ set_src_cost (XEXP (x, 0), mode, speed));
|
||
return true;
|
||
}
|
||
/* (AND (NOT op0) (NOT op1) is a nor operation that can be done in
|
||
a single instruction. */
|
||
if (GET_CODE (XEXP (x, 0)) == NOT && GET_CODE (XEXP (x, 1)) == NOT)
|
||
{
|
||
cost = GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1;
|
||
*total = (COSTS_N_INSNS (cost)
|
||
+ set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
|
||
+ set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed));
|
||
return true;
|
||
}
|
||
|
||
/* Fall through. */
|
||
|
||
case IOR:
|
||
{
|
||
rtx op[2] = {XEXP (x, 0), XEXP (x, 1)};
|
||
if (GET_CODE (op[0]) == AND && GET_CODE (op[1]) == AND
|
||
&& (mode == SImode || (TARGET_64BIT && mode == DImode)))
|
||
{
|
||
rtx rtx_mask0 = XEXP (op[0], 1), rtx_mask1 = XEXP (op[1], 1);
|
||
if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1))
|
||
{
|
||
unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0);
|
||
unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1);
|
||
if (loongarch_use_bstrins_for_ior_with_mask_1 (mode,
|
||
mask0,
|
||
mask1))
|
||
{
|
||
/* A bstrins instruction */
|
||
*total = COSTS_N_INSNS (1);
|
||
|
||
/* A srai instruction */
|
||
if (low_bitmask_len (mode, mask0) == -1
|
||
&& low_bitmask_len (mode, mask1) == -1)
|
||
*total += COSTS_N_INSNS (1);
|
||
|
||
for (int i = 0; i < 2; i++)
|
||
*total += set_src_cost (XEXP (op[i], 0), mode, speed);
|
||
|
||
return true;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Fall through. */
|
||
case XOR:
|
||
/* Double-word operations use two single-word operations. */
|
||
*total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2),
|
||
speed);
|
||
return true;
|
||
|
||
case ASHIFT:
|
||
case ASHIFTRT:
|
||
case LSHIFTRT:
|
||
case ROTATE:
|
||
case ROTATERT:
|
||
if (CONSTANT_P (XEXP (x, 1)))
|
||
*total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
|
||
COSTS_N_INSNS (4), speed);
|
||
else
|
||
*total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
|
||
COSTS_N_INSNS (12), speed);
|
||
return true;
|
||
|
||
case ABS:
|
||
if (float_mode_p)
|
||
*total = loongarch_cost->fp_add;
|
||
else
|
||
*total = COSTS_N_INSNS (4);
|
||
return false;
|
||
|
||
case LT:
|
||
case LTU:
|
||
case LE:
|
||
case LEU:
|
||
case GT:
|
||
case GTU:
|
||
case GE:
|
||
case GEU:
|
||
case EQ:
|
||
case NE:
|
||
case UNORDERED:
|
||
case LTGT:
|
||
case UNGE:
|
||
case UNGT:
|
||
case UNLE:
|
||
case UNLT:
|
||
/* Branch comparisons have VOIDmode, so use the first operand's
|
||
mode instead. */
|
||
mode = GET_MODE (XEXP (x, 0));
|
||
if (FLOAT_MODE_P (mode))
|
||
{
|
||
*total = loongarch_cost->fp_add;
|
||
return false;
|
||
}
|
||
*total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
|
||
speed);
|
||
return true;
|
||
|
||
case MINUS:
|
||
case PLUS:
|
||
if (float_mode_p)
|
||
{
|
||
*total = loongarch_cost->fp_add;
|
||
return false;
|
||
}
|
||
|
||
/* If it's an add + mult (which is equivalent to shift left) and
|
||
it's immediate operand satisfies const_immalsl_operand predicate. */
|
||
if (code == PLUS
|
||
&& (mode == SImode || (TARGET_64BIT && mode == DImode)))
|
||
{
|
||
HOST_WIDE_INT shamt = -1;
|
||
rtx lhs = XEXP (x, 0);
|
||
rtx_code code_lhs = GET_CODE (lhs);
|
||
|
||
switch (code_lhs)
|
||
{
|
||
case ASHIFT:
|
||
if (CONST_INT_P (XEXP (lhs, 1)))
|
||
shamt = INTVAL (XEXP (lhs, 1));
|
||
break;
|
||
case MULT:
|
||
if (CONST_INT_P (XEXP (lhs, 1)))
|
||
shamt = exact_log2 (INTVAL (XEXP (lhs, 1)));
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
|
||
if (IN_RANGE (shamt, 1, 4))
|
||
{
|
||
*total = (COSTS_N_INSNS (1)
|
||
+ set_src_cost (XEXP (lhs, 0), mode, speed)
|
||
+ set_src_cost (XEXP (x, 1), mode, speed));
|
||
return true;
|
||
}
|
||
}
|
||
|
||
/* Double-word operations require three single-word operations and
|
||
an SLTU. */
|
||
*total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
|
||
speed);
|
||
return true;
|
||
|
||
case NEG:
|
||
if (float_mode_p)
|
||
*total = loongarch_cost->fp_add;
|
||
else
|
||
*total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1);
|
||
return false;
|
||
|
||
case FMA:
|
||
*total = loongarch_fp_mult_cost (mode);
|
||
return false;
|
||
|
||
case MULT:
|
||
if (float_mode_p)
|
||
*total = loongarch_fp_mult_cost (mode);
|
||
else if (mode == DImode && !TARGET_64BIT)
|
||
*total = (speed
|
||
? loongarch_cost->int_mult_si * 3 + 6
|
||
: COSTS_N_INSNS (7));
|
||
else if (mode == DImode)
|
||
*total = loongarch_cost->int_mult_di;
|
||
else
|
||
*total = loongarch_cost->int_mult_si;
|
||
return false;
|
||
|
||
case DIV:
|
||
/* Check for a reciprocal. */
|
||
if (float_mode_p
|
||
&& flag_unsafe_math_optimizations
|
||
&& XEXP (x, 0) == CONST1_RTX (mode))
|
||
{
|
||
if (outer_code == SQRT || GET_CODE (XEXP (x, 1)) == SQRT)
|
||
/* An rsqrt<mode>a or rsqrt<mode>b pattern. Count the
|
||
division as being free. */
|
||
*total = set_src_cost (XEXP (x, 1), mode, speed);
|
||
else
|
||
*total = (loongarch_fp_div_cost (mode)
|
||
+ set_src_cost (XEXP (x, 1), mode, speed));
|
||
return true;
|
||
}
|
||
/* Fall through. */
|
||
|
||
case SQRT:
|
||
case MOD:
|
||
if (float_mode_p)
|
||
{
|
||
*total = loongarch_fp_div_cost (mode);
|
||
return false;
|
||
}
|
||
/* Fall through. */
|
||
|
||
case UDIV:
|
||
case UMOD:
|
||
if (mode == DImode)
|
||
*total = loongarch_cost->int_div_di;
|
||
else
|
||
{
|
||
*total = loongarch_cost->int_div_si;
|
||
if (TARGET_64BIT && !ISA_HAS_DIV32)
|
||
*total += COSTS_N_INSNS (2);
|
||
}
|
||
|
||
if (TARGET_CHECK_ZERO_DIV)
|
||
*total += COSTS_N_INSNS (2);
|
||
|
||
return false;
|
||
|
||
case SIGN_EXTEND:
|
||
*total = loongarch_sign_extend_cost (XEXP (x, 0));
|
||
return false;
|
||
|
||
case ZERO_EXTEND:
|
||
*total = loongarch_zero_extend_cost (XEXP (x, 0));
|
||
return false;
|
||
case TRUNCATE:
|
||
/* Costings for highpart multiplies. Matching patterns of the form:
|
||
|
||
(lshiftrt:DI (mult:DI (sign_extend:DI (...)
|
||
(sign_extend:DI (...))
|
||
(const_int 32)
|
||
*/
|
||
if ((GET_CODE (XEXP (x, 0)) == ASHIFTRT
|
||
|| GET_CODE (XEXP (x, 0)) == LSHIFTRT)
|
||
&& CONST_INT_P (XEXP (XEXP (x, 0), 1))
|
||
&& ((INTVAL (XEXP (XEXP (x, 0), 1)) == 32
|
||
&& GET_MODE (XEXP (x, 0)) == DImode)
|
||
|| (TARGET_64BIT
|
||
&& INTVAL (XEXP (XEXP (x, 0), 1)) == 64
|
||
&& GET_MODE (XEXP (x, 0)) == TImode))
|
||
&& GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
|
||
&& ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
|
||
&& GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
|
||
|| (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
|
||
&& (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
|
||
== ZERO_EXTEND))))
|
||
{
|
||
if (mode == DImode)
|
||
*total = loongarch_cost->int_mult_di;
|
||
else
|
||
*total = loongarch_cost->int_mult_si;
|
||
|
||
/* Sign extension is free, zero extension costs for DImode when
|
||
on a 64bit core / when DMUL is present. */
|
||
for (int i = 0; i < 2; ++i)
|
||
{
|
||
rtx op = XEXP (XEXP (XEXP (x, 0), 0), i);
|
||
if (TARGET_64BIT
|
||
&& GET_CODE (op) == ZERO_EXTEND
|
||
&& GET_MODE (op) == DImode)
|
||
*total += rtx_cost (op, DImode, MULT, i, speed);
|
||
else
|
||
*total += rtx_cost (XEXP (op, 0), VOIDmode, GET_CODE (op), 0,
|
||
speed);
|
||
}
|
||
|
||
return true;
|
||
}
|
||
return false;
|
||
|
||
case FLOAT:
|
||
case UNSIGNED_FLOAT:
|
||
case FIX:
|
||
case FLOAT_EXTEND:
|
||
case FLOAT_TRUNCATE:
|
||
*total = loongarch_cost->fp_add;
|
||
return false;
|
||
|
||
case SET:
|
||
if (register_operand (SET_DEST (x), VOIDmode)
|
||
&& reg_or_0_operand (SET_SRC (x), VOIDmode))
|
||
{
|
||
*total = loongarch_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
|
||
return true;
|
||
}
|
||
return false;
|
||
|
||
default:
|
||
return false;
|
||
}
|
||
}
|
||
|
||
/* Implement targetm.vectorize.builtin_vectorization_cost. */
|
||
|
||
static int
|
||
loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||
tree vectype,
|
||
int misalign ATTRIBUTE_UNUSED)
|
||
{
|
||
unsigned elements;
|
||
machine_mode mode = vectype != NULL ? TYPE_MODE (vectype) : DImode;
|
||
|
||
switch (type_of_cost)
|
||
{
|
||
case scalar_stmt:
|
||
case scalar_load:
|
||
case vector_stmt:
|
||
case vec_to_scalar:
|
||
case scalar_to_vec:
|
||
case scalar_store:
|
||
return 1;
|
||
|
||
case vec_promote_demote:
|
||
case vec_perm:
|
||
return LASX_SUPPORTED_MODE_P (mode)
|
||
&& !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1;
|
||
|
||
case vector_load:
|
||
case vector_store:
|
||
case unaligned_load:
|
||
case unaligned_store:
|
||
return 2;
|
||
|
||
case cond_branch_taken:
|
||
return 4;
|
||
|
||
case cond_branch_not_taken:
|
||
return 2;
|
||
|
||
case vec_construct:
|
||
elements = TYPE_VECTOR_SUBPARTS (vectype);
|
||
if (LASX_SUPPORTED_MODE_P (mode) && !LSX_SUPPORTED_MODE_P (mode))
|
||
return elements / 2 + 3;
|
||
else
|
||
return elements / 2 + 1;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
class loongarch_vector_costs : public vector_costs
|
||
{
|
||
public:
|
||
using vector_costs::vector_costs;
|
||
|
||
unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
|
||
stmt_vec_info stmt_info, slp_tree, tree vectype,
|
||
int misalign,
|
||
vect_cost_model_location where) override;
|
||
void finish_cost (const vector_costs *) override;
|
||
|
||
protected:
|
||
void count_operations (vect_cost_for_stmt, stmt_vec_info,
|
||
vect_cost_model_location, unsigned int);
|
||
unsigned int determine_suggested_unroll_factor (loop_vec_info);
|
||
/* The number of vectorized stmts in loop. */
|
||
unsigned m_stmts = 0;
|
||
/* The number of load and store operations in loop. */
|
||
unsigned m_loads = 0;
|
||
unsigned m_stores = 0;
|
||
/* Reduction factor for suggesting unroll factor. */
|
||
unsigned m_reduc_factor = 0;
|
||
/* True if the loop contains an average operation. */
|
||
bool m_has_avg = false;
|
||
/* True if the loop uses approximation instruction sequence. */
|
||
bool m_has_recip = false;
|
||
};
|
||
|
||
/* Implement TARGET_VECTORIZE_CREATE_COSTS. */
|
||
static vector_costs *
|
||
loongarch_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
|
||
{
|
||
return new loongarch_vector_costs (vinfo, costing_for_scalar);
|
||
}
|
||
|
||
void
|
||
loongarch_vector_costs::count_operations (vect_cost_for_stmt kind,
|
||
stmt_vec_info stmt_info,
|
||
vect_cost_model_location where,
|
||
unsigned int count)
|
||
{
|
||
if (!m_costing_for_scalar
|
||
&& is_a<loop_vec_info> (m_vinfo)
|
||
&& where == vect_body)
|
||
{
|
||
m_stmts += count;
|
||
|
||
if (kind == scalar_load
|
||
|| kind == vector_load
|
||
|| kind == unaligned_load)
|
||
m_loads += count;
|
||
else if (kind == scalar_store
|
||
|| kind == vector_store
|
||
|| kind == unaligned_store)
|
||
m_stores += count;
|
||
else if ((kind == scalar_stmt
|
||
|| kind == vector_stmt
|
||
|| kind == vec_to_scalar)
|
||
&& stmt_info && vect_is_reduction (stmt_info))
|
||
{
|
||
tree lhs = gimple_get_lhs (stmt_info->stmt);
|
||
unsigned int base = FLOAT_TYPE_P (TREE_TYPE (lhs)) ? 2 : 1;
|
||
m_reduc_factor = MAX (base * count, m_reduc_factor);
|
||
}
|
||
}
|
||
}
|
||
|
||
unsigned int
|
||
loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
|
||
{
|
||
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||
|
||
if (m_has_avg || m_has_recip)
|
||
return 1;
|
||
|
||
/* Don't unroll if it's specified explicitly not to be unrolled. */
|
||
if (loop->unroll == 1
|
||
|| (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops)
|
||
|| (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops))
|
||
return 1;
|
||
|
||
unsigned int nstmts_nonldst = m_stmts - m_loads - m_stores;
|
||
/* Don't unroll if no vector instructions excepting for memory access. */
|
||
if (nstmts_nonldst == 0)
|
||
return 1;
|
||
|
||
/* Use this simple hardware resource model that how many non vld/vst
|
||
vector instructions can be issued per cycle. */
|
||
unsigned int issue_info = la_vect_issue_info;
|
||
unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
|
||
unsigned int uf = CEIL (reduc_factor * issue_info, nstmts_nonldst);
|
||
uf = MIN ((unsigned int) la_vect_unroll_limit, uf);
|
||
|
||
return 1 << ceil_log2 (uf);
|
||
}
|
||
|
||
/* Check if assign stmt rhs op comes from a multiply-add operation. */
|
||
static bool
|
||
loongarch_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info)
|
||
{
|
||
gassign *assign = dyn_cast<gassign *> (stmt_info->stmt);
|
||
if (!assign)
|
||
return false;
|
||
tree_code code = gimple_assign_rhs_code (assign);
|
||
if (code != PLUS_EXPR && code != MINUS_EXPR)
|
||
return false;
|
||
|
||
auto is_mul_result = [&](int i)
|
||
{
|
||
tree rhs = gimple_op (assign, i);
|
||
if (TREE_CODE (rhs) != SSA_NAME)
|
||
return false;
|
||
|
||
stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
|
||
if (!def_stmt_info
|
||
|| STMT_VINFO_DEF_TYPE (def_stmt_info) != vect_internal_def)
|
||
return false;
|
||
gassign *rhs_assign = dyn_cast<gassign *> (def_stmt_info->stmt);
|
||
if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR)
|
||
return false;
|
||
|
||
return true;
|
||
};
|
||
|
||
return is_mul_result (1) || is_mul_result (2);
|
||
}
|
||
|
||
unsigned
|
||
loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
|
||
stmt_vec_info stmt_info, slp_tree,
|
||
tree vectype, int misalign,
|
||
vect_cost_model_location where)
|
||
{
|
||
unsigned retval = 0;
|
||
|
||
if (flag_vect_cost_model)
|
||
{
|
||
int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype,
|
||
misalign);
|
||
if (vectype && stmt_info)
|
||
{
|
||
gassign *assign = dyn_cast<gassign *> (STMT_VINFO_STMT (stmt_info));
|
||
machine_mode mode = TYPE_MODE (vectype);
|
||
|
||
/* We found through testing that this strategy (the stmt that
|
||
matches the multiply-add pattern) has positive returns only
|
||
when applied to the 128-bit vector stmt, so this restriction
|
||
is currently made. */
|
||
if (kind == vector_stmt && GET_MODE_SIZE (mode) == 16 && assign)
|
||
{
|
||
if (!vect_is_reduction (stmt_info)
|
||
&& loongarch_multiply_add_p (m_vinfo, stmt_info))
|
||
stmt_cost = 0;
|
||
}
|
||
}
|
||
|
||
retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
|
||
m_costs[where] += retval;
|
||
|
||
count_operations (kind, stmt_info, where, count);
|
||
}
|
||
|
||
if (stmt_info)
|
||
{
|
||
/* Detect the use of an averaging operation. */
|
||
gimple *stmt = stmt_info->stmt;
|
||
if (is_gimple_call (stmt)
|
||
&& gimple_call_internal_p (stmt))
|
||
{
|
||
switch (gimple_call_internal_fn (stmt))
|
||
{
|
||
case IFN_AVG_FLOOR:
|
||
case IFN_AVG_CEIL:
|
||
m_has_avg = true;
|
||
default:
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
combined_fn cfn;
|
||
if (kind == vector_stmt
|
||
&& stmt_info
|
||
&& stmt_info->stmt)
|
||
{
|
||
/* Detect the use of approximate instruction sequence. */
|
||
if ((TARGET_RECIP_VEC_SQRT || TARGET_RECIP_VEC_RSQRT)
|
||
&& (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
|
||
switch (cfn)
|
||
{
|
||
case CFN_BUILT_IN_SQRTF:
|
||
m_has_recip = true;
|
||
default:
|
||
break;
|
||
}
|
||
else if (TARGET_RECIP_VEC_DIV
|
||
&& vectype
|
||
&& gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
|
||
{
|
||
machine_mode mode = TYPE_MODE (vectype);
|
||
switch (gimple_assign_rhs_code (stmt_info->stmt))
|
||
{
|
||
case RDIV_EXPR:
|
||
if (GET_MODE_INNER (mode) == SFmode)
|
||
m_has_recip = true;
|
||
default:
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
return retval;
|
||
}
|
||
|
||
void
|
||
loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
|
||
{
|
||
loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
|
||
|
||
if (loop_vinfo)
|
||
m_suggested_unroll_factor
|
||
= determine_suggested_unroll_factor (loop_vinfo);
|
||
|
||
vector_costs::finish_cost (scalar_costs);
|
||
}
|
||
|
||
/* Implement TARGET_INSN_COST. */
|
||
|
||
static int
|
||
loongarch_insn_cost (rtx_insn *insn, bool speed)
|
||
{
|
||
rtx x = PATTERN (insn);
|
||
int cost = pattern_cost (x, speed);
|
||
|
||
/* On LA464, prevent movcf2fr and movfr2gr from merging into movcf2gr. */
|
||
if (GET_CODE (x) == SET
|
||
&& GET_MODE (XEXP (x, 0)) == FCCmode)
|
||
{
|
||
rtx dest, src;
|
||
dest = XEXP (x, 0);
|
||
src = XEXP (x, 1);
|
||
|
||
if (REG_P (dest) && REG_P (src))
|
||
{
|
||
if (GP_REG_P (REGNO (dest)) && FCC_REG_P (REGNO (src)))
|
||
cost = loongarch_cost->movcf2gr;
|
||
else if (FCC_REG_P (REGNO (dest)) && GP_REG_P (REGNO (src)))
|
||
cost = loongarch_cost->movgr2cf;
|
||
}
|
||
}
|
||
return cost;
|
||
}
|
||
|
||
/* Return one word of double-word value OP, taking into account the fixed
|
||
endianness of certain registers. HIGH_P is true to select the high part,
|
||
false to select the low part. */
|
||
|
||
rtx
|
||
loongarch_subword (rtx op, bool high_p)
|
||
{
|
||
unsigned int byte;
|
||
machine_mode mode;
|
||
|
||
byte = high_p ? UNITS_PER_WORD : 0;
|
||
mode = GET_MODE (op);
|
||
if (mode == VOIDmode)
|
||
mode = TARGET_64BIT ? TImode : DImode;
|
||
|
||
if (FP_REG_RTX_P (op))
|
||
return gen_rtx_REG (word_mode, REGNO (op) + high_p);
|
||
|
||
if (MEM_P (op))
|
||
return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte));
|
||
|
||
return simplify_gen_subreg (word_mode, op, mode, byte);
|
||
}
|
||
|
||
static bool loongarch_split_vector_move_p (rtx dest, rtx src);
|
||
/* Return true if a move from SRC to DEST should be split into two.
|
||
SPLIT_TYPE describes the split condition. */
|
||
|
||
bool
|
||
loongarch_split_move_p (rtx dest, rtx src)
|
||
{
|
||
/* FPR-to-FPR moves can be done in a single instruction, if they're
|
||
allowed at all. */
|
||
unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
|
||
if (size == 8 && FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
|
||
return false;
|
||
|
||
/* Check for floating-point loads and stores. */
|
||
if (size == 8)
|
||
{
|
||
if (FP_REG_RTX_P (dest) && MEM_P (src))
|
||
return false;
|
||
if (FP_REG_RTX_P (src) && MEM_P (dest))
|
||
return false;
|
||
}
|
||
|
||
|
||
/* Check if vector moves need splitting. */
|
||
if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
|
||
|| LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
|
||
return loongarch_split_vector_move_p (dest, src);
|
||
|
||
/* Otherwise split all multiword moves. */
|
||
return size > UNITS_PER_WORD;
|
||
}
|
||
|
||
/* Split a move from SRC to DEST, given that loongarch_split_move_p holds.
|
||
SPLIT_TYPE describes the split condition. */
|
||
|
||
void
|
||
loongarch_split_move (rtx dest, rtx src)
|
||
{
|
||
gcc_checking_assert (loongarch_split_move_p (dest, src));
|
||
if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
|
||
|| LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
|
||
loongarch_split_vector_move (dest, src);
|
||
else
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
/* Check if adding an integer constant value for a specific mode can be
|
||
performed with an addu16i.d instruction and an addi.{w/d}
|
||
instruction. */
|
||
|
||
bool
|
||
loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT value, machine_mode mode)
|
||
{
|
||
/* Not necessary, but avoid unnecessary calculation if !TARGET_64BIT. */
|
||
if (!TARGET_64BIT)
|
||
return false;
|
||
|
||
if ((value & 0xffff) == 0)
|
||
return false;
|
||
|
||
if (IMM12_OPERAND (value))
|
||
return false;
|
||
|
||
value = (value & ~HWIT_UC_0xFFF) + ((value & 0x800) << 1);
|
||
return ADDU16I_OPERAND (trunc_int_for_mode (value, mode));
|
||
}
|
||
|
||
/* Split one integer constant op[0] into two (op[1] and op[2]) for constant
|
||
plus operation in a specific mode. The splitted constants can be added
|
||
onto a register with a single instruction (addi.{d/w} or addu16i.d). */
|
||
|
||
void
|
||
loongarch_split_plus_constant (rtx *op, machine_mode mode)
|
||
{
|
||
HOST_WIDE_INT v = INTVAL (op[0]), a;
|
||
|
||
if (DUAL_IMM12_OPERAND (v))
|
||
a = (v > 0 ? 2047 : -2048);
|
||
else if (loongarch_addu16i_imm12_operand_p (v, mode))
|
||
a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1);
|
||
else if (mode == DImode && DUAL_ADDU16I_OPERAND (v))
|
||
a = (v > 0 ? 0x7fff0000 : ~0x7fffffff);
|
||
else
|
||
gcc_unreachable ();
|
||
|
||
op[1] = gen_int_mode (a, mode);
|
||
v = v - (unsigned HOST_WIDE_INT) a;
|
||
op[2] = gen_int_mode (v, mode);
|
||
}
|
||
|
||
/* Test if reassociate (a << shamt) [&|^] mask to
|
||
(a [&|^] (mask >> shamt)) << shamt is possible and beneficial.
|
||
If true, return (mask >> shamt). Return NULL_RTX otherwise. */
|
||
|
||
rtx
|
||
loongarch_reassoc_shift_bitwise (bool is_and, rtx shamt, rtx mask,
|
||
machine_mode mode)
|
||
{
|
||
gcc_checking_assert (CONST_INT_P (shamt));
|
||
gcc_checking_assert (CONST_INT_P (mask));
|
||
gcc_checking_assert (mode == SImode || mode == DImode);
|
||
|
||
if (ctz_hwi (INTVAL (mask)) < INTVAL (shamt))
|
||
return NULL_RTX;
|
||
|
||
/* When trying alsl.w, deliberately ignore the high bits. */
|
||
mask = gen_int_mode (UINTVAL (mask), mode);
|
||
|
||
rtx new_mask = simplify_const_binary_operation (LSHIFTRT, mode, mask,
|
||
shamt);
|
||
|
||
/* Do an arithmetic shift for checking ins_zero_bitmask_operand or -1:
|
||
ashiftrt (0xffffffff00000000, 2) is 0xffffffff60000000 which is an
|
||
ins_zero_bitmask_operand, but lshiftrt will produce
|
||
0x3fffffff60000000. */
|
||
rtx new_mask_1 = simplify_const_binary_operation (ASHIFTRT, mode, mask,
|
||
shamt);
|
||
|
||
if (is_and && const_m1_operand (new_mask_1, mode))
|
||
return new_mask_1;
|
||
|
||
if (const_uns_arith_operand (new_mask, mode))
|
||
return new_mask;
|
||
|
||
if (!is_and)
|
||
return NULL_RTX;
|
||
|
||
if (low_bitmask_operand (new_mask, mode))
|
||
return new_mask;
|
||
|
||
return ins_zero_bitmask_operand (new_mask_1, mode) ? new_mask_1 : NULL_RTX;
|
||
}
|
||
|
||
/* Implement TARGET_CONSTANT_ALIGNMENT. */
|
||
|
||
static HOST_WIDE_INT
|
||
loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align)
|
||
{
|
||
if (TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
|
||
return MAX (align, BITS_PER_WORD);
|
||
return align;
|
||
}
|
||
|
||
const char *
|
||
loongarch_output_move_index (rtx x, machine_mode mode, bool ldr)
|
||
{
|
||
int index = exact_log2 (GET_MODE_SIZE (mode));
|
||
if (!IN_RANGE (index, 0, 3))
|
||
return NULL;
|
||
|
||
struct loongarch_address_info info;
|
||
if ((loongarch_classify_address (&info, x, mode, false)
|
||
&& !(info.type == ADDRESS_REG_REG))
|
||
|| !loongarch_legitimate_address_p (mode, x, false))
|
||
return NULL;
|
||
|
||
const char *const insn[][4] =
|
||
{
|
||
{
|
||
"stx.b\t%z1,%0",
|
||
"stx.h\t%z1,%0",
|
||
"stx.w\t%z1,%0",
|
||
"stx.d\t%z1,%0",
|
||
},
|
||
{
|
||
"ldx.bu\t%0,%1",
|
||
"ldx.hu\t%0,%1",
|
||
"ldx.w\t%0,%1",
|
||
"ldx.d\t%0,%1",
|
||
}
|
||
};
|
||
|
||
return insn[ldr][index];
|
||
}
|
||
|
||
const char *
|
||
loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
|
||
{
|
||
int index = exact_log2 (GET_MODE_SIZE (mode));
|
||
if (!IN_RANGE (index, 2, 5))
|
||
return NULL;
|
||
|
||
struct loongarch_address_info info;
|
||
if ((loongarch_classify_address (&info, x, mode, false)
|
||
&& !(info.type == ADDRESS_REG_REG))
|
||
|| !loongarch_legitimate_address_p (mode, x, false))
|
||
return NULL;
|
||
|
||
const char *const insn[][4] =
|
||
{
|
||
{
|
||
"fstx.s\t%1,%0",
|
||
"fstx.d\t%1,%0",
|
||
"vstx\t%w1,%0",
|
||
"xvstx\t%u1,%0"
|
||
},
|
||
{
|
||
"fldx.s\t%0,%1",
|
||
"fldx.d\t%0,%1",
|
||
"vldx\t%w0,%1",
|
||
"xvldx\t%u0,%1"
|
||
}
|
||
};
|
||
|
||
return insn[ldr][index-2];
|
||
}
|
||
/* Return true if a vector move from SRC to DEST should be split. */
|
||
|
||
static bool
|
||
loongarch_split_vector_move_p (rtx dest, rtx src)
|
||
{
|
||
/* Vector moves can be done in a single instruction. */
|
||
if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
|
||
return false;
|
||
|
||
/* Check for vector loads and stores. */
|
||
if (FP_REG_RTX_P (dest) && MEM_P (src))
|
||
return false;
|
||
if (FP_REG_RTX_P (src) && MEM_P (dest))
|
||
return false;
|
||
|
||
/* Check for vector set to an immediate const vector with valid replicated
|
||
element. */
|
||
if (FP_REG_RTX_P (dest)
|
||
&& loongarch_const_vector_vrepli (src, GET_MODE (src)))
|
||
return false;
|
||
|
||
/* Check for vector load zero immediate. */
|
||
if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
|
||
return false;
|
||
|
||
return true;
|
||
}
|
||
|
||
/* Split a vector move from SRC to DEST. */
|
||
|
||
void
|
||
loongarch_split_vector_move (rtx dest, rtx src)
|
||
{
|
||
int byte, index;
|
||
rtx s, d;
|
||
machine_mode mode = GET_MODE (dest);
|
||
bool lsx_p = LSX_SUPPORTED_MODE_P (mode);
|
||
|
||
if (FP_REG_RTX_P (dest))
|
||
{
|
||
gcc_assert (!MEM_P (src));
|
||
|
||
rtx (*gen_vinsgr2vr_d) (rtx, rtx, rtx, rtx);
|
||
|
||
if (lsx_p)
|
||
{
|
||
mode = V2DImode;
|
||
gen_vinsgr2vr_d = gen_lsx_vinsgr2vr_d;
|
||
}
|
||
else
|
||
{
|
||
mode = V4DImode;
|
||
gen_vinsgr2vr_d = gen_lasx_xvinsgr2vr_d;
|
||
}
|
||
|
||
rtx new_dest = dest;
|
||
|
||
if (GET_MODE (dest) != mode)
|
||
new_dest = simplify_gen_subreg (mode, dest, GET_MODE (dest), 0);
|
||
|
||
for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
|
||
byte += UNITS_PER_WORD, index++)
|
||
{
|
||
s = loongarch_subword_at_byte (src, byte);
|
||
emit_insn (gen_vinsgr2vr_d (new_dest, s, new_dest,
|
||
GEN_INT (1 << index)));
|
||
}
|
||
}
|
||
else if (FP_REG_RTX_P (src))
|
||
{
|
||
gcc_assert (!MEM_P (dest));
|
||
|
||
rtx (*gen_vpickve2gr_d) (rtx, rtx, rtx);
|
||
|
||
if (lsx_p)
|
||
{
|
||
mode = V2DImode;
|
||
gen_vpickve2gr_d = gen_lsx_vpickve2gr_d;
|
||
}
|
||
else
|
||
{
|
||
mode = V4DImode;
|
||
gen_vpickve2gr_d = gen_lasx_xvpickve2gr_d;
|
||
}
|
||
|
||
rtx new_src = src;
|
||
if (GET_MODE (src) != mode)
|
||
new_src = simplify_gen_subreg (mode, src, GET_MODE (src), 0);
|
||
|
||
for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (src));
|
||
byte += UNITS_PER_WORD, index++)
|
||
{
|
||
d = loongarch_subword_at_byte (dest, byte);
|
||
emit_insn (gen_vpickve2gr_d (d, new_src, GEN_INT (index)));
|
||
}
|
||
}
|
||
else
|
||
{
|
||
/* This part of the code is designed to handle the following situations:
|
||
(set (reg:V2DI 4 $r4)
|
||
(reg:V2DI 6 $r6))
|
||
The trigger test case is lsx-mov-1.c. */
|
||
rtx low_dest, low_src;
|
||
|
||
low_dest = loongarch_subword_at_byte (dest, 0);
|
||
low_src = loongarch_subword_at_byte (src, 0);
|
||
gcc_assert (REG_P (low_dest) && REG_P (low_src));
|
||
/* Make sure the source register is not written before reading. */
|
||
if (REGNO (low_dest) <= REGNO (low_src))
|
||
{
|
||
for (byte = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
|
||
byte += UNITS_PER_WORD)
|
||
{
|
||
d = loongarch_subword_at_byte (dest, byte);
|
||
s = loongarch_subword_at_byte (src, byte);
|
||
loongarch_emit_move (d, s);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
for (byte = GET_MODE_SIZE (GET_MODE (dest)) - UNITS_PER_WORD;
|
||
byte >= 0; byte -= UNITS_PER_WORD)
|
||
{
|
||
d = loongarch_subword_at_byte (dest, byte);
|
||
s = loongarch_subword_at_byte (src, byte);
|
||
loongarch_emit_move (d, s);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Return the appropriate instructions to move SRC into DEST. Assume
|
||
that SRC is operand 1 and DEST is operand 0. */
|
||
|
||
const char *
|
||
loongarch_output_move (rtx *operands)
|
||
{
|
||
rtx src = operands[1];
|
||
rtx dest = operands[0];
|
||
enum rtx_code dest_code = GET_CODE (dest);
|
||
enum rtx_code src_code = GET_CODE (src);
|
||
machine_mode mode = GET_MODE (dest);
|
||
bool dbl_p = (GET_MODE_SIZE (mode) == 8);
|
||
bool lsx_p = LSX_SUPPORTED_MODE_P (mode);
|
||
bool lasx_p = LASX_SUPPORTED_MODE_P (mode);
|
||
|
||
if (loongarch_split_move_p (dest, src))
|
||
return "#";
|
||
|
||
if ((lsx_p || lasx_p)
|
||
&& dest_code == REG && FP_REG_P (REGNO (dest))
|
||
&& src_code == CONST_VECTOR
|
||
&& CONST_INT_P (CONST_VECTOR_ELT (src, 0)))
|
||
{
|
||
operands[1] = loongarch_const_vector_vrepli (src, mode);
|
||
gcc_assert (operands[1]);
|
||
|
||
switch (GET_MODE_SIZE (mode))
|
||
{
|
||
case 16:
|
||
return "vrepli.%v1\t%w0,%E1";
|
||
case 32:
|
||
return "xvrepli.%v1\t%u0,%E1";
|
||
default: gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
if ((src_code == REG && GP_REG_P (REGNO (src)))
|
||
|| (src == CONST0_RTX (mode)))
|
||
{
|
||
if (dest_code == REG)
|
||
{
|
||
if (GP_REG_P (REGNO (dest)))
|
||
return "or\t%0,%z1,$r0";
|
||
|
||
if (FP_REG_P (REGNO (dest)))
|
||
{
|
||
if (lsx_p || lasx_p)
|
||
{
|
||
gcc_assert (src == CONST0_RTX (GET_MODE (src)));
|
||
switch (GET_MODE_SIZE (mode))
|
||
{
|
||
case 16:
|
||
return "vrepli.b\t%w0,0";
|
||
case 32:
|
||
return "xvrepli.b\t%u0,0";
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
if (ISA_HAS_LSX && src == CONST0_RTX (GET_MODE (src)))
|
||
return "vxor.v\t%w0,%w0,%w0";
|
||
|
||
return dbl_p ? "movgr2fr.d\t%0,%z1" : "movgr2fr.w\t%0,%z1";
|
||
}
|
||
}
|
||
if (dest_code == MEM)
|
||
{
|
||
const char *insn = NULL;
|
||
insn = loongarch_output_move_index (XEXP (dest, 0), GET_MODE (dest),
|
||
false);
|
||
if (insn)
|
||
return insn;
|
||
|
||
rtx offset = XEXP (dest, 0);
|
||
if (GET_CODE (offset) == PLUS)
|
||
offset = XEXP (offset, 1);
|
||
switch (GET_MODE_SIZE (mode))
|
||
{
|
||
case 1:
|
||
return "st.b\t%z1,%0";
|
||
case 2:
|
||
return "st.h\t%z1,%0";
|
||
case 4:
|
||
/* Matching address type with a 12bit offset and
|
||
ADDRESS_LO_SUM. */
|
||
if (const_arith_operand (offset, Pmode)
|
||
|| GET_CODE (offset) == LO_SUM)
|
||
return "st.w\t%z1,%0";
|
||
else
|
||
return "stptr.w\t%z1,%0";
|
||
case 8:
|
||
if (const_arith_operand (offset, Pmode)
|
||
|| GET_CODE (offset) == LO_SUM)
|
||
return "st.d\t%z1,%0";
|
||
else
|
||
return "stptr.d\t%z1,%0";
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
}
|
||
if (dest_code == REG && GP_REG_P (REGNO (dest)))
|
||
{
|
||
if (src_code == REG)
|
||
if (FP_REG_P (REGNO (src)))
|
||
{
|
||
gcc_assert (!lsx_p);
|
||
return dbl_p ? "movfr2gr.d\t%0,%1" : "movfr2gr.s\t%0,%1";
|
||
}
|
||
|
||
if (src_code == MEM)
|
||
{
|
||
const char *insn = NULL;
|
||
insn = loongarch_output_move_index (XEXP (src, 0), GET_MODE (src),
|
||
true);
|
||
if (insn)
|
||
return insn;
|
||
|
||
rtx offset = XEXP (src, 0);
|
||
if (GET_CODE (offset) == PLUS)
|
||
offset = XEXP (offset, 1);
|
||
switch (GET_MODE_SIZE (mode))
|
||
{
|
||
case 1:
|
||
return "ld.bu\t%0,%1";
|
||
case 2:
|
||
return "ld.hu\t%0,%1";
|
||
case 4:
|
||
/* Matching address type with a 12bit offset and
|
||
ADDRESS_LO_SUM. */
|
||
if (const_arith_operand (offset, Pmode)
|
||
|| GET_CODE (offset) == LO_SUM)
|
||
return "ld.w\t%0,%1";
|
||
else
|
||
return "ldptr.w\t%0,%1";
|
||
case 8:
|
||
if (const_arith_operand (offset, Pmode)
|
||
|| GET_CODE (offset) == LO_SUM)
|
||
return "ld.d\t%0,%1";
|
||
else
|
||
return "ldptr.d\t%0,%1";
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
if (src_code == HIGH)
|
||
{
|
||
rtx offset, x;
|
||
split_const (XEXP (src, 0), &x, &offset);
|
||
enum loongarch_symbol_type type = SYMBOL_PCREL;
|
||
|
||
if (UNSPEC_ADDRESS_P (x))
|
||
type = UNSPEC_ADDRESS_TYPE (x);
|
||
|
||
if (type == SYMBOL_TLS_LE)
|
||
return "lu12i.w\t%0,%h1";
|
||
else
|
||
return "%Q1pcalau12i\t%0,%h1";
|
||
}
|
||
|
||
if (src_code == CONST_INT)
|
||
{
|
||
if (LU12I_INT (src))
|
||
{
|
||
operands[1] = GEN_INT (INTVAL (operands[1]) >> 12);
|
||
return "lu12i.w\t%0,%1\t\t\t# %X1";
|
||
}
|
||
else if (IMM12_INT (src))
|
||
return "addi.w\t%0,$r0,%1\t\t\t# %X1";
|
||
else if (IMM12_INT_UNSIGNED (src))
|
||
return "ori\t%0,$r0,%1\t\t\t# %X1";
|
||
else if (LU52I_INT (src))
|
||
{
|
||
operands[1] = GEN_INT (INTVAL (operands[1]) >> 52);
|
||
return "lu52i.d\t%0,$r0,%X1\t\t\t# %1";
|
||
}
|
||
else
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
if (!loongarch_explicit_relocs_p (loongarch_classify_symbol (src))
|
||
&& dest_code == REG && symbolic_operand (src, VOIDmode))
|
||
{
|
||
if (loongarch_classify_symbol (src) == SYMBOL_PCREL)
|
||
return "la.local\t%0,%1";
|
||
else
|
||
return "la.global\t%0,%1";
|
||
}
|
||
|
||
if (src_code == REG && FP_REG_P (REGNO (src)))
|
||
{
|
||
if (dest_code == REG && FP_REG_P (REGNO (dest)))
|
||
{
|
||
if (lsx_p || lasx_p)
|
||
{
|
||
switch (GET_MODE_SIZE (mode))
|
||
{
|
||
case 16:
|
||
return "vori.b\t%w0,%w1,0";
|
||
case 32:
|
||
return "xvori.b\t%u0,%u1,0";
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
return dbl_p ? "fmov.d\t%0,%1" : "fmov.s\t%0,%1";
|
||
}
|
||
|
||
if (dest_code == MEM)
|
||
{
|
||
const char *insn = NULL;
|
||
insn = loongarch_output_move_index_float (XEXP (dest, 0),
|
||
GET_MODE (dest),
|
||
false);
|
||
if (insn)
|
||
return insn;
|
||
|
||
if (lsx_p || lasx_p)
|
||
{
|
||
switch (GET_MODE_SIZE (mode))
|
||
{
|
||
case 16:
|
||
return "vst\t%w1,%0";
|
||
case 32:
|
||
return "xvst\t%u1,%0";
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
return dbl_p ? "fst.d\t%1,%0" : "fst.s\t%1,%0";
|
||
}
|
||
}
|
||
|
||
if (dest_code == REG && FP_REG_P (REGNO (dest)))
|
||
{
|
||
if (src_code == MEM)
|
||
{
|
||
const char *insn = NULL;
|
||
insn = loongarch_output_move_index_float (XEXP (src, 0),
|
||
GET_MODE (src),
|
||
true);
|
||
if (insn)
|
||
return insn;
|
||
|
||
if (lsx_p || lasx_p)
|
||
{
|
||
switch (GET_MODE_SIZE (mode))
|
||
{
|
||
case 16:
|
||
return "vld\t%w0,%1";
|
||
case 32:
|
||
return "xvld\t%u0,%1";
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
return dbl_p ? "fld.d\t%0,%1" : "fld.s\t%0,%1";
|
||
}
|
||
}
|
||
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
/* Return true if CMP1 is a suitable second operand for integer ordering
|
||
test CODE. */
|
||
|
||
static bool
|
||
loongarch_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
|
||
{
|
||
switch (code)
|
||
{
|
||
case GT:
|
||
case GTU:
|
||
return reg_or_0_operand (cmp1, VOIDmode);
|
||
|
||
case GE:
|
||
case GEU:
|
||
return cmp1 == const1_rtx;
|
||
|
||
case LT:
|
||
case LTU:
|
||
return arith_operand (cmp1, VOIDmode);
|
||
|
||
case LE:
|
||
return sle_operand (cmp1, VOIDmode);
|
||
|
||
case LEU:
|
||
return sleu_operand (cmp1, VOIDmode);
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
/* Return true if *CMP1 (of mode MODE) is a valid second operand for
|
||
integer ordering test *CODE, or if an equivalent combination can
|
||
be formed by adjusting *CODE and *CMP1. When returning true, update
|
||
*CODE and *CMP1 with the chosen code and operand, otherwise leave
|
||
them alone. */
|
||
|
||
static bool
|
||
loongarch_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
|
||
machine_mode mode)
|
||
{
|
||
HOST_WIDE_INT plus_one;
|
||
|
||
if (loongarch_int_order_operand_ok_p (*code, *cmp1))
|
||
return true;
|
||
|
||
if (CONST_INT_P (*cmp1))
|
||
switch (*code)
|
||
{
|
||
case LE:
|
||
plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
|
||
if (INTVAL (*cmp1) < plus_one)
|
||
{
|
||
*code = LT;
|
||
*cmp1 = force_reg (mode, GEN_INT (plus_one));
|
||
return true;
|
||
}
|
||
break;
|
||
|
||
case LEU:
|
||
plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
|
||
if (plus_one != 0)
|
||
{
|
||
*code = LTU;
|
||
*cmp1 = force_reg (mode, GEN_INT (plus_one));
|
||
return true;
|
||
}
|
||
break;
|
||
|
||
default:
|
||
break;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/* Compare CMP0 and CMP1 using ordering test CODE and store the result
|
||
in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR
|
||
is nonnull, it's OK to set TARGET to the inverse of the result and
|
||
flip *INVERT_PTR instead. */
|
||
|
||
static void
|
||
loongarch_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
|
||
rtx target, rtx cmp0, rtx cmp1)
|
||
{
|
||
machine_mode mode;
|
||
|
||
/* First see if there is a LoongArch instruction that can do this operation.
|
||
If not, try doing the same for the inverse operation. If that also
|
||
fails, force CMP1 into a register and try again. */
|
||
mode = GET_MODE (cmp0);
|
||
if (loongarch_canonicalize_int_order_test (&code, &cmp1, mode))
|
||
loongarch_emit_binary (code, target, cmp0, cmp1);
|
||
else
|
||
{
|
||
enum rtx_code inv_code = reverse_condition (code);
|
||
if (!loongarch_canonicalize_int_order_test (&inv_code, &cmp1, mode))
|
||
{
|
||
cmp1 = force_reg (mode, cmp1);
|
||
loongarch_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
|
||
}
|
||
else if (invert_ptr == 0)
|
||
{
|
||
rtx inv_target;
|
||
|
||
inv_target = loongarch_force_binary (GET_MODE (target),
|
||
inv_code, cmp0, cmp1);
|
||
loongarch_emit_binary (XOR, target, inv_target, const1_rtx);
|
||
}
|
||
else
|
||
{
|
||
*invert_ptr = !*invert_ptr;
|
||
loongarch_emit_binary (inv_code, target, cmp0, cmp1);
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Return a register that is zero if CMP0 and CMP1 are equal.
|
||
The register will have the same mode as CMP0. */
|
||
|
||
static rtx
|
||
loongarch_zero_if_equal (rtx cmp0, rtx cmp1)
|
||
{
|
||
if (cmp1 == const0_rtx)
|
||
return cmp0;
|
||
|
||
if (uns_arith_operand (cmp1, VOIDmode))
|
||
return expand_binop (GET_MODE (cmp0), xor_optab, cmp0, cmp1, 0, 0,
|
||
OPTAB_DIRECT);
|
||
|
||
return expand_binop (GET_MODE (cmp0), sub_optab, cmp0, cmp1, 0, 0,
|
||
OPTAB_DIRECT);
|
||
}
|
||
|
||
/* Sign- or zero-extend OP0 and OP1 for integer comparisons. */
|
||
|
||
static void
|
||
loongarch_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
|
||
{
|
||
/* Comparisons consider all GRLEN bits, so extend sub-GRLEN values. */
|
||
if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)))
|
||
{
|
||
/* It is more profitable to zero-extend QImode values. But not if the
|
||
first operand has already been sign-extended, and the second one is
|
||
is a constant or has already been sign-extended also. */
|
||
if (unsigned_condition (code) == code
|
||
&& (GET_MODE (*op0) == QImode
|
||
&& ! (GET_CODE (*op0) == SUBREG
|
||
&& SUBREG_PROMOTED_VAR_P (*op0)
|
||
&& SUBREG_PROMOTED_SIGNED_P (*op0)
|
||
&& (CONST_INT_P (*op1)
|
||
|| (GET_CODE (*op1) == SUBREG
|
||
&& SUBREG_PROMOTED_VAR_P (*op1)
|
||
&& SUBREG_PROMOTED_SIGNED_P (*op1))))))
|
||
{
|
||
*op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
|
||
if (CONST_INT_P (*op1))
|
||
*op1 = GEN_INT ((uint8_t) INTVAL (*op1));
|
||
else
|
||
*op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1);
|
||
}
|
||
else
|
||
{
|
||
*op0 = gen_rtx_SIGN_EXTEND (word_mode, *op0);
|
||
if (*op1 != const0_rtx)
|
||
*op1 = gen_rtx_SIGN_EXTEND (word_mode, *op1);
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
/* Convert a comparison into something that can be used in a branch. On
|
||
entry, *OP0 and *OP1 are the values being compared and *CODE is the code
|
||
used to compare them. Update them to describe the final comparison. */
|
||
|
||
static void
|
||
loongarch_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1)
|
||
{
|
||
static const enum rtx_code
|
||
mag_comparisons[][2] = {{LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}};
|
||
|
||
if (splittable_const_int_operand (*op1, VOIDmode))
|
||
{
|
||
HOST_WIDE_INT rhs = INTVAL (*op1);
|
||
|
||
if (*code == EQ || *code == NE)
|
||
{
|
||
/* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */
|
||
if (IMM12_OPERAND (-rhs))
|
||
{
|
||
*op0 = loongarch_force_binary (GET_MODE (*op0), PLUS, *op0,
|
||
GEN_INT (-rhs));
|
||
*op1 = const0_rtx;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
/* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */
|
||
for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
|
||
{
|
||
HOST_WIDE_INT new_rhs;
|
||
bool increment = *code == mag_comparisons[i][0];
|
||
bool decrement = *code == mag_comparisons[i][1];
|
||
if (!increment && !decrement)
|
||
continue;
|
||
|
||
if ((increment && rhs == HOST_WIDE_INT_MAX)
|
||
|| (decrement && rhs == HOST_WIDE_INT_MIN))
|
||
break;
|
||
|
||
new_rhs = rhs + (increment ? 1 : -1);
|
||
if (loongarch_integer_cost (new_rhs)
|
||
< loongarch_integer_cost (rhs))
|
||
{
|
||
*op1 = GEN_INT (new_rhs);
|
||
*code = mag_comparisons[i][increment];
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
loongarch_extend_comparands (*code, op0, op1);
|
||
|
||
*op0 = force_reg (word_mode, *op0);
|
||
if (*op1 != const0_rtx)
|
||
*op1 = force_reg (word_mode, *op1);
|
||
}
|
||
|
||
/* Like loongarch_emit_int_compare, but for floating-point comparisons. */
|
||
|
||
static void
|
||
loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1)
|
||
{
|
||
rtx cmp_op0 = *op0;
|
||
rtx cmp_op1 = *op1;
|
||
|
||
/* Floating-point tests use a separate FCMP.cond.fmt
|
||
comparison to set a register. The branch or conditional move will
|
||
then compare that register against zero.
|
||
|
||
Set CMP_CODE to the code of the comparison instruction and
|
||
*CODE to the code that the branch or move should use. */
|
||
enum rtx_code cmp_code = *code;
|
||
/* Three FP conditions cannot be implemented by reversing the
|
||
operands for FCMP.cond.fmt, instead a reversed condition code is
|
||
required and a test for false. */
|
||
*code = NE;
|
||
*op0 = gen_reg_rtx (FCCmode);
|
||
|
||
*op1 = const0_rtx;
|
||
loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1);
|
||
}
|
||
|
||
/* Try performing the comparison in OPERANDS[1], whose arms are OPERANDS[2]
|
||
and OPERAND[3]. Store the result in OPERANDS[0].
|
||
|
||
On 64-bit targets, the mode of the comparison and target will always be
|
||
SImode, thus possibly narrower than that of the comparison's operands. */
|
||
|
||
void
|
||
loongarch_expand_scc (rtx operands[])
|
||
{
|
||
rtx target = operands[0];
|
||
enum rtx_code code = GET_CODE (operands[1]);
|
||
rtx op0 = operands[2];
|
||
rtx op1 = operands[3];
|
||
|
||
loongarch_extend_comparands (code, &op0, &op1);
|
||
op0 = force_reg (word_mode, op0);
|
||
|
||
gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT);
|
||
|
||
if (code == EQ || code == NE)
|
||
{
|
||
rtx zie = loongarch_zero_if_equal (op0, op1);
|
||
loongarch_emit_binary (code, target, zie, const0_rtx);
|
||
}
|
||
else
|
||
loongarch_emit_int_order_test (code, 0, target, op0, op1);
|
||
}
|
||
|
||
/* Compare OPERANDS[1] with OPERANDS[2] using comparison code
|
||
CODE and jump to OPERANDS[3] if the condition holds. */
|
||
|
||
void
|
||
loongarch_expand_conditional_branch (rtx *operands)
|
||
{
|
||
enum rtx_code code = GET_CODE (operands[0]);
|
||
rtx op0 = operands[1];
|
||
rtx op1 = operands[2];
|
||
rtx condition;
|
||
|
||
if (FLOAT_MODE_P (GET_MODE (op1)))
|
||
loongarch_emit_float_compare (&code, &op0, &op1);
|
||
else
|
||
loongarch_emit_int_compare (&code, &op0, &op1);
|
||
|
||
condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
|
||
emit_jump_insn (gen_condjump (condition, operands[3]));
|
||
}
|
||
|
||
/* Perform the comparison in OPERANDS[1]. Move OPERANDS[2] into OPERANDS[0]
|
||
if the condition holds, otherwise move OPERANDS[3] into OPERANDS[0]. */
|
||
|
||
void
|
||
loongarch_expand_conditional_move (rtx *operands)
|
||
{
|
||
enum rtx_code code = GET_CODE (operands[1]);
|
||
rtx op0 = XEXP (operands[1], 0);
|
||
rtx op1 = XEXP (operands[1], 1);
|
||
rtx op0_extend = op0;
|
||
rtx op1_extend = op1;
|
||
|
||
/* Record whether operands[2] and operands[3] modes are promoted to word_mode. */
|
||
bool promote_op[2] = {false, false};
|
||
bool promote_p = false;
|
||
machine_mode mode = GET_MODE (operands[0]);
|
||
|
||
if (FLOAT_MODE_P (GET_MODE (op1)))
|
||
loongarch_emit_float_compare (&code, &op0, &op1);
|
||
else
|
||
{
|
||
/* Optimize to reduce the number of instructions for ternary operations.
|
||
Mainly implemented based on noce_try_cmove_arith.
|
||
For dest = (condition) ? value_if_true : value_if_false;
|
||
the optimization requires:
|
||
a. value_if_false = var;
|
||
b. value_if_true = var OP C (a positive integer power of 2).
|
||
|
||
Situations similar to the following:
|
||
if (condition)
|
||
dest += 1 << imm;
|
||
to:
|
||
dest += (condition ? 1 : 0) << imm; */
|
||
|
||
rtx_insn *insn;
|
||
HOST_WIDE_INT val = 0; /* The value of rtx C. */
|
||
/* INSN with operands[2] as the output. */
|
||
rtx_insn *value_if_true_insn = NULL;
|
||
/* INSN with operands[3] as the output. */
|
||
rtx_insn *value_if_false_insn = NULL;
|
||
rtx value_if_true_insn_src = NULL_RTX;
|
||
/* Common operand var in value_if_true and value_if_false. */
|
||
rtx comm_var = NULL_RTX;
|
||
bool can_be_optimized = false;
|
||
|
||
/* Search value_if_true_insn and value_if_false_insn. */
|
||
struct sequence_stack *seq = get_current_sequence ()->next;
|
||
for (insn = seq->last; insn; insn = PREV_INSN (insn))
|
||
{
|
||
if (single_set (insn))
|
||
{
|
||
rtx set_dest = SET_DEST (single_set (insn));
|
||
if (rtx_equal_p (set_dest, operands[2]))
|
||
value_if_true_insn = insn;
|
||
else if (rtx_equal_p (set_dest, operands[3]))
|
||
value_if_false_insn = insn;
|
||
if (value_if_true_insn && value_if_false_insn)
|
||
break;
|
||
}
|
||
}
|
||
|
||
auto is_binary_op_0_keep_orig = [](enum rtx_code code)
|
||
{
|
||
switch (code)
|
||
{
|
||
case PLUS:
|
||
case MINUS:
|
||
case IOR:
|
||
case XOR:
|
||
case ROTATE:
|
||
case ROTATERT:
|
||
case ASHIFT:
|
||
case ASHIFTRT:
|
||
case LSHIFTRT:
|
||
return true;
|
||
default:
|
||
return false;
|
||
}
|
||
};
|
||
|
||
/* Check if the optimization conditions are met. */
|
||
if (value_if_true_insn
|
||
&& value_if_false_insn
|
||
/* Make sure that the orig value OP 0 keep orig. */
|
||
&& (value_if_true_insn_src
|
||
= SET_SRC (single_set (value_if_true_insn)))
|
||
&& is_binary_op_0_keep_orig ( GET_CODE (value_if_true_insn_src))
|
||
/* Make sure that both value_if_true and value_if_false
|
||
has the same var. */
|
||
&& rtx_equal_p (XEXP (value_if_true_insn_src, 0),
|
||
SET_SRC (single_set (value_if_false_insn))))
|
||
{
|
||
comm_var = SET_SRC (single_set (value_if_false_insn));
|
||
rtx src = XEXP (value_if_true_insn_src, 1);
|
||
rtx imm = NULL_RTX;
|
||
if (CONST_INT_P (src))
|
||
imm = src;
|
||
else
|
||
for (insn = seq->last; insn; insn = PREV_INSN (insn))
|
||
{
|
||
rtx set = single_set (insn);
|
||
if (set && rtx_equal_p (SET_DEST (set), src))
|
||
{
|
||
imm = SET_SRC (set);
|
||
break;
|
||
}
|
||
}
|
||
if (imm && CONST_INT_P (imm))
|
||
{
|
||
val = INTVAL (imm);
|
||
/* Make sure that imm is a positive integer power of 2. */
|
||
if (val > 0 && !(val & (val - 1)))
|
||
can_be_optimized = true;
|
||
}
|
||
}
|
||
|
||
if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD)
|
||
{
|
||
promote_op[0] = (REG_P (op0) && REG_P (operands[2]) &&
|
||
REGNO (op0) == REGNO (operands[2]));
|
||
promote_op[1] = (REG_P (op1) && REG_P (operands[3]) &&
|
||
REGNO (op1) == REGNO (operands[3]));
|
||
}
|
||
|
||
if (promote_op[0] || promote_op[1])
|
||
{
|
||
mode = word_mode;
|
||
promote_p = true;
|
||
}
|
||
|
||
loongarch_extend_comparands (code, &op0, &op1);
|
||
|
||
op0 = force_reg (word_mode, op0);
|
||
op0_extend = op0;
|
||
op1_extend = force_reg (word_mode, op1);
|
||
|
||
rtx target = gen_reg_rtx (GET_MODE (op0));
|
||
|
||
if (code == EQ || code == NE)
|
||
{
|
||
op0 = loongarch_zero_if_equal (op0, op1);
|
||
op1 = const0_rtx;
|
||
/* For EQ, set target to 1 if op0 and op1 are the same,
|
||
otherwise set to 0.
|
||
For NE, set target to 0 if op0 and op1 are the same,
|
||
otherwise set to 1. */
|
||
if (can_be_optimized)
|
||
loongarch_emit_binary (code, target, op0, const0_rtx);
|
||
}
|
||
else
|
||
{
|
||
/* The comparison needs a separate scc instruction. Store the
|
||
result of the scc in *OP0 and compare it against zero. */
|
||
bool invert = false;
|
||
loongarch_emit_int_order_test (code, &invert, target, op0, op1);
|
||
if (can_be_optimized && invert)
|
||
loongarch_emit_binary (EQ, target, target, const0_rtx);
|
||
code = invert ? EQ : NE;
|
||
op0 = target;
|
||
op1 = const0_rtx;
|
||
}
|
||
|
||
if (can_be_optimized)
|
||
{
|
||
/* Perform (condition ? 1 : 0) << log2 (C). */
|
||
loongarch_emit_binary (ASHIFT, target, target,
|
||
GEN_INT (exact_log2 (val)));
|
||
/* Shift-related insn patterns only support SImode operands[2]. */
|
||
enum rtx_code opcode = GET_CODE (value_if_true_insn_src);
|
||
if (opcode == ASHIFT || opcode == ASHIFTRT || opcode == LSHIFTRT
|
||
|| opcode == ROTATE || opcode == ROTATERT)
|
||
target = gen_lowpart (SImode, target);
|
||
/* Perform target = target OP ((condition ? 1 : 0) << log2 (C)). */
|
||
loongarch_emit_binary (opcode, operands[0],
|
||
force_reg (GET_MODE (operands[3]), comm_var),
|
||
target);
|
||
return;
|
||
}
|
||
}
|
||
|
||
rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
|
||
/* There is no direct support for general conditional GP move involving
|
||
two registers using SEL. */
|
||
if (INTEGRAL_MODE_P (GET_MODE (operands[2]))
|
||
&& register_operand (operands[2], VOIDmode)
|
||
&& register_operand (operands[3], VOIDmode))
|
||
{
|
||
rtx op2 = operands[2];
|
||
rtx op3 = operands[3];
|
||
|
||
if (promote_p)
|
||
{
|
||
if (promote_op[0])
|
||
op2 = op0_extend;
|
||
else
|
||
{
|
||
loongarch_extend_comparands (code, &op2, &const0_rtx);
|
||
op2 = force_reg (mode, op2);
|
||
}
|
||
|
||
if (promote_op[1])
|
||
op3 = op1_extend;
|
||
else
|
||
{
|
||
loongarch_extend_comparands (code, &op3, &const0_rtx);
|
||
op3 = force_reg (mode, op3);
|
||
}
|
||
}
|
||
|
||
rtx temp = gen_reg_rtx (mode);
|
||
rtx temp2 = gen_reg_rtx (mode);
|
||
|
||
emit_insn (gen_rtx_SET (temp,
|
||
gen_rtx_IF_THEN_ELSE (mode, cond,
|
||
op2, const0_rtx)));
|
||
|
||
/* Flip the test for the second operand. */
|
||
cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
|
||
|
||
emit_insn (gen_rtx_SET (temp2,
|
||
gen_rtx_IF_THEN_ELSE (mode, cond,
|
||
op3, const0_rtx)));
|
||
|
||
/* Merge the two results, at least one is guaranteed to be zero. */
|
||
if (promote_p)
|
||
{
|
||
rtx temp3 = gen_reg_rtx (mode);
|
||
emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
|
||
temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
|
||
/* Nonzero in a subreg if it was made when accessing an object that
|
||
was promoted to a wider mode in accord with the PROMOTED_MODE
|
||
machine description macro. */
|
||
SUBREG_PROMOTED_VAR_P (temp3) = 1;
|
||
/* Sets promoted mode for SUBREG_PROMOTED_VAR_P. */
|
||
SUBREG_PROMOTED_SET (temp3, SRP_SIGNED);
|
||
loongarch_emit_move (operands[0], temp3);
|
||
}
|
||
else
|
||
emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
|
||
}
|
||
else
|
||
emit_insn (gen_rtx_SET (operands[0],
|
||
gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), cond,
|
||
operands[2], operands[3])));
|
||
}
|
||
|
||
/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
|
||
|
||
static void
|
||
loongarch_va_start (tree valist, rtx nextarg)
|
||
{
|
||
nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
|
||
std_expand_builtin_va_start (valist, nextarg);
|
||
}
|
||
|
||
/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */
|
||
|
||
static bool
|
||
loongarch_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
|
||
tree exp ATTRIBUTE_UNUSED)
|
||
{
|
||
/* Always OK. */
|
||
return true;
|
||
}
|
||
|
||
static machine_mode
|
||
loongarch_mode_for_move_size (HOST_WIDE_INT size)
|
||
{
|
||
switch (size)
|
||
{
|
||
case 32:
|
||
return V32QImode;
|
||
case 16:
|
||
return V16QImode;
|
||
}
|
||
|
||
return int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
|
||
}
|
||
|
||
/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
|
||
Assume that the areas do not overlap. */
|
||
|
||
static void
|
||
loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
|
||
HOST_WIDE_INT delta)
|
||
{
|
||
HOST_WIDE_INT offs, delta_cur;
|
||
int i;
|
||
machine_mode mode;
|
||
rtx *regs;
|
||
|
||
/* Calculate how many registers we'll need for the block move.
|
||
We'll emit length / delta move operations with delta as the size
|
||
first. Then we may still have length % delta bytes not copied.
|
||
We handle these remaining bytes by move operations with smaller
|
||
(halfed) sizes. For example, if length = 21 and delta = 8, we'll
|
||
emit two ld.d/st.d pairs, one ld.w/st.w pair, and one ld.b/st.b
|
||
pair. For each load/store pair we use a dedicated register to keep
|
||
the pipeline as populated as possible. */
|
||
gcc_assert (pow2p_hwi (delta));
|
||
HOST_WIDE_INT num_reg = length / delta + popcount_hwi (length % delta);
|
||
|
||
/* Allocate a buffer for the temporary registers. */
|
||
regs = XALLOCAVEC (rtx, num_reg);
|
||
|
||
for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
|
||
{
|
||
mode = loongarch_mode_for_move_size (delta_cur);
|
||
|
||
for (; offs + delta_cur <= length; offs += delta_cur, i++)
|
||
{
|
||
regs[i] = gen_reg_rtx (mode);
|
||
loongarch_emit_move (regs[i], adjust_address (src, mode, offs));
|
||
}
|
||
}
|
||
|
||
for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
|
||
{
|
||
mode = loongarch_mode_for_move_size (delta_cur);
|
||
|
||
for (; offs + delta_cur <= length; offs += delta_cur, i++)
|
||
loongarch_emit_move (adjust_address (dest, mode, offs), regs[i]);
|
||
}
|
||
}
|
||
|
||
/* Helper function for doing a loop-based block operation on memory
|
||
reference MEM. Each iteration of the loop will operate on LENGTH
|
||
bytes of MEM.
|
||
|
||
Create a new base register for use within the loop and point it to
|
||
the start of MEM. Create a new memory reference that uses this
|
||
register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
|
||
|
||
static void
|
||
loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
|
||
rtx *loop_mem)
|
||
{
|
||
*loop_reg = copy_addr_to_reg (XEXP (mem, 0));
|
||
|
||
/* Although the new mem does not refer to a known location,
|
||
it does keep up to LENGTH bytes of alignment. */
|
||
*loop_mem = change_address (mem, BLKmode, *loop_reg);
|
||
set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
|
||
}
|
||
|
||
/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
|
||
bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
|
||
the memory regions do not overlap. */
|
||
|
||
static void
|
||
loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
|
||
HOST_WIDE_INT align)
|
||
{
|
||
rtx_code_label *label;
|
||
rtx src_reg, dest_reg, final_src, test;
|
||
HOST_WIDE_INT bytes_per_iter = align * LARCH_MAX_MOVE_OPS_PER_LOOP_ITER;
|
||
HOST_WIDE_INT leftover;
|
||
|
||
leftover = length % bytes_per_iter;
|
||
length -= leftover;
|
||
|
||
/* Create registers and memory references for use within the loop. */
|
||
loongarch_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
|
||
loongarch_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
|
||
|
||
/* Calculate the value that SRC_REG should have after the last iteration
|
||
of the loop. */
|
||
final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), 0,
|
||
0, OPTAB_WIDEN);
|
||
|
||
/* Emit the start of the loop. */
|
||
label = gen_label_rtx ();
|
||
emit_label (label);
|
||
|
||
/* Emit the loop body. */
|
||
loongarch_block_move_straight (dest, src, bytes_per_iter, align);
|
||
|
||
/* Move on to the next block. */
|
||
loongarch_emit_move (src_reg,
|
||
plus_constant (Pmode, src_reg, bytes_per_iter));
|
||
loongarch_emit_move (dest_reg,
|
||
plus_constant (Pmode, dest_reg, bytes_per_iter));
|
||
|
||
/* Emit the loop condition. */
|
||
test = gen_rtx_NE (VOIDmode, src_reg, final_src);
|
||
if (Pmode == DImode)
|
||
emit_jump_insn (gen_cbranchdi4 (test, src_reg, final_src, label));
|
||
else
|
||
emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
|
||
|
||
/* Mop up any left-over bytes. */
|
||
if (leftover)
|
||
loongarch_block_move_straight (dest, src, leftover, align);
|
||
else
|
||
/* Temporary fix for PR79150. */
|
||
emit_insn (gen_nop ());
|
||
}
|
||
|
||
/* Expand a cpymemsi instruction, which copies LENGTH bytes from
|
||
memory reference SRC to memory reference DEST. */
|
||
|
||
bool
|
||
loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx r_align)
|
||
{
|
||
if (!CONST_INT_P (r_length))
|
||
return false;
|
||
|
||
HOST_WIDE_INT length = INTVAL (r_length);
|
||
if (length > la_max_inline_memcpy_size)
|
||
return false;
|
||
|
||
HOST_WIDE_INT align = INTVAL (r_align);
|
||
|
||
if (!TARGET_STRICT_ALIGN || align > LARCH_MAX_MOVE_PER_INSN)
|
||
align = LARCH_MAX_MOVE_PER_INSN;
|
||
|
||
if (length <= align * LARCH_MAX_MOVE_OPS_STRAIGHT)
|
||
{
|
||
loongarch_block_move_straight (dest, src, length, align);
|
||
return true;
|
||
}
|
||
|
||
if (optimize)
|
||
{
|
||
loongarch_block_move_loop (dest, src, length, align);
|
||
return true;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
/* Return true if loongarch_expand_block_move is the preferred
|
||
implementation of the 'cpymemsi' template. */
|
||
|
||
bool
|
||
loongarch_do_optimize_block_move_p (void)
|
||
{
|
||
/* if -m[no-]memcpy is given explicitly. */
|
||
if (target_flags_explicit & MASK_MEMCPY)
|
||
return !TARGET_MEMCPY;
|
||
|
||
/* if not, don't optimize under -Os. */
|
||
return !optimize_size;
|
||
}
|
||
|
||
/* Expand a QI or HI mode atomic memory operation.
|
||
|
||
GENERATOR contains a pointer to the gen_* function that generates
|
||
the SI mode underlying atomic operation using masks that we
|
||
calculate.
|
||
|
||
RESULT is the return register for the operation. Its value is NULL
|
||
if unused.
|
||
|
||
MEM is the location of the atomic access.
|
||
|
||
OLDVAL is the first operand for the operation.
|
||
|
||
NEWVAL is the optional second operand for the operation. Its value
|
||
is NULL if unused. */
|
||
|
||
void
|
||
loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator,
|
||
rtx result, rtx mem, rtx oldval, rtx newval,
|
||
rtx model)
|
||
{
|
||
rtx orig_addr, memsi_addr, memsi, shift, shiftsi, unshifted_mask;
|
||
rtx unshifted_mask_reg, mask, inverted_mask, si_op;
|
||
rtx res = NULL;
|
||
machine_mode mode;
|
||
|
||
mode = GET_MODE (mem);
|
||
|
||
/* Compute the address of the containing SImode value. */
|
||
orig_addr = force_reg (Pmode, XEXP (mem, 0));
|
||
memsi_addr = loongarch_force_binary (Pmode, AND, orig_addr,
|
||
force_reg (Pmode, GEN_INT (-4)));
|
||
|
||
/* Create a memory reference for it. */
|
||
memsi = gen_rtx_MEM (SImode, memsi_addr);
|
||
set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
|
||
MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
|
||
|
||
/* Work out the byte offset of the QImode or HImode value,
|
||
counting from the least significant byte. */
|
||
shift = loongarch_force_binary (Pmode, AND, orig_addr, GEN_INT (3));
|
||
/* Multiply by eight to convert the shift value from bytes to bits. */
|
||
loongarch_emit_binary (ASHIFT, shift, shift, GEN_INT (3));
|
||
|
||
/* Make the final shift an SImode value, so that it can be used in
|
||
SImode operations. */
|
||
shiftsi = force_reg (SImode, gen_lowpart (SImode, shift));
|
||
|
||
/* Set MASK to an inclusive mask of the QImode or HImode value. */
|
||
unshifted_mask = GEN_INT (GET_MODE_MASK (mode));
|
||
unshifted_mask_reg = force_reg (SImode, unshifted_mask);
|
||
mask = loongarch_force_binary (SImode, ASHIFT, unshifted_mask_reg, shiftsi);
|
||
|
||
/* Compute the equivalent exclusive mask. */
|
||
inverted_mask = gen_reg_rtx (SImode);
|
||
emit_insn (gen_rtx_SET (inverted_mask, gen_rtx_NOT (SImode, mask)));
|
||
|
||
/* Shift the old value into place. */
|
||
if (oldval != const0_rtx)
|
||
{
|
||
oldval = convert_modes (SImode, mode, oldval, true);
|
||
oldval = force_reg (SImode, oldval);
|
||
oldval = loongarch_force_binary (SImode, ASHIFT, oldval, shiftsi);
|
||
}
|
||
|
||
/* Do the same for the new value. */
|
||
if (newval && newval != const0_rtx)
|
||
{
|
||
newval = convert_modes (SImode, mode, newval, true);
|
||
newval = force_reg (SImode, newval);
|
||
newval = loongarch_force_binary (SImode, ASHIFT, newval, shiftsi);
|
||
}
|
||
|
||
/* Do the SImode atomic access. */
|
||
if (result)
|
||
res = gen_reg_rtx (SImode);
|
||
|
||
if (newval)
|
||
si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval,
|
||
model);
|
||
else if (result)
|
||
si_op = generator.fn_6 (res, memsi, mask, inverted_mask, oldval, model);
|
||
else
|
||
si_op = generator.fn_5 (memsi, mask, inverted_mask, oldval, model);
|
||
|
||
emit_insn (si_op);
|
||
|
||
if (result)
|
||
{
|
||
/* Shift and convert the result. */
|
||
loongarch_emit_binary (AND, res, res, mask);
|
||
loongarch_emit_binary (LSHIFTRT, res, res, shiftsi);
|
||
loongarch_emit_move (result, gen_lowpart (GET_MODE (result), res));
|
||
}
|
||
}
|
||
|
||
/* Return true if (zero_extract OP WIDTH BITPOS) can be used as the
|
||
source of an "ext" instruction or the destination of an "ins"
|
||
instruction. OP must be a register operand and the following
|
||
conditions must hold:
|
||
|
||
0 <= BITPOS < GET_MODE_BITSIZE (GET_MODE (op))
|
||
0 < WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
|
||
0 < BITPOS + WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
|
||
|
||
Also reject lengths equal to a word as they are better handled
|
||
by the move patterns. */
|
||
|
||
bool
|
||
loongarch_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos)
|
||
{
|
||
if (!register_operand (op, VOIDmode)
|
||
|| GET_MODE_BITSIZE (GET_MODE (op)) > BITS_PER_WORD)
|
||
return false;
|
||
|
||
if (!IN_RANGE (width, 1, GET_MODE_BITSIZE (GET_MODE (op)) - 1))
|
||
return false;
|
||
|
||
if (bitpos < 0 || bitpos + width > GET_MODE_BITSIZE (GET_MODE (op)))
|
||
return false;
|
||
|
||
return true;
|
||
}
|
||
|
||
/* Predicate for pre-reload splitters with associated instructions,
|
||
which can match any time before the split1 pass (usually combine),
|
||
then are unconditionally split in that pass and should not be
|
||
matched again afterwards. */
|
||
|
||
bool loongarch_pre_reload_split (void)
|
||
{
|
||
return (can_create_pseudo_p ()
|
||
&& !(cfun->curr_properties & PROP_rtl_split_insns));
|
||
}
|
||
|
||
/* Check if we can use bstrins.<d> for
|
||
op0 = (op1 & op2) | (op3 & op4)
|
||
where op0, op1, op3 are regs, and op2, op4 are integer constants. */
|
||
int
|
||
loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
|
||
{
|
||
return loongarch_use_bstrins_for_ior_with_mask_1 (mode,
|
||
UINTVAL (op[2]),
|
||
UINTVAL (op[4]));
|
||
}
|
||
|
||
/* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto
|
||
-mcmodel={normal/medium}. */
|
||
rtx
|
||
loongarch_rewrite_mem_for_simple_ldst (rtx mem)
|
||
{
|
||
rtx addr = XEXP (mem, 0);
|
||
rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
|
||
UNSPEC_PCALAU12I_GR);
|
||
rtx new_mem;
|
||
|
||
addr = gen_rtx_LO_SUM (Pmode, force_reg (Pmode, hi), addr);
|
||
new_mem = gen_rtx_MEM (GET_MODE (mem), addr);
|
||
MEM_COPY_ATTRIBUTES (new_mem, mem);
|
||
return new_mem;
|
||
}
|
||
|
||
/* Print the text for PRINT_OPERAND punctation character CH to FILE.
|
||
The punctuation characters are:
|
||
|
||
'.' Print the name of the register with a hard-wired zero (zero or $r0).
|
||
'$' Print the name of the stack pointer register (sp or $r3).
|
||
|
||
See also loongarch_init_print_operand_punct. */
|
||
|
||
static void
|
||
loongarch_print_operand_punctuation (FILE *file, int ch)
|
||
{
|
||
switch (ch)
|
||
{
|
||
case '.':
|
||
fputs (reg_names[GP_REG_FIRST + 0], file);
|
||
break;
|
||
|
||
case '$':
|
||
fputs (reg_names[STACK_POINTER_REGNUM], file);
|
||
break;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
break;
|
||
}
|
||
}
|
||
|
||
/* PRINT_OPERAND prefix LETTER refers to the integer branch instruction
|
||
associated with condition CODE. Print the condition part of the
|
||
opcode to FILE. */
|
||
|
||
static void
|
||
loongarch_print_int_branch_condition (FILE *file, enum rtx_code code,
|
||
int letter)
|
||
{
|
||
switch (code)
|
||
{
|
||
case EQ:
|
||
case NE:
|
||
case GT:
|
||
case GE:
|
||
case LT:
|
||
case LE:
|
||
case GTU:
|
||
case GEU:
|
||
case LTU:
|
||
case LEU:
|
||
/* Conveniently, the LoongArch names for these conditions are the same
|
||
as their RTL equivalents. */
|
||
fputs (GET_RTX_NAME (code), file);
|
||
break;
|
||
|
||
default:
|
||
output_operand_lossage ("'%%%c' is not a valid operand prefix", letter);
|
||
break;
|
||
}
|
||
}
|
||
|
||
/* Likewise floating-point branches. */
|
||
|
||
static void
|
||
loongarch_print_float_branch_condition (FILE *file, enum rtx_code code,
|
||
int letter)
|
||
{
|
||
switch (code)
|
||
{
|
||
case EQ:
|
||
fputs ("ceqz", file);
|
||
break;
|
||
|
||
case NE:
|
||
fputs ("cnez", file);
|
||
break;
|
||
|
||
default:
|
||
output_operand_lossage ("'%%%c' is not a valid operand prefix", letter);
|
||
break;
|
||
}
|
||
}
|
||
|
||
/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
|
||
|
||
static bool
|
||
loongarch_print_operand_punct_valid_p (unsigned char code)
|
||
{
|
||
return loongarch_print_operand_punct[code];
|
||
}
|
||
|
||
/* Return true if a FENCE should be emitted to before a memory access to
|
||
implement the release portion of memory model MODEL. */
|
||
|
||
static bool
|
||
loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
|
||
{
|
||
switch (memmodel_base (model))
|
||
{
|
||
case MEMMODEL_ACQ_REL:
|
||
case MEMMODEL_SEQ_CST:
|
||
case MEMMODEL_RELEASE:
|
||
case MEMMODEL_ACQUIRE:
|
||
return true;
|
||
|
||
case MEMMODEL_RELAXED:
|
||
return false;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
/* Return true if a FENCE should be emitted after a failed CAS to
|
||
implement the acquire semantic of failure_memorder. */
|
||
|
||
static bool
|
||
loongarch_cas_failure_memorder_needs_acquire (enum memmodel model)
|
||
{
|
||
switch (memmodel_base (model))
|
||
{
|
||
case MEMMODEL_ACQUIRE:
|
||
case MEMMODEL_ACQ_REL:
|
||
case MEMMODEL_SEQ_CST:
|
||
return true;
|
||
|
||
case MEMMODEL_RELAXED:
|
||
case MEMMODEL_RELEASE:
|
||
return false;
|
||
|
||
/* MEMMODEL_CONSUME is deliberately not handled because it's always
|
||
replaced by MEMMODEL_ACQUIRE as at now. If you see an ICE caused by
|
||
MEMMODEL_CONSUME, read the change (re)introducing it carefully and
|
||
decide what to do. See PR 59448 and get_memmodel in builtins.cc. */
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
/* Print symbolic operand OP, which is part of a HIGH or LO_SUM
|
||
in context CONTEXT. HI_RELOC indicates a high-part reloc. */
|
||
|
||
static void
|
||
loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
|
||
bool hi_reloc)
|
||
{
|
||
const char *reloc;
|
||
enum loongarch_symbol_type symbol_type =
|
||
loongarch_classify_symbolic_expression (op);
|
||
|
||
if (loongarch_symbol_extreme_p (symbol_type))
|
||
gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE);
|
||
|
||
switch (symbol_type)
|
||
{
|
||
case SYMBOL_PCREL64:
|
||
if (hi64_part)
|
||
{
|
||
reloc = hi_reloc ? "%pc64_hi12" : "%pc64_lo20";
|
||
break;
|
||
}
|
||
/* fall through */
|
||
case SYMBOL_PCREL:
|
||
reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
|
||
break;
|
||
|
||
case SYMBOL_GOT_DISP:
|
||
if (hi64_part)
|
||
{
|
||
if (TARGET_CMODEL_EXTREME)
|
||
reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
|
||
else
|
||
gcc_unreachable ();
|
||
}
|
||
else
|
||
reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
|
||
break;
|
||
|
||
case SYMBOL_TLS_IE:
|
||
if (hi64_part)
|
||
{
|
||
if (TARGET_CMODEL_EXTREME)
|
||
reloc = hi_reloc ? "%ie64_pc_hi12" : "%ie64_pc_lo20";
|
||
else
|
||
gcc_unreachable ();
|
||
}
|
||
else
|
||
reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
|
||
break;
|
||
|
||
case SYMBOL_TLS_LE:
|
||
if (hi64_part)
|
||
{
|
||
if (TARGET_CMODEL_EXTREME)
|
||
reloc = hi_reloc ? "%le64_hi12" : "%le64_lo20";
|
||
else
|
||
gcc_unreachable ();
|
||
}
|
||
else
|
||
{
|
||
if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME)
|
||
reloc = hi_reloc ? "%le_hi20_r" : "%le_lo12_r";
|
||
else
|
||
reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
|
||
}
|
||
break;
|
||
|
||
case SYMBOL_TLSGD:
|
||
if (hi64_part)
|
||
{
|
||
if (TARGET_CMODEL_EXTREME)
|
||
reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
|
||
else
|
||
gcc_unreachable ();
|
||
}
|
||
else
|
||
reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
|
||
break;
|
||
|
||
case SYMBOL_TLSLDM:
|
||
if (hi64_part)
|
||
{
|
||
if (TARGET_CMODEL_EXTREME)
|
||
reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
|
||
else
|
||
gcc_unreachable ();
|
||
}
|
||
else
|
||
reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
|
||
break;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
fprintf (file, "%s(", reloc);
|
||
output_addr_const (file, loongarch_strip_unspec_address (op));
|
||
fputc (')', file);
|
||
}
|
||
|
||
/* Implement TARGET_PRINT_OPERAND. The LoongArch-specific operand codes are:
|
||
|
||
'A' Print a _DB suffix if the memory model requires a release.
|
||
'b' Print the address of a memory operand, without offset.
|
||
'B' Print CONST_INT OP element 0 of a replicated CONST_VECTOR
|
||
as an unsigned byte [0..255].
|
||
'c' Print an integer.
|
||
'C' Print the integer branch condition for comparison OP.
|
||
'd' Print CONST_INT OP in decimal.
|
||
'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal.
|
||
'F' Print the FPU branch condition for comparison OP.
|
||
'G' Print a DBAR insn for CAS failure (with an acquire semantic if
|
||
needed, otherwise a simple load-load barrier).
|
||
'H' Print address 52-61bit relocation associated with OP.
|
||
'h' Print the high-part relocation associated with OP.
|
||
'i' Print i if the operand is not a register.
|
||
'L' Print the low-part relocation associated with OP.
|
||
'm' Print one less than CONST_INT OP in decimal.
|
||
'M' Print the indices of the lowest enabled bit and the highest
|
||
enabled bit in a mask (for bstr* instructions).
|
||
'N' Print the inverse of the integer branch condition for comparison OP.
|
||
'Q' Print R_LARCH_RELAX for TLS IE.
|
||
'r' Print address 12-31bit relocation associated with OP.
|
||
'R' Print address 32-51bit relocation associated with OP.
|
||
'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
|
||
'z' for (eq:?I ...), 'n' for (ne:?I ...).
|
||
't' Like 'T', but with the EQ/NE cases reversed
|
||
'u' Print a LASX register.
|
||
'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI,
|
||
V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively.
|
||
'V' Print exact log2 of CONST_INT OP element 0 of a replicated
|
||
CONST_VECTOR in decimal.
|
||
'W' Print the inverse of the FPU branch condition for comparison OP.
|
||
'w' Print a LSX register.
|
||
'X' Print CONST_INT OP in hexadecimal format.
|
||
'x' Print the low 16 bits of CONST_INT OP in hexadecimal format.
|
||
'Y' Print loongarch_fp_conditions[INTVAL (OP)]
|
||
'y' Print exact log2 of CONST_INT OP in decimal.
|
||
'Z' Print OP and a comma for 8CC, otherwise print nothing.
|
||
'z' Print $0 if OP is zero, otherwise print OP normally. */
|
||
|
||
static void
|
||
loongarch_print_operand (FILE *file, rtx op, int letter)
|
||
{
|
||
enum rtx_code code;
|
||
|
||
if (loongarch_print_operand_punct_valid_p (letter))
|
||
{
|
||
loongarch_print_operand_punctuation (file, letter);
|
||
return;
|
||
}
|
||
|
||
gcc_assert (op);
|
||
code = GET_CODE (op);
|
||
|
||
switch (letter)
|
||
{
|
||
case 'A':
|
||
if (loongarch_memmodel_needs_rel_acq_fence ((enum memmodel) INTVAL (op)))
|
||
fputs ("_db", file);
|
||
break;
|
||
case 'E':
|
||
if (GET_CODE (op) == CONST_VECTOR)
|
||
{
|
||
gcc_assert (loongarch_const_vector_same_val_p (op, GET_MODE (op)));
|
||
op = CONST_VECTOR_ELT (op, 0);
|
||
gcc_assert (CONST_INT_P (op));
|
||
fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
|
||
}
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
break;
|
||
|
||
|
||
case 'c':
|
||
if (CONST_INT_P (op))
|
||
fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
|
||
else
|
||
output_operand_lossage ("unsupported operand for code '%c'", letter);
|
||
|
||
break;
|
||
|
||
case 'C':
|
||
loongarch_print_int_branch_condition (file, code, letter);
|
||
break;
|
||
|
||
case 'd':
|
||
if (CONST_INT_P (op))
|
||
fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
break;
|
||
|
||
case 'O':
|
||
fprintf (file, "%s", INTVAL (XVECEXP (op, 0, 0)) ? "od" : "ev");
|
||
break;
|
||
|
||
case 'F':
|
||
loongarch_print_float_branch_condition (file, code, letter);
|
||
break;
|
||
|
||
case 'G':
|
||
if (loongarch_cas_failure_memorder_needs_acquire (
|
||
memmodel_from_int (INTVAL (op))))
|
||
fputs ("dbar\t0b10100", file);
|
||
else if (!ISA_HAS_LD_SEQ_SA)
|
||
fputs ("dbar\t0x700", file);
|
||
break;
|
||
|
||
case 'h':
|
||
if (code == HIGH)
|
||
op = XEXP (op, 0);
|
||
loongarch_print_operand_reloc (file, op, false /* hi64_part */,
|
||
true /* hi_reloc */);
|
||
break;
|
||
|
||
case 'H':
|
||
loongarch_print_operand_reloc (file, op, true /* hi64_part */,
|
||
true /* hi_reloc */);
|
||
break;
|
||
|
||
case 'i':
|
||
if (code != REG)
|
||
fputs ("i", file);
|
||
break;
|
||
|
||
case 'L':
|
||
loongarch_print_operand_reloc (file, op, false /* hi64_part*/,
|
||
false /* lo_reloc */);
|
||
break;
|
||
case 'B':
|
||
if (GET_CODE (op) == CONST_VECTOR)
|
||
{
|
||
gcc_assert (loongarch_const_vector_same_val_p (op, GET_MODE (op)));
|
||
op = CONST_VECTOR_ELT (op, 0);
|
||
gcc_assert (CONST_INT_P (op));
|
||
unsigned HOST_WIDE_INT val8 = UINTVAL (op) & GET_MODE_MASK (QImode);
|
||
fprintf (file, HOST_WIDE_INT_PRINT_UNSIGNED, val8);
|
||
}
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
break;
|
||
|
||
case 'm':
|
||
if (CONST_INT_P (op))
|
||
fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) - 1);
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
break;
|
||
|
||
case 'M':
|
||
if (CONST_INT_P (op))
|
||
{
|
||
HOST_WIDE_INT mask = INTVAL (op);
|
||
fprintf (file, "%d,%d", floor_log2 (mask), ctz_hwi (mask));
|
||
}
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
break;
|
||
|
||
case 'N':
|
||
loongarch_print_int_branch_condition (file, reverse_condition (code),
|
||
letter);
|
||
break;
|
||
|
||
case 'Q':
|
||
if (!TARGET_LINKER_RELAXATION)
|
||
break;
|
||
|
||
if (code == HIGH)
|
||
op = XEXP (op, 0);
|
||
|
||
if (loongarch_classify_symbolic_expression (op) == SYMBOL_TLS_IE)
|
||
fprintf (file, ".reloc\t.,R_LARCH_RELAX\n\t");
|
||
|
||
break;
|
||
|
||
case 'r':
|
||
loongarch_print_operand_reloc (file, op, false /* hi64_part */,
|
||
true /* lo_reloc */);
|
||
break;
|
||
|
||
case 'R':
|
||
loongarch_print_operand_reloc (file, op, true /* hi64_part */,
|
||
false /* lo_reloc */);
|
||
break;
|
||
|
||
case 't':
|
||
case 'T':
|
||
{
|
||
int truth = (code == NE) == (letter == 'T');
|
||
fputc ("zfnt"[truth * 2 + FCC_REG_P (REGNO (XEXP (op, 0)))], file);
|
||
}
|
||
break;
|
||
|
||
case 'V':
|
||
if (CONST_VECTOR_P (op))
|
||
{
|
||
machine_mode mode = GET_MODE_INNER (GET_MODE (op));
|
||
unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
|
||
int vlog2 = exact_log2 (val & GET_MODE_MASK (mode));
|
||
if (vlog2 != -1)
|
||
fprintf (file, "%d", vlog2);
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
}
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
break;
|
||
|
||
case 'W':
|
||
loongarch_print_float_branch_condition (file, reverse_condition (code),
|
||
letter);
|
||
break;
|
||
|
||
case 'x':
|
||
if (CONST_INT_P (op))
|
||
fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op) & 0xffff);
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
break;
|
||
|
||
case 'X':
|
||
if (CONST_INT_P (op))
|
||
fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op));
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
break;
|
||
|
||
case 'y':
|
||
if (CONST_INT_P (op))
|
||
{
|
||
int val = exact_log2 (INTVAL (op));
|
||
if (val != -1)
|
||
fprintf (file, "%d", val);
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
}
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
break;
|
||
|
||
case 'Y':
|
||
if (code == CONST_INT
|
||
&& UINTVAL (op) < ARRAY_SIZE (loongarch_fp_conditions))
|
||
fputs (loongarch_fp_conditions[UINTVAL (op)], file);
|
||
else
|
||
output_operand_lossage ("'%%%c' is not a valid operand prefix",
|
||
letter);
|
||
break;
|
||
|
||
case 'Z':
|
||
loongarch_print_operand (file, op, 0);
|
||
fputc (',', file);
|
||
break;
|
||
|
||
case 'w':
|
||
if (code == REG && LSX_REG_P (REGNO (op)))
|
||
fprintf (file, "$vr%s", ®_names[REGNO (op)][2]);
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
break;
|
||
|
||
case 'u':
|
||
if (code == REG && LASX_REG_P (REGNO (op)))
|
||
fprintf (file, "$xr%s", ®_names[REGNO (op)][2]);
|
||
else
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
break;
|
||
|
||
case 'v':
|
||
switch (GET_MODE (op))
|
||
{
|
||
case E_V16QImode:
|
||
case E_V32QImode:
|
||
fprintf (file, "b");
|
||
break;
|
||
case E_V8HImode:
|
||
case E_V16HImode:
|
||
fprintf (file, "h");
|
||
break;
|
||
case E_V4SImode:
|
||
case E_V4SFmode:
|
||
case E_V8SImode:
|
||
case E_V8SFmode:
|
||
fprintf (file, "w");
|
||
break;
|
||
case E_V2DImode:
|
||
case E_V2DFmode:
|
||
case E_V4DImode:
|
||
case E_V4DFmode:
|
||
fprintf (file, "d");
|
||
break;
|
||
default:
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
}
|
||
break;
|
||
|
||
default:
|
||
switch (code)
|
||
{
|
||
case REG:
|
||
{
|
||
unsigned int regno = REGNO (op);
|
||
if (letter && letter != 'z')
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
fprintf (file, "%s", reg_names[regno]);
|
||
}
|
||
break;
|
||
|
||
case MEM:
|
||
if (letter == 'D')
|
||
output_address (GET_MODE (op),
|
||
plus_constant (Pmode, XEXP (op, 0), 4));
|
||
else if (letter == 'b')
|
||
{
|
||
gcc_assert (REG_P (XEXP (op, 0)));
|
||
loongarch_print_operand (file, XEXP (op, 0), 0);
|
||
}
|
||
else if (letter && letter != 'z')
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
else
|
||
output_address (GET_MODE (op), XEXP (op, 0));
|
||
break;
|
||
|
||
default:
|
||
if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
|
||
fputs (reg_names[GP_REG_FIRST], file);
|
||
else if (letter && letter != 'z')
|
||
output_operand_lossage ("invalid use of '%%%c'", letter);
|
||
else
|
||
output_addr_const (file, loongarch_strip_unspec_address (op));
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Implement TARGET_PRINT_OPERAND_ADDRESS. */
|
||
|
||
static void
|
||
loongarch_print_operand_address (FILE *file, machine_mode /* mode */, rtx x)
|
||
{
|
||
struct loongarch_address_info addr;
|
||
|
||
if (loongarch_classify_address (&addr, x, word_mode, true))
|
||
switch (addr.type)
|
||
{
|
||
case ADDRESS_REG:
|
||
fprintf (file, "%s,", reg_names[REGNO (addr.reg)]);
|
||
loongarch_print_operand (file, addr.offset, 0);
|
||
return;
|
||
|
||
case ADDRESS_REG_REG:
|
||
fprintf (file, "%s,%s", reg_names[REGNO (addr.reg)],
|
||
reg_names[REGNO (addr.offset)]);
|
||
return;
|
||
|
||
case ADDRESS_LO_SUM:
|
||
fprintf (file, "%s,", reg_names[REGNO (addr.reg)]);
|
||
loongarch_print_operand_reloc (file, addr.offset, false /* hi64_part */,
|
||
false /* hi_reloc */);
|
||
return;
|
||
|
||
case ADDRESS_CONST_INT:
|
||
fprintf (file, "%s,", reg_names[GP_REG_FIRST]);
|
||
output_addr_const (file, x);
|
||
return;
|
||
|
||
case ADDRESS_SYMBOLIC:
|
||
output_addr_const (file, loongarch_strip_unspec_address (x));
|
||
return;
|
||
}
|
||
if (CONST_INT_P (x))
|
||
output_addr_const (file, x);
|
||
else
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
/* Implement TARGET_ASM_SELECT_RTX_SECTION. */
|
||
|
||
static section *
|
||
loongarch_select_rtx_section (machine_mode mode, rtx x,
|
||
unsigned HOST_WIDE_INT align)
|
||
{
|
||
/* ??? Consider using mergeable small data sections. */
|
||
if (loongarch_rtx_constant_in_small_data_p (mode))
|
||
return get_named_section (NULL, ".sdata", 0);
|
||
|
||
return default_elf_select_rtx_section (mode, x, align);
|
||
}
|
||
|
||
/* Implement TARGET_ASM_FUNCTION_RODATA_SECTION.
|
||
|
||
The complication here is that jump tables will use absolute addresses,
|
||
and should therefore not be included in the read-only part of a DSO.
|
||
Handle such cases by selecting a normal data section instead of a
|
||
read-only one. The logic apes that in default_function_rodata_section. */
|
||
|
||
static section *
|
||
loongarch_function_rodata_section (tree decl, bool)
|
||
{
|
||
return default_function_rodata_section (decl, false);
|
||
}
|
||
|
||
/* Implement TARGET_IN_SMALL_DATA_P. */
|
||
|
||
static bool
|
||
loongarch_in_small_data_p (const_tree decl)
|
||
{
|
||
int size;
|
||
|
||
if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
|
||
return false;
|
||
|
||
if (VAR_P (decl) && DECL_SECTION_NAME (decl) != 0)
|
||
{
|
||
const char *name;
|
||
|
||
/* Reject anything that isn't in a known small-data section. */
|
||
name = DECL_SECTION_NAME (decl);
|
||
if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
|
||
return false;
|
||
|
||
/* If a symbol is defined externally, the assembler will use the
|
||
usual -G rules when deciding how to implement macros. */
|
||
if (!DECL_EXTERNAL (decl))
|
||
return true;
|
||
}
|
||
|
||
/* We have traditionally not treated zero-sized objects as small data,
|
||
so this is now effectively part of the ABI. */
|
||
size = int_size_in_bytes (TREE_TYPE (decl));
|
||
return size > 0 && size <= g_switch_value;
|
||
}
|
||
|
||
/* The LoongArch debug format wants all automatic variables and arguments
|
||
to be in terms of the virtual frame pointer (stack pointer before
|
||
any adjustment in the function), while the LoongArch linker wants
|
||
the frame pointer to be the stack pointer after the initial
|
||
adjustment. So, we do the adjustment here. The arg pointer (which
|
||
is eliminated) points to the virtual frame pointer, while the frame
|
||
pointer (which may be eliminated) points to the stack pointer after
|
||
the initial adjustments. */
|
||
|
||
HOST_WIDE_INT
|
||
loongarch_debugger_offset (rtx addr, HOST_WIDE_INT offset)
|
||
{
|
||
rtx offset2 = const0_rtx;
|
||
rtx reg = eliminate_constant_term (addr, &offset2);
|
||
|
||
if (offset == 0)
|
||
offset = INTVAL (offset2);
|
||
|
||
if (reg == stack_pointer_rtx
|
||
|| reg == frame_pointer_rtx
|
||
|| reg == hard_frame_pointer_rtx)
|
||
{
|
||
offset -= cfun->machine->frame.total_size;
|
||
if (reg == hard_frame_pointer_rtx)
|
||
offset += cfun->machine->frame.hard_frame_pointer_offset;
|
||
}
|
||
|
||
return offset;
|
||
}
|
||
|
||
/* Implement ASM_OUTPUT_EXTERNAL. */
|
||
|
||
void
|
||
loongarch_output_external (FILE *file, tree decl, const char *name)
|
||
{
|
||
default_elf_asm_output_external (file, decl, name);
|
||
|
||
/* We output the name if and only if TREE_SYMBOL_REFERENCED is
|
||
set in order to avoid putting out names that are never really
|
||
used. */
|
||
if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
|
||
{
|
||
if (loongarch_in_small_data_p (decl))
|
||
{
|
||
/* When using assembler macros, emit .extern directives for
|
||
all small-data externs so that the assembler knows how
|
||
big they are.
|
||
|
||
In most cases it would be safe (though pointless) to emit
|
||
.externs for other symbols too. One exception is when an
|
||
object is within the -G limit but declared by the user to
|
||
be in a section other than .sbss or .sdata. */
|
||
fputs ("\t.extern\t", file);
|
||
assemble_name (file, name);
|
||
fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC "\n",
|
||
int_size_in_bytes (TREE_TYPE (decl)));
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Implement TARGET_ASM_OUTPUT_DWARF_DTPREL. */
|
||
|
||
static void ATTRIBUTE_UNUSED
|
||
loongarch_output_dwarf_dtprel (FILE *file, int size, rtx x)
|
||
{
|
||
switch (size)
|
||
{
|
||
case 4:
|
||
fputs ("\t.dtprelword\t", file);
|
||
break;
|
||
|
||
case 8:
|
||
fputs ("\t.dtpreldword\t", file);
|
||
break;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
output_addr_const (file, x);
|
||
fputs ("+0x8000", file);
|
||
}
|
||
|
||
/* Implement ASM_OUTPUT_ASCII. */
|
||
|
||
void
|
||
loongarch_output_ascii (FILE *stream, const char *string, size_t len)
|
||
{
|
||
size_t i;
|
||
int cur_pos;
|
||
|
||
cur_pos = 17;
|
||
fprintf (stream, "\t.ascii\t\"");
|
||
for (i = 0; i < len; i++)
|
||
{
|
||
int c;
|
||
|
||
c = (unsigned char) string[i];
|
||
if (ISPRINT (c))
|
||
{
|
||
if (c == '\\' || c == '\"')
|
||
{
|
||
putc ('\\', stream);
|
||
cur_pos++;
|
||
}
|
||
putc (c, stream);
|
||
cur_pos++;
|
||
}
|
||
else
|
||
{
|
||
fprintf (stream, "\\%03o", c);
|
||
cur_pos += 4;
|
||
}
|
||
|
||
if (cur_pos > 72 && i + 1 < len)
|
||
{
|
||
cur_pos = 17;
|
||
fprintf (stream, "\"\n\t.ascii\t\"");
|
||
}
|
||
}
|
||
fprintf (stream, "\"\n");
|
||
}
|
||
|
||
/* Implement TARGET_FRAME_POINTER_REQUIRED. */
|
||
|
||
static bool
|
||
loongarch_frame_pointer_required (void)
|
||
{
|
||
/* If the function contains dynamic stack allocations, we need to
|
||
use the frame pointer to access the static parts of the frame. */
|
||
if (cfun->calls_alloca)
|
||
return true;
|
||
|
||
return false;
|
||
}
|
||
|
||
/* Implement TARGET_CAN_ELIMINATE. Make sure that we're not trying
|
||
to eliminate to the wrong hard frame pointer. */
|
||
|
||
static bool
|
||
loongarch_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
|
||
{
|
||
return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
|
||
}
|
||
|
||
/* Implement RETURN_ADDR_RTX. We do not support moving back to a
|
||
previous frame. */
|
||
|
||
rtx
|
||
loongarch_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
|
||
{
|
||
if (count != 0)
|
||
return const0_rtx;
|
||
|
||
return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
|
||
}
|
||
|
||
/* Emit code to change the current function's return address to
|
||
ADDRESS. SCRATCH is available as a scratch register, if needed.
|
||
ADDRESS and SCRATCH are both word-mode GPRs. */
|
||
|
||
void
|
||
loongarch_set_return_address (rtx address, rtx scratch)
|
||
{
|
||
rtx slot_address;
|
||
|
||
gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
|
||
|
||
if (frame_pointer_needed)
|
||
slot_address = loongarch_add_offset (scratch, hard_frame_pointer_rtx,
|
||
-UNITS_PER_WORD);
|
||
else
|
||
slot_address = loongarch_add_offset (scratch, stack_pointer_rtx,
|
||
cfun->machine->frame.gp_sp_offset);
|
||
|
||
loongarch_emit_move (gen_frame_mem (GET_MODE (address), slot_address),
|
||
address);
|
||
}
|
||
|
||
/* Return true if register REGNO can store a value of mode MODE.
|
||
The result of this function is cached in loongarch_hard_regno_mode_ok. */
|
||
|
||
static bool
|
||
loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
|
||
{
|
||
unsigned int size;
|
||
enum mode_class mclass;
|
||
|
||
if (mode == FCCmode)
|
||
return FCC_REG_P (regno) || GP_REG_P (regno) || FP_REG_P (regno);
|
||
|
||
size = GET_MODE_SIZE (mode);
|
||
mclass = GET_MODE_CLASS (mode);
|
||
|
||
if (GP_REG_P (regno)
|
||
&& !LSX_SUPPORTED_MODE_P (mode)
|
||
&& !LASX_SUPPORTED_MODE_P (mode))
|
||
return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
|
||
|
||
if (FP_REG_P (regno))
|
||
{
|
||
/* Allow 128-bit or 256-bit vector modes in all FPR. */
|
||
if (LSX_SUPPORTED_MODE_P (mode)
|
||
|| LASX_SUPPORTED_MODE_P (mode))
|
||
return true;
|
||
|
||
if (mclass == MODE_FLOAT
|
||
|| mclass == MODE_COMPLEX_FLOAT
|
||
|| mclass == MODE_VECTOR_FLOAT)
|
||
return size <= UNITS_PER_HWFPVALUE;
|
||
|
||
/* Allow integer modes that fit into a single register. We need
|
||
to put integers into FPRs when using instructions like CVT
|
||
and TRUNC. There's no point allowing sizes smaller than a word,
|
||
because the FPU has no appropriate load/store instructions. */
|
||
if (mclass == MODE_INT)
|
||
return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FP_REG;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
/* Implement TARGET_HARD_REGNO_MODE_OK. */
|
||
|
||
static bool
|
||
loongarch_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
|
||
{
|
||
return loongarch_hard_regno_mode_ok_p[mode][regno];
|
||
}
|
||
|
||
|
||
static bool
|
||
loongarch_hard_regno_call_part_clobbered (unsigned int,
|
||
unsigned int regno, machine_mode mode)
|
||
{
|
||
if (ISA_HAS_LSX && FP_REG_P (regno) && GET_MODE_SIZE (mode) > 8)
|
||
return true;
|
||
|
||
return false;
|
||
}
|
||
|
||
/* Implement TARGET_HARD_REGNO_NREGS. */
|
||
|
||
static unsigned int
|
||
loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode)
|
||
{
|
||
if (FCC_REG_P (regno))
|
||
/* The size of FP status registers is always 4, because they only hold
|
||
FCCmode values, and FCCmode is always considered to be 4 bytes wide. */
|
||
return (GET_MODE_SIZE (mode) + 3) / 4;
|
||
|
||
if (FP_REG_P (regno))
|
||
{
|
||
if (LSX_SUPPORTED_MODE_P (mode))
|
||
return 1;
|
||
|
||
if (LASX_SUPPORTED_MODE_P (mode))
|
||
return 1;
|
||
|
||
return (GET_MODE_SIZE (mode) + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG;
|
||
}
|
||
|
||
/* All other registers are word-sized. */
|
||
return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
|
||
}
|
||
|
||
/* Implement CLASS_MAX_NREGS, taking the maximum of the cases
|
||
in loongarch_hard_regno_nregs. */
|
||
|
||
int
|
||
loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode)
|
||
{
|
||
int size;
|
||
HARD_REG_SET left;
|
||
|
||
size = 0x8000;
|
||
left = reg_class_contents[rclass];
|
||
if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FCC_REGS]))
|
||
{
|
||
if (loongarch_hard_regno_mode_ok (FCC_REG_FIRST, mode))
|
||
size = MIN (size, 4);
|
||
|
||
left &= ~reg_class_contents[FCC_REGS];
|
||
}
|
||
if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FP_REGS]))
|
||
{
|
||
if (loongarch_hard_regno_mode_ok (FP_REG_FIRST, mode))
|
||
{
|
||
/* Fixed me. */
|
||
if (LASX_SUPPORTED_MODE_P (mode))
|
||
size = MIN (size, UNITS_PER_LASX_REG);
|
||
else if (LSX_SUPPORTED_MODE_P (mode))
|
||
size = MIN (size, UNITS_PER_LSX_REG);
|
||
else
|
||
size = MIN (size, UNITS_PER_FP_REG);
|
||
}
|
||
left &= ~reg_class_contents[FP_REGS];
|
||
}
|
||
if (!hard_reg_set_empty_p (left))
|
||
size = MIN (size, UNITS_PER_WORD);
|
||
return (GET_MODE_SIZE (mode) + size - 1) / size;
|
||
}
|
||
|
||
/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
|
||
|
||
static bool
|
||
loongarch_can_change_mode_class (machine_mode from, machine_mode to,
|
||
reg_class_t rclass)
|
||
{
|
||
/* Allow conversions between different LSX/LASX vector modes. */
|
||
if (LASX_SUPPORTED_MODE_P (from) && LASX_SUPPORTED_MODE_P (to))
|
||
return true;
|
||
|
||
/* Allow conversions between different LSX vector modes. */
|
||
if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))
|
||
return true;
|
||
|
||
/* Allow conversion between LSX vector mode and scalar fp mode. */
|
||
if ((LSX_SUPPORTED_MODE_P (from) && SCALAR_FLOAT_MODE_P (to))
|
||
|| ((SCALAR_FLOAT_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))))
|
||
return true;
|
||
|
||
return !reg_classes_intersect_p (FP_REGS, rclass);
|
||
}
|
||
|
||
/* Return true if moves in mode MODE can use the FPU's fmov.fmt instruction,
|
||
*/
|
||
|
||
static bool
|
||
loongarch_mode_ok_for_mov_fmt_p (machine_mode mode)
|
||
{
|
||
switch (mode)
|
||
{
|
||
case E_FCCmode:
|
||
case E_SFmode:
|
||
return TARGET_HARD_FLOAT;
|
||
|
||
case E_DFmode:
|
||
return TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT;
|
||
|
||
default:
|
||
return ISA_HAS_LASX ? LASX_SUPPORTED_MODE_P (mode)
|
||
: LSX_SUPPORTED_MODE_P (mode);
|
||
}
|
||
}
|
||
|
||
/* Implement TARGET_MODES_TIEABLE_P. */
|
||
|
||
static bool
|
||
loongarch_modes_tieable_p (machine_mode mode1, machine_mode mode2)
|
||
{
|
||
/* FPRs allow no mode punning, so it's not worth tying modes if we'd
|
||
prefer to put one of them in FPRs. */
|
||
return (mode1 == mode2
|
||
|| (!loongarch_mode_ok_for_mov_fmt_p (mode1)
|
||
&& !loongarch_mode_ok_for_mov_fmt_p (mode2)));
|
||
}
|
||
|
||
/* Implement TARGET_PREFERRED_RELOAD_CLASS. */
|
||
|
||
static reg_class_t
|
||
loongarch_preferred_reload_class (rtx x, reg_class_t rclass)
|
||
{
|
||
if (reg_class_subset_p (FP_REGS, rclass)
|
||
&& loongarch_mode_ok_for_mov_fmt_p (GET_MODE (x)))
|
||
return FP_REGS;
|
||
|
||
if (reg_class_subset_p (GR_REGS, rclass))
|
||
rclass = GR_REGS;
|
||
|
||
return rclass;
|
||
}
|
||
|
||
/* RCLASS is a class involved in a REGISTER_MOVE_COST calculation.
|
||
Return a "canonical" class to represent it in later calculations. */
|
||
|
||
static reg_class_t
|
||
loongarch_canonicalize_move_class (reg_class_t rclass)
|
||
{
|
||
if (reg_class_subset_p (rclass, GENERAL_REGS))
|
||
rclass = GENERAL_REGS;
|
||
|
||
return rclass;
|
||
}
|
||
|
||
/* Return the cost of moving a value from a register of class FROM to a GPR.
|
||
Return 0 for classes that are unions of other classes handled by this
|
||
function. */
|
||
|
||
static int
|
||
loongarch_move_to_gpr_cost (reg_class_t from)
|
||
{
|
||
switch (from)
|
||
{
|
||
case GENERAL_REGS:
|
||
/* MOVE macro. */
|
||
return 2;
|
||
|
||
case FP_REGS:
|
||
/* MOVFR2GR, etc. */
|
||
return 4;
|
||
|
||
case FCC_REGS:
|
||
return loongarch_cost->movcf2gr;
|
||
|
||
default:
|
||
return 0;
|
||
}
|
||
}
|
||
|
||
/* Return the cost of moving a value from a GPR to a register of class TO.
|
||
Return 0 for classes that are unions of other classes handled by this
|
||
function. */
|
||
|
||
static int
|
||
loongarch_move_from_gpr_cost (reg_class_t to)
|
||
{
|
||
switch (to)
|
||
{
|
||
case GENERAL_REGS:
|
||
/*MOVE macro. */
|
||
return 2;
|
||
|
||
case FP_REGS:
|
||
/* MOVGR2FR, etc. */
|
||
return 4;
|
||
|
||
case FCC_REGS:
|
||
return loongarch_cost->movgr2cf;
|
||
|
||
default:
|
||
return 0;
|
||
}
|
||
}
|
||
|
||
/* Implement TARGET_REGISTER_MOVE_COST. Return 0 for classes that are the
|
||
maximum of the move costs for subclasses; regclass will work out
|
||
the maximum for us. */
|
||
|
||
static int
|
||
loongarch_register_move_cost (machine_mode mode, reg_class_t from,
|
||
reg_class_t to)
|
||
{
|
||
reg_class_t dregs;
|
||
int cost1, cost2;
|
||
|
||
from = loongarch_canonicalize_move_class (from);
|
||
to = loongarch_canonicalize_move_class (to);
|
||
|
||
/* Handle moves that can be done without using general-purpose registers. */
|
||
if (from == FP_REGS)
|
||
{
|
||
if (to == FP_REGS && loongarch_mode_ok_for_mov_fmt_p (mode))
|
||
/* FMOV.FMT. */
|
||
return 4;
|
||
}
|
||
|
||
/* Handle cases in which only one class deviates from the ideal. */
|
||
dregs = GENERAL_REGS;
|
||
if (from == dregs)
|
||
return loongarch_move_from_gpr_cost (to);
|
||
if (to == dregs)
|
||
return loongarch_move_to_gpr_cost (from);
|
||
|
||
/* fcc -> fcc, fcc -> fpr, or fpr -> fcc. */
|
||
if (from == FCC_REGS || to == FCC_REGS)
|
||
return COSTS_N_INSNS (from == to ? 2 : 1);
|
||
|
||
/* Handles cases that require a GPR temporary. */
|
||
cost1 = loongarch_move_to_gpr_cost (from);
|
||
if (cost1 != 0)
|
||
{
|
||
cost2 = loongarch_move_from_gpr_cost (to);
|
||
if (cost2 != 0)
|
||
return cost1 + cost2;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/* Implement TARGET_MEMORY_MOVE_COST. */
|
||
|
||
static int
|
||
loongarch_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
|
||
{
|
||
return (loongarch_cost->memory_latency
|
||
+ memory_move_secondary_cost (mode, rclass, in));
|
||
}
|
||
|
||
/* Return the register class required for a secondary register when
|
||
copying between one of the registers in RCLASS and value X, which
|
||
has mode MODE. X is the source of the move if IN_P, otherwise it
|
||
is the destination. Return NO_REGS if no secondary register is
|
||
needed. */
|
||
|
||
static reg_class_t
|
||
loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
|
||
reg_class_t rclass, machine_mode mode,
|
||
secondary_reload_info *sri ATTRIBUTE_UNUSED)
|
||
{
|
||
int regno;
|
||
|
||
regno = true_regnum (x);
|
||
|
||
if (mode == FCCmode)
|
||
{
|
||
if (reg_class_subset_p (rclass, FCC_REGS) && !FP_REG_P (regno))
|
||
{
|
||
if (FCC_REG_P (regno))
|
||
return FP_REGS;
|
||
|
||
auto fn = in_p ? loongarch_move_from_gpr_cost
|
||
: loongarch_move_to_gpr_cost;
|
||
|
||
if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1))
|
||
return FP_REGS;
|
||
|
||
return GP_REG_P (regno) ? NO_REGS : GR_REGS;
|
||
}
|
||
|
||
if (reg_class_subset_p (rclass, GR_REGS) && FCC_REG_P (regno))
|
||
{
|
||
auto fn = in_p ? loongarch_move_to_gpr_cost
|
||
: loongarch_move_from_gpr_cost;
|
||
|
||
if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1))
|
||
return FP_REGS;
|
||
|
||
return NO_REGS;
|
||
}
|
||
|
||
if (reg_class_subset_p (rclass, FP_REGS)
|
||
&& (regno == -1 || MEM_P (x)))
|
||
return GR_REGS;
|
||
|
||
return NO_REGS;
|
||
}
|
||
|
||
if (reg_class_subset_p (rclass, FP_REGS))
|
||
{
|
||
if (regno < 0
|
||
|| (MEM_P (x)
|
||
&& (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)))
|
||
/* In this case we can use lwc1, swc1, ldc1 or sdc1. We'll use
|
||
pairs of lwc1s and swc1s if ldc1 and sdc1 are not supported. */
|
||
return NO_REGS;
|
||
|
||
if (MEM_P (x) && LSX_SUPPORTED_MODE_P (mode))
|
||
/* In this case we can use LSX LD.* and ST.*. */
|
||
return NO_REGS;
|
||
|
||
if (GP_REG_P (regno) || x == CONST0_RTX (mode))
|
||
/* In this case we can use movgr2fr.s, movfr2gr.s, movgr2fr.d or
|
||
* movfr2gr.d. */
|
||
return NO_REGS;
|
||
|
||
if (CONSTANT_P (x) && !targetm.cannot_force_const_mem (mode, x))
|
||
/* We can force the constant to memory and use fld.s
|
||
and fld.d. As above, we will use pairs of lwc1s if
|
||
ldc1 is not supported. */
|
||
return NO_REGS;
|
||
|
||
if (FP_REG_P (regno) && loongarch_mode_ok_for_mov_fmt_p (mode))
|
||
/* In this case we can use fmov.{s/d}. */
|
||
return NO_REGS;
|
||
|
||
/* Otherwise, we need to reload through an integer register. */
|
||
return GR_REGS;
|
||
}
|
||
if (FP_REG_P (regno))
|
||
return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS;
|
||
|
||
return NO_REGS;
|
||
}
|
||
|
||
/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
|
||
|
||
The register allocator chooses ALL_REGS if FP_REGS and GR_REGS have the
|
||
same cost - even if ALL_REGS has a much higher cost. ALL_REGS is also used
|
||
if the cost of both FP_REGS and GR_REGS is lower than the memory cost (in
|
||
this case the best class is the lowest cost one). Using ALL_REGS
|
||
irrespectively of itself cost results in bad allocations with many redundant
|
||
int<->FP moves which are expensive on various cores.
|
||
|
||
To avoid this we don't allow ALL_REGS as the allocno class, but force a
|
||
decision between FP_REGS and GR_REGS. We use the allocno class if it isn't
|
||
ALL_REGS. Similarly, use the best class if it isn't ALL_REGS. Otherwise Set
|
||
the allocno class depending on the mode.
|
||
|
||
This change has a similar effect to increasing the cost of FPR->GPR register
|
||
moves for integer modes so that they are higher than the cost of memory but
|
||
changing the allocno class is more reliable. */
|
||
|
||
static reg_class_t
|
||
loongarch_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
|
||
reg_class_t best_class)
|
||
{
|
||
enum machine_mode mode;
|
||
|
||
if (allocno_class != ALL_REGS)
|
||
return allocno_class;
|
||
|
||
if (best_class != ALL_REGS)
|
||
return best_class;
|
||
|
||
mode = PSEUDO_REGNO_MODE (regno);
|
||
return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GR_REGS;
|
||
}
|
||
|
||
/* Implement TARGET_VALID_POINTER_MODE. */
|
||
|
||
static bool
|
||
loongarch_valid_pointer_mode (scalar_int_mode mode)
|
||
{
|
||
return mode == SImode || (TARGET_64BIT && mode == DImode);
|
||
}
|
||
|
||
/* Implement TARGET_VECTOR_MODE_SUPPORTED_P. */
|
||
|
||
static bool
|
||
loongarch_vector_mode_supported_p (machine_mode mode)
|
||
{
|
||
return ISA_HAS_LASX ? LASX_SUPPORTED_MODE_P (mode)
|
||
: LSX_SUPPORTED_MODE_P (mode);
|
||
}
|
||
|
||
/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
|
||
|
||
static bool
|
||
loongarch_scalar_mode_supported_p (scalar_mode mode)
|
||
{
|
||
if (ALL_FIXED_POINT_MODE_P (mode)
|
||
&& GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
|
||
return true;
|
||
|
||
return default_scalar_mode_supported_p (mode);
|
||
}
|
||
|
||
/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
|
||
|
||
static machine_mode
|
||
loongarch_preferred_simd_mode (scalar_mode mode)
|
||
{
|
||
if (!ISA_HAS_LSX)
|
||
return word_mode;
|
||
|
||
switch (mode)
|
||
{
|
||
case E_QImode:
|
||
return ISA_HAS_LASX ? E_V32QImode : E_V16QImode;
|
||
case E_HImode:
|
||
return ISA_HAS_LASX ? E_V16HImode : E_V8HImode;
|
||
case E_SImode:
|
||
return ISA_HAS_LASX ? E_V8SImode : E_V4SImode;
|
||
case E_DImode:
|
||
return ISA_HAS_LASX ? E_V4DImode : E_V2DImode;
|
||
|
||
case E_SFmode:
|
||
return ISA_HAS_LASX ? E_V8SFmode : E_V4SFmode;
|
||
|
||
case E_DFmode:
|
||
return ISA_HAS_LASX ? E_V4DFmode : E_V2DFmode;
|
||
|
||
default:
|
||
break;
|
||
}
|
||
return word_mode;
|
||
}
|
||
|
||
static unsigned int
|
||
loongarch_autovectorize_vector_modes (vector_modes *modes, bool)
|
||
{
|
||
if (ISA_HAS_LASX)
|
||
{
|
||
modes->safe_push (V32QImode);
|
||
modes->safe_push (V16QImode);
|
||
}
|
||
else if (ISA_HAS_LSX)
|
||
{
|
||
modes->safe_push (V16QImode);
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/* Return the assembly code for INSN, which has the operands given by
|
||
OPERANDS, and which branches to OPERANDS[0] if some condition is true.
|
||
BRANCH_IF_TRUE is the asm template that should be used if OPERANDS[0]
|
||
is in range of a direct branch. BRANCH_IF_FALSE is an inverted
|
||
version of BRANCH_IF_TRUE. */
|
||
|
||
const char *
|
||
loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands,
|
||
const char *branch_if_true,
|
||
const char *branch_if_false)
|
||
{
|
||
unsigned int length;
|
||
rtx taken;
|
||
|
||
gcc_assert (LABEL_P (operands[0]));
|
||
|
||
length = get_attr_length (insn);
|
||
if (length <= 4)
|
||
{
|
||
return branch_if_true;
|
||
}
|
||
|
||
/* Generate a reversed branch around a direct jump. */
|
||
rtx_code_label *not_taken = gen_label_rtx ();
|
||
taken = operands[0];
|
||
|
||
/* Generate the reversed branch to NOT_TAKEN. */
|
||
operands[0] = not_taken;
|
||
output_asm_insn (branch_if_false, operands);
|
||
|
||
output_asm_insn ("b\t%0", &taken);
|
||
|
||
/* Output NOT_TAKEN. */
|
||
targetm.asm_out.internal_label (asm_out_file, "L",
|
||
CODE_LABEL_NUMBER (not_taken));
|
||
return "";
|
||
}
|
||
|
||
/* Return the assembly code for INSN, which branches to OPERANDS[0]
|
||
if some equality condition is true. The condition is given by
|
||
OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
|
||
OPERANDS[1]. OPERANDS[2] is the comparison's first operand;
|
||
OPERANDS[3] is the second operand and may be zero or a register. */
|
||
|
||
const char *
|
||
loongarch_output_equal_conditional_branch (rtx_insn *insn, rtx *operands,
|
||
bool inverted_p)
|
||
{
|
||
const char *branch[2];
|
||
if (operands[3] == const0_rtx)
|
||
{
|
||
branch[!inverted_p] = LARCH_BRANCH ("b%C1z", "%2,%0");
|
||
branch[inverted_p] = LARCH_BRANCH ("b%N1z", "%2,%0");
|
||
}
|
||
else
|
||
{
|
||
branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%z3,%0");
|
||
branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%z3,%0");
|
||
}
|
||
|
||
return loongarch_output_conditional_branch (insn, operands, branch[1],
|
||
branch[0]);
|
||
}
|
||
|
||
/* Return the assembly code for INSN, which branches to OPERANDS[0]
|
||
if some ordering condition is true. The condition is given by
|
||
OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
|
||
OPERANDS[1]. OPERANDS[2] is the comparison's first operand;
|
||
OPERANDS[3] is the second operand and may be zero or a register. */
|
||
|
||
const char *
|
||
loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands,
|
||
bool inverted_p)
|
||
{
|
||
const char *branch[2];
|
||
|
||
/* Make BRANCH[1] branch to OPERANDS[0] when the condition is true.
|
||
Make BRANCH[0] branch on the inverse condition. */
|
||
if (operands[3] != const0_rtx)
|
||
{
|
||
/* Handle degenerate cases that should not, but do, occur. */
|
||
if (REGNO (operands[2]) == REGNO (operands[3]))
|
||
{
|
||
switch (GET_CODE (operands[1]))
|
||
{
|
||
case LT:
|
||
case LTU:
|
||
case GT:
|
||
case GTU:
|
||
inverted_p = !inverted_p;
|
||
/* Fall through. */
|
||
case LE:
|
||
case LEU:
|
||
case GE:
|
||
case GEU:
|
||
branch[!inverted_p] = LARCH_BRANCH ("b", "%0");
|
||
branch[inverted_p] = "\t# branch never";
|
||
break;
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
else
|
||
{
|
||
switch (GET_CODE (operands[1]))
|
||
{
|
||
case LE:
|
||
case LEU:
|
||
case GT:
|
||
case GTU:
|
||
case LT:
|
||
case LTU:
|
||
case GE:
|
||
case GEU:
|
||
branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%3,%0");
|
||
branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%3,%0");
|
||
break;
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
}
|
||
else
|
||
{
|
||
switch (GET_CODE (operands[1]))
|
||
{
|
||
/* These cases are equivalent to comparisons against zero. */
|
||
case LEU:
|
||
case GTU:
|
||
case LTU:
|
||
case GEU:
|
||
case LE:
|
||
case GT:
|
||
case LT:
|
||
case GE:
|
||
branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,$r0,%0");
|
||
branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,$r0,%0");
|
||
break;
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
return loongarch_output_conditional_branch (insn, operands, branch[1],
|
||
branch[0]);
|
||
}
|
||
|
||
/* Return the assembly code for DIV.{W/D} instruction DIVISION, which has
|
||
the operands given by OPERANDS. Add in a divide-by-zero check if needed.
|
||
*/
|
||
|
||
const char *
|
||
loongarch_output_division (const char *division, rtx *operands)
|
||
{
|
||
const char *s;
|
||
|
||
s = division;
|
||
if (loongarch_check_zero_div_p ())
|
||
{
|
||
output_asm_insn (s, operands);
|
||
s = "bne\t%2,%.,1f\n\tbreak\t7\n1:";
|
||
}
|
||
return s;
|
||
}
|
||
|
||
/* Return the assembly code for LSX DIV_{S,U}.DF or MOD_{S,U}.DF instructions,
|
||
which has the operands given by OPERANDS. Add in a divide-by-zero check
|
||
if needed. */
|
||
|
||
const char *
|
||
loongarch_lsx_output_division (const char *division, rtx *operands)
|
||
{
|
||
const char *s;
|
||
machine_mode mode = GET_MODE (*operands);
|
||
|
||
s = division;
|
||
if (TARGET_CHECK_ZERO_DIV)
|
||
{
|
||
if (ISA_HAS_LASX && GET_MODE_SIZE (mode) == 32)
|
||
{
|
||
output_asm_insn ("xvsetallnez.%v0\t$fcc7,%u2",operands);
|
||
output_asm_insn (s, operands);
|
||
output_asm_insn ("bcnez\t$fcc7,1f", operands);
|
||
}
|
||
else if (ISA_HAS_LSX)
|
||
{
|
||
output_asm_insn ("vsetallnez.%v0\t$fcc7,%w2",operands);
|
||
output_asm_insn (s, operands);
|
||
output_asm_insn ("bcnez\t$fcc7,1f", operands);
|
||
}
|
||
s = "break\t7\n1:";
|
||
}
|
||
return s;
|
||
}
|
||
|
||
/* Implement TARGET_SCHED_ADJUST_COST. We assume that anti and output
|
||
dependencies have no cost. */
|
||
|
||
static int
|
||
loongarch_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost,
|
||
unsigned int)
|
||
{
|
||
if (dep_type != 0 && (dep_type != REG_DEP_OUTPUT))
|
||
return 0;
|
||
return cost;
|
||
}
|
||
|
||
/* Return the number of instructions that can be issued per cycle. */
|
||
|
||
static int
|
||
loongarch_issue_rate (void)
|
||
{
|
||
if ((unsigned long) la_target.cpu_tune < N_TUNE_TYPES)
|
||
return loongarch_cpu_issue_rate[la_target.cpu_tune];
|
||
else
|
||
return 1;
|
||
}
|
||
|
||
/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD. This should
|
||
be as wide as the scheduling freedom in the DFA. */
|
||
|
||
static int
|
||
loongarch_multipass_dfa_lookahead (void)
|
||
{
|
||
if ((unsigned long) la_target.cpu_tune < N_ARCH_TYPES)
|
||
return loongarch_cpu_multipass_dfa_lookahead[la_target.cpu_tune];
|
||
else
|
||
return 0;
|
||
}
|
||
|
||
/* Implement TARGET_SCHED_REORDER. */
|
||
|
||
static int
|
||
loongarch_sched_reorder (FILE *file ATTRIBUTE_UNUSED,
|
||
int verbose ATTRIBUTE_UNUSED,
|
||
rtx_insn **ready ATTRIBUTE_UNUSED,
|
||
int *nreadyp ATTRIBUTE_UNUSED,
|
||
int cycle ATTRIBUTE_UNUSED)
|
||
{
|
||
return loongarch_issue_rate ();
|
||
}
|
||
|
||
/* Implement TARGET_SCHED_REORDER2. */
|
||
|
||
static int
|
||
loongarch_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED,
|
||
int verbose ATTRIBUTE_UNUSED,
|
||
rtx_insn **ready ATTRIBUTE_UNUSED,
|
||
int *nreadyp ATTRIBUTE_UNUSED,
|
||
int cycle ATTRIBUTE_UNUSED)
|
||
{
|
||
return cached_can_issue_more;
|
||
}
|
||
|
||
/* Implement TARGET_SCHED_INIT. */
|
||
|
||
static void
|
||
loongarch_sched_init (FILE *file ATTRIBUTE_UNUSED,
|
||
int verbose ATTRIBUTE_UNUSED,
|
||
int max_ready ATTRIBUTE_UNUSED)
|
||
{}
|
||
|
||
/* Implement TARGET_SCHED_VARIABLE_ISSUE. */
|
||
|
||
static int
|
||
loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED,
|
||
int verbose ATTRIBUTE_UNUSED, rtx_insn *insn,
|
||
int more)
|
||
{
|
||
/* Ignore USEs and CLOBBERs; don't count them against the issue rate. */
|
||
if (USEFUL_INSN_P (insn))
|
||
{
|
||
if (get_attr_type (insn) != TYPE_GHOST)
|
||
more--;
|
||
}
|
||
|
||
/* Instructions of type 'multi' should all be split before
|
||
the second scheduling pass. */
|
||
gcc_assert (!reload_completed
|
||
|| recog_memoized (insn) < 0
|
||
|| get_attr_type (insn) != TYPE_MULTI);
|
||
|
||
cached_can_issue_more = more;
|
||
return more;
|
||
}
|
||
|
||
/* Given that we have an rtx of the form (prefetch ... WRITE LOCALITY),
|
||
return the first operand of the associated PREF or PREFX insn. */
|
||
|
||
rtx
|
||
loongarch_prefetch_cookie (rtx write, rtx locality)
|
||
{
|
||
/* store_streamed / load_streamed. */
|
||
if (INTVAL (locality) <= 0)
|
||
return GEN_INT (INTVAL (write) + 4);
|
||
|
||
/* store / load. */
|
||
if (INTVAL (locality) <= 2)
|
||
return write;
|
||
|
||
/* store_retained / load_retained. */
|
||
return GEN_INT (INTVAL (write) + 6);
|
||
}
|
||
|
||
/* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text
|
||
in order to avoid duplicating too much logic from elsewhere. */
|
||
|
||
static void
|
||
loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
|
||
HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
|
||
tree function)
|
||
{
|
||
const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
|
||
rtx this_rtx, temp1, temp2, fnaddr;
|
||
rtx_insn *insn;
|
||
bool use_sibcall_p;
|
||
|
||
/* Pretend to be a post-reload pass while generating rtl. */
|
||
reload_completed = 1;
|
||
|
||
/* Mark the end of the (empty) prologue. */
|
||
emit_note (NOTE_INSN_PROLOGUE_END);
|
||
|
||
/* Determine if we can use a sibcall to call FUNCTION directly. */
|
||
fnaddr = XEXP (DECL_RTL (function), 0);
|
||
use_sibcall_p = const_call_insn_operand (fnaddr, Pmode);
|
||
|
||
/* We need two temporary registers in some cases. */
|
||
temp1 = gen_rtx_REG (Pmode, 12);
|
||
temp2 = gen_rtx_REG (Pmode, 13);
|
||
|
||
/* Find out which register contains the "this" pointer. */
|
||
if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
|
||
this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
|
||
else
|
||
this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
|
||
|
||
/* Add DELTA to THIS_RTX. */
|
||
if (delta != 0)
|
||
{
|
||
rtx offset = GEN_INT (delta);
|
||
if (!IMM12_OPERAND (delta))
|
||
{
|
||
loongarch_emit_move (temp1, offset);
|
||
offset = temp1;
|
||
}
|
||
emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
|
||
}
|
||
|
||
/* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
|
||
if (vcall_offset != 0)
|
||
{
|
||
rtx addr;
|
||
|
||
/* Set TEMP1 to *THIS_RTX. */
|
||
loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
|
||
|
||
/* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */
|
||
addr = loongarch_add_offset (temp2, temp1, vcall_offset);
|
||
|
||
/* Load the offset and add it to THIS_RTX. */
|
||
loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
|
||
emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
|
||
}
|
||
|
||
/* Jump to the target function. Use a sibcall if direct jumps are
|
||
allowed, otherwise load the address into a register first. */
|
||
if (use_sibcall_p)
|
||
{
|
||
/* If TARGET_CMODEL_EXTREME, we cannot do a direct jump at all
|
||
and const_call_insn_operand should have returned false. */
|
||
gcc_assert (!TARGET_CMODEL_EXTREME);
|
||
|
||
insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx));
|
||
SIBLING_CALL_P (insn) = 1;
|
||
}
|
||
else
|
||
{
|
||
if (!TARGET_CMODEL_EXTREME)
|
||
loongarch_emit_move (temp1, fnaddr);
|
||
else if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
|
||
emit_insn (gen_movdi_symbolic_off64 (temp1, fnaddr, temp2));
|
||
else
|
||
{
|
||
emit_insn (gen_la_pcrel64_two_parts (temp1, temp2, fnaddr));
|
||
emit_move_insn (temp1, gen_rtx_PLUS (Pmode, temp1, temp2));
|
||
}
|
||
|
||
emit_jump_insn (gen_indirect_jump (temp1));
|
||
}
|
||
|
||
/* Run just enough of rest_of_compilation. This sequence was
|
||
"borrowed" from alpha.c. */
|
||
insn = get_insns ();
|
||
split_all_insns_noflow ();
|
||
shorten_branches (insn);
|
||
assemble_start_function (thunk_fndecl, fnname);
|
||
final_start_function (insn, file, 1);
|
||
final (insn, file, 1);
|
||
final_end_function ();
|
||
assemble_end_function (thunk_fndecl, fnname);
|
||
|
||
/* Stop pretending to be a post-reload pass. */
|
||
reload_completed = 0;
|
||
}
|
||
|
||
/* Allocate a chunk of memory for per-function machine-dependent data. */
|
||
|
||
static struct machine_function *
|
||
loongarch_init_machine_status (void)
|
||
{
|
||
return ggc_cleared_alloc<machine_function> ();
|
||
}
|
||
|
||
static void
|
||
loongarch_global_init (void)
|
||
{
|
||
/* Initialize loongarch_print_operand_punct. */
|
||
for (const char *p = ".$"; *p; p++)
|
||
loongarch_print_operand_punct[(unsigned char) *p] = true;
|
||
|
||
/* Set up array to map GCC register number to debug register number.
|
||
Ignore the special purpose register numbers. */
|
||
for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
||
{
|
||
if (GP_REG_P (i) || FP_REG_P (i))
|
||
loongarch_dwarf_regno[i] = i;
|
||
else
|
||
loongarch_dwarf_regno[i] = INVALID_REGNUM;
|
||
}
|
||
|
||
/* Function to allocate machine-dependent function status. */
|
||
init_machine_status = &loongarch_init_machine_status;
|
||
}
|
||
|
||
static void
|
||
loongarch_reg_init (void)
|
||
{
|
||
/* Set up loongarch_hard_regno_mode_ok. */
|
||
for (int mode = 0; mode < MAX_MACHINE_MODE; mode++)
|
||
for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
|
||
loongarch_hard_regno_mode_ok_p[mode][regno]
|
||
= loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode);
|
||
}
|
||
|
||
void
|
||
loongarch_option_override_internal (struct loongarch_target *target,
|
||
struct gcc_options *opts,
|
||
struct gcc_options *opts_set)
|
||
{
|
||
/* Handle options not covered by struct loongarch_target. */
|
||
loongarch_init_misc_options (opts, opts_set);
|
||
|
||
/* Resolve the target struct. */
|
||
loongarch_init_target (target,
|
||
opts->x_la_opt_cpu_arch,
|
||
opts->x_la_opt_cpu_tune,
|
||
opts->x_la_opt_fpu,
|
||
opts->x_la_opt_simd,
|
||
opts->x_la_opt_abi_base,
|
||
opts->x_la_opt_abi_ext,
|
||
opts->x_la_opt_cmodel,
|
||
opts->x_la_opt_tls_dialect,
|
||
opts->x_la_isa_evolution,
|
||
opts_set->x_la_isa_evolution);
|
||
|
||
loongarch_config_target (target, NULL, 0);
|
||
|
||
/* Override some options according to the resolved target. */
|
||
loongarch_target_option_override (target, opts, opts_set);
|
||
|
||
loongarch_reg_init ();
|
||
}
|
||
|
||
/* Remember the last target of loongarch_set_current_function. */
|
||
|
||
static GTY(()) tree loongarch_previous_fndecl;
|
||
|
||
void
|
||
loongarch_reset_previous_fndecl (void)
|
||
{
|
||
loongarch_previous_fndecl = NULL;
|
||
}
|
||
|
||
/* Restore or save the TREE_TARGET_GLOBALS from or to new_tree.
|
||
Used by loongarch_set_current_function to
|
||
make sure optab availability predicates are recomputed when necessary. */
|
||
|
||
void
|
||
loongarch_save_restore_target_globals (tree new_tree)
|
||
{
|
||
if (TREE_TARGET_GLOBALS (new_tree))
|
||
restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
|
||
else if (new_tree == target_option_default_node)
|
||
restore_target_globals (&default_target_globals);
|
||
else
|
||
TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
|
||
}
|
||
|
||
/* Implement TARGET_SET_CURRENT_FUNCTION. */
|
||
|
||
static void
|
||
loongarch_set_current_function (tree fndecl)
|
||
{
|
||
if (fndecl == loongarch_previous_fndecl)
|
||
return;
|
||
|
||
tree old_tree;
|
||
if (loongarch_previous_fndecl == NULL_TREE)
|
||
old_tree = target_option_current_node;
|
||
else if (DECL_FUNCTION_SPECIFIC_TARGET (loongarch_previous_fndecl))
|
||
old_tree = DECL_FUNCTION_SPECIFIC_TARGET (loongarch_previous_fndecl);
|
||
else
|
||
old_tree = target_option_default_node;
|
||
|
||
/* When the function is optimized, the pop_cfun will be called, and
|
||
the fndecl will be NULL. */
|
||
if (fndecl == NULL_TREE)
|
||
{
|
||
if (old_tree != target_option_current_node)
|
||
{
|
||
/* When this function is set with special options, we need to
|
||
restore the original global optimization options at the end
|
||
of function optimization. */
|
||
loongarch_previous_fndecl = NULL_TREE;
|
||
cl_target_option_restore (&global_options, &global_options_set,
|
||
TREE_TARGET_OPTION
|
||
(target_option_current_node));
|
||
}
|
||
return;
|
||
}
|
||
|
||
tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
|
||
|
||
/* When no separate compilation parameters are set for the function,
|
||
new_tree is NULL. */
|
||
if (new_tree == NULL_TREE)
|
||
new_tree = target_option_default_node;
|
||
|
||
loongarch_previous_fndecl = fndecl;
|
||
|
||
if (new_tree != old_tree)
|
||
/* According to the settings of the functions attribute and pragma,
|
||
the options is corrected. */
|
||
cl_target_option_restore (&global_options, &global_options_set,
|
||
TREE_TARGET_OPTION (new_tree));
|
||
|
||
|
||
/* After correcting the value of options, we need to update the
|
||
rules for using the hardware registers to ensure that the
|
||
rules correspond to the options. */
|
||
loongarch_reg_init ();
|
||
|
||
loongarch_save_restore_target_globals (new_tree);
|
||
}
|
||
|
||
|
||
|
||
/* Implement TARGET_OPTION_OVERRIDE. */
|
||
|
||
static void
|
||
loongarch_option_override (void)
|
||
{
|
||
/* Global initializations. */
|
||
loongarch_global_init ();
|
||
|
||
/* Setting up the target configuration. */
|
||
loongarch_option_override_internal (&la_target,
|
||
&global_options,
|
||
&global_options_set);
|
||
|
||
/* Save the initial options so that we can restore the initial option
|
||
settings later when processing attributes and pragmas. */
|
||
target_option_default_node = target_option_current_node
|
||
= build_target_option_node (&global_options, &global_options_set);
|
||
|
||
}
|
||
|
||
/* Implement TARGET_OPTION_SAVE. */
|
||
static void
|
||
loongarch_option_save (struct cl_target_option *,
|
||
struct gcc_options *opts,
|
||
struct gcc_options *opts_set)
|
||
{
|
||
loongarch_update_gcc_opt_status (&la_target, opts, opts_set);
|
||
}
|
||
|
||
/* Implement TARGET_OPTION_RESTORE. */
|
||
static void
|
||
loongarch_option_restore (struct gcc_options *,
|
||
struct gcc_options *,
|
||
struct cl_target_option *ptr)
|
||
{
|
||
la_target.cpu_arch = ptr->x_la_opt_cpu_arch;
|
||
la_target.cpu_tune = ptr->x_la_opt_cpu_tune;
|
||
|
||
la_target.isa.fpu = ptr->x_la_opt_fpu;
|
||
la_target.isa.simd = ptr->x_la_opt_simd;
|
||
la_target.isa.evolution = ptr->x_la_isa_evolution;
|
||
|
||
la_target.cmodel = ptr->x_la_opt_cmodel;
|
||
la_target.tls_dialect = ptr->x_la_opt_tls_dialect;
|
||
}
|
||
|
||
/* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
|
||
|
||
static void
|
||
loongarch_conditional_register_usage (void)
|
||
{
|
||
if (!TARGET_HARD_FLOAT)
|
||
accessible_reg_set &= ~(reg_class_contents[FP_REGS]
|
||
| reg_class_contents[FCC_REGS]);
|
||
}
|
||
|
||
/* Implement EH_USES. */
|
||
|
||
bool
|
||
loongarch_eh_uses (unsigned int regno ATTRIBUTE_UNUSED)
|
||
{
|
||
return false;
|
||
}
|
||
|
||
/* Implement EPILOGUE_USES. */
|
||
|
||
bool
|
||
loongarch_epilogue_uses (unsigned int regno)
|
||
{
|
||
/* Say that the epilogue uses the return address register. Note that
|
||
in the case of sibcalls, the values "used by the epilogue" are
|
||
considered live at the start of the called function. */
|
||
if (regno == RETURN_ADDR_REGNUM)
|
||
return true;
|
||
|
||
return false;
|
||
}
|
||
|
||
bool
|
||
loongarch_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p)
|
||
{
|
||
rtx reg1, reg2, mem1, mem2, base1, base2;
|
||
enum reg_class rc1, rc2;
|
||
HOST_WIDE_INT offset1, offset2;
|
||
|
||
if (load_p)
|
||
{
|
||
reg1 = operands[0];
|
||
reg2 = operands[2];
|
||
mem1 = operands[1];
|
||
mem2 = operands[3];
|
||
}
|
||
else
|
||
{
|
||
reg1 = operands[1];
|
||
reg2 = operands[3];
|
||
mem1 = operands[0];
|
||
mem2 = operands[2];
|
||
}
|
||
|
||
if (loongarch_address_insns (XEXP (mem1, 0), mode, false) == 0
|
||
|| loongarch_address_insns (XEXP (mem2, 0), mode, false) == 0)
|
||
return false;
|
||
|
||
loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1);
|
||
loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2);
|
||
|
||
/* Base regs do not match. */
|
||
if (!REG_P (base1) || !rtx_equal_p (base1, base2))
|
||
return false;
|
||
|
||
/* Either of the loads is clobbering base register. It is legitimate to bond
|
||
loads if second load clobbers base register. However, hardware does not
|
||
support such bonding. */
|
||
if (load_p
|
||
&& (REGNO (reg1) == REGNO (base1) || (REGNO (reg2) == REGNO (base1))))
|
||
return false;
|
||
|
||
/* Loading in same registers. */
|
||
if (load_p && REGNO (reg1) == REGNO (reg2))
|
||
return false;
|
||
|
||
/* The loads/stores are not of same type. */
|
||
rc1 = REGNO_REG_CLASS (REGNO (reg1));
|
||
rc2 = REGNO_REG_CLASS (REGNO (reg2));
|
||
if (rc1 != rc2 && !reg_class_subset_p (rc1, rc2)
|
||
&& !reg_class_subset_p (rc2, rc1))
|
||
return false;
|
||
|
||
if (abs (offset1 - offset2) != GET_MODE_SIZE (mode))
|
||
return false;
|
||
|
||
return true;
|
||
}
|
||
|
||
/* Implement TARGET_TRAMPOLINE_INIT. */
|
||
|
||
static void
|
||
loongarch_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
|
||
{
|
||
rtx addr, end_addr, mem;
|
||
rtx trampoline[8];
|
||
unsigned int i, j;
|
||
HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset;
|
||
|
||
/* Work out the offsets of the pointers from the start of the
|
||
trampoline code. */
|
||
end_addr_offset = TRAMPOLINE_CODE_SIZE;
|
||
static_chain_offset = end_addr_offset;
|
||
target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
|
||
|
||
/* Get pointers to the beginning and end of the code block. */
|
||
addr = force_reg (Pmode, XEXP (m_tramp, 0));
|
||
end_addr
|
||
= loongarch_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset));
|
||
|
||
#define OP(X) gen_int_mode (X, SImode)
|
||
|
||
/* Build up the code in TRAMPOLINE. */
|
||
i = 0;
|
||
/*pcaddi $static_chain,0
|
||
ld.[dw] $tmp,$static_chain,target_function_offset
|
||
ld.[dw] $static_chain,$static_chain,static_chain_offset
|
||
jirl $r0,$tmp,0 */
|
||
trampoline[i++] = OP (0x18000000 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST));
|
||
trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
|
||
| 19 /* $t7 */
|
||
| ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
|
||
| ((target_function_offset & 0xfff) << 10));
|
||
trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
|
||
| (STATIC_CHAIN_REGNUM - GP_REG_FIRST)
|
||
| ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
|
||
| ((static_chain_offset & 0xfff) << 10));
|
||
trampoline[i++] = OP (0x4c000000 | (19 << 5));
|
||
#undef OP
|
||
|
||
for (j = 0; j < i; j++)
|
||
{
|
||
mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode));
|
||
loongarch_emit_move (mem, trampoline[j]);
|
||
}
|
||
|
||
/* Set up the static chain pointer field. */
|
||
mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
|
||
loongarch_emit_move (mem, chain_value);
|
||
|
||
/* Set up the target function field. */
|
||
mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
|
||
loongarch_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
|
||
|
||
/* Flush the code part of the trampoline. */
|
||
emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
|
||
emit_insn (gen_clear_cache (addr, end_addr));
|
||
}
|
||
|
||
/* Generate or test for an insn that supports a constant permutation. */
|
||
|
||
#define MAX_VECT_LEN 32
|
||
|
||
struct expand_vec_perm_d
|
||
{
|
||
rtx target, op0, op1;
|
||
unsigned char perm[MAX_VECT_LEN];
|
||
machine_mode vmode;
|
||
unsigned char nelt;
|
||
bool one_vector_p;
|
||
bool testing_p;
|
||
};
|
||
|
||
/* Construct (set target (vec_select op0 (parallel perm))) and
|
||
return true if that's a valid instruction in the active ISA. */
|
||
|
||
static bool
|
||
loongarch_expand_vselect (rtx target, rtx op0,
|
||
const unsigned char *perm, unsigned nelt,
|
||
bool testing_p)
|
||
{
|
||
rtx rperm[MAX_VECT_LEN], x;
|
||
rtx_insn *insn;
|
||
unsigned i;
|
||
|
||
for (i = 0; i < nelt; ++i)
|
||
rperm[i] = GEN_INT (perm[i]);
|
||
|
||
x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
|
||
x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
|
||
x = gen_rtx_SET (target, x);
|
||
|
||
insn = emit_insn (x);
|
||
if (recog_memoized (insn) < 0)
|
||
{
|
||
remove_insn (insn);
|
||
return false;
|
||
}
|
||
|
||
if (testing_p)
|
||
remove_insn (insn);
|
||
return true;
|
||
}
|
||
|
||
/* Similar, but generate a vec_concat from op0 and op1 as well. */
|
||
|
||
static bool
|
||
loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
|
||
const unsigned char *perm, unsigned nelt,
|
||
bool testing_p)
|
||
{
|
||
machine_mode v2mode;
|
||
rtx x;
|
||
|
||
if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
|
||
return false;
|
||
x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
|
||
return loongarch_expand_vselect (target, x, perm, nelt, testing_p);
|
||
}
|
||
|
||
static tree
|
||
loongarch_handle_model_attribute (tree *node, tree name, tree arg, int,
|
||
bool *no_add_attrs)
|
||
{
|
||
tree decl = *node;
|
||
if (VAR_P (decl))
|
||
{
|
||
if (DECL_THREAD_LOCAL_P (decl))
|
||
{
|
||
error_at (DECL_SOURCE_LOCATION (decl),
|
||
"%qE attribute cannot be specified for thread-local "
|
||
"variables", name);
|
||
*no_add_attrs = true;
|
||
return NULL_TREE;
|
||
}
|
||
if (DECL_CONTEXT (decl)
|
||
&& TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
|
||
&& !TREE_STATIC (decl))
|
||
{
|
||
error_at (DECL_SOURCE_LOCATION (decl),
|
||
"%qE attribute cannot be specified for local "
|
||
"variables", name);
|
||
*no_add_attrs = true;
|
||
return NULL_TREE;
|
||
}
|
||
if (DECL_REGISTER (decl))
|
||
{
|
||
error_at (DECL_SOURCE_LOCATION (decl),
|
||
"%qE attribute cannot be specified for register "
|
||
"variables", name);
|
||
*no_add_attrs = true;
|
||
return NULL_TREE;
|
||
}
|
||
|
||
arg = TREE_VALUE (arg);
|
||
if (TREE_CODE (arg) != STRING_CST)
|
||
{
|
||
error_at (DECL_SOURCE_LOCATION (decl),
|
||
"invalid argument of %qE attribute", name);
|
||
*no_add_attrs = true;
|
||
return NULL_TREE;
|
||
}
|
||
|
||
const char *model = TREE_STRING_POINTER (arg);
|
||
if (strcmp (model, "normal") != 0
|
||
&& strcmp (model, "extreme") != 0)
|
||
{
|
||
error_at (DECL_SOURCE_LOCATION (decl),
|
||
"invalid argument of %qE attribute", name);
|
||
*no_add_attrs = true;
|
||
return NULL_TREE;
|
||
}
|
||
|
||
if (lookup_attribute ("model", DECL_ATTRIBUTES (decl)))
|
||
{
|
||
error_at (DECL_SOURCE_LOCATION (decl),
|
||
"multiple %qE attribute", name);
|
||
*no_add_attrs = true;
|
||
return NULL_TREE;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
warning (OPT_Wattributes, "%qE attribute ignored", name);
|
||
*no_add_attrs = true;
|
||
}
|
||
return NULL_TREE;
|
||
}
|
||
|
||
TARGET_GNU_ATTRIBUTES (loongarch_attribute_table,
|
||
{
|
||
/* { name, min_len, max_len, decl_req, type_req, fn_type_req,
|
||
affects_type_identity, handler, exclude } */
|
||
{ "model", 1, 1, true, false, false, false,
|
||
loongarch_handle_model_attribute, NULL }
|
||
});
|
||
|
||
bool
|
||
loongarch_use_anchors_for_symbol_p (const_rtx symbol)
|
||
{
|
||
tree decl = SYMBOL_REF_DECL (symbol);
|
||
|
||
/* The section anchor optimization may break custom address model. */
|
||
if (decl && lookup_attribute ("model", DECL_ATTRIBUTES (decl)))
|
||
return false;
|
||
|
||
return default_use_anchors_for_symbol_p (symbol);
|
||
}
|
||
|
||
/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
|
||
|
||
static unsigned HOST_WIDE_INT
|
||
loongarch_asan_shadow_offset (void)
|
||
{
|
||
/* We only have libsanitizer support for LOONGARCH64 at present.
|
||
This value is taken from the file libsanitizer/asan/asan_mapping.h. */
|
||
return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
|
||
}
|
||
|
||
static sbitmap
|
||
loongarch_get_separate_components (void)
|
||
{
|
||
HOST_WIDE_INT offset;
|
||
sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
|
||
bitmap_clear (components);
|
||
offset = cfun->machine->frame.gp_sp_offset;
|
||
|
||
/* The stack should be aligned to 16-bytes boundary, so we can make the use
|
||
of ldptr instructions. */
|
||
gcc_assert (offset % UNITS_PER_WORD == 0);
|
||
|
||
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
||
{
|
||
/* We can wrap general registers saved at [sp, sp + 32768) using the
|
||
ldptr/stptr instructions. For large offsets a pseudo register
|
||
might be needed which cannot be created during the shrink
|
||
wrapping pass.
|
||
|
||
TODO: This may need a revise when we add LA32 as ldptr.w is not
|
||
guaranteed available by the manual. */
|
||
if (offset < 32768)
|
||
bitmap_set_bit (components, regno);
|
||
|
||
offset -= UNITS_PER_WORD;
|
||
}
|
||
|
||
offset = cfun->machine->frame.fp_sp_offset;
|
||
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||
if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
|
||
{
|
||
/* We can only wrap FP registers with imm12 offsets. For large
|
||
offsets a pseudo register might be needed which cannot be
|
||
created during the shrink wrapping pass. */
|
||
if (IMM12_OPERAND (offset))
|
||
bitmap_set_bit (components, regno);
|
||
|
||
offset -= UNITS_PER_FP_REG;
|
||
}
|
||
|
||
/* Don't mess with the hard frame pointer. */
|
||
if (frame_pointer_needed)
|
||
bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
|
||
|
||
bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
|
||
|
||
return components;
|
||
}
|
||
|
||
static sbitmap
|
||
loongarch_components_for_bb (basic_block bb)
|
||
{
|
||
/* Registers are used in a bb if they are in the IN, GEN, or KILL sets. */
|
||
auto_bitmap used;
|
||
bitmap_copy (used, DF_LIVE_IN (bb));
|
||
bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->gen);
|
||
bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->kill);
|
||
|
||
sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
|
||
bitmap_clear (components);
|
||
|
||
function_abi_aggregator callee_abis;
|
||
rtx_insn *insn;
|
||
FOR_BB_INSNS (bb, insn)
|
||
if (CALL_P (insn))
|
||
callee_abis.note_callee_abi (insn_callee_abi (insn));
|
||
|
||
HARD_REG_SET extra_caller_saves =
|
||
callee_abis.caller_save_regs (*crtl->abi);
|
||
|
||
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||
if (!fixed_regs[regno]
|
||
&& !crtl->abi->clobbers_full_reg_p (regno)
|
||
&& (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
|
||
bitmap_bit_p (used, regno)))
|
||
bitmap_set_bit (components, regno);
|
||
|
||
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||
if (!fixed_regs[regno]
|
||
&& !crtl->abi->clobbers_full_reg_p (regno)
|
||
&& (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
|
||
bitmap_bit_p (used, regno)))
|
||
bitmap_set_bit (components, regno);
|
||
|
||
return components;
|
||
}
|
||
|
||
static void
|
||
loongarch_disqualify_components (sbitmap, edge, sbitmap, bool)
|
||
{
|
||
/* Do nothing. */
|
||
}
|
||
|
||
static void
|
||
loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn)
|
||
{
|
||
HOST_WIDE_INT offset = cfun->machine->frame.gp_sp_offset;
|
||
|
||
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
||
{
|
||
if (bitmap_bit_p (components, regno))
|
||
loongarch_save_restore_reg (word_mode, regno, offset, fn);
|
||
|
||
offset -= UNITS_PER_WORD;
|
||
}
|
||
|
||
offset = cfun->machine->frame.fp_sp_offset;
|
||
machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
|
||
|
||
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||
if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
|
||
{
|
||
if (bitmap_bit_p (components, regno))
|
||
loongarch_save_restore_reg (mode, regno, offset, fn);
|
||
|
||
offset -= UNITS_PER_FP_REG;
|
||
}
|
||
}
|
||
|
||
static void
|
||
loongarch_emit_prologue_components (sbitmap components)
|
||
{
|
||
loongarch_process_components (components, loongarch_save_reg);
|
||
}
|
||
|
||
static void
|
||
loongarch_emit_epilogue_components (sbitmap components)
|
||
{
|
||
loongarch_process_components (components, loongarch_restore_reg);
|
||
}
|
||
|
||
static void
|
||
loongarch_set_handled_components (sbitmap components)
|
||
{
|
||
for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||
if (bitmap_bit_p (components, regno))
|
||
cfun->machine->reg_is_wrapped_separately[regno] = true;
|
||
|
||
for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||
if (bitmap_bit_p (components, regno))
|
||
cfun->machine->reg_is_wrapped_separately[regno] = true;
|
||
}
|
||
|
||
/* Use the vshuf instruction to implement all 128-bit constant vector
|
||
permuatation. */
|
||
|
||
static bool
|
||
loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
|
||
{
|
||
int i;
|
||
rtx target, op0, op1;
|
||
rtx rperm[MAX_VECT_LEN];
|
||
|
||
if (GET_MODE_SIZE (d->vmode) == 16)
|
||
{
|
||
target = d->target;
|
||
op0 = d->op0;
|
||
op1 = d->one_vector_p ? d->op0 : d->op1;
|
||
|
||
if (GET_MODE (op0) != GET_MODE (op1)
|
||
|| GET_MODE (op0) != GET_MODE (target))
|
||
return false;
|
||
|
||
if (d->testing_p)
|
||
return true;
|
||
|
||
for (i = 0; i < d->nelt; i += 1)
|
||
rperm[i] = GEN_INT (d->perm[i]);
|
||
|
||
machine_mode sel_mode = related_int_vector_mode (d->vmode)
|
||
.require ();
|
||
rtvec sel_v = gen_rtvec_v (d->nelt, rperm);
|
||
|
||
/* Despite vshuf.* (except vshuf.b) needs sel == target, we cannot
|
||
load sel into target right now: here we are dealing with
|
||
pseudo regs, and target may be the same pseudo as one of op0
|
||
or op1. Then we'd clobber the input. Instead, we use a new
|
||
pseudo reg here. The reload pass will look at the constraint
|
||
of vshuf.* and move sel into target first if needed. */
|
||
rtx sel = force_reg (sel_mode,
|
||
gen_rtx_CONST_VECTOR (sel_mode, sel_v));
|
||
|
||
if (d->vmode == E_V16QImode)
|
||
emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
|
||
else
|
||
emit_insn (gen_lsx_vshuf (d->vmode, target, sel, op1, op0));
|
||
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/* Construct (set target (vec_select op0 (parallel selector))) and
|
||
return true if that's a valid instruction in the active ISA.
|
||
In fact, it matches the special constant vector with repeated
|
||
4-element sets. */
|
||
|
||
static bool
|
||
loongarch_is_imm_set_shuffle (struct expand_vec_perm_d *d)
|
||
{
|
||
rtx x, elts[MAX_VECT_LEN];
|
||
rtvec v;
|
||
rtx_insn *insn;
|
||
unsigned i;
|
||
|
||
if (!ISA_HAS_LSX && !ISA_HAS_LASX)
|
||
return false;
|
||
|
||
for (i = 0; i < d->nelt; i++)
|
||
elts[i] = GEN_INT (d->perm[i]);
|
||
|
||
v = gen_rtvec_v (d->nelt, elts);
|
||
x = gen_rtx_PARALLEL (VOIDmode, v);
|
||
|
||
if (!loongarch_const_vector_shuffle_set_p (x, d->vmode))
|
||
return false;
|
||
|
||
if (d->testing_p)
|
||
return true;
|
||
|
||
x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x);
|
||
x = gen_rtx_SET (d->target, x);
|
||
|
||
insn = emit_insn (x);
|
||
if (recog_memoized (insn) < 0)
|
||
{
|
||
remove_insn (insn);
|
||
return false;
|
||
}
|
||
return true;
|
||
}
|
||
|
||
static bool
|
||
loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *);
|
||
|
||
/* Try to match and expand all kinds of 128-bit const vector permutation
|
||
cases. */
|
||
|
||
static bool
|
||
loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
|
||
{
|
||
if (!ISA_HAS_LSX && GET_MODE_SIZE (d->vmode) != 16)
|
||
return false;
|
||
|
||
if (loongarch_is_imm_set_shuffle (d))
|
||
return true;
|
||
|
||
if (loongarch_expand_vec_perm_even_odd (d))
|
||
return true;
|
||
|
||
return loongarch_try_expand_lsx_vshuf_const (d);
|
||
}
|
||
|
||
/* Try to simplify a two vector permutation using 2 intra-lane interleave
|
||
insns and cross-lane shuffle for 32-byte vectors. */
|
||
|
||
static bool
|
||
loongarch_expand_vec_perm_interleave (struct expand_vec_perm_d *d)
|
||
{
|
||
unsigned i, nelt;
|
||
rtx t1,t2,t3;
|
||
rtx (*gen_high) (rtx, rtx, rtx);
|
||
rtx (*gen_low) (rtx, rtx, rtx);
|
||
machine_mode mode = GET_MODE (d->target);
|
||
|
||
if (d->one_vector_p)
|
||
return false;
|
||
if (ISA_HAS_LASX && GET_MODE_SIZE (d->vmode) == 32)
|
||
;
|
||
else
|
||
return false;
|
||
|
||
nelt = d->nelt;
|
||
if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
|
||
return false;
|
||
for (i = 0; i < nelt; i += 2)
|
||
if (d->perm[i] != d->perm[0] + i / 2
|
||
|| d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
|
||
return false;
|
||
|
||
if (d->testing_p)
|
||
return true;
|
||
|
||
switch (d->vmode)
|
||
{
|
||
case E_V32QImode:
|
||
gen_high = gen_lasx_xvilvh_b;
|
||
gen_low = gen_lasx_xvilvl_b;
|
||
break;
|
||
case E_V16HImode:
|
||
gen_high = gen_lasx_xvilvh_h;
|
||
gen_low = gen_lasx_xvilvl_h;
|
||
break;
|
||
case E_V8SImode:
|
||
gen_high = gen_lasx_xvilvh_w;
|
||
gen_low = gen_lasx_xvilvl_w;
|
||
break;
|
||
case E_V4DImode:
|
||
gen_high = gen_lasx_xvilvh_d;
|
||
gen_low = gen_lasx_xvilvl_d;
|
||
break;
|
||
case E_V8SFmode:
|
||
gen_high = gen_lasx_xvilvh_w_f;
|
||
gen_low = gen_lasx_xvilvl_w_f;
|
||
break;
|
||
case E_V4DFmode:
|
||
gen_high = gen_lasx_xvilvh_d_f;
|
||
gen_low = gen_lasx_xvilvl_d_f;
|
||
break;
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
t1 = gen_reg_rtx (mode);
|
||
t2 = gen_reg_rtx (mode);
|
||
emit_insn (gen_high (t1, d->op0, d->op1));
|
||
emit_insn (gen_low (t2, d->op0, d->op1));
|
||
if (mode == V4DFmode || mode == V8SFmode)
|
||
{
|
||
t3 = gen_reg_rtx (V4DFmode);
|
||
if (d->perm[0])
|
||
emit_insn (gen_lasx_xvpermi_q_v4df (t3, gen_lowpart (V4DFmode, t1),
|
||
gen_lowpart (V4DFmode, t2),
|
||
GEN_INT (0x31)));
|
||
else
|
||
emit_insn (gen_lasx_xvpermi_q_v4df (t3, gen_lowpart (V4DFmode, t1),
|
||
gen_lowpart (V4DFmode, t2),
|
||
GEN_INT (0x20)));
|
||
}
|
||
else
|
||
{
|
||
t3 = gen_reg_rtx (V4DImode);
|
||
if (d->perm[0])
|
||
emit_insn (gen_lasx_xvpermi_q_v4di (t3, gen_lowpart (V4DImode, t1),
|
||
gen_lowpart (V4DImode, t2),
|
||
GEN_INT (0x31)));
|
||
else
|
||
emit_insn (gen_lasx_xvpermi_q_v4di (t3, gen_lowpart (V4DImode, t1),
|
||
gen_lowpart (V4DImode, t2),
|
||
GEN_INT (0x20)));
|
||
}
|
||
emit_move_insn (d->target, gen_lowpart (mode, t3));
|
||
return true;
|
||
}
|
||
|
||
/* Implement 128-bit and 256-bit extract-even and extract-odd permutations. */
|
||
|
||
static bool
|
||
loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
|
||
{
|
||
rtx t1;
|
||
machine_mode mode = GET_MODE (d->target);
|
||
|
||
if (d->testing_p)
|
||
return true;
|
||
|
||
t1 = gen_reg_rtx (mode);
|
||
|
||
switch (d->vmode)
|
||
{
|
||
/* 128 bit. */
|
||
case E_V2DFmode:
|
||
if (odd)
|
||
emit_insn (gen_lsx_vilvh_d_f (d->target, d->op0, d->op1));
|
||
else
|
||
emit_insn (gen_lsx_vilvl_d_f (d->target, d->op0, d->op1));
|
||
break;
|
||
|
||
case E_V2DImode:
|
||
if (odd)
|
||
emit_insn (gen_lsx_vilvh_d (d->target, d->op0, d->op1));
|
||
else
|
||
emit_insn (gen_lsx_vilvl_d (d->target, d->op0, d->op1));
|
||
break;
|
||
|
||
case E_V4SFmode:
|
||
if (odd)
|
||
emit_insn (gen_lsx_vpickod_w_f (d->target, d->op0, d->op1));
|
||
else
|
||
emit_insn (gen_lsx_vpickev_w_f (d->target, d->op0, d->op1));
|
||
break;
|
||
|
||
case E_V4SImode:
|
||
if (odd)
|
||
emit_insn (gen_lsx_vpickod_w (d->target, d->op0, d->op1));
|
||
else
|
||
emit_insn (gen_lsx_vpickev_w (d->target, d->op0, d->op1));
|
||
break;
|
||
|
||
case E_V8HImode:
|
||
if (odd)
|
||
emit_insn (gen_lsx_vpickod_h (d->target, d->op0, d->op1));
|
||
else
|
||
emit_insn (gen_lsx_vpickev_h (d->target, d->op0, d->op1));
|
||
break;
|
||
|
||
case E_V16QImode:
|
||
if (odd)
|
||
emit_insn (gen_lsx_vpickod_b (d->target, d->op0, d->op1));
|
||
else
|
||
emit_insn (gen_lsx_vpickev_b (d->target, d->op0, d->op1));
|
||
break;
|
||
|
||
/* 256 bit. */
|
||
case E_V4DFmode:
|
||
/* Shuffle the lanes around into { 0 4 2 6 } and { 1 5 3 7 }. */
|
||
if (odd)
|
||
emit_insn (gen_lasx_xvilvh_d_f (t1, d->op0, d->op1));
|
||
else
|
||
emit_insn (gen_lasx_xvilvl_d_f (t1, d->op0, d->op1));
|
||
|
||
/* Shuffle within the 256-bit lanes to produce the result required.
|
||
{ 0 2 4 6 } | { 1 3 5 7 }. */
|
||
emit_insn (gen_lasx_xvpermi_d_v4df (d->target, t1, GEN_INT (0xd8)));
|
||
break;
|
||
|
||
case E_V4DImode:
|
||
if (odd)
|
||
emit_insn (gen_lasx_xvilvh_d (t1, d->op0, d->op1));
|
||
else
|
||
emit_insn (gen_lasx_xvilvl_d (t1, d->op0, d->op1));
|
||
|
||
emit_insn (gen_lasx_xvpermi_d_v4di (d->target, t1, GEN_INT (0xd8)));
|
||
break;
|
||
|
||
case E_V8SFmode:
|
||
/* Shuffle the lanes around into:
|
||
{ 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
|
||
if (odd)
|
||
emit_insn (gen_lasx_xvpickod_w_f (t1, d->op0, d->op1));
|
||
else
|
||
emit_insn (gen_lasx_xvpickev_w_f (t1, d->op0, d->op1));
|
||
|
||
/* Shuffle within the 256-bit lanes to produce the result required.
|
||
{ 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
|
||
emit_insn (gen_lasx_xvpermi_d_v8sf (d->target, t1, GEN_INT (0xd8)));
|
||
break;
|
||
|
||
case E_V8SImode:
|
||
if (odd)
|
||
emit_insn (gen_lasx_xvpickod_w (t1, d->op0, d->op1));
|
||
else
|
||
emit_insn (gen_lasx_xvpickev_w (t1, d->op0, d->op1));
|
||
|
||
emit_insn (gen_lasx_xvpermi_d_v8si (d->target, t1, GEN_INT (0xd8)));
|
||
break;
|
||
|
||
case E_V16HImode:
|
||
if (odd)
|
||
emit_insn (gen_lasx_xvpickod_h (t1, d->op0, d->op1));
|
||
else
|
||
emit_insn (gen_lasx_xvpickev_h (t1, d->op0, d->op1));
|
||
|
||
emit_insn (gen_lasx_xvpermi_d_v16hi (d->target, t1, GEN_INT (0xd8)));
|
||
break;
|
||
|
||
case E_V32QImode:
|
||
if (odd)
|
||
emit_insn (gen_lasx_xvpickod_b (t1, d->op0, d->op1));
|
||
else
|
||
emit_insn (gen_lasx_xvpickev_b (t1, d->op0, d->op1));
|
||
|
||
emit_insn (gen_lasx_xvpermi_d_v32qi (d->target, t1, GEN_INT (0xd8)));
|
||
break;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
/* Pattern match extract-even and extract-odd permutations. */
|
||
|
||
static bool
|
||
loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
|
||
{
|
||
unsigned i, odd, nelt = d->nelt;
|
||
if (!ISA_HAS_LASX && !ISA_HAS_LSX)
|
||
return false;
|
||
|
||
odd = d->perm[0];
|
||
if (odd != 0 && odd != 1)
|
||
return false;
|
||
|
||
for (i = 1; i < nelt; ++i)
|
||
if (d->perm[i] != 2 * i + odd)
|
||
return false;
|
||
|
||
return loongarch_expand_vec_perm_even_odd_1 (d, odd);
|
||
}
|
||
|
||
static void
|
||
loongarch_expand_vec_interleave (rtx target, rtx op0, rtx op1, bool high_p)
|
||
{
|
||
struct expand_vec_perm_d d;
|
||
unsigned i, nelt, base;
|
||
bool ok;
|
||
|
||
d.target = target;
|
||
d.op0 = op0;
|
||
d.op1 = op1;
|
||
d.vmode = GET_MODE (target);
|
||
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
||
d.one_vector_p = false;
|
||
d.testing_p = false;
|
||
|
||
base = high_p ? nelt / 2 : 0;
|
||
for (i = 0; i < nelt / 2; ++i)
|
||
{
|
||
d.perm[i * 2] = i + base;
|
||
d.perm[i * 2 + 1] = i + base + nelt;
|
||
}
|
||
|
||
ok = loongarch_expand_vec_perm_interleave (&d);
|
||
gcc_assert (ok);
|
||
}
|
||
|
||
/* The loongarch lasx instructions xvmulwev and xvmulwod return the even or odd
|
||
parts of the double sized result elements in the corresponding elements of
|
||
the target register. That's NOT what the vec_widen_umult_lo/hi patterns are
|
||
expected to do. We emulate the widening lo/hi multiplies with the even/odd
|
||
versions followed by a vector merge. */
|
||
|
||
void
|
||
loongarch_expand_vec_widen_hilo (rtx dest, rtx op1, rtx op2,
|
||
bool uns_p, bool high_p, const char *optab)
|
||
{
|
||
machine_mode wmode = GET_MODE (dest);
|
||
machine_mode mode = GET_MODE (op1);
|
||
rtx t1, t2, t3;
|
||
|
||
t1 = gen_reg_rtx (wmode);
|
||
t2 = gen_reg_rtx (wmode);
|
||
t3 = gen_reg_rtx (wmode);
|
||
switch (mode)
|
||
{
|
||
case V16HImode:
|
||
if (!strcmp (optab, "add"))
|
||
{
|
||
if (!uns_p)
|
||
{
|
||
emit_insn (gen_lasx_xvaddwev_w_h (t1, op1, op2));
|
||
emit_insn (gen_lasx_xvaddwod_w_h (t2, op1, op2));
|
||
}
|
||
else
|
||
{
|
||
emit_insn (gen_lasx_xvaddwev_w_hu (t1, op1, op2));
|
||
emit_insn (gen_lasx_xvaddwod_w_hu (t2, op1, op2));
|
||
}
|
||
}
|
||
else if (!strcmp (optab, "mult"))
|
||
{
|
||
if (!uns_p)
|
||
{
|
||
emit_insn (gen_lasx_xvmulwev_w_h (t1, op1, op2));
|
||
emit_insn (gen_lasx_xvmulwod_w_h (t2, op1, op2));
|
||
}
|
||
else
|
||
{
|
||
emit_insn (gen_lasx_xvmulwev_w_hu (t1, op1, op2));
|
||
emit_insn (gen_lasx_xvmulwod_w_hu (t2, op1, op2));
|
||
}
|
||
}
|
||
else if (!strcmp (optab, "sub"))
|
||
{
|
||
if (!uns_p)
|
||
{
|
||
emit_insn (gen_lasx_xvsubwev_w_h (t1, op1, op2));
|
||
emit_insn (gen_lasx_xvsubwod_w_h (t2, op1, op2));
|
||
}
|
||
else
|
||
{
|
||
emit_insn (gen_lasx_xvsubwev_w_hu (t1, op1, op2));
|
||
emit_insn (gen_lasx_xvsubwod_w_hu (t2, op1, op2));
|
||
}
|
||
}
|
||
break;
|
||
|
||
case V32QImode:
|
||
if (!strcmp (optab, "add"))
|
||
{
|
||
if (!uns_p)
|
||
{
|
||
emit_insn (gen_lasx_xvaddwev_h_b (t1, op1, op2));
|
||
emit_insn (gen_lasx_xvaddwod_h_b (t2, op1, op2));
|
||
}
|
||
else
|
||
{
|
||
emit_insn (gen_lasx_xvaddwev_h_bu (t1, op1, op2));
|
||
emit_insn (gen_lasx_xvaddwod_h_bu (t2, op1, op2));
|
||
}
|
||
}
|
||
else if (!strcmp (optab, "mult"))
|
||
{
|
||
if (!uns_p)
|
||
{
|
||
emit_insn (gen_lasx_xvmulwev_h_b (t1, op1, op2));
|
||
emit_insn (gen_lasx_xvmulwod_h_b (t2, op1, op2));
|
||
}
|
||
else
|
||
{
|
||
emit_insn (gen_lasx_xvmulwev_h_bu (t1, op1, op2));
|
||
emit_insn (gen_lasx_xvmulwod_h_bu (t2, op1, op2));
|
||
}
|
||
}
|
||
else if (!strcmp (optab, "sub"))
|
||
{
|
||
if (!uns_p)
|
||
{
|
||
emit_insn (gen_lasx_xvsubwev_h_b (t1, op1, op2));
|
||
emit_insn (gen_lasx_xvsubwod_h_b (t2, op1, op2));
|
||
}
|
||
else
|
||
{
|
||
emit_insn (gen_lasx_xvsubwev_h_bu (t1, op1, op2));
|
||
emit_insn (gen_lasx_xvsubwod_h_bu (t2, op1, op2));
|
||
}
|
||
}
|
||
break;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
loongarch_expand_vec_interleave (t3, t1, t2, high_p);
|
||
emit_move_insn (dest, gen_lowpart (wmode, t3));
|
||
}
|
||
|
||
/* Expand a variable vector permutation for LASX. */
|
||
|
||
void
|
||
loongarch_expand_vec_perm_1 (rtx operands[])
|
||
{
|
||
rtx target = operands[0];
|
||
rtx op0 = operands[1];
|
||
rtx op1 = operands[2];
|
||
rtx mask = operands[3];
|
||
|
||
bool one_operand_shuffle = rtx_equal_p (op0, op1);
|
||
rtx t1 = NULL;
|
||
rtx t2 = NULL;
|
||
rtx t3, t4, t5, t6, vt = NULL;
|
||
rtx vec[32] = {NULL};
|
||
machine_mode mode = GET_MODE (op0);
|
||
machine_mode maskmode = GET_MODE (mask);
|
||
int w, i;
|
||
|
||
/* Number of elements in the vector. */
|
||
w = GET_MODE_NUNITS (mode);
|
||
|
||
rtx round_data[MAX_VECT_LEN];
|
||
rtx round_reg, round_data_rtx;
|
||
|
||
if (mode != E_V32QImode)
|
||
{
|
||
for (int i = 0; i < w; i += 1)
|
||
{
|
||
round_data[i] = GEN_INT (0x1f);
|
||
}
|
||
|
||
if (mode == E_V4DFmode)
|
||
{
|
||
round_data_rtx = gen_rtx_CONST_VECTOR (E_V4DImode,
|
||
gen_rtvec_v (w, round_data));
|
||
round_reg = gen_reg_rtx (E_V4DImode);
|
||
}
|
||
else if (mode == E_V8SFmode)
|
||
{
|
||
|
||
round_data_rtx = gen_rtx_CONST_VECTOR (E_V8SImode,
|
||
gen_rtvec_v (w, round_data));
|
||
round_reg = gen_reg_rtx (E_V8SImode);
|
||
}
|
||
else
|
||
{
|
||
round_data_rtx = gen_rtx_CONST_VECTOR (mode,
|
||
gen_rtvec_v (w, round_data));
|
||
round_reg = gen_reg_rtx (mode);
|
||
}
|
||
|
||
emit_move_insn (round_reg, round_data_rtx);
|
||
switch (mode)
|
||
{
|
||
case E_V32QImode:
|
||
emit_insn (gen_andv32qi3 (mask, mask, round_reg));
|
||
break;
|
||
case E_V16HImode:
|
||
emit_insn (gen_andv16hi3 (mask, mask, round_reg));
|
||
break;
|
||
case E_V8SImode:
|
||
case E_V8SFmode:
|
||
emit_insn (gen_andv8si3 (mask, mask, round_reg));
|
||
break;
|
||
case E_V4DImode:
|
||
case E_V4DFmode:
|
||
emit_insn (gen_andv4di3 (mask, mask, round_reg));
|
||
break;
|
||
default:
|
||
gcc_unreachable ();
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (mode == V4DImode || mode == V4DFmode)
|
||
{
|
||
maskmode = mode = V8SImode;
|
||
w = 8;
|
||
t1 = gen_reg_rtx (maskmode);
|
||
|
||
/* Replicate the low bits of the V4DImode mask into V8SImode:
|
||
mask = { A B C D }
|
||
t1 = { A A B B C C D D }. */
|
||
for (i = 0; i < w / 2; ++i)
|
||
vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
|
||
vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
|
||
vt = force_reg (maskmode, vt);
|
||
mask = gen_lowpart (maskmode, mask);
|
||
emit_insn (gen_lasx_xvperm_w (t1, mask, vt));
|
||
|
||
/* Multiply the shuffle indicies by two. */
|
||
t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
|
||
OPTAB_DIRECT);
|
||
|
||
/* Add one to the odd shuffle indicies:
|
||
t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
|
||
for (i = 0; i < w / 2; ++i)
|
||
{
|
||
vec[i * 2] = const0_rtx;
|
||
vec[i * 2 + 1] = const1_rtx;
|
||
}
|
||
vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
|
||
vt = validize_mem (force_const_mem (maskmode, vt));
|
||
t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
|
||
OPTAB_DIRECT);
|
||
|
||
/* Continue as if V8SImode (resp. V32QImode) was used initially. */
|
||
operands[3] = mask = t1;
|
||
target = gen_reg_rtx (mode);
|
||
op0 = gen_lowpart (mode, op0);
|
||
op1 = gen_lowpart (mode, op1);
|
||
}
|
||
|
||
switch (mode)
|
||
{
|
||
case E_V8SImode:
|
||
if (one_operand_shuffle)
|
||
{
|
||
emit_insn (gen_lasx_xvperm_w (target, op0, mask));
|
||
if (target != operands[0])
|
||
emit_move_insn (operands[0],
|
||
gen_lowpart (GET_MODE (operands[0]), target));
|
||
}
|
||
else
|
||
{
|
||
t1 = gen_reg_rtx (V8SImode);
|
||
t2 = gen_reg_rtx (V8SImode);
|
||
emit_insn (gen_lasx_xvperm_w (t1, op0, mask));
|
||
emit_insn (gen_lasx_xvperm_w (t2, op1, mask));
|
||
goto merge_two;
|
||
}
|
||
return;
|
||
|
||
case E_V8SFmode:
|
||
mask = gen_lowpart (V8SImode, mask);
|
||
if (one_operand_shuffle)
|
||
emit_insn (gen_lasx_xvperm_w_f (target, op0, mask));
|
||
else
|
||
{
|
||
t1 = gen_reg_rtx (V8SFmode);
|
||
t2 = gen_reg_rtx (V8SFmode);
|
||
emit_insn (gen_lasx_xvperm_w_f (t1, op0, mask));
|
||
emit_insn (gen_lasx_xvperm_w_f (t2, op1, mask));
|
||
goto merge_two;
|
||
}
|
||
return;
|
||
|
||
case E_V16HImode:
|
||
if (one_operand_shuffle)
|
||
{
|
||
t1 = gen_reg_rtx (V16HImode);
|
||
t2 = gen_reg_rtx (V16HImode);
|
||
emit_insn (gen_lasx_xvpermi_d_v16hi (t1, op0, GEN_INT (0x44)));
|
||
emit_insn (gen_lasx_xvpermi_d_v16hi (t2, op0, GEN_INT (0xee)));
|
||
emit_insn (gen_lasx_xvshuf_h (target, mask, t2, t1));
|
||
}
|
||
else
|
||
{
|
||
t1 = gen_reg_rtx (V16HImode);
|
||
t2 = gen_reg_rtx (V16HImode);
|
||
t3 = gen_reg_rtx (V16HImode);
|
||
t4 = gen_reg_rtx (V16HImode);
|
||
t5 = gen_reg_rtx (V16HImode);
|
||
t6 = gen_reg_rtx (V16HImode);
|
||
emit_insn (gen_lasx_xvpermi_d_v16hi (t3, op0, GEN_INT (0x44)));
|
||
emit_insn (gen_lasx_xvpermi_d_v16hi (t4, op0, GEN_INT (0xee)));
|
||
emit_insn (gen_lasx_xvshuf_h (t1, mask, t4, t3));
|
||
emit_insn (gen_lasx_xvpermi_d_v16hi (t5, op1, GEN_INT (0x44)));
|
||
emit_insn (gen_lasx_xvpermi_d_v16hi (t6, op1, GEN_INT (0xee)));
|
||
emit_insn (gen_lasx_xvshuf_h (t2, mask, t6, t5));
|
||
goto merge_two;
|
||
}
|
||
return;
|
||
|
||
case E_V32QImode:
|
||
if (one_operand_shuffle)
|
||
{
|
||
t1 = gen_reg_rtx (V32QImode);
|
||
t2 = gen_reg_rtx (V32QImode);
|
||
emit_insn (gen_lasx_xvpermi_d_v32qi (t1, op0, GEN_INT (0x44)));
|
||
emit_insn (gen_lasx_xvpermi_d_v32qi (t2, op0, GEN_INT (0xee)));
|
||
emit_insn (gen_lasx_xvshuf_b (target, t2, t1, mask));
|
||
}
|
||
else
|
||
{
|
||
t1 = gen_reg_rtx (V32QImode);
|
||
t2 = gen_reg_rtx (V32QImode);
|
||
t3 = gen_reg_rtx (V32QImode);
|
||
t4 = gen_reg_rtx (V32QImode);
|
||
t5 = gen_reg_rtx (V32QImode);
|
||
t6 = gen_reg_rtx (V32QImode);
|
||
emit_insn (gen_lasx_xvpermi_d_v32qi (t3, op0, GEN_INT (0x44)));
|
||
emit_insn (gen_lasx_xvpermi_d_v32qi (t4, op0, GEN_INT (0xee)));
|
||
emit_insn (gen_lasx_xvshuf_b (t1, t4, t3, mask));
|
||
emit_insn (gen_lasx_xvpermi_d_v32qi (t5, op1, GEN_INT (0x44)));
|
||
emit_insn (gen_lasx_xvpermi_d_v32qi (t6, op1, GEN_INT (0xee)));
|
||
emit_insn (gen_lasx_xvshuf_b (t2, t6, t5, mask));
|
||
goto merge_two;
|
||
}
|
||
return;
|
||
|
||
default:
|
||
gcc_assert (GET_MODE_SIZE (mode) == 32);
|
||
break;
|
||
}
|
||
|
||
merge_two:
|
||
/* Then merge them together. The key is whether any given control
|
||
element contained a bit set that indicates the second word. */
|
||
rtx xops[6];
|
||
mask = operands[3];
|
||
vt = GEN_INT (w);
|
||
vt = gen_const_vec_duplicate (maskmode, vt);
|
||
vt = force_reg (maskmode, vt);
|
||
mask = expand_simple_binop (maskmode, AND, mask, vt,
|
||
NULL_RTX, 0, OPTAB_DIRECT);
|
||
if (GET_MODE (target) != mode)
|
||
target = gen_reg_rtx (mode);
|
||
xops[0] = target;
|
||
xops[1] = gen_lowpart (mode, t2);
|
||
xops[2] = gen_lowpart (mode, t1);
|
||
xops[3] = gen_rtx_EQ (maskmode, mask, vt);
|
||
xops[4] = mask;
|
||
xops[5] = vt;
|
||
|
||
loongarch_expand_vec_cond_expr (mode, maskmode, xops);
|
||
if (target != operands[0])
|
||
emit_move_insn (operands[0],
|
||
gen_lowpart (GET_MODE (operands[0]), target));
|
||
}
|
||
|
||
void
|
||
loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
|
||
{
|
||
machine_mode vmode = GET_MODE (target);
|
||
machine_mode vimode = GET_MODE (sel);
|
||
auto nelt = GET_MODE_NUNITS (vmode);
|
||
auto round_reg = gen_reg_rtx (vimode);
|
||
rtx round_data[MAX_VECT_LEN];
|
||
|
||
for (int i = 0; i < nelt; i += 1)
|
||
{
|
||
round_data[i] = GEN_INT (0x1f);
|
||
}
|
||
|
||
rtx round_data_rtx = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (nelt, round_data));
|
||
emit_move_insn (round_reg, round_data_rtx);
|
||
|
||
if (vmode != vimode)
|
||
{
|
||
target = lowpart_subreg (vimode, target, vmode);
|
||
op0 = lowpart_subreg (vimode, op0, vmode);
|
||
op1 = lowpart_subreg (vimode, op1, vmode);
|
||
}
|
||
|
||
switch (vmode)
|
||
{
|
||
case E_V16QImode:
|
||
emit_insn (gen_andv16qi3 (sel, sel, round_reg));
|
||
emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
|
||
break;
|
||
case E_V2DFmode:
|
||
case E_V2DImode:
|
||
emit_insn (gen_andv2di3 (sel, sel, round_reg));
|
||
emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0));
|
||
break;
|
||
case E_V4SFmode:
|
||
case E_V4SImode:
|
||
emit_insn (gen_andv4si3 (sel, sel, round_reg));
|
||
emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0));
|
||
break;
|
||
case E_V8HImode:
|
||
emit_insn (gen_andv8hi3 (sel, sel, round_reg));
|
||
emit_insn (gen_lsx_vshuf_h (target, sel, op1, op0));
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
}
|
||
|
||
/* Following are the assist function for const vector permutation support. */
|
||
static bool
|
||
loongarch_is_quad_duplicate (struct expand_vec_perm_d *d)
|
||
{
|
||
if (d->perm[0] >= d->nelt / 2)
|
||
return false;
|
||
|
||
bool result = true;
|
||
unsigned char lhs = d->perm[0];
|
||
unsigned char rhs = d->perm[d->nelt / 2];
|
||
|
||
if ((rhs - lhs) != d->nelt / 2)
|
||
return false;
|
||
|
||
for (int i = 1; i < d->nelt; i += 1)
|
||
{
|
||
if ((i < d->nelt / 2) && (d->perm[i] != lhs))
|
||
{
|
||
result = false;
|
||
break;
|
||
}
|
||
if ((i > d->nelt / 2) && (d->perm[i] != rhs))
|
||
{
|
||
result = false;
|
||
break;
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
static bool
|
||
loongarch_is_extraction_permutation (struct expand_vec_perm_d *d)
|
||
{
|
||
bool result = true;
|
||
unsigned char buf = d->perm[0];
|
||
|
||
if (buf != 0 || buf != d->nelt)
|
||
return false;
|
||
|
||
for (int i = 0; i < d->nelt; i += 1)
|
||
{
|
||
if (buf != d->perm[i])
|
||
{
|
||
result = false;
|
||
break;
|
||
}
|
||
buf += 1;
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
static bool
|
||
loongarch_is_lasx_lowpart_interleave (struct expand_vec_perm_d *d)
|
||
{
|
||
bool result = true;
|
||
unsigned char buf = 0;
|
||
|
||
for (int i = 0;i < d->nelt; i += 2)
|
||
{
|
||
if (buf != d->perm[i])
|
||
{
|
||
result = false;
|
||
break;
|
||
}
|
||
buf += 1;
|
||
}
|
||
|
||
if (result)
|
||
{
|
||
buf = d->nelt;
|
||
for (int i = 1; i < d->nelt; i += 2)
|
||
{
|
||
if (buf != d->perm[i])
|
||
{
|
||
result = false;
|
||
break;
|
||
}
|
||
buf += 1;
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
static bool
|
||
loongarch_is_lasx_lowpart_interleave_2 (struct expand_vec_perm_d *d)
|
||
{
|
||
if (d->vmode != E_V32QImode)
|
||
return false;
|
||
bool result = true;
|
||
unsigned char buf = 0;
|
||
|
||
#define COMPARE_SELECTOR(INIT, BEGIN, END) \
|
||
buf = INIT; \
|
||
for (int i = BEGIN; i < END && result; i += 1) \
|
||
{ \
|
||
if (buf != d->perm[i]) \
|
||
{ \
|
||
result = false; \
|
||
break; \
|
||
} \
|
||
buf += 1; \
|
||
}
|
||
|
||
COMPARE_SELECTOR (0, 0, 8);
|
||
COMPARE_SELECTOR (32, 8, 16);
|
||
COMPARE_SELECTOR (8, 16, 24);
|
||
COMPARE_SELECTOR (40, 24, 32);
|
||
|
||
#undef COMPARE_SELECTOR
|
||
return result;
|
||
}
|
||
|
||
static bool
|
||
loongarch_is_lasx_highpart_interleave (expand_vec_perm_d *d)
|
||
{
|
||
bool result = true;
|
||
unsigned char buf = d->nelt / 2;
|
||
|
||
for (int i = 0; i < d->nelt; i += 2)
|
||
{
|
||
if (buf != d->perm[i])
|
||
{
|
||
result = false;
|
||
break;
|
||
}
|
||
buf += 1;
|
||
}
|
||
|
||
if (result)
|
||
{
|
||
buf = d->nelt + d->nelt / 2;
|
||
for (int i = 1; i < d->nelt;i += 2)
|
||
{
|
||
if (buf != d->perm[i])
|
||
{
|
||
result = false;
|
||
break;
|
||
}
|
||
buf += 1;
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
static bool
|
||
loongarch_is_lasx_highpart_interleave_2 (struct expand_vec_perm_d *d)
|
||
{
|
||
if (d->vmode != E_V32QImode)
|
||
return false;
|
||
|
||
bool result = true;
|
||
unsigned char buf = 0;
|
||
|
||
#define COMPARE_SELECTOR(INIT, BEGIN, END) \
|
||
buf = INIT; \
|
||
for (int i = BEGIN; i < END && result; i += 1) \
|
||
{ \
|
||
if (buf != d->perm[i]) \
|
||
{ \
|
||
result = false; \
|
||
break; \
|
||
} \
|
||
buf += 1; \
|
||
}
|
||
|
||
COMPARE_SELECTOR (16, 0, 8);
|
||
COMPARE_SELECTOR (48, 8, 16);
|
||
COMPARE_SELECTOR (24, 16, 24);
|
||
COMPARE_SELECTOR (56, 24, 32);
|
||
|
||
#undef COMPARE_SELECTOR
|
||
return result;
|
||
}
|
||
|
||
static bool
|
||
loongarch_is_elem_duplicate (struct expand_vec_perm_d *d)
|
||
{
|
||
bool result = true;
|
||
unsigned char buf = d->perm[0];
|
||
|
||
for (int i = 0; i < d->nelt; i += 1)
|
||
{
|
||
if (buf != d->perm[i])
|
||
{
|
||
result = false;
|
||
break;
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
/* In LASX, some permutation insn does not have the behavior that gcc expects
|
||
when compiler wants to emit a vector permutation.
|
||
|
||
1. What GCC provides via vectorize_vec_perm_const ()'s paramater:
|
||
When GCC wants to performs a vector permutation, it provides two op
|
||
reigster, one target register, and a selector.
|
||
In const vector permutation case, GCC provides selector as a char array
|
||
that contains original value; in variable vector permuatation
|
||
(performs via vec_perm<mode> insn template), it provides a vector register.
|
||
We assume that nelt is the elements numbers inside single vector in current
|
||
256bit vector mode.
|
||
|
||
2. What GCC expects to perform:
|
||
Two op registers (op0, op1) will "combine" into a 512bit temp vector storage
|
||
that has 2*nelt elements inside it; the low 256bit is op0, and high 256bit
|
||
is op1, then the elements are indexed as below:
|
||
0 ~ nelt - 1 nelt ~ 2 * nelt - 1
|
||
|-------------------------|-------------------------|
|
||
Low 256bit (op0) High 256bit (op1)
|
||
For example, the second element in op1 (V8SImode) will be indexed with 9.
|
||
Selector is a vector that has the same mode and number of elements with
|
||
op0,op1 and target, it's look like this:
|
||
0 ~ nelt - 1
|
||
|-------------------------|
|
||
256bit (selector)
|
||
It describes which element from 512bit temp vector storage will fit into
|
||
target's every element slot.
|
||
GCC expects that every element in selector can be ANY indices of 512bit
|
||
vector storage (Selector can pick literally any element from op0 and op1, and
|
||
then fits into any place of target register). This is also what LSX 128bit
|
||
vshuf.* instruction do similarly, so we can handle 128bit vector permutation
|
||
by single instruction easily.
|
||
|
||
3. What LASX permutation instruction does:
|
||
In short, it just execute two independent 128bit vector permuatation, and
|
||
it's the reason that we need to do the jobs below. We will explain it.
|
||
op0, op1, target, and selector will be separate into high 128bit and low
|
||
128bit, and do permutation as the description below:
|
||
|
||
a) op0's low 128bit and op1's low 128bit "combines" into a 256bit temp
|
||
vector storage (TVS1), elements are indexed as below:
|
||
0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1
|
||
|---------------------|---------------------| TVS1
|
||
op0's low 128bit op1's low 128bit
|
||
op0's high 128bit and op1's high 128bit are "combined" into TVS2 in the
|
||
same way.
|
||
0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1
|
||
|---------------------|---------------------| TVS2
|
||
op0's high 128bit op1's high 128bit
|
||
b) Selector's low 128bit describes which elements from TVS1 will fit into
|
||
target vector's low 128bit. No TVS2 elements are allowed.
|
||
c) Selector's high 128bit describes which elements from TVS2 will fit into
|
||
target vector's high 128bit. No TVS1 elements are allowed.
|
||
|
||
As we can see, if we want to handle vector permutation correctly, we can
|
||
achieve it in three ways:
|
||
a) Modify selector's elements, to make sure that every elements can inform
|
||
correct value that will put into target vector.
|
||
b) Generate extra instruction before/after permutation instruction, for
|
||
adjusting op vector or target vector, to make sure target vector's value is
|
||
what GCC expects.
|
||
c) Use other instructions to process op and put correct result into target.
|
||
*/
|
||
|
||
/* Implementation of constant vector permuatation. This function identifies
|
||
recognized pattern of permuation selector argument, and use one or more
|
||
instruction (s) to finish the permutation job correctly. For unsupported
|
||
patterns, it will return false. */
|
||
|
||
static bool
|
||
loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d)
|
||
{
|
||
bool flag = false;
|
||
unsigned int i;
|
||
unsigned char idx;
|
||
rtx target, op0, op1;
|
||
rtx rperm[MAX_VECT_LEN];
|
||
unsigned int remapped[MAX_VECT_LEN];
|
||
unsigned char perm2[MAX_VECT_LEN];
|
||
|
||
if (GET_MODE_SIZE (d->vmode) == 16)
|
||
return loongarch_expand_lsx_shuffle (d);
|
||
else
|
||
{
|
||
if (d->one_vector_p)
|
||
{
|
||
/* Try interleave with alternating operands. */
|
||
memcpy (perm2, d->perm, sizeof (perm2));
|
||
for (i = 1; i < d->nelt; i += 2)
|
||
perm2[i] += d->nelt;
|
||
if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1,
|
||
perm2, d->nelt, d->testing_p))
|
||
return true;
|
||
}
|
||
else
|
||
{
|
||
if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1,
|
||
d->perm, d->nelt,
|
||
d->testing_p))
|
||
return true;
|
||
|
||
/* Try again with swapped operands. */
|
||
for (i = 0; i < d->nelt; ++i)
|
||
perm2[i] = (d->perm[i] + d->nelt) & (2 * d->nelt - 1);
|
||
if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0,
|
||
perm2, d->nelt, d->testing_p))
|
||
return true;
|
||
}
|
||
|
||
if (loongarch_is_imm_set_shuffle (d))
|
||
return true;
|
||
|
||
if (loongarch_expand_vec_perm_even_odd (d))
|
||
return true;
|
||
|
||
if (loongarch_is_lasx_lowpart_interleave (d)
|
||
|| loongarch_is_lasx_lowpart_interleave_2 (d)
|
||
|| loongarch_is_lasx_highpart_interleave (d)
|
||
|| loongarch_is_lasx_highpart_interleave_2 (d))
|
||
{
|
||
if (loongarch_expand_vec_perm_interleave (d))
|
||
return true;
|
||
}
|
||
|
||
if (loongarch_is_quad_duplicate (d))
|
||
{
|
||
if (d->testing_p)
|
||
return true;
|
||
/* Selector example: E_V8SImode, { 0, 0, 0, 0, 4, 4, 4, 4 }. */
|
||
for (i = 0; i < d->nelt; i += 1)
|
||
{
|
||
rperm[i] = GEN_INT (d->perm[0]);
|
||
}
|
||
/* Selector after: { 0, 0, 0, 0, 0, 0, 0, 0 }. */
|
||
flag = true;
|
||
goto expand_perm_const_end;
|
||
}
|
||
|
||
if (loongarch_is_extraction_permutation (d))
|
||
{
|
||
if (d->testing_p)
|
||
return true;
|
||
/* Selector sample: E_V8SImode, { 0, 1, 2, 3, 4, 5, 6, 7 }. */
|
||
if (d->perm[0] == 0)
|
||
{
|
||
for (i = 0; i < d->nelt / 2; i += 1)
|
||
{
|
||
remapped[i] = i;
|
||
remapped[i + d->nelt / 2] = i;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
/* { 8, 9, 10, 11, 12, 13, 14, 15 }. */
|
||
for (i = 0; i < d->nelt / 2; i += 1)
|
||
{
|
||
idx = i + d->nelt / 2;
|
||
remapped[i] = idx;
|
||
remapped[i + d->nelt / 2] = idx;
|
||
}
|
||
}
|
||
/* Selector after: { 0, 1, 2, 3, 0, 1, 2, 3 }
|
||
{ 8, 9, 10, 11, 8, 9, 10, 11 } */
|
||
|
||
/* Convert remapped selector array to RTL array. */
|
||
for (i = 0; i < d->nelt; i += 1)
|
||
{
|
||
rperm[i] = GEN_INT (remapped[i]);
|
||
}
|
||
|
||
flag = true;
|
||
goto expand_perm_const_end;
|
||
}
|
||
|
||
if (loongarch_is_elem_duplicate (d))
|
||
{
|
||
if (d->testing_p)
|
||
return true;
|
||
/* Brocast single element (from op0 or op1) to all slot of target
|
||
register.
|
||
Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } */
|
||
rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0);
|
||
rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
|
||
rtx temp_reg = gen_reg_rtx (d->vmode);
|
||
rtx conv_temp = simplify_gen_subreg (E_V4DImode, temp_reg,
|
||
d->vmode, 0);
|
||
emit_move_insn (temp_reg, d->op0);
|
||
|
||
idx = d->perm[0];
|
||
/* We will use xvrepl128vei.* insn to achieve the result, but we need
|
||
to make the high/low 128bit has the same contents that contain the
|
||
value that we need to broardcast, because xvrepl128vei does the
|
||
broardcast job from every 128bit of source register to
|
||
corresponded part of target register! (A deep sigh.) */
|
||
if (idx < d->nelt / 2)
|
||
{
|
||
emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
|
||
conv_op0, GEN_INT (0x0)));
|
||
}
|
||
else if (idx >= d->nelt / 2 && idx < d->nelt)
|
||
{
|
||
emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
|
||
conv_op0, GEN_INT (0x11)));
|
||
idx -= d->nelt / 2;
|
||
}
|
||
else if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2))
|
||
{
|
||
emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
|
||
conv_op1, GEN_INT (0x0)));
|
||
}
|
||
else if (idx >= (d->nelt + d->nelt / 2) && idx < d->nelt * 2)
|
||
{
|
||
emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
|
||
conv_op1, GEN_INT (0x11)));
|
||
idx -= d->nelt / 2;
|
||
}
|
||
|
||
/* Then we can finally generate this insn. */
|
||
switch (d->vmode)
|
||
{
|
||
case E_V4DImode:
|
||
emit_insn (gen_lasx_xvrepl128vei_d (d->target, temp_reg,
|
||
GEN_INT (idx)));
|
||
break;
|
||
case E_V4DFmode:
|
||
emit_insn (gen_lasx_xvrepl128vei_d_f (d->target, temp_reg,
|
||
GEN_INT (idx)));
|
||
break;
|
||
case E_V8SImode:
|
||
emit_insn (gen_lasx_xvrepl128vei_w (d->target, temp_reg,
|
||
GEN_INT (idx)));
|
||
break;
|
||
case E_V8SFmode:
|
||
emit_insn (gen_lasx_xvrepl128vei_w_f (d->target, temp_reg,
|
||
GEN_INT (idx)));
|
||
break;
|
||
case E_V16HImode:
|
||
emit_insn (gen_lasx_xvrepl128vei_h (d->target, temp_reg,
|
||
GEN_INT (idx)));
|
||
break;
|
||
case E_V32QImode:
|
||
emit_insn (gen_lasx_xvrepl128vei_b (d->target, temp_reg,
|
||
GEN_INT (idx)));
|
||
break;
|
||
default:
|
||
gcc_unreachable ();
|
||
break;
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
expand_perm_const_end:
|
||
if (flag)
|
||
{
|
||
target = d->target;
|
||
op0 = d->op0;
|
||
op1 = d->one_vector_p ? d->op0 : d->op1;
|
||
|
||
machine_mode sel_mode = related_int_vector_mode (d->vmode)
|
||
.require ();
|
||
rtvec sel_v = gen_rtvec_v (d->nelt, rperm);
|
||
|
||
/* See the comment in loongarch_expand_lsx_shuffle for why
|
||
we don't simply use a SUBREG to pun target. */
|
||
rtx sel = force_reg (sel_mode,
|
||
gen_rtx_CONST_VECTOR (sel_mode, sel_v));
|
||
|
||
if (d->vmode == E_V32QImode)
|
||
emit_insn (gen_lasx_xvshuf_b (target, op1, op0, sel));
|
||
else
|
||
emit_insn (gen_lasx_xvshuf (d->vmode, target, sel, op1, op0));
|
||
|
||
return true;
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
|
||
|
||
static bool
|
||
loongarch_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
|
||
rtx target, rtx op0, rtx op1,
|
||
const vec_perm_indices &sel)
|
||
{
|
||
if (vmode != op_mode)
|
||
return false;
|
||
|
||
struct expand_vec_perm_d d;
|
||
int i, nelt, which;
|
||
unsigned char orig_perm[MAX_VECT_LEN];
|
||
bool ok;
|
||
|
||
d.target = target;
|
||
if (op0)
|
||
{
|
||
rtx nop0 = force_reg (vmode, op0);
|
||
if (op0 == op1)
|
||
op1 = nop0;
|
||
op0 = nop0;
|
||
}
|
||
if (op1)
|
||
op1 = force_reg (vmode, op1);
|
||
d.op0 = op0;
|
||
d.op1 = op1;
|
||
|
||
d.vmode = vmode;
|
||
gcc_assert (VECTOR_MODE_P (vmode));
|
||
d.nelt = nelt = GET_MODE_NUNITS (vmode);
|
||
d.testing_p = !target;
|
||
|
||
/* This is overly conservative, but ensures we don't get an
|
||
uninitialized warning on ORIG_PERM. */
|
||
memset (orig_perm, 0, MAX_VECT_LEN);
|
||
for (i = which = 0; i < nelt; ++i)
|
||
{
|
||
int ei = sel[i] & (2 * nelt - 1);
|
||
which |= (ei < nelt ? 1 : 2);
|
||
orig_perm[i] = ei;
|
||
}
|
||
memcpy (d.perm, orig_perm, MAX_VECT_LEN);
|
||
|
||
switch (which)
|
||
{
|
||
default:
|
||
gcc_unreachable ();
|
||
|
||
case 3:
|
||
d.one_vector_p = false;
|
||
if (d.testing_p || !rtx_equal_p (d.op0, d.op1))
|
||
break;
|
||
/* FALLTHRU */
|
||
|
||
case 2:
|
||
for (i = 0; i < nelt; ++i)
|
||
d.perm[i] &= nelt - 1;
|
||
d.op0 = d.op1;
|
||
d.one_vector_p = true;
|
||
break;
|
||
|
||
case 1:
|
||
d.op1 = d.op0;
|
||
d.one_vector_p = true;
|
||
break;
|
||
}
|
||
|
||
// Do rounding for selector to avoid vshuf undefined behavior.
|
||
for (i = 0; i < d.nelt; i += 1)
|
||
{
|
||
d.perm[i] %= (d.nelt * 2);
|
||
}
|
||
|
||
if (d.testing_p)
|
||
{
|
||
d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
|
||
d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
|
||
if (!d.one_vector_p)
|
||
d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
|
||
|
||
start_sequence ();
|
||
ok = loongarch_expand_vec_perm_const (&d);
|
||
end_sequence ();
|
||
return ok;
|
||
}
|
||
|
||
ok = loongarch_expand_vec_perm_const (&d);
|
||
|
||
/* If we were given a two-vector permutation which just happened to
|
||
have both input vectors equal, we folded this into a one-vector
|
||
permutation. There are several loongson patterns that are matched
|
||
via direct vec_select+vec_concat expansion, but we do not have
|
||
support in loongarch_expand_vec_perm_const to guess the adjustment
|
||
that should be made for a single operand. Just try again with
|
||
the original permutation. */
|
||
if (!ok && which == 3)
|
||
{
|
||
d.op0 = op0;
|
||
d.op1 = op1;
|
||
d.one_vector_p = false;
|
||
memcpy (d.perm, orig_perm, MAX_VECT_LEN);
|
||
ok = loongarch_expand_vec_perm_const (&d);
|
||
}
|
||
|
||
return ok;
|
||
}
|
||
|
||
static int
|
||
loongarch_cpu_sched_reassociation_width (struct loongarch_target *target,
|
||
unsigned int opc, machine_mode mode)
|
||
{
|
||
/* unreferenced argument */
|
||
(void) opc;
|
||
|
||
switch (target->cpu_tune)
|
||
{
|
||
case TUNE_GENERIC:
|
||
case TUNE_LOONGARCH64:
|
||
case TUNE_LA464:
|
||
case TUNE_LA664:
|
||
/* Vector part. */
|
||
if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
|
||
{
|
||
/* Integer vector instructions execute in FP unit.
|
||
The width of integer/float-point vector instructions is 3. */
|
||
return 3;
|
||
}
|
||
|
||
/* Scalar part. */
|
||
else if (INTEGRAL_MODE_P (mode))
|
||
return 1;
|
||
else if (FLOAT_MODE_P (mode))
|
||
{
|
||
if (opc == PLUS_EXPR)
|
||
{
|
||
return 2;
|
||
}
|
||
return 4;
|
||
}
|
||
break;
|
||
default:
|
||
break;
|
||
}
|
||
|
||
/* default is 1 */
|
||
return 1;
|
||
}
|
||
|
||
/* Implement TARGET_SCHED_REASSOCIATION_WIDTH. */
|
||
|
||
static int
|
||
loongarch_sched_reassociation_width (unsigned int opc, machine_mode mode)
|
||
{
|
||
return loongarch_cpu_sched_reassociation_width (&la_target, opc, mode);
|
||
}
|
||
|
||
/* Implement extract a scalar element from vecotr register */
|
||
|
||
void
|
||
loongarch_expand_vector_extract (rtx target, rtx vec, int elt)
|
||
{
|
||
machine_mode mode = GET_MODE (vec);
|
||
machine_mode inner_mode = GET_MODE_INNER (mode);
|
||
rtx tmp;
|
||
|
||
switch (mode)
|
||
{
|
||
case E_V8HImode:
|
||
case E_V16QImode:
|
||
break;
|
||
|
||
case E_V32QImode:
|
||
if (ISA_HAS_LASX)
|
||
{
|
||
if (elt >= 16)
|
||
{
|
||
tmp = gen_reg_rtx (V32QImode);
|
||
emit_insn (gen_lasx_xvpermi_d_v32qi (tmp, vec, GEN_INT (0xe)));
|
||
loongarch_expand_vector_extract (target,
|
||
gen_lowpart (V16QImode, tmp),
|
||
elt & 15);
|
||
}
|
||
else
|
||
loongarch_expand_vector_extract (target,
|
||
gen_lowpart (V16QImode, vec),
|
||
elt & 15);
|
||
return;
|
||
}
|
||
break;
|
||
|
||
case E_V16HImode:
|
||
if (ISA_HAS_LASX)
|
||
{
|
||
if (elt >= 8)
|
||
{
|
||
tmp = gen_reg_rtx (V16HImode);
|
||
emit_insn (gen_lasx_xvpermi_d_v16hi (tmp, vec, GEN_INT (0xe)));
|
||
loongarch_expand_vector_extract (target,
|
||
gen_lowpart (V8HImode, tmp),
|
||
elt & 7);
|
||
}
|
||
else
|
||
loongarch_expand_vector_extract (target,
|
||
gen_lowpart (V8HImode, vec),
|
||
elt & 7);
|
||
return;
|
||
}
|
||
break;
|
||
|
||
default:
|
||
break;
|
||
}
|
||
|
||
tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
|
||
tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
|
||
|
||
/* Let the rtl optimizers know about the zero extension performed. */
|
||
if (inner_mode == QImode || inner_mode == HImode)
|
||
{
|
||
tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
|
||
target = gen_lowpart (SImode, target);
|
||
}
|
||
if (inner_mode == SImode || inner_mode == DImode)
|
||
{
|
||
tmp = gen_rtx_SIGN_EXTEND (inner_mode, tmp);
|
||
}
|
||
|
||
emit_insn (gen_rtx_SET (target, tmp));
|
||
}
|
||
|
||
/* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
|
||
to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
|
||
The upper bits of DEST are undefined, though they shouldn't cause
|
||
exceptions (some bits from src or all zeros are ok). */
|
||
|
||
static void
|
||
emit_reduc_half (rtx dest, rtx src, int i)
|
||
{
|
||
rtx tem, d = dest;
|
||
switch (GET_MODE (src))
|
||
{
|
||
case E_V4SFmode:
|
||
tem = gen_lsx_vbsrl_w_f (dest, src, GEN_INT (i == 128 ? 8 : 4));
|
||
break;
|
||
case E_V2DFmode:
|
||
tem = gen_lsx_vbsrl_d_f (dest, src, GEN_INT (8));
|
||
break;
|
||
case E_V8SFmode:
|
||
if (i == 256)
|
||
tem = gen_lasx_xvpermi_d_v8sf (dest, src, GEN_INT (0xe));
|
||
else
|
||
tem = gen_lasx_xvshuf4i_w_f (dest, src,
|
||
GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
|
||
break;
|
||
case E_V4DFmode:
|
||
if (i == 256)
|
||
tem = gen_lasx_xvpermi_d_v4df (dest, src, GEN_INT (0xe));
|
||
else
|
||
tem = gen_lasx_xvpermi_d_v4df (dest, src, const1_rtx);
|
||
break;
|
||
case E_V32QImode:
|
||
case E_V16HImode:
|
||
case E_V8SImode:
|
||
case E_V4DImode:
|
||
d = gen_reg_rtx (V4DImode);
|
||
if (i == 256)
|
||
tem = gen_lasx_xvpermi_d_v4di (d, gen_lowpart (V4DImode, src),
|
||
GEN_INT (0xe));
|
||
else
|
||
tem = gen_lasx_xvbsrl_d (d, gen_lowpart (V4DImode, src),
|
||
GEN_INT (i/16));
|
||
break;
|
||
case E_V16QImode:
|
||
case E_V8HImode:
|
||
case E_V4SImode:
|
||
case E_V2DImode:
|
||
d = gen_reg_rtx (V2DImode);
|
||
tem = gen_lsx_vbsrl_d (d, gen_lowpart (V2DImode, src), GEN_INT (i/16));
|
||
break;
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
emit_insn (tem);
|
||
if (d != dest)
|
||
emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
|
||
}
|
||
|
||
/* Expand a vector reduction. FN is the binary pattern to reduce;
|
||
DEST is the destination; IN is the input vector. */
|
||
|
||
void
|
||
loongarch_expand_vector_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
|
||
{
|
||
rtx half, dst, vec = in;
|
||
machine_mode mode = GET_MODE (in);
|
||
int i;
|
||
|
||
for (i = GET_MODE_BITSIZE (mode);
|
||
i > GET_MODE_UNIT_BITSIZE (mode);
|
||
i >>= 1)
|
||
{
|
||
half = gen_reg_rtx (mode);
|
||
emit_reduc_half (half, vec, i);
|
||
if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
|
||
dst = dest;
|
||
else
|
||
dst = gen_reg_rtx (mode);
|
||
emit_insn (fn (dst, half, vec));
|
||
vec = dst;
|
||
}
|
||
}
|
||
|
||
/* Expand an integral vector unpack operation. */
|
||
|
||
void
|
||
loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p)
|
||
{
|
||
machine_mode imode = GET_MODE (operands[1]);
|
||
rtx (*unpack) (rtx, rtx, rtx);
|
||
rtx (*extend) (rtx, rtx);
|
||
rtx (*cmpFunc) (rtx, rtx, rtx);
|
||
rtx (*swap_hi_lo) (rtx, rtx, rtx, rtx);
|
||
rtx tmp, dest;
|
||
|
||
/* In LASX, only vec_unpacks_hi_<mode> requires expander. */
|
||
if (ISA_HAS_LASX && GET_MODE_SIZE (imode) == 32)
|
||
{
|
||
switch (imode)
|
||
{
|
||
case E_V8SImode:
|
||
if (unsigned_p)
|
||
extend = gen_vec_unpacku_lo_v8si;
|
||
else
|
||
extend = gen_vec_unpacks_lo_v8si;
|
||
swap_hi_lo = gen_lasx_xvpermi_q_v8si;
|
||
break;
|
||
|
||
case E_V16HImode:
|
||
if (unsigned_p)
|
||
extend = gen_vec_unpacku_lo_v16hi;
|
||
else
|
||
extend = gen_vec_unpacks_lo_v16hi;
|
||
swap_hi_lo = gen_lasx_xvpermi_q_v16hi;
|
||
break;
|
||
|
||
case E_V32QImode:
|
||
if (unsigned_p)
|
||
extend = gen_vec_unpacku_lo_v32qi;
|
||
else
|
||
extend = gen_vec_unpacks_lo_v32qi;
|
||
swap_hi_lo = gen_lasx_xvpermi_q_v32qi;
|
||
break;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
break;
|
||
}
|
||
|
||
tmp = gen_reg_rtx (imode);
|
||
emit_insn (swap_hi_lo (tmp, tmp, operands[1], const1_rtx));
|
||
emit_insn (extend (operands[0], tmp));
|
||
return;
|
||
}
|
||
/* In LSX, only vec_unpacks_lo_<mode> requires expander. */
|
||
else if (ISA_HAS_LSX && !ISA_HAS_LASX)
|
||
{
|
||
switch (imode)
|
||
{
|
||
case E_V4SImode:
|
||
unpack = gen_lsx_vilvl_w;
|
||
cmpFunc = gen_lsx_vslt_w;
|
||
break;
|
||
|
||
case E_V8HImode:
|
||
unpack = gen_lsx_vilvl_h;
|
||
cmpFunc = gen_lsx_vslt_h;
|
||
break;
|
||
|
||
case E_V16QImode:
|
||
unpack = gen_lsx_vilvl_b;
|
||
cmpFunc = gen_lsx_vslt_b;
|
||
break;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
break;
|
||
}
|
||
|
||
if (!unsigned_p)
|
||
{
|
||
/* Extract sign extention for each element comparing each element
|
||
with immediate zero. */
|
||
tmp = gen_reg_rtx (imode);
|
||
emit_insn (cmpFunc (tmp, operands[1], CONST0_RTX (imode)));
|
||
}
|
||
else
|
||
tmp = force_reg (imode, CONST0_RTX (imode));
|
||
|
||
dest = gen_reg_rtx (imode);
|
||
|
||
emit_insn (unpack (dest, operands[1], tmp));
|
||
emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest));
|
||
return;
|
||
}
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
/* Construct and return PARALLEL RTX with CONST_INTs for HIGH (high_p == TRUE)
|
||
or LOW (high_p == FALSE) half of a vector for mode MODE. */
|
||
|
||
rtx
|
||
loongarch_lsx_vec_parallel_const_half (machine_mode mode, bool high_p)
|
||
{
|
||
int nunits = GET_MODE_NUNITS (mode);
|
||
rtvec v = rtvec_alloc (nunits / 2);
|
||
int base;
|
||
int i;
|
||
|
||
base = high_p ? nunits / 2 : 0;
|
||
|
||
for (i = 0; i < nunits / 2; i++)
|
||
RTVEC_ELT (v, i) = GEN_INT (base + i);
|
||
|
||
return gen_rtx_PARALLEL (VOIDmode, v);
|
||
}
|
||
|
||
/* A subroutine of loongarch_expand_vec_init, match constant vector
|
||
elements. */
|
||
|
||
static inline bool
|
||
loongarch_constant_elt_p (rtx x)
|
||
{
|
||
return CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE;
|
||
}
|
||
|
||
rtx
|
||
loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val)
|
||
{
|
||
int nunits = GET_MODE_NUNITS (mode);
|
||
int nsets = nunits / 4;
|
||
rtx elts[MAX_VECT_LEN];
|
||
int set = 0;
|
||
int i, j;
|
||
|
||
/* Generate a const_int vector replicating the same 4-element set
|
||
from an immediate. */
|
||
for (j = 0; j < nsets; j++, set = 4 * j)
|
||
for (i = 0; i < 4; i++)
|
||
elts[set + i] = GEN_INT (set + ((val >> (2 * i)) & 0x3));
|
||
|
||
return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nunits, elts));
|
||
}
|
||
|
||
|
||
/* Expand a vector initialization. */
|
||
|
||
void
|
||
loongarch_expand_vector_group_init (rtx target, rtx vals)
|
||
{
|
||
machine_mode vmode = GET_MODE (target);
|
||
machine_mode half_mode = VOIDmode;
|
||
rtx low = XVECEXP (vals, 0, 0);
|
||
rtx high = XVECEXP (vals, 0, 1);
|
||
|
||
switch (vmode)
|
||
{
|
||
case E_V32QImode:
|
||
half_mode = V16QImode;
|
||
break;
|
||
case E_V16HImode:
|
||
half_mode = V8HImode;
|
||
break;
|
||
case E_V8SImode:
|
||
half_mode = V4SImode;
|
||
break;
|
||
case E_V4DImode:
|
||
half_mode = V2DImode;
|
||
break;
|
||
case E_V8SFmode:
|
||
half_mode = V4SFmode;
|
||
break;
|
||
case E_V4DFmode:
|
||
half_mode = V2DFmode;
|
||
break;
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
if (!register_operand (low, half_mode))
|
||
low = force_reg (half_mode, low);
|
||
if (!register_operand (high, half_mode))
|
||
high = force_reg (half_mode, high);
|
||
emit_insn (gen_rtx_SET (target,
|
||
gen_rtx_VEC_CONCAT (vmode, low, high)));
|
||
}
|
||
|
||
/* Expand initialization of a vector which has all same elements. */
|
||
|
||
void
|
||
loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
|
||
{
|
||
machine_mode vmode = GET_MODE (target);
|
||
machine_mode imode = GET_MODE_INNER (vmode);
|
||
rtx same = XVECEXP (vals, 0, 0);
|
||
rtx temp;
|
||
|
||
if (CONST_INT_P (same) && nvar == 0
|
||
&& loongarch_signed_immediate_p (INTVAL (same), 10, 0))
|
||
{
|
||
switch (vmode)
|
||
{
|
||
case E_V32QImode:
|
||
case E_V16HImode:
|
||
case E_V8SImode:
|
||
case E_V4DImode:
|
||
case E_V16QImode:
|
||
case E_V8HImode:
|
||
case E_V4SImode:
|
||
case E_V2DImode:
|
||
temp = gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0));
|
||
emit_move_insn (target, temp);
|
||
return;
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
if (imode == GET_MODE (same))
|
||
temp = same;
|
||
else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
|
||
{
|
||
if (GET_CODE (same) == MEM)
|
||
{
|
||
rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
|
||
loongarch_emit_move (reg_tmp, same);
|
||
temp = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0);
|
||
}
|
||
else
|
||
temp = simplify_gen_subreg (imode, same, GET_MODE (same), 0);
|
||
}
|
||
else
|
||
{
|
||
if (GET_CODE (same) == MEM)
|
||
{
|
||
rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
|
||
loongarch_emit_move (reg_tmp, same);
|
||
temp = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp));
|
||
}
|
||
else
|
||
temp = lowpart_subreg (imode, same, GET_MODE (same));
|
||
}
|
||
|
||
temp = force_reg (imode, temp);
|
||
|
||
switch (vmode)
|
||
{
|
||
case E_V32QImode:
|
||
case E_V16HImode:
|
||
case E_V8SImode:
|
||
case E_V4DImode:
|
||
case E_V16QImode:
|
||
case E_V8HImode:
|
||
case E_V4SImode:
|
||
case E_V2DImode:
|
||
loongarch_emit_move (target, gen_rtx_VEC_DUPLICATE (vmode, temp));
|
||
break;
|
||
|
||
case E_V8SFmode:
|
||
emit_insn (gen_lasx_xvreplve0_w_f_scalar (target, temp));
|
||
break;
|
||
|
||
case E_V4DFmode:
|
||
emit_insn (gen_lasx_xvreplve0_d_f_scalar (target, temp));
|
||
break;
|
||
|
||
case E_V4SFmode:
|
||
emit_insn (gen_lsx_vreplvei_w_f_scalar (target, temp));
|
||
break;
|
||
|
||
case E_V2DFmode:
|
||
emit_insn (gen_lsx_vreplvei_d_f_scalar (target, temp));
|
||
break;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
/* Expand a vector initialization. */
|
||
|
||
void
|
||
loongarch_expand_vector_init (rtx target, rtx vals)
|
||
{
|
||
machine_mode vmode = GET_MODE (target);
|
||
machine_mode imode = GET_MODE_INNER (vmode);
|
||
unsigned i, nelt = GET_MODE_NUNITS (vmode);
|
||
/* VALS is divided into high and low half-part. */
|
||
/* Number of non constant elements in corresponding parts of VALS. */
|
||
unsigned nvar = 0, hi_nvar = 0, lo_nvar = 0;
|
||
/* all_same : true if all elements of VALS are the same.
|
||
hi_same : true if all elements of the high half-part are the same.
|
||
lo_same : true if all elements of the low half-part are the same.
|
||
half_same : true if the high half-part is the same as the low one. */
|
||
bool all_same = false, hi_same = true, lo_same = true, half_same = true;
|
||
rtx val[32], val_hi[32], val_lo[16];
|
||
rtx x, op0, op1;
|
||
/* Copy one element of vals to per element of target vector. */
|
||
typedef rtx (*loongarch_vec_repl1_fn) (rtx, rtx);
|
||
/* Copy two elements of vals to target vector. */
|
||
typedef rtx (*loongarch_vec_repl2_fn) (rtx, rtx, rtx);
|
||
/* Insert scalar operands into the specified position of the vector. */
|
||
typedef rtx (*loongarch_vec_set_fn) (rtx, rtx, rtx);
|
||
/* Copy 64bit lowpart to highpart. */
|
||
typedef rtx (*loongarch_vec_mirror_fn) (rtx, rtx, rtx);
|
||
/* Merge lowpart and highpart into target. */
|
||
typedef rtx (*loongarch_vec_merge_fn) (rtx, rtx, rtx, rtx);
|
||
|
||
loongarch_vec_repl1_fn loongarch_vec_repl1_128 = NULL,
|
||
loongarch_vec_repl1_256 = NULL;
|
||
loongarch_vec_repl2_fn loongarch_vec_repl2_128 = NULL,
|
||
loongarch_vec_repl2_256 = NULL;
|
||
loongarch_vec_set_fn loongarch_vec_set128 = NULL, loongarch_vec_set256 = NULL;
|
||
loongarch_vec_mirror_fn loongarch_vec_mirror = NULL;
|
||
loongarch_vec_merge_fn loongarch_lasx_vecinit_merge = NULL;
|
||
machine_mode half_mode = VOIDmode;
|
||
|
||
/* Check whether elements of each part are the same. */
|
||
for (i = 0; i < nelt / 2; ++i)
|
||
{
|
||
val_hi[i] = val_hi[i + nelt / 2] = val[i + nelt / 2]
|
||
= XVECEXP (vals, 0, i + nelt / 2);
|
||
val_lo[i] = val[i] = XVECEXP (vals, 0, i);
|
||
if (!loongarch_constant_elt_p (val_hi[i]))
|
||
hi_nvar++;
|
||
if (!loongarch_constant_elt_p (val_lo[i]))
|
||
lo_nvar++;
|
||
if (i > 0 && !rtx_equal_p (val_hi[i], val_hi[0]))
|
||
hi_same = false;
|
||
if (i > 0 && !rtx_equal_p (val_lo[i], val_lo[0]))
|
||
lo_same = false;
|
||
if (!rtx_equal_p (val_hi[i], val_lo[i]))
|
||
half_same = false;
|
||
}
|
||
|
||
/* If all elements are the same, set all_same true. */
|
||
if (hi_same && lo_same && half_same)
|
||
all_same = true;
|
||
|
||
nvar = hi_nvar + lo_nvar;
|
||
|
||
switch (vmode)
|
||
{
|
||
case E_V32QImode:
|
||
half_mode = E_V16QImode;
|
||
loongarch_vec_set256 = gen_vec_setv32qi_internal;
|
||
loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_b;
|
||
loongarch_lasx_vecinit_merge
|
||
= half_same ? gen_lasx_xvpermi_q_v32qi : gen_lasx_vecinit_merge_v32qi;
|
||
/* FALLTHRU. */
|
||
case E_V16QImode:
|
||
loongarch_vec_set128 = gen_vec_setv16qi;
|
||
loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_b;
|
||
loongarch_vec_mirror = gen_lsx_vreplvei_mirror_b;
|
||
break;
|
||
|
||
case E_V16HImode:
|
||
half_mode = E_V8HImode;
|
||
loongarch_vec_set256 = gen_vec_setv16hi_internal;
|
||
loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_h;
|
||
loongarch_lasx_vecinit_merge
|
||
= half_same ? gen_lasx_xvpermi_q_v16hi : gen_lasx_vecinit_merge_v16hi;
|
||
/* FALLTHRU. */
|
||
case E_V8HImode:
|
||
loongarch_vec_set128 = gen_vec_setv8hi;
|
||
loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_h;
|
||
loongarch_vec_mirror = gen_lsx_vreplvei_mirror_h;
|
||
break;
|
||
|
||
case E_V8SImode:
|
||
half_mode = V4SImode;
|
||
loongarch_vec_set256 = gen_vec_setv8si;
|
||
loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_w;
|
||
loongarch_lasx_vecinit_merge
|
||
= half_same ? gen_lasx_xvpermi_q_v8si : gen_lasx_vecinit_merge_v8si;
|
||
/* FALLTHRU. */
|
||
case E_V4SImode:
|
||
loongarch_vec_set128 = gen_vec_setv4si;
|
||
loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_w;
|
||
loongarch_vec_mirror = gen_lsx_vreplvei_mirror_w;
|
||
break;
|
||
|
||
case E_V4DImode:
|
||
half_mode = E_V2DImode;
|
||
loongarch_vec_set256 = gen_vec_setv4di;
|
||
loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_d;
|
||
loongarch_lasx_vecinit_merge
|
||
= half_same ? gen_lasx_xvpermi_q_v4di : gen_lasx_vecinit_merge_v4di;
|
||
/* FALLTHRU. */
|
||
case E_V2DImode:
|
||
loongarch_vec_set128 = gen_vec_setv2di;
|
||
loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_d;
|
||
loongarch_vec_mirror = gen_lsx_vreplvei_mirror_d;
|
||
break;
|
||
|
||
case E_V8SFmode:
|
||
half_mode = E_V4SFmode;
|
||
loongarch_vec_set256 = gen_vec_setv8sf;
|
||
loongarch_vec_repl1_128 = gen_lsx_vreplvei_w_f_scalar;
|
||
loongarch_vec_repl2_256 = gen_lasx_xvilvl_w_f_internal;
|
||
loongarch_lasx_vecinit_merge
|
||
= half_same ? gen_lasx_xvpermi_q_v8sf : gen_lasx_vecinit_merge_v8sf;
|
||
/* FALLTHRU. */
|
||
case E_V4SFmode:
|
||
loongarch_vec_set128 = gen_vec_setv4sf;
|
||
loongarch_vec_repl2_128 = gen_lsx_vilvl_w_f_internal;
|
||
loongarch_vec_mirror = gen_lsx_vreplvei_mirror_w_f;
|
||
break;
|
||
|
||
case E_V4DFmode:
|
||
half_mode = E_V2DFmode;
|
||
loongarch_vec_set256 = gen_vec_setv4df;
|
||
loongarch_vec_repl1_128 = gen_lsx_vreplvei_d_f_scalar;
|
||
loongarch_vec_repl2_256 = gen_lasx_xvilvl_d_f_internal;
|
||
loongarch_lasx_vecinit_merge
|
||
= half_same ? gen_lasx_xvpermi_q_v4df : gen_lasx_vecinit_merge_v4df;
|
||
/* FALLTHRU. */
|
||
case E_V2DFmode:
|
||
loongarch_vec_set128 = gen_vec_setv2df;
|
||
loongarch_vec_repl2_128 = gen_lsx_vilvl_d_f_internal;
|
||
loongarch_vec_mirror = gen_lsx_vreplvei_mirror_d_f;
|
||
break;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
if (ISA_HAS_LASX && GET_MODE_SIZE (vmode) == 32)
|
||
{
|
||
/* If all elements are the same, just do a broadcost. */
|
||
if (all_same)
|
||
loongarch_expand_vector_init_same (target, vals, nvar);
|
||
else
|
||
{
|
||
gcc_assert (nelt >= 4);
|
||
|
||
rtx target_hi, target_lo;
|
||
/* Write elements of high half-part in target directly. */
|
||
target_hi = target;
|
||
target_lo = gen_reg_rtx (half_mode);
|
||
|
||
/* If all elements of high half-part are the same,
|
||
just do a broadcost. Also applicable to low half-part. */
|
||
if (hi_same)
|
||
{
|
||
rtx vtmp = gen_rtx_PARALLEL (vmode, gen_rtvec_v (nelt, val_hi));
|
||
loongarch_expand_vector_init_same (target_hi, vtmp, hi_nvar);
|
||
}
|
||
if (lo_same)
|
||
{
|
||
rtx vtmp
|
||
= gen_rtx_PARALLEL (half_mode, gen_rtvec_v (nelt / 2, val_lo));
|
||
loongarch_expand_vector_init_same (target_lo, vtmp, lo_nvar);
|
||
}
|
||
|
||
for (i = 0; i < nelt / 2; ++i)
|
||
{
|
||
if (!hi_same)
|
||
{
|
||
if (vmode == E_V8SFmode || vmode == E_V4DFmode)
|
||
{
|
||
/* Using xvilvl to load lowest 2 elements simultaneously
|
||
to reduce the number of instructions. */
|
||
if (i == 1)
|
||
{
|
||
op0 = force_reg (imode, val_hi[0]);
|
||
op1 = force_reg (imode, val_hi[1]);
|
||
emit_insn (
|
||
loongarch_vec_repl2_256 (target_hi, op0, op1));
|
||
}
|
||
else if (i > 1)
|
||
{
|
||
op0 = force_reg (imode, val_hi[i]);
|
||
emit_insn (
|
||
loongarch_vec_set256 (target_hi, op0, GEN_INT (i)));
|
||
}
|
||
}
|
||
else
|
||
{
|
||
op0 = force_reg (imode, val_hi[i]);
|
||
/* Assign the lowest element of val_hi to all elements
|
||
of target_hi. */
|
||
if (i == 0)
|
||
{
|
||
emit_insn (loongarch_vec_repl1_256 (target_hi, op0));
|
||
}
|
||
else if (!rtx_equal_p (val_hi[i], val_hi[0]))
|
||
{
|
||
emit_insn (
|
||
loongarch_vec_set256 (target_hi, op0, GEN_INT (i)));
|
||
}
|
||
}
|
||
}
|
||
if (!lo_same && !half_same)
|
||
{
|
||
op0 = force_reg (imode, val_lo[i]);
|
||
/* Assign the lowest element of val_lo to all elements
|
||
of target_lo. */
|
||
if (i == 0)
|
||
{
|
||
emit_insn (loongarch_vec_repl1_128 (target_lo, op0));
|
||
}
|
||
else if (!rtx_equal_p (val_lo[i], val_lo[0]))
|
||
{
|
||
emit_insn (
|
||
loongarch_vec_set128 (target_lo, op0, GEN_INT (i)));
|
||
}
|
||
}
|
||
}
|
||
if (half_same)
|
||
{
|
||
emit_insn (loongarch_lasx_vecinit_merge (target, target_hi,
|
||
target_hi, const0_rtx));
|
||
return;
|
||
}
|
||
emit_insn (loongarch_lasx_vecinit_merge (target, target_hi, target_lo,
|
||
GEN_INT (0x20)));
|
||
}
|
||
return;
|
||
}
|
||
|
||
if (ISA_HAS_LSX)
|
||
{
|
||
if (all_same)
|
||
loongarch_expand_vector_init_same (target, vals, nvar);
|
||
else
|
||
{
|
||
for (i = 0; i < nelt; ++i)
|
||
{
|
||
if (vmode == E_V4SFmode || vmode == E_V2DFmode)
|
||
{
|
||
/* Using vilvl to load lowest 2 elements simultaneously to
|
||
reduce the number of instructions. */
|
||
if (i == 1)
|
||
{
|
||
op0 = force_reg (imode, val[0]);
|
||
op1 = force_reg (imode, val[1]);
|
||
emit_insn (loongarch_vec_repl2_128 (target, op0, op1));
|
||
}
|
||
else if (i > 1)
|
||
{
|
||
op0 = force_reg (imode, val[i]);
|
||
emit_insn (
|
||
loongarch_vec_set128 (target, op0, GEN_INT (i)));
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if (half_same && i == nelt / 2)
|
||
{
|
||
emit_insn (
|
||
loongarch_vec_mirror (target, target, const0_rtx));
|
||
return;
|
||
}
|
||
op0 = force_reg (imode, val[i]);
|
||
/* Assign the lowest element of val to all elements of
|
||
target. */
|
||
if (i == 0)
|
||
{
|
||
emit_insn (loongarch_vec_repl1_128 (target, op0));
|
||
}
|
||
else if (!rtx_equal_p (val[i], val[0]))
|
||
{
|
||
emit_insn (
|
||
loongarch_vec_set128 (target, op0, GEN_INT (i)));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
/* Load constants from the pool, or whatever's handy. */
|
||
if (nvar == 0)
|
||
{
|
||
emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)));
|
||
return;
|
||
}
|
||
|
||
/* For two-part initialization, always use CONCAT. */
|
||
if (nelt == 2)
|
||
{
|
||
rtx op0 = force_reg (imode, val[0]);
|
||
rtx op1 = force_reg (imode, val[1]);
|
||
x = gen_rtx_VEC_CONCAT (vmode, op0, op1);
|
||
emit_insn (gen_rtx_SET (target, x));
|
||
return;
|
||
}
|
||
|
||
/* No LoongArch CPU supports vectors with more elements as at now. */
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
|
||
|
||
machine_mode
|
||
loongarch_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
|
||
machine_mode mode)
|
||
{
|
||
/* For performance, avoid saving/restoring upper parts of a register
|
||
by returning MODE as save mode when the mode is known. */
|
||
if (mode == VOIDmode)
|
||
return choose_hard_reg_mode (regno, nregs, NULL);
|
||
else
|
||
return mode;
|
||
}
|
||
|
||
/* Generate RTL for comparing CMP_OP0 and CMP_OP1 using condition COND and
|
||
store the result -1 or 0 in DEST. */
|
||
|
||
static void
|
||
loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1)
|
||
{
|
||
machine_mode cmp_mode = GET_MODE (op0);
|
||
bool negate = false;
|
||
|
||
switch (cmp_mode)
|
||
{
|
||
case E_V16QImode:
|
||
case E_V32QImode:
|
||
case E_V8HImode:
|
||
case E_V16HImode:
|
||
case E_V4SImode:
|
||
case E_V8SImode:
|
||
case E_V2DImode:
|
||
case E_V4DImode:
|
||
switch (cond)
|
||
{
|
||
case NE:
|
||
if (!loongarch_const_vector_same_int_p (op1, cmp_mode, -16, 15))
|
||
op1 = force_reg (cmp_mode, op1);
|
||
cond = reverse_condition (cond);
|
||
negate = true;
|
||
break;
|
||
case EQ:
|
||
case LT:
|
||
case LE:
|
||
if (!loongarch_const_vector_same_int_p (op1, cmp_mode, -16, 15))
|
||
op1 = force_reg (cmp_mode, op1);
|
||
break;
|
||
case LTU:
|
||
case LEU:
|
||
if (!loongarch_const_vector_same_int_p (op1, cmp_mode, 0, 31))
|
||
op1 = force_reg (cmp_mode, op1);
|
||
break;
|
||
case GE:
|
||
case GT:
|
||
case GEU:
|
||
case GTU:
|
||
/* Only supports reg-reg comparison. */
|
||
if (!register_operand (op1, cmp_mode))
|
||
op1 = force_reg (cmp_mode, op1);
|
||
std::swap (op0, op1);
|
||
cond = swap_condition (cond);
|
||
break;
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
loongarch_emit_binary (cond, dest, op0, op1);
|
||
if (negate)
|
||
emit_move_insn (dest, gen_rtx_NOT (GET_MODE (dest), dest));
|
||
break;
|
||
|
||
case E_V4SFmode:
|
||
case E_V2DFmode:
|
||
case E_V8SFmode:
|
||
case E_V4DFmode:
|
||
if (!register_operand (op1, cmp_mode))
|
||
op1 = force_reg (cmp_mode, op1);
|
||
loongarch_emit_binary (cond, dest, op0, op1);
|
||
break;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
break;
|
||
}
|
||
}
|
||
|
||
/* Expand VEC_COND_EXPR, where:
|
||
MODE is mode of the result
|
||
VIMODE equivalent integer mode
|
||
OPERANDS operands of VEC_COND_EXPR. */
|
||
|
||
void
|
||
loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
|
||
rtx *operands)
|
||
{
|
||
rtx cond = operands[3];
|
||
rtx cmp_op0 = operands[4];
|
||
rtx cmp_op1 = operands[5];
|
||
rtx cmp_res = gen_reg_rtx (vimode);
|
||
|
||
loongarch_expand_lsx_cmp (cmp_res, GET_CODE (cond), cmp_op0, cmp_op1);
|
||
|
||
/* We handle the following cases:
|
||
1) r = a CMP b ? -1 : 0
|
||
2) r = a CMP b ? -1 : v
|
||
3) r = a CMP b ? v : 0
|
||
4) r = a CMP b ? v1 : v2 */
|
||
|
||
/* Case (1) above. We only move the results. */
|
||
if (operands[1] == CONSTM1_RTX (vimode)
|
||
&& operands[2] == CONST0_RTX (vimode))
|
||
emit_move_insn (operands[0], cmp_res);
|
||
else
|
||
{
|
||
rtx src1 = gen_reg_rtx (vimode);
|
||
rtx src2 = gen_reg_rtx (vimode);
|
||
rtx mask = gen_reg_rtx (vimode);
|
||
rtx bsel;
|
||
|
||
/* Move the vector result to use it as a mask. */
|
||
emit_move_insn (mask, cmp_res);
|
||
|
||
if (register_operand (operands[1], mode))
|
||
{
|
||
rtx xop1 = operands[1];
|
||
if (mode != vimode)
|
||
{
|
||
xop1 = gen_reg_rtx (vimode);
|
||
emit_move_insn (xop1,
|
||
simplify_gen_subreg (vimode, operands[1],
|
||
mode, 0));
|
||
}
|
||
emit_move_insn (src1, xop1);
|
||
}
|
||
else
|
||
{
|
||
gcc_assert (operands[1] == CONSTM1_RTX (vimode));
|
||
/* Case (2) if the below doesn't move the mask to src2. */
|
||
emit_move_insn (src1, mask);
|
||
}
|
||
|
||
if (register_operand (operands[2], mode))
|
||
{
|
||
rtx xop2 = operands[2];
|
||
if (mode != vimode)
|
||
{
|
||
xop2 = gen_reg_rtx (vimode);
|
||
emit_move_insn (xop2,
|
||
simplify_gen_subreg (vimode, operands[2],
|
||
mode, 0));
|
||
}
|
||
emit_move_insn (src2, xop2);
|
||
}
|
||
else
|
||
{
|
||
gcc_assert (operands[2] == CONST0_RTX (mode));
|
||
/* Case (3) if the above didn't move the mask to src1. */
|
||
emit_move_insn (src2, mask);
|
||
}
|
||
|
||
/* We deal with case (4) if the mask wasn't moved to either src1 or src2.
|
||
In any case, we eventually do vector mask-based copy. */
|
||
bsel = gen_rtx_IOR (vimode,
|
||
gen_rtx_AND (vimode,
|
||
gen_rtx_NOT (vimode, mask), src2),
|
||
gen_rtx_AND (vimode, mask, src1));
|
||
/* The result is placed back to a register with the mask. */
|
||
emit_insn (gen_rtx_SET (mask, bsel));
|
||
emit_move_insn (operands[0],
|
||
simplify_gen_subreg (mode, mask, vimode, 0));
|
||
}
|
||
}
|
||
|
||
void
|
||
loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
|
||
rtx *operands)
|
||
{
|
||
rtx cmp_res = operands[3];
|
||
|
||
/* We handle the following cases:
|
||
1) r = a CMP b ? -1 : 0
|
||
2) r = a CMP b ? -1 : v
|
||
3) r = a CMP b ? v : 0
|
||
4) r = a CMP b ? v1 : v2 */
|
||
|
||
/* Case (1) above. We only move the results. */
|
||
if (operands[1] == CONSTM1_RTX (vimode)
|
||
&& operands[2] == CONST0_RTX (vimode))
|
||
emit_move_insn (operands[0], cmp_res);
|
||
else
|
||
{
|
||
rtx src1 = gen_reg_rtx (vimode);
|
||
rtx src2 = gen_reg_rtx (vimode);
|
||
rtx mask = gen_reg_rtx (vimode);
|
||
rtx bsel;
|
||
|
||
/* Move the vector result to use it as a mask. */
|
||
emit_move_insn (mask, cmp_res);
|
||
|
||
if (register_operand (operands[1], mode))
|
||
{
|
||
rtx xop1 = operands[1];
|
||
if (mode != vimode)
|
||
{
|
||
xop1 = gen_reg_rtx (vimode);
|
||
emit_move_insn (xop1,
|
||
simplify_gen_subreg (vimode, operands[1],
|
||
mode, 0));
|
||
}
|
||
emit_move_insn (src1, xop1);
|
||
}
|
||
else
|
||
{
|
||
gcc_assert (operands[1] == CONSTM1_RTX (vimode));
|
||
/* Case (2) if the below doesn't move the mask to src2. */
|
||
emit_move_insn (src1, mask);
|
||
}
|
||
|
||
if (register_operand (operands[2], mode))
|
||
{
|
||
rtx xop2 = operands[2];
|
||
if (mode != vimode)
|
||
{
|
||
xop2 = gen_reg_rtx (vimode);
|
||
emit_move_insn (xop2,
|
||
simplify_gen_subreg (vimode, operands[2],
|
||
mode, 0));
|
||
}
|
||
emit_move_insn (src2, xop2);
|
||
}
|
||
else
|
||
{
|
||
gcc_assert (operands[2] == CONST0_RTX (mode));
|
||
/* Case (3) if the above didn't move the mask to src1. */
|
||
emit_move_insn (src2, mask);
|
||
}
|
||
|
||
/* We deal with case (4) if the mask wasn't moved to either src1 or src2.
|
||
In any case, we eventually do vector mask-based copy. */
|
||
bsel = gen_rtx_IOR (vimode,
|
||
gen_rtx_AND (vimode,
|
||
gen_rtx_NOT (vimode, mask), src2),
|
||
gen_rtx_AND (vimode, mask, src1));
|
||
/* The result is placed back to a register with the mask. */
|
||
emit_insn (gen_rtx_SET (mask, bsel));
|
||
emit_move_insn (operands[0], simplify_gen_subreg (mode, mask,
|
||
vimode, 0));
|
||
}
|
||
}
|
||
|
||
/* Expand integer vector comparison */
|
||
void
|
||
loongarch_expand_vec_cmp (rtx operands[])
|
||
{
|
||
|
||
rtx_code code = GET_CODE (operands[1]);
|
||
loongarch_expand_lsx_cmp (operands[0], code, operands[2], operands[3]);
|
||
}
|
||
|
||
/* Implement TARGET_PROMOTE_FUNCTION_MODE. */
|
||
|
||
/* This function is equivalent to default_promote_function_mode_always_promote
|
||
except that it returns a promoted mode even if type is NULL_TREE. This is
|
||
needed by libcalls which have no type (only a mode) such as fixed conversion
|
||
routines that take a signed or unsigned char/short argument and convert it
|
||
to a fixed type. */
|
||
|
||
static machine_mode
|
||
loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
|
||
machine_mode mode,
|
||
int *punsignedp ATTRIBUTE_UNUSED,
|
||
const_tree fntype ATTRIBUTE_UNUSED,
|
||
int for_return ATTRIBUTE_UNUSED)
|
||
{
|
||
int unsignedp;
|
||
|
||
if (type != NULL_TREE)
|
||
return promote_mode (type, mode, punsignedp);
|
||
|
||
unsignedp = *punsignedp;
|
||
PROMOTE_MODE (mode, unsignedp, type);
|
||
*punsignedp = unsignedp;
|
||
return mode;
|
||
}
|
||
|
||
/* Implement TARGET_STARTING_FRAME_OFFSET. See loongarch_compute_frame_info
|
||
for details about the frame layout. */
|
||
|
||
static HOST_WIDE_INT
|
||
loongarch_starting_frame_offset (void)
|
||
{
|
||
if (FRAME_GROWS_DOWNWARD)
|
||
return 0;
|
||
return crtl->outgoing_args_size;
|
||
}
|
||
|
||
/* A subroutine of loongarch_build_signbit_mask. If VECT is true,
|
||
then replicate the value for all elements of the vector
|
||
register. */
|
||
|
||
rtx
|
||
loongarch_build_const_vector (machine_mode mode, bool vect, rtx value)
|
||
{
|
||
int i, n_elt;
|
||
rtvec v;
|
||
machine_mode scalar_mode;
|
||
|
||
switch (mode)
|
||
{
|
||
case E_V32QImode:
|
||
case E_V16QImode:
|
||
case E_V32HImode:
|
||
case E_V16HImode:
|
||
case E_V8HImode:
|
||
case E_V8SImode:
|
||
case E_V4SImode:
|
||
case E_V8DImode:
|
||
case E_V4DImode:
|
||
case E_V2DImode:
|
||
gcc_assert (vect);
|
||
/* FALLTHRU */
|
||
case E_V8SFmode:
|
||
case E_V4SFmode:
|
||
case E_V8DFmode:
|
||
case E_V4DFmode:
|
||
case E_V2DFmode:
|
||
n_elt = GET_MODE_NUNITS (mode);
|
||
v = rtvec_alloc (n_elt);
|
||
scalar_mode = GET_MODE_INNER (mode);
|
||
|
||
RTVEC_ELT (v, 0) = value;
|
||
|
||
for (i = 1; i < n_elt; ++i)
|
||
RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
|
||
|
||
return gen_rtx_CONST_VECTOR (mode, v);
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
}
|
||
|
||
/* Create a mask for the sign bit in MODE
|
||
for an register. If VECT is true, then replicate the mask for
|
||
all elements of the vector register. If INVERT is true, then create
|
||
a mask excluding the sign bit. */
|
||
|
||
rtx
|
||
loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
|
||
{
|
||
machine_mode vec_mode, imode;
|
||
wide_int w;
|
||
rtx mask, v;
|
||
|
||
switch (mode)
|
||
{
|
||
case E_V16SImode:
|
||
case E_V16SFmode:
|
||
case E_V8SImode:
|
||
case E_V4SImode:
|
||
case E_V8SFmode:
|
||
case E_V4SFmode:
|
||
vec_mode = mode;
|
||
imode = SImode;
|
||
break;
|
||
|
||
case E_V8DImode:
|
||
case E_V4DImode:
|
||
case E_V2DImode:
|
||
case E_V8DFmode:
|
||
case E_V4DFmode:
|
||
case E_V2DFmode:
|
||
vec_mode = mode;
|
||
imode = DImode;
|
||
break;
|
||
|
||
case E_TImode:
|
||
case E_TFmode:
|
||
vec_mode = VOIDmode;
|
||
imode = TImode;
|
||
break;
|
||
|
||
default:
|
||
gcc_unreachable ();
|
||
}
|
||
|
||
machine_mode inner_mode = GET_MODE_INNER (mode);
|
||
w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
|
||
GET_MODE_BITSIZE (inner_mode));
|
||
if (invert)
|
||
w = wi::bit_not (w);
|
||
|
||
/* Force this value into the low part of a fp vector constant. */
|
||
mask = immed_wide_int_const (w, imode);
|
||
mask = gen_lowpart (inner_mode, mask);
|
||
|
||
if (vec_mode == VOIDmode)
|
||
return force_reg (inner_mode, mask);
|
||
|
||
v = loongarch_build_const_vector (vec_mode, vect, mask);
|
||
return force_reg (vec_mode, v);
|
||
}
|
||
|
||
/* Use rsqrte instruction and Newton-Rhapson to compute the approximation of
|
||
a single precision floating point [reciprocal] square root. */
|
||
|
||
void loongarch_emit_swrsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
|
||
{
|
||
rtx x0, e0, e1, e2, mhalf, monehalf;
|
||
REAL_VALUE_TYPE r;
|
||
int unspec;
|
||
|
||
x0 = gen_reg_rtx (mode);
|
||
e0 = gen_reg_rtx (mode);
|
||
e1 = gen_reg_rtx (mode);
|
||
e2 = gen_reg_rtx (mode);
|
||
|
||
real_arithmetic (&r, ABS_EXPR, &dconsthalf, NULL);
|
||
mhalf = const_double_from_real_value (r, SFmode);
|
||
|
||
real_arithmetic (&r, PLUS_EXPR, &dconsthalf, &dconst1);
|
||
monehalf = const_double_from_real_value (r, SFmode);
|
||
unspec = UNSPEC_RSQRTE;
|
||
|
||
if (VECTOR_MODE_P (mode))
|
||
{
|
||
mhalf = loongarch_build_const_vector (mode, true, mhalf);
|
||
monehalf = loongarch_build_const_vector (mode, true, monehalf);
|
||
unspec = GET_MODE_SIZE (mode) == 32 ? UNSPEC_LASX_XVFRSQRTE
|
||
: UNSPEC_LSX_VFRSQRTE;
|
||
}
|
||
|
||
/* rsqrt(a) = rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a))
|
||
sqrt(a) = a * rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a)) */
|
||
|
||
a = force_reg (mode, a);
|
||
|
||
/* x0 = rsqrt(a) estimate. */
|
||
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
|
||
unspec)));
|
||
|
||
/* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
|
||
if (!recip)
|
||
{
|
||
rtx zero = force_reg (mode, CONST0_RTX (mode));
|
||
|
||
if (VECTOR_MODE_P (mode))
|
||
{
|
||
machine_mode imode = related_int_vector_mode (mode).require ();
|
||
rtx mask = gen_reg_rtx (imode);
|
||
emit_insn (gen_rtx_SET (mask, gen_rtx_NE (imode, a, zero)));
|
||
emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0,
|
||
gen_lowpart (mode, mask))));
|
||
}
|
||
else
|
||
{
|
||
rtx target = emit_conditional_move (x0, { GT, a, zero, mode },
|
||
x0, zero, mode, 0);
|
||
if (target != x0)
|
||
emit_move_insn (x0, target);
|
||
}
|
||
}
|
||
|
||
/* e0 = x0 * a */
|
||
emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
|
||
/* e1 = e0 * x0 */
|
||
emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
|
||
|
||
/* e2 = 1.5 - e1 * 0.5 */
|
||
mhalf = force_reg (mode, mhalf);
|
||
monehalf = force_reg (mode, monehalf);
|
||
emit_insn (gen_rtx_SET (e2, gen_rtx_FMA (mode,
|
||
gen_rtx_NEG (mode, e1),
|
||
mhalf, monehalf)));
|
||
|
||
if (recip)
|
||
/* res = e2 * x0 */
|
||
emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, x0, e2)));
|
||
else
|
||
/* res = e2 * e0 */
|
||
emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e0)));
|
||
}
|
||
|
||
/* Use recipe instruction and Newton-Rhapson to compute the approximation of
|
||
a single precision floating point divide. */
|
||
|
||
void loongarch_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
|
||
{
|
||
rtx x0, e0, mtwo;
|
||
REAL_VALUE_TYPE r;
|
||
x0 = gen_reg_rtx (mode);
|
||
e0 = gen_reg_rtx (mode);
|
||
int unspec = UNSPEC_RECIPE;
|
||
|
||
real_arithmetic (&r, ABS_EXPR, &dconst2, NULL);
|
||
mtwo = const_double_from_real_value (r, SFmode);
|
||
|
||
if (VECTOR_MODE_P (mode))
|
||
{
|
||
mtwo = loongarch_build_const_vector (mode, true, mtwo);
|
||
unspec = GET_MODE_SIZE (mode) == 32 ? UNSPEC_LASX_XVFRECIPE
|
||
: UNSPEC_LSX_VFRECIPE;
|
||
}
|
||
|
||
mtwo = force_reg (mode, mtwo);
|
||
|
||
/* a / b = a * recipe(b) * (2.0 - b * recipe(b)) */
|
||
|
||
/* x0 = 1./b estimate. */
|
||
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
|
||
unspec)));
|
||
/* e0 = 2.0 - b * x0. */
|
||
emit_insn (gen_rtx_SET (e0, gen_rtx_FMA (mode,
|
||
gen_rtx_NEG (mode, b), x0, mtwo)));
|
||
|
||
if (a != CONST1_RTX (mode))
|
||
{
|
||
rtx e1 = gen_reg_rtx (mode);
|
||
/* e1 = a * x0. */
|
||
emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, a, x0)));
|
||
/* res = e0 * e1. */
|
||
emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, e1)));
|
||
}
|
||
else
|
||
{
|
||
/* res = e0 * x0. */
|
||
emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0)));
|
||
}
|
||
}
|
||
|
||
static bool
|
||
loongarch_builtin_support_vector_misalignment (machine_mode mode,
|
||
const_tree type,
|
||
int misalignment,
|
||
bool is_packed)
|
||
{
|
||
if ((ISA_HAS_LSX || ISA_HAS_LASX) && STRICT_ALIGNMENT)
|
||
{
|
||
if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
|
||
return false;
|
||
if (misalignment == -1)
|
||
return false;
|
||
}
|
||
return default_builtin_support_vector_misalignment (mode, type, misalignment,
|
||
is_packed);
|
||
}
|
||
|
||
/* Return a PARALLEL containing NELTS elements, with element I equal
|
||
to BASE + I * STEP. */
|
||
rtx
|
||
loongarch_gen_stepped_int_parallel (unsigned int nelts, int base,
|
||
int step)
|
||
{
|
||
rtvec vec = rtvec_alloc (nelts);
|
||
for (unsigned int i = 0; i < nelts; i++)
|
||
RTVEC_ELT (vec, i) = GEN_INT (base + i * step);
|
||
return gen_rtx_PARALLEL (VOIDmode, vec);
|
||
}
|
||
|
||
/* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return TFmode or DFmode
|
||
for TI_LONG_DOUBLE_TYPE which is for long double type, go with the
|
||
default one for the others. */
|
||
|
||
static machine_mode
|
||
loongarch_c_mode_for_floating_type (enum tree_index ti)
|
||
{
|
||
if (ti == TI_LONG_DOUBLE_TYPE)
|
||
return TARGET_64BIT ? TFmode : DFmode;
|
||
return default_mode_for_floating_type (ti);
|
||
}
|
||
|
||
static bool
|
||
use_rsqrt_p (void)
|
||
{
|
||
return (flag_finite_math_only
|
||
&& !flag_trapping_math
|
||
&& flag_unsafe_math_optimizations);
|
||
}
|
||
|
||
/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
|
||
|
||
static bool
|
||
loongarch_optab_supported_p (int op, machine_mode, machine_mode,
|
||
optimization_type opt_type)
|
||
{
|
||
switch (op)
|
||
{
|
||
case rsqrt_optab:
|
||
return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
|
||
|
||
default:
|
||
return true;
|
||
}
|
||
}
|
||
|
||
/* If -fverbose-asm, dump some info for debugging. */
|
||
static void
|
||
loongarch_asm_code_end (void)
|
||
{
|
||
#define DUMP_FEATURE(PRED) \
|
||
fprintf (asm_out_file, "%s %s: %s\n", ASM_COMMENT_START, #PRED, \
|
||
(PRED) ? "enabled" : "disabled")
|
||
|
||
if (flag_verbose_asm)
|
||
{
|
||
fprintf (asm_out_file, "\n%s CPU: %s\n", ASM_COMMENT_START,
|
||
loongarch_arch_strings[la_target.cpu_arch]);
|
||
fprintf (asm_out_file, "%s Tune: %s\n", ASM_COMMENT_START,
|
||
loongarch_tune_strings[la_target.cpu_tune]);
|
||
fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START,
|
||
loongarch_isa_base_strings [la_target.isa.base]);
|
||
DUMP_FEATURE (ISA_HAS_FRECIPE);
|
||
DUMP_FEATURE (ISA_HAS_DIV32);
|
||
DUMP_FEATURE (ISA_HAS_LAM_BH);
|
||
DUMP_FEATURE (ISA_HAS_LAMCAS);
|
||
DUMP_FEATURE (ISA_HAS_LD_SEQ_SA);
|
||
}
|
||
|
||
fputs ("\n\n", asm_out_file);
|
||
#undef DUMP_FEATURE
|
||
}
|
||
|
||
/* Target hook for c_mode_for_suffix. */
|
||
static machine_mode
|
||
loongarch_c_mode_for_suffix (char suffix)
|
||
{
|
||
if (suffix == 'q')
|
||
return TFmode;
|
||
|
||
return VOIDmode;
|
||
}
|
||
|
||
/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
|
||
|
||
static int
|
||
loongarch_compute_pressure_classes (reg_class *classes)
|
||
{
|
||
int i = 0;
|
||
classes[i++] = GENERAL_REGS;
|
||
classes[i++] = FP_REGS;
|
||
classes[i++] = FCC_REGS;
|
||
return i;
|
||
}
|
||
|
||
/* Implement TARGET_CAN_INLINE_P. Determine whether inlining the function
|
||
CALLER into the function CALLEE is safe. Inlining should be rejected if
|
||
there is no always_inline attribute and the target options differ except
|
||
for differences in ISA extensions or performance tuning options like the
|
||
code model, TLS dialect, etc. */
|
||
|
||
static bool
|
||
loongarch_can_inline_p (tree caller, tree callee)
|
||
{
|
||
tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
|
||
tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
|
||
|
||
if (!callee_tree)
|
||
callee_tree = target_option_default_node;
|
||
|
||
if (!caller_tree)
|
||
caller_tree = target_option_default_node;
|
||
|
||
/* If both caller and callee have attributes, assume that if the
|
||
pointer is different, the two functions have different target
|
||
options since build_target_option_node uses a hash table for the
|
||
options. */
|
||
if (callee_tree == caller_tree)
|
||
return true;
|
||
|
||
struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
|
||
struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
|
||
|
||
/* Callee and caller should have the same target options. */
|
||
int callee_target_flags = callee_opts->x_target_flags;
|
||
int caller_target_flags = caller_opts->x_target_flags;
|
||
|
||
if (callee_target_flags != caller_target_flags)
|
||
return false;
|
||
|
||
/* If callee enables the isa extension that the caller does not enable,
|
||
inlining is disabled. */
|
||
if (~caller_opts->x_la_isa_evolution
|
||
& callee_opts->x_la_isa_evolution)
|
||
return false;
|
||
|
||
/* If simd extensions are enabled for the callee but not for the caller,
|
||
inlining is disabled. */
|
||
if ((caller_opts->x_la_opt_simd == ISA_EXT_NONE
|
||
&& callee_opts->x_la_opt_simd != ISA_EXT_NONE)
|
||
|| (caller_opts->x_la_opt_simd == ISA_EXT_SIMD_LSX
|
||
&& callee_opts->x_la_opt_simd == ISA_EXT_SIMD_LASX))
|
||
return false;
|
||
|
||
bool always_inline
|
||
= lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee));
|
||
|
||
/* If the architectural features match up and the callee is always_inline
|
||
then the other attributes don't matter. */
|
||
if (always_inline)
|
||
return true;
|
||
|
||
if (caller_opts->x_la_opt_cmodel != callee_opts->x_la_opt_cmodel)
|
||
return false;
|
||
|
||
return true;
|
||
}
|
||
|
||
/* Initialize the GCC target structure. */
|
||
#undef TARGET_ASM_ALIGNED_HI_OP
|
||
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
|
||
#undef TARGET_ASM_ALIGNED_SI_OP
|
||
#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
|
||
#undef TARGET_ASM_ALIGNED_DI_OP
|
||
#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
|
||
|
||
#undef TARGET_OPTION_OVERRIDE
|
||
#define TARGET_OPTION_OVERRIDE loongarch_option_override
|
||
#undef TARGET_OPTION_SAVE
|
||
#define TARGET_OPTION_SAVE loongarch_option_save
|
||
#undef TARGET_OPTION_RESTORE
|
||
#define TARGET_OPTION_RESTORE loongarch_option_restore
|
||
|
||
#undef TARGET_SET_CURRENT_FUNCTION
|
||
#define TARGET_SET_CURRENT_FUNCTION loongarch_set_current_function
|
||
|
||
#undef TARGET_LEGITIMIZE_ADDRESS
|
||
#define TARGET_LEGITIMIZE_ADDRESS loongarch_legitimize_address
|
||
|
||
#undef TARGET_ASM_SELECT_RTX_SECTION
|
||
#define TARGET_ASM_SELECT_RTX_SECTION loongarch_select_rtx_section
|
||
#undef TARGET_ASM_FUNCTION_RODATA_SECTION
|
||
#define TARGET_ASM_FUNCTION_RODATA_SECTION loongarch_function_rodata_section
|
||
|
||
#undef TARGET_ASM_CODE_END
|
||
#define TARGET_ASM_CODE_END loongarch_asm_code_end
|
||
|
||
#undef TARGET_SCHED_INIT
|
||
#define TARGET_SCHED_INIT loongarch_sched_init
|
||
#undef TARGET_SCHED_REORDER
|
||
#define TARGET_SCHED_REORDER loongarch_sched_reorder
|
||
#undef TARGET_SCHED_REORDER2
|
||
#define TARGET_SCHED_REORDER2 loongarch_sched_reorder2
|
||
#undef TARGET_SCHED_VARIABLE_ISSUE
|
||
#define TARGET_SCHED_VARIABLE_ISSUE loongarch_variable_issue
|
||
#undef TARGET_SCHED_ADJUST_COST
|
||
#define TARGET_SCHED_ADJUST_COST loongarch_adjust_cost
|
||
#undef TARGET_SCHED_ISSUE_RATE
|
||
#define TARGET_SCHED_ISSUE_RATE loongarch_issue_rate
|
||
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
|
||
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
|
||
loongarch_multipass_dfa_lookahead
|
||
|
||
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
|
||
#define TARGET_FUNCTION_OK_FOR_SIBCALL loongarch_function_ok_for_sibcall
|
||
|
||
#undef TARGET_VALID_POINTER_MODE
|
||
#define TARGET_VALID_POINTER_MODE loongarch_valid_pointer_mode
|
||
#undef TARGET_REGISTER_MOVE_COST
|
||
#define TARGET_REGISTER_MOVE_COST loongarch_register_move_cost
|
||
#undef TARGET_MEMORY_MOVE_COST
|
||
#define TARGET_MEMORY_MOVE_COST loongarch_memory_move_cost
|
||
#undef TARGET_RTX_COSTS
|
||
#define TARGET_RTX_COSTS loongarch_rtx_costs
|
||
#undef TARGET_ADDRESS_COST
|
||
#define TARGET_ADDRESS_COST loongarch_address_cost
|
||
#undef TARGET_INSN_COST
|
||
#define TARGET_INSN_COST loongarch_insn_cost
|
||
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
|
||
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
|
||
loongarch_builtin_vectorization_cost
|
||
#undef TARGET_VECTORIZE_CREATE_COSTS
|
||
#define TARGET_VECTORIZE_CREATE_COSTS loongarch_vectorize_create_costs
|
||
|
||
|
||
#undef TARGET_IN_SMALL_DATA_P
|
||
#define TARGET_IN_SMALL_DATA_P loongarch_in_small_data_p
|
||
|
||
#undef TARGET_PREFERRED_RELOAD_CLASS
|
||
#define TARGET_PREFERRED_RELOAD_CLASS loongarch_preferred_reload_class
|
||
|
||
#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
|
||
#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
|
||
|
||
#undef TARGET_EXPAND_BUILTIN_VA_START
|
||
#define TARGET_EXPAND_BUILTIN_VA_START loongarch_va_start
|
||
|
||
#undef TARGET_PROMOTE_FUNCTION_MODE
|
||
#define TARGET_PROMOTE_FUNCTION_MODE loongarch_promote_function_mode
|
||
#undef TARGET_RETURN_IN_MEMORY
|
||
#define TARGET_RETURN_IN_MEMORY loongarch_return_in_memory
|
||
|
||
#undef TARGET_FUNCTION_VALUE
|
||
#define TARGET_FUNCTION_VALUE loongarch_function_value
|
||
#undef TARGET_LIBCALL_VALUE
|
||
#define TARGET_LIBCALL_VALUE loongarch_libcall_value
|
||
|
||
#undef TARGET_ASM_OUTPUT_MI_THUNK
|
||
#define TARGET_ASM_OUTPUT_MI_THUNK loongarch_output_mi_thunk
|
||
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
|
||
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
|
||
hook_bool_const_tree_hwi_hwi_const_tree_true
|
||
|
||
#undef TARGET_PRINT_OPERAND
|
||
#define TARGET_PRINT_OPERAND loongarch_print_operand
|
||
#undef TARGET_PRINT_OPERAND_ADDRESS
|
||
#define TARGET_PRINT_OPERAND_ADDRESS loongarch_print_operand_address
|
||
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
|
||
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P \
|
||
loongarch_print_operand_punct_valid_p
|
||
|
||
#undef TARGET_SETUP_INCOMING_VARARGS
|
||
#define TARGET_SETUP_INCOMING_VARARGS loongarch_setup_incoming_varargs
|
||
#undef TARGET_STRICT_ARGUMENT_NAMING
|
||
#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
|
||
#undef TARGET_MUST_PASS_IN_STACK
|
||
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
|
||
#undef TARGET_PASS_BY_REFERENCE
|
||
#define TARGET_PASS_BY_REFERENCE loongarch_pass_by_reference
|
||
#undef TARGET_ARG_PARTIAL_BYTES
|
||
#define TARGET_ARG_PARTIAL_BYTES loongarch_arg_partial_bytes
|
||
#undef TARGET_FUNCTION_ARG
|
||
#define TARGET_FUNCTION_ARG loongarch_function_arg
|
||
#undef TARGET_FUNCTION_ARG_ADVANCE
|
||
#define TARGET_FUNCTION_ARG_ADVANCE loongarch_function_arg_advance
|
||
#undef TARGET_FUNCTION_ARG_BOUNDARY
|
||
#define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary
|
||
|
||
#undef TARGET_VECTOR_MODE_SUPPORTED_P
|
||
#define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p
|
||
|
||
#undef TARGET_SCALAR_MODE_SUPPORTED_P
|
||
#define TARGET_SCALAR_MODE_SUPPORTED_P loongarch_scalar_mode_supported_p
|
||
|
||
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
|
||
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE loongarch_preferred_simd_mode
|
||
|
||
#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
|
||
#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
|
||
loongarch_autovectorize_vector_modes
|
||
|
||
#undef TARGET_OPTAB_SUPPORTED_P
|
||
#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p
|
||
|
||
#undef TARGET_INIT_BUILTINS
|
||
#define TARGET_INIT_BUILTINS loongarch_init_builtins
|
||
#undef TARGET_BUILTIN_DECL
|
||
#define TARGET_BUILTIN_DECL loongarch_builtin_decl
|
||
#undef TARGET_EXPAND_BUILTIN
|
||
#define TARGET_EXPAND_BUILTIN loongarch_expand_builtin
|
||
|
||
/* The generic ELF target does not always have TLS support. */
|
||
#ifdef HAVE_AS_TLS
|
||
#undef TARGET_HAVE_TLS
|
||
#define TARGET_HAVE_TLS HAVE_AS_TLS
|
||
#endif
|
||
|
||
#undef TARGET_CANNOT_FORCE_CONST_MEM
|
||
#define TARGET_CANNOT_FORCE_CONST_MEM loongarch_cannot_force_const_mem
|
||
|
||
#undef TARGET_LEGITIMATE_CONSTANT_P
|
||
#define TARGET_LEGITIMATE_CONSTANT_P loongarch_legitimate_constant_p
|
||
|
||
#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
|
||
#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
|
||
|
||
#ifdef HAVE_AS_DTPRELWORD
|
||
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
|
||
#define TARGET_ASM_OUTPUT_DWARF_DTPREL loongarch_output_dwarf_dtprel
|
||
#endif
|
||
|
||
#undef TARGET_LEGITIMATE_ADDRESS_P
|
||
#define TARGET_LEGITIMATE_ADDRESS_P loongarch_legitimate_address_p
|
||
|
||
#undef TARGET_FRAME_POINTER_REQUIRED
|
||
#define TARGET_FRAME_POINTER_REQUIRED loongarch_frame_pointer_required
|
||
|
||
#undef TARGET_CAN_ELIMINATE
|
||
#define TARGET_CAN_ELIMINATE loongarch_can_eliminate
|
||
|
||
#undef TARGET_CONDITIONAL_REGISTER_USAGE
|
||
#define TARGET_CONDITIONAL_REGISTER_USAGE loongarch_conditional_register_usage
|
||
|
||
#undef TARGET_TRAMPOLINE_INIT
|
||
#define TARGET_TRAMPOLINE_INIT loongarch_trampoline_init
|
||
|
||
#undef TARGET_MIN_ANCHOR_OFFSET
|
||
#define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2)
|
||
|
||
#undef TARGET_MAX_ANCHOR_OFFSET
|
||
#define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1)
|
||
#undef TARGET_VECTORIZE_VEC_PERM_CONST
|
||
#define TARGET_VECTORIZE_VEC_PERM_CONST loongarch_vectorize_vec_perm_const
|
||
|
||
#undef TARGET_SCHED_REASSOCIATION_WIDTH
|
||
#define TARGET_SCHED_REASSOCIATION_WIDTH loongarch_sched_reassociation_width
|
||
|
||
#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
|
||
#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV loongarch_atomic_assign_expand_fenv
|
||
|
||
#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
|
||
#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
|
||
|
||
#undef TARGET_HARD_REGNO_NREGS
|
||
#define TARGET_HARD_REGNO_NREGS loongarch_hard_regno_nregs
|
||
#undef TARGET_HARD_REGNO_MODE_OK
|
||
#define TARGET_HARD_REGNO_MODE_OK loongarch_hard_regno_mode_ok
|
||
|
||
#undef TARGET_MODES_TIEABLE_P
|
||
#define TARGET_MODES_TIEABLE_P loongarch_modes_tieable_p
|
||
|
||
#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
|
||
#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
|
||
loongarch_hard_regno_call_part_clobbered
|
||
|
||
#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
|
||
#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
|
||
|
||
#undef TARGET_CAN_CHANGE_MODE_CLASS
|
||
#define TARGET_CAN_CHANGE_MODE_CLASS loongarch_can_change_mode_class
|
||
|
||
#undef TARGET_CONSTANT_ALIGNMENT
|
||
#define TARGET_CONSTANT_ALIGNMENT loongarch_constant_alignment
|
||
|
||
#undef TARGET_STARTING_FRAME_OFFSET
|
||
#define TARGET_STARTING_FRAME_OFFSET loongarch_starting_frame_offset
|
||
|
||
#undef TARGET_SECONDARY_RELOAD
|
||
#define TARGET_SECONDARY_RELOAD loongarch_secondary_reload
|
||
|
||
#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
|
||
#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
|
||
loongarch_ira_change_pseudo_allocno_class
|
||
|
||
#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
|
||
#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
|
||
|
||
#undef TARGET_ATTRIBUTE_TABLE
|
||
#define TARGET_ATTRIBUTE_TABLE loongarch_attribute_table
|
||
|
||
#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
|
||
#define TARGET_USE_ANCHORS_FOR_SYMBOL_P loongarch_use_anchors_for_symbol_p
|
||
|
||
#undef TARGET_ASAN_SHADOW_OFFSET
|
||
#define TARGET_ASAN_SHADOW_OFFSET loongarch_asan_shadow_offset
|
||
|
||
#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
|
||
#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
|
||
loongarch_get_separate_components
|
||
|
||
#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
|
||
#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB loongarch_components_for_bb
|
||
|
||
#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
|
||
#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
|
||
loongarch_disqualify_components
|
||
|
||
#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
|
||
#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
|
||
loongarch_emit_prologue_components
|
||
|
||
#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
|
||
#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
|
||
loongarch_emit_epilogue_components
|
||
|
||
#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
|
||
#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
|
||
loongarch_set_handled_components
|
||
|
||
#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
|
||
#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
|
||
loongarch_builtin_support_vector_misalignment
|
||
|
||
#undef TARGET_C_MODE_FOR_FLOATING_TYPE
|
||
#define TARGET_C_MODE_FOR_FLOATING_TYPE loongarch_c_mode_for_floating_type
|
||
|
||
#undef TARGET_OPTION_VALID_ATTRIBUTE_P
|
||
#define TARGET_OPTION_VALID_ATTRIBUTE_P loongarch_option_valid_attribute_p
|
||
|
||
#undef TARGET_C_MODE_FOR_SUFFIX
|
||
#define TARGET_C_MODE_FOR_SUFFIX loongarch_c_mode_for_suffix
|
||
|
||
#undef TARGET_COMPUTE_PRESSURE_CLASSES
|
||
#define TARGET_COMPUTE_PRESSURE_CLASSES loongarch_compute_pressure_classes
|
||
|
||
#undef TARGET_CAN_INLINE_P
|
||
#define TARGET_CAN_INLINE_P loongarch_can_inline_p
|
||
|
||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||
|
||
#include "gt-loongarch.h"
|