mirror of git://gcc.gnu.org/git/gcc.git
aarch64-protos.h (aarch64_use_simple_return_insn_p): New prototype.
2018-12-17 Steve Ellcey <sellcey@cavium.com> * config/aarch64/aarch64-protos.h (aarch64_use_simple_return_insn_p): New prototype. (aarch64_epilogue_uses): Ditto. * config/aarch64/aarch64.c (aarch64_attribute_table): New array. (aarch64_simd_decl_p): New function. (aarch64_reg_save_mode): New function. (aarch64_function_ok_for_sibcall): Check for simd calls. (aarch64_layout_frame): Check for simd function. (aarch64_gen_storewb_pair): Handle E_TFmode. (aarch64_push_regs): Use aarch64_reg_save_mode to get mode. (aarch64_gen_loadwb_pair): Handle E_TFmode. (aarch64_pop_regs): Use aarch64_reg_save_mode to get mode. (aarch64_gen_store_pair): Handle E_TFmode. (aarch64_gen_load_pair): Ditto. (aarch64_save_callee_saves): Handle different mode sizes. (aarch64_restore_callee_saves): Ditto. (aarch64_components_for_bb): Check for simd function. (aarch64_epilogue_uses): New function. (aarch64_process_components): Check for simd function. (aarch64_expand_prologue): Ditto. (aarch64_expand_epilogue): Ditto. (aarch64_expand_call): Ditto. (aarch64_use_simple_return_insn_p): New function. (TARGET_ATTRIBUTE_TABLE): New define. * config/aarch64/aarch64.h (EPILOGUE_USES): Redefine. (FP_SIMD_SAVED_REGNUM_P): New macro. * config/aarch64/aarch64.md (simple_return): New define_expand. (load_pair_dw_tftf): New instruction. (store_pair_dw_tftf): Ditto. (loadwb_pair<TX:mode>_<P:mode>): Ditto. (storewb_pair<TX:mode>_<P:mode>): Ditto. From-SVN: r267208
This commit is contained in:
parent
4d814b6989
commit
a0d0b980f1
|
|
@ -1,3 +1,37 @@
|
||||||
|
2018-12-17 Steve Ellcey <sellcey@cavium.com>
|
||||||
|
|
||||||
|
* config/aarch64/aarch64-protos.h (aarch64_use_simple_return_insn_p):
|
||||||
|
New prototype.
|
||||||
|
(aarch64_epilogue_uses): Ditto.
|
||||||
|
* config/aarch64/aarch64.c (aarch64_attribute_table): New array.
|
||||||
|
(aarch64_simd_decl_p): New function.
|
||||||
|
(aarch64_reg_save_mode): New function.
|
||||||
|
(aarch64_function_ok_for_sibcall): Check for simd calls.
|
||||||
|
(aarch64_layout_frame): Check for simd function.
|
||||||
|
(aarch64_gen_storewb_pair): Handle E_TFmode.
|
||||||
|
(aarch64_push_regs): Use aarch64_reg_save_mode to get mode.
|
||||||
|
(aarch64_gen_loadwb_pair): Handle E_TFmode.
|
||||||
|
(aarch64_pop_regs): Use aarch64_reg_save_mode to get mode.
|
||||||
|
(aarch64_gen_store_pair): Handle E_TFmode.
|
||||||
|
(aarch64_gen_load_pair): Ditto.
|
||||||
|
(aarch64_save_callee_saves): Handle different mode sizes.
|
||||||
|
(aarch64_restore_callee_saves): Ditto.
|
||||||
|
(aarch64_components_for_bb): Check for simd function.
|
||||||
|
(aarch64_epilogue_uses): New function.
|
||||||
|
(aarch64_process_components): Check for simd function.
|
||||||
|
(aarch64_expand_prologue): Ditto.
|
||||||
|
(aarch64_expand_epilogue): Ditto.
|
||||||
|
(aarch64_expand_call): Ditto.
|
||||||
|
(aarch64_use_simple_return_insn_p): New function.
|
||||||
|
(TARGET_ATTRIBUTE_TABLE): New define.
|
||||||
|
* config/aarch64/aarch64.h (EPILOGUE_USES): Redefine.
|
||||||
|
(FP_SIMD_SAVED_REGNUM_P): New macro.
|
||||||
|
* config/aarch64/aarch64.md (simple_return): New define_expand.
|
||||||
|
(load_pair_dw_tftf): New instruction.
|
||||||
|
(store_pair_dw_tftf): Ditto.
|
||||||
|
(loadwb_pair<TX:mode>_<P:mode>): Ditto.
|
||||||
|
(storewb_pair<TX:mode>_<P:mode>): Ditto.
|
||||||
|
|
||||||
2018-12-17 Uros Bizjak <ubizjak@gmail.com>
|
2018-12-17 Uros Bizjak <ubizjak@gmail.com>
|
||||||
|
|
||||||
PR target/88502
|
PR target/88502
|
||||||
|
|
|
||||||
|
|
@ -471,6 +471,7 @@ bool aarch64_split_dimode_const_store (rtx, rtx);
|
||||||
bool aarch64_symbolic_address_p (rtx);
|
bool aarch64_symbolic_address_p (rtx);
|
||||||
bool aarch64_uimm12_shift (HOST_WIDE_INT);
|
bool aarch64_uimm12_shift (HOST_WIDE_INT);
|
||||||
bool aarch64_use_return_insn_p (void);
|
bool aarch64_use_return_insn_p (void);
|
||||||
|
bool aarch64_use_simple_return_insn_p (void);
|
||||||
const char *aarch64_mangle_builtin_type (const_tree);
|
const char *aarch64_mangle_builtin_type (const_tree);
|
||||||
const char *aarch64_output_casesi (rtx *);
|
const char *aarch64_output_casesi (rtx *);
|
||||||
|
|
||||||
|
|
@ -556,6 +557,8 @@ void aarch64_split_simd_move (rtx, rtx);
|
||||||
/* Check for a legitimate floating point constant for FMOV. */
|
/* Check for a legitimate floating point constant for FMOV. */
|
||||||
bool aarch64_float_const_representable_p (rtx);
|
bool aarch64_float_const_representable_p (rtx);
|
||||||
|
|
||||||
|
extern int aarch64_epilogue_uses (int);
|
||||||
|
|
||||||
#if defined (RTX_CODE)
|
#if defined (RTX_CODE)
|
||||||
void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
|
void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
|
||||||
rtx label_ref);
|
rtx label_ref);
|
||||||
|
|
|
||||||
|
|
@ -1141,6 +1141,15 @@ static const struct processor *selected_tune;
|
||||||
/* The current tuning set. */
|
/* The current tuning set. */
|
||||||
struct tune_params aarch64_tune_params = generic_tunings;
|
struct tune_params aarch64_tune_params = generic_tunings;
|
||||||
|
|
||||||
|
/* Table of machine attributes. */
|
||||||
|
static const struct attribute_spec aarch64_attribute_table[] =
|
||||||
|
{
|
||||||
|
/* { name, min_len, max_len, decl_req, type_req, fn_type_req,
|
||||||
|
affects_type_identity, handler, exclude } */
|
||||||
|
{ "aarch64_vector_pcs", 0, 0, false, true, true, false, NULL, NULL },
|
||||||
|
{ NULL, 0, 0, false, false, false, false, NULL, NULL }
|
||||||
|
};
|
||||||
|
|
||||||
#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
|
#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
|
||||||
|
|
||||||
/* An ISA extension in the co-processor and main instruction set space. */
|
/* An ISA extension in the co-processor and main instruction set space. */
|
||||||
|
|
@ -1523,6 +1532,39 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return true if this is a definition of a vectorized simd function. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
aarch64_simd_decl_p (tree fndecl)
|
||||||
|
{
|
||||||
|
tree fntype;
|
||||||
|
|
||||||
|
if (fndecl == NULL)
|
||||||
|
return false;
|
||||||
|
fntype = TREE_TYPE (fndecl);
|
||||||
|
if (fntype == NULL)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Functions with the aarch64_vector_pcs attribute use the simd ABI. */
|
||||||
|
if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)) != NULL)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return the mode a register save/restore should use. DImode for integer
|
||||||
|
registers, DFmode for FP registers in non-SIMD functions (they only save
|
||||||
|
the bottom half of a 128 bit register), or TFmode for FP registers in
|
||||||
|
SIMD functions. */
|
||||||
|
|
||||||
|
static machine_mode
|
||||||
|
aarch64_reg_save_mode (tree fndecl, unsigned regno)
|
||||||
|
{
|
||||||
|
return GP_REGNUM_P (regno)
|
||||||
|
? E_DImode
|
||||||
|
: (aarch64_simd_decl_p (fndecl) ? E_TFmode : E_DFmode);
|
||||||
|
}
|
||||||
|
|
||||||
/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
|
/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
|
||||||
the lower 64 bits of a 128-bit register. Tell the compiler the callee
|
the lower 64 bits of a 128-bit register. Tell the compiler the callee
|
||||||
clobbers the top 64 bits when restoring the bottom 64 bits. */
|
clobbers the top 64 bits when restoring the bottom 64 bits. */
|
||||||
|
|
@ -3349,7 +3391,9 @@ static bool
|
||||||
aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
|
aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
|
||||||
tree exp ATTRIBUTE_UNUSED)
|
tree exp ATTRIBUTE_UNUSED)
|
||||||
{
|
{
|
||||||
/* Currently, always true. */
|
if (aarch64_simd_decl_p (cfun->decl) != aarch64_simd_decl_p (decl))
|
||||||
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -4210,6 +4254,7 @@ aarch64_layout_frame (void)
|
||||||
{
|
{
|
||||||
HOST_WIDE_INT offset = 0;
|
HOST_WIDE_INT offset = 0;
|
||||||
int regno, last_fp_reg = INVALID_REGNUM;
|
int regno, last_fp_reg = INVALID_REGNUM;
|
||||||
|
bool simd_function = aarch64_simd_decl_p (cfun->decl);
|
||||||
|
|
||||||
cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
|
cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
|
||||||
|
|
||||||
|
|
@ -4223,6 +4268,17 @@ aarch64_layout_frame (void)
|
||||||
cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
|
cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
|
||||||
cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
|
cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
|
||||||
|
|
||||||
|
/* If this is a non-leaf simd function with calls we assume that
|
||||||
|
at least one of those calls is to a non-simd function and thus
|
||||||
|
we must save V8 to V23 in the prologue. */
|
||||||
|
|
||||||
|
if (simd_function && !crtl->is_leaf)
|
||||||
|
{
|
||||||
|
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
|
||||||
|
if (FP_SIMD_SAVED_REGNUM_P (regno))
|
||||||
|
df_set_regs_ever_live (regno, true);
|
||||||
|
}
|
||||||
|
|
||||||
/* First mark all the registers that really need to be saved... */
|
/* First mark all the registers that really need to be saved... */
|
||||||
for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
|
for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
|
||||||
cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
|
cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
|
||||||
|
|
@ -4245,7 +4301,8 @@ aarch64_layout_frame (void)
|
||||||
|
|
||||||
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
|
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
|
||||||
if (df_regs_ever_live_p (regno)
|
if (df_regs_ever_live_p (regno)
|
||||||
&& !call_used_regs[regno])
|
&& (!call_used_regs[regno]
|
||||||
|
|| (simd_function && FP_SIMD_SAVED_REGNUM_P (regno))))
|
||||||
{
|
{
|
||||||
cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
|
cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
|
||||||
last_fp_reg = regno;
|
last_fp_reg = regno;
|
||||||
|
|
@ -4287,7 +4344,10 @@ aarch64_layout_frame (void)
|
||||||
{
|
{
|
||||||
/* If there is an alignment gap between integer and fp callee-saves,
|
/* If there is an alignment gap between integer and fp callee-saves,
|
||||||
allocate the last fp register to it if possible. */
|
allocate the last fp register to it if possible. */
|
||||||
if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
|
if (regno == last_fp_reg
|
||||||
|
&& has_align_gap
|
||||||
|
&& !simd_function
|
||||||
|
&& (offset & 8) == 0)
|
||||||
{
|
{
|
||||||
cfun->machine->frame.reg_offset[regno] = max_int_offset;
|
cfun->machine->frame.reg_offset[regno] = max_int_offset;
|
||||||
break;
|
break;
|
||||||
|
|
@ -4299,7 +4359,7 @@ aarch64_layout_frame (void)
|
||||||
else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
|
else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
|
||||||
&& cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
|
&& cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
|
||||||
cfun->machine->frame.wb_candidate2 = regno;
|
cfun->machine->frame.wb_candidate2 = regno;
|
||||||
offset += UNITS_PER_WORD;
|
offset += simd_function ? UNITS_PER_VREG : UNITS_PER_WORD;
|
||||||
}
|
}
|
||||||
|
|
||||||
offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||||
|
|
@ -4442,6 +4502,10 @@ aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
|
||||||
return gen_storewb_pairdf_di (base, base, reg, reg2,
|
return gen_storewb_pairdf_di (base, base, reg, reg2,
|
||||||
GEN_INT (-adjustment),
|
GEN_INT (-adjustment),
|
||||||
GEN_INT (UNITS_PER_WORD - adjustment));
|
GEN_INT (UNITS_PER_WORD - adjustment));
|
||||||
|
case E_TFmode:
|
||||||
|
return gen_storewb_pairtf_di (base, base, reg, reg2,
|
||||||
|
GEN_INT (-adjustment),
|
||||||
|
GEN_INT (UNITS_PER_VREG - adjustment));
|
||||||
default:
|
default:
|
||||||
gcc_unreachable ();
|
gcc_unreachable ();
|
||||||
}
|
}
|
||||||
|
|
@ -4454,7 +4518,7 @@ static void
|
||||||
aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
|
aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
|
||||||
{
|
{
|
||||||
rtx_insn *insn;
|
rtx_insn *insn;
|
||||||
machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
|
machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1);
|
||||||
|
|
||||||
if (regno2 == INVALID_REGNUM)
|
if (regno2 == INVALID_REGNUM)
|
||||||
return aarch64_pushwb_single_reg (mode, regno1, adjustment);
|
return aarch64_pushwb_single_reg (mode, regno1, adjustment);
|
||||||
|
|
@ -4484,6 +4548,9 @@ aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
|
||||||
case E_DFmode:
|
case E_DFmode:
|
||||||
return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
|
return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
|
||||||
GEN_INT (UNITS_PER_WORD));
|
GEN_INT (UNITS_PER_WORD));
|
||||||
|
case E_TFmode:
|
||||||
|
return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment),
|
||||||
|
GEN_INT (UNITS_PER_VREG));
|
||||||
default:
|
default:
|
||||||
gcc_unreachable ();
|
gcc_unreachable ();
|
||||||
}
|
}
|
||||||
|
|
@ -4497,7 +4564,7 @@ static void
|
||||||
aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
|
aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
|
||||||
rtx *cfi_ops)
|
rtx *cfi_ops)
|
||||||
{
|
{
|
||||||
machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
|
machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1);
|
||||||
rtx reg1 = gen_rtx_REG (mode, regno1);
|
rtx reg1 = gen_rtx_REG (mode, regno1);
|
||||||
|
|
||||||
*cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
|
*cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
|
||||||
|
|
@ -4532,6 +4599,9 @@ aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
|
||||||
case E_DFmode:
|
case E_DFmode:
|
||||||
return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
|
return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
|
||||||
|
|
||||||
|
case E_TFmode:
|
||||||
|
return gen_store_pair_dw_tftf (mem1, reg1, mem2, reg2);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
gcc_unreachable ();
|
gcc_unreachable ();
|
||||||
}
|
}
|
||||||
|
|
@ -4552,6 +4622,9 @@ aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
|
||||||
case E_DFmode:
|
case E_DFmode:
|
||||||
return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
|
return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
|
||||||
|
|
||||||
|
case E_TFmode:
|
||||||
|
return gen_load_pair_dw_tftf (reg1, mem1, reg2, mem2);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
gcc_unreachable ();
|
gcc_unreachable ();
|
||||||
}
|
}
|
||||||
|
|
@ -4591,6 +4664,7 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
|
||||||
{
|
{
|
||||||
rtx reg, mem;
|
rtx reg, mem;
|
||||||
poly_int64 offset;
|
poly_int64 offset;
|
||||||
|
int offset_diff;
|
||||||
|
|
||||||
if (skip_wb
|
if (skip_wb
|
||||||
&& (regno == cfun->machine->frame.wb_candidate1
|
&& (regno == cfun->machine->frame.wb_candidate1
|
||||||
|
|
@ -4606,12 +4680,12 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
|
||||||
offset));
|
offset));
|
||||||
|
|
||||||
regno2 = aarch64_next_callee_save (regno + 1, limit);
|
regno2 = aarch64_next_callee_save (regno + 1, limit);
|
||||||
|
offset_diff = cfun->machine->frame.reg_offset[regno2]
|
||||||
|
- cfun->machine->frame.reg_offset[regno];
|
||||||
|
|
||||||
if (regno2 <= limit
|
if (regno2 <= limit
|
||||||
&& !cfun->machine->reg_is_wrapped_separately[regno2]
|
&& !cfun->machine->reg_is_wrapped_separately[regno2]
|
||||||
&& ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
|
&& known_eq (GET_MODE_SIZE (mode), offset_diff))
|
||||||
== cfun->machine->frame.reg_offset[regno2]))
|
|
||||||
|
|
||||||
{
|
{
|
||||||
rtx reg2 = gen_rtx_REG (mode, regno2);
|
rtx reg2 = gen_rtx_REG (mode, regno2);
|
||||||
rtx mem2;
|
rtx mem2;
|
||||||
|
|
@ -4659,6 +4733,7 @@ aarch64_restore_callee_saves (machine_mode mode,
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
rtx reg, mem;
|
rtx reg, mem;
|
||||||
|
int offset_diff;
|
||||||
|
|
||||||
if (skip_wb
|
if (skip_wb
|
||||||
&& (regno == cfun->machine->frame.wb_candidate1
|
&& (regno == cfun->machine->frame.wb_candidate1
|
||||||
|
|
@ -4670,11 +4745,12 @@ aarch64_restore_callee_saves (machine_mode mode,
|
||||||
mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
|
mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
|
||||||
|
|
||||||
regno2 = aarch64_next_callee_save (regno + 1, limit);
|
regno2 = aarch64_next_callee_save (regno + 1, limit);
|
||||||
|
offset_diff = cfun->machine->frame.reg_offset[regno2]
|
||||||
|
- cfun->machine->frame.reg_offset[regno];
|
||||||
|
|
||||||
if (regno2 <= limit
|
if (regno2 <= limit
|
||||||
&& !cfun->machine->reg_is_wrapped_separately[regno2]
|
&& !cfun->machine->reg_is_wrapped_separately[regno2]
|
||||||
&& ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
|
&& known_eq (GET_MODE_SIZE (mode), offset_diff))
|
||||||
== cfun->machine->frame.reg_offset[regno2]))
|
|
||||||
{
|
{
|
||||||
rtx reg2 = gen_rtx_REG (mode, regno2);
|
rtx reg2 = gen_rtx_REG (mode, regno2);
|
||||||
rtx mem2;
|
rtx mem2;
|
||||||
|
|
@ -4808,13 +4884,15 @@ aarch64_components_for_bb (basic_block bb)
|
||||||
bitmap in = DF_LIVE_IN (bb);
|
bitmap in = DF_LIVE_IN (bb);
|
||||||
bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
|
bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
|
||||||
bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
|
bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
|
||||||
|
bool simd_function = aarch64_simd_decl_p (cfun->decl);
|
||||||
|
|
||||||
sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
|
sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
|
||||||
bitmap_clear (components);
|
bitmap_clear (components);
|
||||||
|
|
||||||
/* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
|
/* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
|
||||||
for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
|
for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
|
||||||
if ((!call_used_regs[regno])
|
if ((!call_used_regs[regno]
|
||||||
|
|| (simd_function && FP_SIMD_SAVED_REGNUM_P (regno)))
|
||||||
&& (bitmap_bit_p (in, regno)
|
&& (bitmap_bit_p (in, regno)
|
||||||
|| bitmap_bit_p (gen, regno)
|
|| bitmap_bit_p (gen, regno)
|
||||||
|| bitmap_bit_p (kill, regno)))
|
|| bitmap_bit_p (kill, regno)))
|
||||||
|
|
@ -4885,9 +4963,11 @@ aarch64_process_components (sbitmap components, bool prologue_p)
|
||||||
|
|
||||||
while (regno != last_regno)
|
while (regno != last_regno)
|
||||||
{
|
{
|
||||||
/* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
|
/* AAPCS64 section 5.1.2 requires only the low 64 bits to be saved
|
||||||
so DFmode for the vector registers is enough. */
|
so DFmode for the vector registers is enough. For simd functions
|
||||||
machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode;
|
we want to save the low 128 bits. */
|
||||||
|
machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno);
|
||||||
|
|
||||||
rtx reg = gen_rtx_REG (mode, regno);
|
rtx reg = gen_rtx_REG (mode, regno);
|
||||||
poly_int64 offset = cfun->machine->frame.reg_offset[regno];
|
poly_int64 offset = cfun->machine->frame.reg_offset[regno];
|
||||||
if (!frame_pointer_needed)
|
if (!frame_pointer_needed)
|
||||||
|
|
@ -4916,6 +4996,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
|
||||||
mergeable with the current one into a pair. */
|
mergeable with the current one into a pair. */
|
||||||
if (!satisfies_constraint_Ump (mem)
|
if (!satisfies_constraint_Ump (mem)
|
||||||
|| GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
|
|| GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
|
||||||
|
|| (aarch64_simd_decl_p (cfun->decl) && FP_REGNUM_P (regno))
|
||||||
|| maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
|
|| maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
|
||||||
GET_MODE_SIZE (mode)))
|
GET_MODE_SIZE (mode)))
|
||||||
{
|
{
|
||||||
|
|
@ -5231,6 +5312,28 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return 1 if the register is used by the epilogue. We need to say the
|
||||||
|
return register is used, but only after epilogue generation is complete.
|
||||||
|
Note that in the case of sibcalls, the values "used by the epilogue" are
|
||||||
|
considered live at the start of the called function.
|
||||||
|
|
||||||
|
For SIMD functions we need to return 1 for FP registers that are saved and
|
||||||
|
restored by a function but are not zero in call_used_regs. If we do not do
|
||||||
|
this optimizations may remove the restore of the register. */
|
||||||
|
|
||||||
|
int
|
||||||
|
aarch64_epilogue_uses (int regno)
|
||||||
|
{
|
||||||
|
if (epilogue_completed)
|
||||||
|
{
|
||||||
|
if (regno == LR_REGNUM)
|
||||||
|
return 1;
|
||||||
|
if (aarch64_simd_decl_p (cfun->decl) && FP_SIMD_SAVED_REGNUM_P (regno))
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
|
/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
|
||||||
is saved at BASE + OFFSET. */
|
is saved at BASE + OFFSET. */
|
||||||
|
|
||||||
|
|
@ -5405,8 +5508,12 @@ aarch64_expand_prologue (void)
|
||||||
|
|
||||||
aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
|
aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
|
||||||
callee_adjust != 0 || emit_frame_chain);
|
callee_adjust != 0 || emit_frame_chain);
|
||||||
aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
|
if (aarch64_simd_decl_p (cfun->decl))
|
||||||
callee_adjust != 0 || emit_frame_chain);
|
aarch64_save_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM,
|
||||||
|
callee_adjust != 0 || emit_frame_chain);
|
||||||
|
else
|
||||||
|
aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
|
||||||
|
callee_adjust != 0 || emit_frame_chain);
|
||||||
|
|
||||||
/* We may need to probe the final adjustment if it is larger than the guard
|
/* We may need to probe the final adjustment if it is larger than the guard
|
||||||
that is assumed by the called. */
|
that is assumed by the called. */
|
||||||
|
|
@ -5432,6 +5539,19 @@ aarch64_use_return_insn_p (void)
|
||||||
return known_eq (cfun->machine->frame.frame_size, 0);
|
return known_eq (cfun->machine->frame.frame_size, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return false for non-leaf SIMD functions in order to avoid
|
||||||
|
shrink-wrapping them. Doing this will lose the necessary
|
||||||
|
save/restore of FP registers. */
|
||||||
|
|
||||||
|
bool
|
||||||
|
aarch64_use_simple_return_insn_p (void)
|
||||||
|
{
|
||||||
|
if (aarch64_simd_decl_p (cfun->decl) && !crtl->is_leaf)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/* Generate the epilogue instructions for returning from a function.
|
/* Generate the epilogue instructions for returning from a function.
|
||||||
This is almost exactly the reverse of the prolog sequence, except
|
This is almost exactly the reverse of the prolog sequence, except
|
||||||
that we need to insert barriers to avoid scheduling loads that read
|
that we need to insert barriers to avoid scheduling loads that read
|
||||||
|
|
@ -5500,8 +5620,12 @@ aarch64_expand_epilogue (bool for_sibcall)
|
||||||
|
|
||||||
aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
|
aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
|
||||||
callee_adjust != 0, &cfi_ops);
|
callee_adjust != 0, &cfi_ops);
|
||||||
aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
|
if (aarch64_simd_decl_p (cfun->decl))
|
||||||
callee_adjust != 0, &cfi_ops);
|
aarch64_restore_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM,
|
||||||
|
callee_adjust != 0, &cfi_ops);
|
||||||
|
else
|
||||||
|
aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
|
||||||
|
callee_adjust != 0, &cfi_ops);
|
||||||
|
|
||||||
if (need_barrier_p)
|
if (need_barrier_p)
|
||||||
emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
|
emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
|
||||||
|
|
@ -18422,6 +18546,9 @@ aarch64_libgcc_floating_mode_supported_p
|
||||||
#undef TARGET_ESTIMATED_POLY_VALUE
|
#undef TARGET_ESTIMATED_POLY_VALUE
|
||||||
#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
|
#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
|
||||||
|
|
||||||
|
#undef TARGET_ATTRIBUTE_TABLE
|
||||||
|
#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
|
||||||
|
|
||||||
#if CHECKING_P
|
#if CHECKING_P
|
||||||
#undef TARGET_RUN_TARGET_SELFTESTS
|
#undef TARGET_RUN_TARGET_SELFTESTS
|
||||||
#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
|
#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
|
||||||
|
|
|
||||||
|
|
@ -409,13 +409,7 @@ extern unsigned aarch64_architecture_version;
|
||||||
V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31) \
|
V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31) \
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Say that the return address register is used by the epilogue, but only after
|
#define EPILOGUE_USES(REGNO) (aarch64_epilogue_uses (REGNO))
|
||||||
epilogue generation is complete. Note that in the case of sibcalls, the
|
|
||||||
values "used by the epilogue" are considered live at the start of the called
|
|
||||||
function. */
|
|
||||||
|
|
||||||
#define EPILOGUE_USES(REGNO) \
|
|
||||||
(epilogue_completed && (REGNO) == LR_REGNUM)
|
|
||||||
|
|
||||||
/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
|
/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
|
||||||
the stack pointer does not matter. This is only true if the function
|
the stack pointer does not matter. This is only true if the function
|
||||||
|
|
@ -523,6 +517,8 @@ extern unsigned aarch64_architecture_version;
|
||||||
#define PR_LO_REGNUM_P(REGNO)\
|
#define PR_LO_REGNUM_P(REGNO)\
|
||||||
(((unsigned) (REGNO - P0_REGNUM)) <= (P7_REGNUM - P0_REGNUM))
|
(((unsigned) (REGNO - P0_REGNUM)) <= (P7_REGNUM - P0_REGNUM))
|
||||||
|
|
||||||
|
#define FP_SIMD_SAVED_REGNUM_P(REGNO) \
|
||||||
|
(((unsigned) (REGNO - V8_REGNUM)) <= (V23_REGNUM - V8_REGNUM))
|
||||||
|
|
||||||
/* Register and constant classes. */
|
/* Register and constant classes. */
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -727,7 +727,7 @@
|
||||||
|
|
||||||
(define_insn "simple_return"
|
(define_insn "simple_return"
|
||||||
[(simple_return)]
|
[(simple_return)]
|
||||||
""
|
"aarch64_use_simple_return_insn_p ()"
|
||||||
"ret"
|
"ret"
|
||||||
[(set_attr "type" "branch")]
|
[(set_attr "type" "branch")]
|
||||||
)
|
)
|
||||||
|
|
@ -1387,6 +1387,21 @@
|
||||||
(set_attr "arch" "*,fp")]
|
(set_attr "arch" "*,fp")]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
(define_insn "load_pair_dw_tftf"
|
||||||
|
[(set (match_operand:TF 0 "register_operand" "=w")
|
||||||
|
(match_operand:TF 1 "aarch64_mem_pair_operand" "Ump"))
|
||||||
|
(set (match_operand:TF 2 "register_operand" "=w")
|
||||||
|
(match_operand:TF 3 "memory_operand" "m"))]
|
||||||
|
"TARGET_SIMD
|
||||||
|
&& rtx_equal_p (XEXP (operands[3], 0),
|
||||||
|
plus_constant (Pmode,
|
||||||
|
XEXP (operands[1], 0),
|
||||||
|
GET_MODE_SIZE (TFmode)))"
|
||||||
|
"ldp\\t%q0, %q2, %1"
|
||||||
|
[(set_attr "type" "neon_ldp_q")
|
||||||
|
(set_attr "fp" "yes")]
|
||||||
|
)
|
||||||
|
|
||||||
;; Operands 0 and 2 are tied together by the final condition; so we allow
|
;; Operands 0 and 2 are tied together by the final condition; so we allow
|
||||||
;; fairly lax checking on the second memory operation.
|
;; fairly lax checking on the second memory operation.
|
||||||
(define_insn "store_pair_sw_<SX:mode><SX2:mode>"
|
(define_insn "store_pair_sw_<SX:mode><SX2:mode>"
|
||||||
|
|
@ -1422,6 +1437,21 @@
|
||||||
(set_attr "arch" "*,fp")]
|
(set_attr "arch" "*,fp")]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
(define_insn "store_pair_dw_tftf"
|
||||||
|
[(set (match_operand:TF 0 "aarch64_mem_pair_operand" "=Ump")
|
||||||
|
(match_operand:TF 1 "register_operand" "w"))
|
||||||
|
(set (match_operand:TF 2 "memory_operand" "=m")
|
||||||
|
(match_operand:TF 3 "register_operand" "w"))]
|
||||||
|
"TARGET_SIMD &&
|
||||||
|
rtx_equal_p (XEXP (operands[2], 0),
|
||||||
|
plus_constant (Pmode,
|
||||||
|
XEXP (operands[0], 0),
|
||||||
|
GET_MODE_SIZE (TFmode)))"
|
||||||
|
"stp\\t%q1, %q3, %0"
|
||||||
|
[(set_attr "type" "neon_stp_q")
|
||||||
|
(set_attr "fp" "yes")]
|
||||||
|
)
|
||||||
|
|
||||||
;; Load pair with post-index writeback. This is primarily used in function
|
;; Load pair with post-index writeback. This is primarily used in function
|
||||||
;; epilogues.
|
;; epilogues.
|
||||||
(define_insn "loadwb_pair<GPI:mode>_<P:mode>"
|
(define_insn "loadwb_pair<GPI:mode>_<P:mode>"
|
||||||
|
|
@ -1454,6 +1484,21 @@
|
||||||
[(set_attr "type" "neon_load1_2reg")]
|
[(set_attr "type" "neon_load1_2reg")]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
(define_insn "loadwb_pair<TX:mode>_<P:mode>"
|
||||||
|
[(parallel
|
||||||
|
[(set (match_operand:P 0 "register_operand" "=k")
|
||||||
|
(plus:P (match_operand:P 1 "register_operand" "0")
|
||||||
|
(match_operand:P 4 "aarch64_mem_pair_offset" "n")))
|
||||||
|
(set (match_operand:TX 2 "register_operand" "=w")
|
||||||
|
(mem:TX (match_dup 1)))
|
||||||
|
(set (match_operand:TX 3 "register_operand" "=w")
|
||||||
|
(mem:TX (plus:P (match_dup 1)
|
||||||
|
(match_operand:P 5 "const_int_operand" "n"))))])]
|
||||||
|
"TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)"
|
||||||
|
"ldp\\t%q2, %q3, [%1], %4"
|
||||||
|
[(set_attr "type" "neon_ldp_q")]
|
||||||
|
)
|
||||||
|
|
||||||
;; Store pair with pre-index writeback. This is primarily used in function
|
;; Store pair with pre-index writeback. This is primarily used in function
|
||||||
;; prologues.
|
;; prologues.
|
||||||
(define_insn "storewb_pair<GPI:mode>_<P:mode>"
|
(define_insn "storewb_pair<GPI:mode>_<P:mode>"
|
||||||
|
|
@ -1488,6 +1533,24 @@
|
||||||
[(set_attr "type" "neon_store1_2reg<q>")]
|
[(set_attr "type" "neon_store1_2reg<q>")]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
(define_insn "storewb_pair<TX:mode>_<P:mode>"
|
||||||
|
[(parallel
|
||||||
|
[(set (match_operand:P 0 "register_operand" "=&k")
|
||||||
|
(plus:P (match_operand:P 1 "register_operand" "0")
|
||||||
|
(match_operand:P 4 "aarch64_mem_pair_offset" "n")))
|
||||||
|
(set (mem:TX (plus:P (match_dup 0)
|
||||||
|
(match_dup 4)))
|
||||||
|
(match_operand:TX 2 "register_operand" "w"))
|
||||||
|
(set (mem:TX (plus:P (match_dup 0)
|
||||||
|
(match_operand:P 5 "const_int_operand" "n")))
|
||||||
|
(match_operand:TX 3 "register_operand" "w"))])]
|
||||||
|
"TARGET_SIMD
|
||||||
|
&& INTVAL (operands[5])
|
||||||
|
== INTVAL (operands[4]) + GET_MODE_SIZE (<TX:MODE>mode)"
|
||||||
|
"stp\\t%q2, %q3, [%0, %4]!"
|
||||||
|
[(set_attr "type" "neon_stp_q")]
|
||||||
|
)
|
||||||
|
|
||||||
;; -------------------------------------------------------------------
|
;; -------------------------------------------------------------------
|
||||||
;; Sign/Zero extension
|
;; Sign/Zero extension
|
||||||
;; -------------------------------------------------------------------
|
;; -------------------------------------------------------------------
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue