aarch64-protos.h (aarch64_use_simple_return_insn_p): New prototype.

2018-12-17  Steve Ellcey  <sellcey@cavium.com>

	* config/aarch64/aarch64-protos.h (aarch64_use_simple_return_insn_p):
	New prototype.
	(aarch64_epilogue_uses): Ditto.
	* config/aarch64/aarch64.c (aarch64_attribute_table): New array.
	(aarch64_simd_decl_p): New function.
	(aarch64_reg_save_mode): New function.
	(aarch64_function_ok_for_sibcall): Check for simd calls.
	(aarch64_layout_frame): Check for simd function.
	(aarch64_gen_storewb_pair): Handle E_TFmode.
	(aarch64_push_regs): Use aarch64_reg_save_mode to get mode.
	(aarch64_gen_loadwb_pair): Handle E_TFmode.
	(aarch64_pop_regs): Use aarch64_reg_save_mode to get mode.
	(aarch64_gen_store_pair): Handle E_TFmode.
	(aarch64_gen_load_pair): Ditto.
	(aarch64_save_callee_saves): Handle different mode sizes.
	(aarch64_restore_callee_saves): Ditto.
	(aarch64_components_for_bb): Check for simd function.
	(aarch64_epilogue_uses): New function.
	(aarch64_process_components): Check for simd function.
	(aarch64_expand_prologue): Ditto.
	(aarch64_expand_epilogue): Ditto.
	(aarch64_expand_call): Ditto.
	(aarch64_use_simple_return_insn_p): New function.
	(TARGET_ATTRIBUTE_TABLE): New define.
	* config/aarch64/aarch64.h (EPILOGUE_USES): Redefine.
	(FP_SIMD_SAVED_REGNUM_P): New macro.
	* config/aarch64/aarch64.md (simple_return): New define_expand.
	(load_pair_dw_tftf): New instruction.
	(store_pair_dw_tftf): Ditto.
	(loadwb_pair<TX:mode>_<P:mode>): Ditto.
	(storewb_pair<TX:mode>_<P:mode>): Ditto.

From-SVN: r267208
This commit is contained in:
Steve Ellcey 2018-12-17 19:14:04 +00:00 committed by Steve Ellcey
parent 4d814b6989
commit a0d0b980f1
5 changed files with 250 additions and 27 deletions

View File

@ -1,3 +1,37 @@
2018-12-17 Steve Ellcey <sellcey@cavium.com>
* config/aarch64/aarch64-protos.h (aarch64_use_simple_return_insn_p):
New prototype.
(aarch64_epilogue_uses): Ditto.
* config/aarch64/aarch64.c (aarch64_attribute_table): New array.
(aarch64_simd_decl_p): New function.
(aarch64_reg_save_mode): New function.
(aarch64_function_ok_for_sibcall): Check for simd calls.
(aarch64_layout_frame): Check for simd function.
(aarch64_gen_storewb_pair): Handle E_TFmode.
(aarch64_push_regs): Use aarch64_reg_save_mode to get mode.
(aarch64_gen_loadwb_pair): Handle E_TFmode.
(aarch64_pop_regs): Use aarch64_reg_save_mode to get mode.
(aarch64_gen_store_pair): Handle E_TFmode.
(aarch64_gen_load_pair): Ditto.
(aarch64_save_callee_saves): Handle different mode sizes.
(aarch64_restore_callee_saves): Ditto.
(aarch64_components_for_bb): Check for simd function.
(aarch64_epilogue_uses): New function.
(aarch64_process_components): Check for simd function.
(aarch64_expand_prologue): Ditto.
(aarch64_expand_epilogue): Ditto.
(aarch64_expand_call): Ditto.
(aarch64_use_simple_return_insn_p): New function.
(TARGET_ATTRIBUTE_TABLE): New define.
* config/aarch64/aarch64.h (EPILOGUE_USES): Redefine.
(FP_SIMD_SAVED_REGNUM_P): New macro.
* config/aarch64/aarch64.md (simple_return): New define_expand.
(load_pair_dw_tftf): New instruction.
(store_pair_dw_tftf): Ditto.
(loadwb_pair<TX:mode>_<P:mode>): Ditto.
(storewb_pair<TX:mode>_<P:mode>): Ditto.
2018-12-17 Uros Bizjak <ubizjak@gmail.com>
PR target/88502

View File

@ -471,6 +471,7 @@ bool aarch64_split_dimode_const_store (rtx, rtx);
bool aarch64_symbolic_address_p (rtx);
bool aarch64_uimm12_shift (HOST_WIDE_INT);
bool aarch64_use_return_insn_p (void);
bool aarch64_use_simple_return_insn_p (void);
const char *aarch64_mangle_builtin_type (const_tree);
const char *aarch64_output_casesi (rtx *);
@ -556,6 +557,8 @@ void aarch64_split_simd_move (rtx, rtx);
/* Check for a legitimate floating point constant for FMOV. */
bool aarch64_float_const_representable_p (rtx);
extern int aarch64_epilogue_uses (int);
#if defined (RTX_CODE)
void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
rtx label_ref);

View File

@ -1141,6 +1141,15 @@ static const struct processor *selected_tune;
/* The current tuning set. */
struct tune_params aarch64_tune_params = generic_tunings;
/* Table of machine attributes. */
static const struct attribute_spec aarch64_attribute_table[] =
{
/* { name, min_len, max_len, decl_req, type_req, fn_type_req,
affects_type_identity, handler, exclude } */
{ "aarch64_vector_pcs", 0, 0, false, true, true, false, NULL, NULL },
{ NULL, 0, 0, false, false, false, false, NULL, NULL }
};
#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
/* An ISA extension in the co-processor and main instruction set space. */
@ -1523,6 +1532,39 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
return false;
}
/* Return true if this is a definition of a vectorized simd function. */
static bool
aarch64_simd_decl_p (tree fndecl)
{
tree fntype;
if (fndecl == NULL)
return false;
fntype = TREE_TYPE (fndecl);
if (fntype == NULL)
return false;
/* Functions with the aarch64_vector_pcs attribute use the simd ABI. */
if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)) != NULL)
return true;
return false;
}
/* Return the mode a register save/restore should use. DImode for integer
registers, DFmode for FP registers in non-SIMD functions (they only save
the bottom half of a 128 bit register), or TFmode for FP registers in
SIMD functions. */
static machine_mode
aarch64_reg_save_mode (tree fndecl, unsigned regno)
{
return GP_REGNUM_P (regno)
? E_DImode
: (aarch64_simd_decl_p (fndecl) ? E_TFmode : E_DFmode);
}
/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
the lower 64 bits of a 128-bit register. Tell the compiler the callee
clobbers the top 64 bits when restoring the bottom 64 bits. */
@ -3349,7 +3391,9 @@ static bool
aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
tree exp ATTRIBUTE_UNUSED)
{
/* Currently, always true. */
if (aarch64_simd_decl_p (cfun->decl) != aarch64_simd_decl_p (decl))
return false;
return true;
}
@ -4210,6 +4254,7 @@ aarch64_layout_frame (void)
{
HOST_WIDE_INT offset = 0;
int regno, last_fp_reg = INVALID_REGNUM;
bool simd_function = aarch64_simd_decl_p (cfun->decl);
cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
@ -4223,6 +4268,17 @@ aarch64_layout_frame (void)
cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
/* If this is a non-leaf simd function with calls we assume that
at least one of those calls is to a non-simd function and thus
we must save V8 to V23 in the prologue. */
if (simd_function && !crtl->is_leaf)
{
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
if (FP_SIMD_SAVED_REGNUM_P (regno))
df_set_regs_ever_live (regno, true);
}
/* First mark all the registers that really need to be saved... */
for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
@ -4245,7 +4301,8 @@ aarch64_layout_frame (void)
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
if (df_regs_ever_live_p (regno)
&& !call_used_regs[regno])
&& (!call_used_regs[regno]
|| (simd_function && FP_SIMD_SAVED_REGNUM_P (regno))))
{
cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
last_fp_reg = regno;
@ -4287,7 +4344,10 @@ aarch64_layout_frame (void)
{
/* If there is an alignment gap between integer and fp callee-saves,
allocate the last fp register to it if possible. */
if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
if (regno == last_fp_reg
&& has_align_gap
&& !simd_function
&& (offset & 8) == 0)
{
cfun->machine->frame.reg_offset[regno] = max_int_offset;
break;
@ -4299,7 +4359,7 @@ aarch64_layout_frame (void)
else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
&& cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
cfun->machine->frame.wb_candidate2 = regno;
offset += UNITS_PER_WORD;
offset += simd_function ? UNITS_PER_VREG : UNITS_PER_WORD;
}
offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
@ -4442,6 +4502,10 @@ aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
return gen_storewb_pairdf_di (base, base, reg, reg2,
GEN_INT (-adjustment),
GEN_INT (UNITS_PER_WORD - adjustment));
case E_TFmode:
return gen_storewb_pairtf_di (base, base, reg, reg2,
GEN_INT (-adjustment),
GEN_INT (UNITS_PER_VREG - adjustment));
default:
gcc_unreachable ();
}
@ -4454,7 +4518,7 @@ static void
aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
{
rtx_insn *insn;
machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1);
if (regno2 == INVALID_REGNUM)
return aarch64_pushwb_single_reg (mode, regno1, adjustment);
@ -4484,6 +4548,9 @@ aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
case E_DFmode:
return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
GEN_INT (UNITS_PER_WORD));
case E_TFmode:
return gen_loadwb_pairtf_di (base, base, reg, reg2, GEN_INT (adjustment),
GEN_INT (UNITS_PER_VREG));
default:
gcc_unreachable ();
}
@ -4497,7 +4564,7 @@ static void
aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
rtx *cfi_ops)
{
machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1);
rtx reg1 = gen_rtx_REG (mode, regno1);
*cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
@ -4532,6 +4599,9 @@ aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
case E_DFmode:
return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
case E_TFmode:
return gen_store_pair_dw_tftf (mem1, reg1, mem2, reg2);
default:
gcc_unreachable ();
}
@ -4552,6 +4622,9 @@ aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
case E_DFmode:
return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
case E_TFmode:
return gen_load_pair_dw_tftf (reg1, mem1, reg2, mem2);
default:
gcc_unreachable ();
}
@ -4591,6 +4664,7 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
{
rtx reg, mem;
poly_int64 offset;
int offset_diff;
if (skip_wb
&& (regno == cfun->machine->frame.wb_candidate1
@ -4606,12 +4680,12 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
offset));
regno2 = aarch64_next_callee_save (regno + 1, limit);
offset_diff = cfun->machine->frame.reg_offset[regno2]
- cfun->machine->frame.reg_offset[regno];
if (regno2 <= limit
&& !cfun->machine->reg_is_wrapped_separately[regno2]
&& ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
== cfun->machine->frame.reg_offset[regno2]))
&& known_eq (GET_MODE_SIZE (mode), offset_diff))
{
rtx reg2 = gen_rtx_REG (mode, regno2);
rtx mem2;
@ -4659,6 +4733,7 @@ aarch64_restore_callee_saves (machine_mode mode,
continue;
rtx reg, mem;
int offset_diff;
if (skip_wb
&& (regno == cfun->machine->frame.wb_candidate1
@ -4670,11 +4745,12 @@ aarch64_restore_callee_saves (machine_mode mode,
mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
regno2 = aarch64_next_callee_save (regno + 1, limit);
offset_diff = cfun->machine->frame.reg_offset[regno2]
- cfun->machine->frame.reg_offset[regno];
if (regno2 <= limit
&& !cfun->machine->reg_is_wrapped_separately[regno2]
&& ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
== cfun->machine->frame.reg_offset[regno2]))
&& known_eq (GET_MODE_SIZE (mode), offset_diff))
{
rtx reg2 = gen_rtx_REG (mode, regno2);
rtx mem2;
@ -4808,13 +4884,15 @@ aarch64_components_for_bb (basic_block bb)
bitmap in = DF_LIVE_IN (bb);
bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
bool simd_function = aarch64_simd_decl_p (cfun->decl);
sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
bitmap_clear (components);
/* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
if ((!call_used_regs[regno])
if ((!call_used_regs[regno]
|| (simd_function && FP_SIMD_SAVED_REGNUM_P (regno)))
&& (bitmap_bit_p (in, regno)
|| bitmap_bit_p (gen, regno)
|| bitmap_bit_p (kill, regno)))
@ -4885,9 +4963,11 @@ aarch64_process_components (sbitmap components, bool prologue_p)
while (regno != last_regno)
{
/* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
so DFmode for the vector registers is enough. */
machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode;
/* AAPCS64 section 5.1.2 requires only the low 64 bits to be saved
so DFmode for the vector registers is enough. For simd functions
we want to save the low 128 bits. */
machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno);
rtx reg = gen_rtx_REG (mode, regno);
poly_int64 offset = cfun->machine->frame.reg_offset[regno];
if (!frame_pointer_needed)
@ -4916,6 +4996,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
mergeable with the current one into a pair. */
if (!satisfies_constraint_Ump (mem)
|| GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
|| (aarch64_simd_decl_p (cfun->decl) && FP_REGNUM_P (regno))
|| maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
GET_MODE_SIZE (mode)))
{
@ -5231,6 +5312,28 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
}
}
/* Return 1 if the register is used by the epilogue. We need to say the
return register is used, but only after epilogue generation is complete.
Note that in the case of sibcalls, the values "used by the epilogue" are
considered live at the start of the called function.
For SIMD functions we need to return 1 for FP registers that are saved and
restored by a function but are not zero in call_used_regs. If we do not do
this optimizations may remove the restore of the register. */
int
aarch64_epilogue_uses (int regno)
{
if (epilogue_completed)
{
if (regno == LR_REGNUM)
return 1;
if (aarch64_simd_decl_p (cfun->decl) && FP_SIMD_SAVED_REGNUM_P (regno))
return 1;
}
return 0;
}
/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
is saved at BASE + OFFSET. */
@ -5405,8 +5508,12 @@ aarch64_expand_prologue (void)
aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
callee_adjust != 0 || emit_frame_chain);
aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
callee_adjust != 0 || emit_frame_chain);
if (aarch64_simd_decl_p (cfun->decl))
aarch64_save_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM,
callee_adjust != 0 || emit_frame_chain);
else
aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
callee_adjust != 0 || emit_frame_chain);
/* We may need to probe the final adjustment if it is larger than the guard
that is assumed by the called. */
@ -5432,6 +5539,19 @@ aarch64_use_return_insn_p (void)
return known_eq (cfun->machine->frame.frame_size, 0);
}
/* Return false for non-leaf SIMD functions in order to avoid
shrink-wrapping them. Doing this will lose the necessary
save/restore of FP registers. */
bool
aarch64_use_simple_return_insn_p (void)
{
if (aarch64_simd_decl_p (cfun->decl) && !crtl->is_leaf)
return false;
return true;
}
/* Generate the epilogue instructions for returning from a function.
This is almost exactly the reverse of the prolog sequence, except
that we need to insert barriers to avoid scheduling loads that read
@ -5500,8 +5620,12 @@ aarch64_expand_epilogue (bool for_sibcall)
aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
callee_adjust != 0, &cfi_ops);
aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
callee_adjust != 0, &cfi_ops);
if (aarch64_simd_decl_p (cfun->decl))
aarch64_restore_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM,
callee_adjust != 0, &cfi_ops);
else
aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
callee_adjust != 0, &cfi_ops);
if (need_barrier_p)
emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
@ -18422,6 +18546,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_ESTIMATED_POLY_VALUE
#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
#if CHECKING_P
#undef TARGET_RUN_TARGET_SELFTESTS
#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests

View File

@ -409,13 +409,7 @@ extern unsigned aarch64_architecture_version;
V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31) \
}
/* Say that the return address register is used by the epilogue, but only after
epilogue generation is complete. Note that in the case of sibcalls, the
values "used by the epilogue" are considered live at the start of the called
function. */
#define EPILOGUE_USES(REGNO) \
(epilogue_completed && (REGNO) == LR_REGNUM)
#define EPILOGUE_USES(REGNO) (aarch64_epilogue_uses (REGNO))
/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
the stack pointer does not matter. This is only true if the function
@ -523,6 +517,8 @@ extern unsigned aarch64_architecture_version;
#define PR_LO_REGNUM_P(REGNO)\
(((unsigned) (REGNO - P0_REGNUM)) <= (P7_REGNUM - P0_REGNUM))
#define FP_SIMD_SAVED_REGNUM_P(REGNO) \
(((unsigned) (REGNO - V8_REGNUM)) <= (V23_REGNUM - V8_REGNUM))
/* Register and constant classes. */

View File

@ -727,7 +727,7 @@
(define_insn "simple_return"
[(simple_return)]
""
"aarch64_use_simple_return_insn_p ()"
"ret"
[(set_attr "type" "branch")]
)
@ -1387,6 +1387,21 @@
(set_attr "arch" "*,fp")]
)
(define_insn "load_pair_dw_tftf"
[(set (match_operand:TF 0 "register_operand" "=w")
(match_operand:TF 1 "aarch64_mem_pair_operand" "Ump"))
(set (match_operand:TF 2 "register_operand" "=w")
(match_operand:TF 3 "memory_operand" "m"))]
"TARGET_SIMD
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (TFmode)))"
"ldp\\t%q0, %q2, %1"
[(set_attr "type" "neon_ldp_q")
(set_attr "fp" "yes")]
)
;; Operands 0 and 2 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation.
(define_insn "store_pair_sw_<SX:mode><SX2:mode>"
@ -1422,6 +1437,21 @@
(set_attr "arch" "*,fp")]
)
(define_insn "store_pair_dw_tftf"
[(set (match_operand:TF 0 "aarch64_mem_pair_operand" "=Ump")
(match_operand:TF 1 "register_operand" "w"))
(set (match_operand:TF 2 "memory_operand" "=m")
(match_operand:TF 3 "register_operand" "w"))]
"TARGET_SIMD &&
rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (TFmode)))"
"stp\\t%q1, %q3, %0"
[(set_attr "type" "neon_stp_q")
(set_attr "fp" "yes")]
)
;; Load pair with post-index writeback. This is primarily used in function
;; epilogues.
(define_insn "loadwb_pair<GPI:mode>_<P:mode>"
@ -1454,6 +1484,21 @@
[(set_attr "type" "neon_load1_2reg")]
)
(define_insn "loadwb_pair<TX:mode>_<P:mode>"
[(parallel
[(set (match_operand:P 0 "register_operand" "=k")
(plus:P (match_operand:P 1 "register_operand" "0")
(match_operand:P 4 "aarch64_mem_pair_offset" "n")))
(set (match_operand:TX 2 "register_operand" "=w")
(mem:TX (match_dup 1)))
(set (match_operand:TX 3 "register_operand" "=w")
(mem:TX (plus:P (match_dup 1)
(match_operand:P 5 "const_int_operand" "n"))))])]
"TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)"
"ldp\\t%q2, %q3, [%1], %4"
[(set_attr "type" "neon_ldp_q")]
)
;; Store pair with pre-index writeback. This is primarily used in function
;; prologues.
(define_insn "storewb_pair<GPI:mode>_<P:mode>"
@ -1488,6 +1533,24 @@
[(set_attr "type" "neon_store1_2reg<q>")]
)
(define_insn "storewb_pair<TX:mode>_<P:mode>"
[(parallel
[(set (match_operand:P 0 "register_operand" "=&k")
(plus:P (match_operand:P 1 "register_operand" "0")
(match_operand:P 4 "aarch64_mem_pair_offset" "n")))
(set (mem:TX (plus:P (match_dup 0)
(match_dup 4)))
(match_operand:TX 2 "register_operand" "w"))
(set (mem:TX (plus:P (match_dup 0)
(match_operand:P 5 "const_int_operand" "n")))
(match_operand:TX 3 "register_operand" "w"))])]
"TARGET_SIMD
&& INTVAL (operands[5])
== INTVAL (operands[4]) + GET_MODE_SIZE (<TX:MODE>mode)"
"stp\\t%q2, %q3, [%0, %4]!"
[(set_attr "type" "neon_stp_q")]
)
;; -------------------------------------------------------------------
;; Sign/Zero extension
;; -------------------------------------------------------------------