mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			[AArch64, 3/6] Restrict indirect tail calls to x16 and x17
This patch is part of a series that enables ARMv8.5-A in GCC and adds Branch Target Identification Mechanism. This patch changes the registers that are allowed for indirect tail calls. We are choosing to restrict these to only x16 or x17. Indirect tail calls are special in a way that they convert a call statement (BLR instruction) to a jump statement (BR instruction). For the best possible use of Branch Target Identification Mechanism, we would like to place a "BTI C" (call) at the beginning of the function which is only compatible with BLRs and BR X16/X17. In order to make indirect tail calls compatible with this scenario, we are restricting the TAILCALL_ADDR_REGS. In order to use x16/x17 for this purpose, we also had to change the use of these registers in the epilogue/prologue handling. For this purpose we are now using x12 and x13 named as EP0_REGNUM and EP1_REGNUM as scratch registers for epilogue and prologue. *** gcc/ChangeLog*** 2018-01-09 Sudakshina Das <sudi.das@arm.com> * config/aarch64/aarch64.c (aarch64_expand_prologue): Use new epilogue/prologue scratch registers EP0_REGNUM and EP1_REGNUM. (aarch64_expand_epilogue): Likewise. (aarch64_output_mi_thunk): Likewise * config/aarch64/aarch64.h (REG_CLASS_CONTENTS): Change TAILCALL_ADDR_REGS to x16 and x17. * config/aarch64/aarch64.md: Define EP0_REGNUM and EP1_REGNUM. *** gcc/testsuite/ChangeLog *** 2018-01-09 Sudakshina Das <sudi.das@arm.com> * gcc.target/aarch64/test_frame_17.c: Update to check for EP0_REGNUM instead of IP0_REGNUM and add test case. From-SVN: r267767
This commit is contained in:
		
							parent
							
								
									9b4247de4f
								
							
						
					
					
						commit
						901e66e03e
					
				|  | @ -1,3 +1,13 @@ | ||||||
|  | 2018-01-09  Sudakshina Das  <sudi.das@arm.com> | ||||||
|  | 
 | ||||||
|  | 	* config/aarch64/aarch64.c (aarch64_expand_prologue): Use new | ||||||
|  | 	epilogue/prologue scratch registers EP0_REGNUM and EP1_REGNUM. | ||||||
|  | 	(aarch64_expand_epilogue): Likewise. | ||||||
|  | 	(aarch64_output_mi_thunk): Likewise | ||||||
|  | 	* config/aarch64/aarch64.h (REG_CLASS_CONTENTS): Change | ||||||
|  | 	TAILCALL_ADDR_REGS to x16 and x17. | ||||||
|  | 	* config/aarch64/aarch64.md: Define EP0_REGNUM and EP1_REGNUM. | ||||||
|  | 
 | ||||||
| 2018-01-09  Sudakshina Das  <sudi.das@arm.com> | 2018-01-09  Sudakshina Das  <sudi.das@arm.com> | ||||||
| 
 | 
 | ||||||
| 	* config/aarch64/aarch64-option-extensions.def: Define | 	* config/aarch64/aarch64-option-extensions.def: Define | ||||||
|  |  | ||||||
|  | @ -5521,8 +5521,8 @@ aarch64_expand_prologue (void) | ||||||
| 	aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size); | 	aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|   rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM); |   rtx tmp0_rtx = gen_rtx_REG (Pmode, EP0_REGNUM); | ||||||
|   rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM); |   rtx tmp1_rtx = gen_rtx_REG (Pmode, EP1_REGNUM); | ||||||
| 
 | 
 | ||||||
|   /* In theory we should never have both an initial adjustment
 |   /* In theory we should never have both an initial adjustment
 | ||||||
|      and a callee save adjustment.  Verify that is the case since the |      and a callee save adjustment.  Verify that is the case since the | ||||||
|  | @ -5532,7 +5532,7 @@ aarch64_expand_prologue (void) | ||||||
|   /* Will only probe if the initial adjustment is larger than the guard
 |   /* Will only probe if the initial adjustment is larger than the guard
 | ||||||
|      less the amount of the guard reserved for use by the caller's |      less the amount of the guard reserved for use by the caller's | ||||||
|      outgoing args.  */ |      outgoing args.  */ | ||||||
|   aarch64_allocate_and_probe_stack_space (ip0_rtx, ip1_rtx, initial_adjust, |   aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust, | ||||||
| 					  true, false); | 					  true, false); | ||||||
| 
 | 
 | ||||||
|   if (callee_adjust != 0) |   if (callee_adjust != 0) | ||||||
|  | @ -5550,7 +5550,7 @@ aarch64_expand_prologue (void) | ||||||
| 	} | 	} | ||||||
|       aarch64_add_offset (Pmode, hard_frame_pointer_rtx, |       aarch64_add_offset (Pmode, hard_frame_pointer_rtx, | ||||||
| 			  stack_pointer_rtx, callee_offset, | 			  stack_pointer_rtx, callee_offset, | ||||||
| 			  ip1_rtx, ip0_rtx, frame_pointer_needed); | 			  tmp1_rtx, tmp0_rtx, frame_pointer_needed); | ||||||
|       if (frame_pointer_needed && !frame_size.is_constant ()) |       if (frame_pointer_needed && !frame_size.is_constant ()) | ||||||
| 	{ | 	{ | ||||||
| 	  /* Variable-sized frames need to describe the save slot
 | 	  /* Variable-sized frames need to describe the save slot
 | ||||||
|  | @ -5596,7 +5596,7 @@ aarch64_expand_prologue (void) | ||||||
| 
 | 
 | ||||||
|   /* We may need to probe the final adjustment if it is larger than the guard
 |   /* We may need to probe the final adjustment if it is larger than the guard
 | ||||||
|      that is assumed by the called.  */ |      that is assumed by the called.  */ | ||||||
|   aarch64_allocate_and_probe_stack_space (ip1_rtx, ip0_rtx, final_adjust, |   aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, | ||||||
| 					  !frame_pointer_needed, true); | 					  !frame_pointer_needed, true); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -5647,8 +5647,8 @@ aarch64_expand_epilogue (bool for_sibcall) | ||||||
|   unsigned reg2 = cfun->machine->frame.wb_candidate2; |   unsigned reg2 = cfun->machine->frame.wb_candidate2; | ||||||
|   rtx cfi_ops = NULL; |   rtx cfi_ops = NULL; | ||||||
|   rtx_insn *insn; |   rtx_insn *insn; | ||||||
|   /* A stack clash protection prologue may not have left IP0_REGNUM or
 |   /* A stack clash protection prologue may not have left EP0_REGNUM or
 | ||||||
|      IP1_REGNUM in a usable state.  The same is true for allocations |      EP1_REGNUM in a usable state.  The same is true for allocations | ||||||
|      with an SVE component, since we then need both temporary registers |      with an SVE component, since we then need both temporary registers | ||||||
|      for each allocation.  For stack clash we are in a usable state if |      for each allocation.  For stack clash we are in a usable state if | ||||||
|      the adjustment is less than GUARD_SIZE - GUARD_USED_BY_CALLER.  */ |      the adjustment is less than GUARD_SIZE - GUARD_USED_BY_CALLER.  */ | ||||||
|  | @ -5682,20 +5682,20 @@ aarch64_expand_epilogue (bool for_sibcall) | ||||||
| 
 | 
 | ||||||
|   /* Restore the stack pointer from the frame pointer if it may not
 |   /* Restore the stack pointer from the frame pointer if it may not
 | ||||||
|      be the same as the stack pointer.  */ |      be the same as the stack pointer.  */ | ||||||
|   rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM); |   rtx tmp0_rtx = gen_rtx_REG (Pmode, EP0_REGNUM); | ||||||
|   rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM); |   rtx tmp1_rtx = gen_rtx_REG (Pmode, EP1_REGNUM); | ||||||
|   if (frame_pointer_needed |   if (frame_pointer_needed | ||||||
|       && (maybe_ne (final_adjust, 0) || cfun->calls_alloca)) |       && (maybe_ne (final_adjust, 0) || cfun->calls_alloca)) | ||||||
|     /* If writeback is used when restoring callee-saves, the CFA
 |     /* If writeback is used when restoring callee-saves, the CFA
 | ||||||
|        is restored on the instruction doing the writeback.  */ |        is restored on the instruction doing the writeback.  */ | ||||||
|     aarch64_add_offset (Pmode, stack_pointer_rtx, |     aarch64_add_offset (Pmode, stack_pointer_rtx, | ||||||
| 			hard_frame_pointer_rtx, -callee_offset, | 			hard_frame_pointer_rtx, -callee_offset, | ||||||
| 			ip1_rtx, ip0_rtx, callee_adjust == 0); | 			tmp1_rtx, tmp0_rtx, callee_adjust == 0); | ||||||
|   else |   else | ||||||
|      /* The case where we need to re-use the register here is very rare, so
 |      /* The case where we need to re-use the register here is very rare, so
 | ||||||
| 	avoid the complicated condition and just always emit a move if the | 	avoid the complicated condition and just always emit a move if the | ||||||
| 	immediate doesn't fit.  */ | 	immediate doesn't fit.  */ | ||||||
|      aarch64_add_sp (ip1_rtx, ip0_rtx, final_adjust, true); |      aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true); | ||||||
| 
 | 
 | ||||||
|   aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, |   aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, | ||||||
| 				callee_adjust != 0, &cfi_ops); | 				callee_adjust != 0, &cfi_ops); | ||||||
|  | @ -5722,8 +5722,11 @@ aarch64_expand_epilogue (bool for_sibcall) | ||||||
|       cfi_ops = NULL; |       cfi_ops = NULL; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|   aarch64_add_sp (ip0_rtx, ip1_rtx, initial_adjust, |   /* Liveness of EP0_REGNUM can not be trusted across function calls either, so
 | ||||||
| 		  !can_inherit_p || df_regs_ever_live_p (IP0_REGNUM)); |      add restriction on emit_move optimization to leaf functions.  */ | ||||||
|  |   aarch64_add_sp (tmp0_rtx, tmp1_rtx, initial_adjust, | ||||||
|  | 		  (!can_inherit_p || !crtl->is_leaf | ||||||
|  | 		   || df_regs_ever_live_p (EP0_REGNUM))); | ||||||
| 
 | 
 | ||||||
|   if (cfi_ops) |   if (cfi_ops) | ||||||
|     { |     { | ||||||
|  | @ -5829,8 +5832,8 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, | ||||||
|   emit_note (NOTE_INSN_PROLOGUE_END); |   emit_note (NOTE_INSN_PROLOGUE_END); | ||||||
| 
 | 
 | ||||||
|   this_rtx = gen_rtx_REG (Pmode, this_regno); |   this_rtx = gen_rtx_REG (Pmode, this_regno); | ||||||
|   temp0 = gen_rtx_REG (Pmode, IP0_REGNUM); |   temp0 = gen_rtx_REG (Pmode, EP0_REGNUM); | ||||||
|   temp1 = gen_rtx_REG (Pmode, IP1_REGNUM); |   temp1 = gen_rtx_REG (Pmode, EP1_REGNUM); | ||||||
| 
 | 
 | ||||||
|   if (vcall_offset == 0) |   if (vcall_offset == 0) | ||||||
|     aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false); |     aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false); | ||||||
|  |  | ||||||
|  | @ -579,7 +579,7 @@ enum reg_class | ||||||
| #define REG_CLASS_CONTENTS						\ | #define REG_CLASS_CONTENTS						\ | ||||||
| {									\ | {									\ | ||||||
|   { 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\ |   { 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\ | ||||||
|   { 0x0004ffff, 0x00000000, 0x00000000 },	/* TAILCALL_ADDR_REGS */\ |   { 0x00030000, 0x00000000, 0x00000000 },	/* TAILCALL_ADDR_REGS */\ | ||||||
|   { 0x7fffffff, 0x00000000, 0x00000003 },	/* GENERAL_REGS */	\ |   { 0x7fffffff, 0x00000000, 0x00000003 },	/* GENERAL_REGS */	\ | ||||||
|   { 0x80000000, 0x00000000, 0x00000000 },	/* STACK_REG */		\ |   { 0x80000000, 0x00000000, 0x00000000 },	/* STACK_REG */		\ | ||||||
|   { 0xffffffff, 0x00000000, 0x00000003 },	/* POINTER_REGS */	\ |   { 0xffffffff, 0x00000000, 0x00000003 },	/* POINTER_REGS */	\ | ||||||
|  |  | ||||||
|  | @ -35,6 +35,9 @@ | ||||||
|     (R11_REGNUM		11) |     (R11_REGNUM		11) | ||||||
|     (R12_REGNUM		12) |     (R12_REGNUM		12) | ||||||
|     (R13_REGNUM		13) |     (R13_REGNUM		13) | ||||||
|  |     ;; Scratch registers for prologue/epilogue use. | ||||||
|  |     (EP0_REGNUM		12) | ||||||
|  |     (EP1_REGNUM		13) | ||||||
|     (R14_REGNUM		14) |     (R14_REGNUM		14) | ||||||
|     (R15_REGNUM		15) |     (R15_REGNUM		15) | ||||||
|     (R16_REGNUM		16) |     (R16_REGNUM		16) | ||||||
|  |  | ||||||
|  | @ -1,3 +1,8 @@ | ||||||
|  | 2018-01-09  Sudakshina Das  <sudi.das@arm.com> | ||||||
|  | 
 | ||||||
|  | 	* gcc.target/aarch64/test_frame_17.c: Update to check for EP0_REGNUM | ||||||
|  | 	instead of IP0_REGNUM and add test case. | ||||||
|  | 
 | ||||||
| 2019-01-09  Alejandro Martinez  <alejandro.martinezvicente@arm.com> | 2019-01-09  Alejandro Martinez  <alejandro.martinezvicente@arm.com> | ||||||
| 
 | 
 | ||||||
| 	* gcc.target/aarch64/sve/copysign_1.c: New test for SVE vectorized | 	* gcc.target/aarch64/sve/copysign_1.c: New test for SVE vectorized | ||||||
|  |  | ||||||
|  | @ -1,16 +1,27 @@ | ||||||
| /* { dg-do compile } */ | /* { dg-do compile } */ | ||||||
| /* { dg-options "-O2 --save-temps" } */ | /* { dg-options "-O2" } */ | ||||||
| 
 | 
 | ||||||
| /* Test reuse of stack adjustment temporaries.  */ | /* Test reuse of stack adjustment temporaries.  */ | ||||||
| 
 | 
 | ||||||
| void foo (); | void foo (); | ||||||
| 
 | 
 | ||||||
|  | /* Should only use 1 mov and re-use it.  */ | ||||||
| int reuse_mov (int i) | int reuse_mov (int i) | ||||||
| { | { | ||||||
|   int arr[1025]; |   int arr[1025]; | ||||||
|   return arr[i]; |   return arr[i]; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* Should use 2 movs because x12 is live.  */ | ||||||
|  | int no_reuse_mov_live (int i) | ||||||
|  | { | ||||||
|  |   int arr[1025]; | ||||||
|  |   register long long a __asm("x12"); | ||||||
|  |   a = a+1; | ||||||
|  |   return arr[i] + a; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* Should use 2 movs because its not a leaf function.  */ | ||||||
| int no_reuse_mov (int i) | int no_reuse_mov (int i) | ||||||
| { | { | ||||||
|   int arr[1025]; |   int arr[1025]; | ||||||
|  | @ -18,4 +29,4 @@ int no_reuse_mov (int i) | ||||||
|   return arr[i]; |   return arr[i]; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* { dg-final { scan-assembler-times "mov\tx16, \[0-9\]+" 3 } } */ | /* { dg-final { scan-assembler-times "mov\tx12, \[0-9\]+" 5 } } */ | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	 Sudakshina Das
						Sudakshina Das