mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			aarch64: Improve cas generation
Do not zero-extend the input to the cas for subword operations; instead, use the appropriate zero-extending compare insns. Correct the predicates and constraints for immediate expected operand. * config/aarch64/aarch64.c (aarch64_gen_compare_reg_maybe_ze): New. (aarch64_split_compare_and_swap): Use it. (aarch64_expand_compare_and_swap): Likewise. Remove convert_modes; test oldval against the proper predicate. * config/aarch64/atomics.md (@atomic_compare_and_swap<ALLI>): Use nonmemory_operand for expected. (cas_short_expected_pred): New. (@aarch64_compare_and_swap<SHORT>): Use it; use "rn" not "rI" to match. (@aarch64_compare_and_swap<GPI>): Use "rn" not "rI" for expected. * config/aarch64/predicates.md (aarch64_plushi_immediate): New. (aarch64_plushi_operand): New. From-SVN: r265657
This commit is contained in:
		
							parent
							
								
									77f33f44ba
								
							
						
					
					
						commit
						d400fda3a8
					
				|  | @ -1,5 +1,17 @@ | |||
| 2018-10-31  Richard Henderson  <richard.henderson@linaro.org> | ||||
| 
 | ||||
| 	* config/aarch64/aarch64.c (aarch64_gen_compare_reg_maybe_ze): New. | ||||
| 	(aarch64_split_compare_and_swap): Use it. | ||||
| 	(aarch64_expand_compare_and_swap): Likewise.  Remove convert_modes; | ||||
| 	test oldval against the proper predicate. | ||||
| 	* config/aarch64/atomics.md (@atomic_compare_and_swap<ALLI>): | ||||
| 	Use nonmemory_operand for expected. | ||||
| 	(cas_short_expected_pred): New. | ||||
| 	(@aarch64_compare_and_swap<SHORT>): Use it; use "rn" not "rI" to match. | ||||
| 	(@aarch64_compare_and_swap<GPI>): Use "rn" not "rI" for expected. | ||||
| 	* config/aarch64/predicates.md (aarch64_plushi_immediate): New. | ||||
| 	(aarch64_plushi_operand): New. | ||||
| 
 | ||||
| 	* config/aarch64/aarch64.c (aarch64_expand_compare_and_swap): | ||||
| 	Force oldval into the rval register for TARGET_LSE; emit the compare | ||||
| 	during initial expansion so that it may be deleted if unused. | ||||
|  |  | |||
|  | @ -1614,6 +1614,33 @@ aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) | |||
|   return cc_reg; | ||||
| } | ||||
| 
 | ||||
| /* Similarly, but maybe zero-extend Y if Y_MODE < SImode.  */ | ||||
| 
 | ||||
| static rtx | ||||
| aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y, | ||||
|                                   machine_mode y_mode) | ||||
| { | ||||
|   if (y_mode == E_QImode || y_mode == E_HImode) | ||||
|     { | ||||
|       if (CONST_INT_P (y)) | ||||
| 	y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode)); | ||||
|       else | ||||
| 	{ | ||||
| 	  rtx t, cc_reg; | ||||
| 	  machine_mode cc_mode; | ||||
| 
 | ||||
| 	  t = gen_rtx_ZERO_EXTEND (SImode, y); | ||||
| 	  t = gen_rtx_COMPARE (CC_SWPmode, t, x); | ||||
| 	  cc_mode = CC_SWPmode; | ||||
| 	  cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); | ||||
| 	  emit_set_insn (cc_reg, t); | ||||
| 	  return cc_reg; | ||||
| 	} | ||||
|     } | ||||
| 
 | ||||
|   return aarch64_gen_compare_reg (code, x, y); | ||||
| } | ||||
| 
 | ||||
| /* Build the SYMBOL_REF for __tls_get_addr.  */ | ||||
| 
 | ||||
| static GTY(()) rtx tls_get_addr_libfunc; | ||||
|  | @ -14575,8 +14602,8 @@ aarch64_emit_unlikely_jump (rtx insn) | |||
| void | ||||
| aarch64_expand_compare_and_swap (rtx operands[]) | ||||
| { | ||||
|   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; | ||||
|   machine_mode mode, cmp_mode; | ||||
|   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x, cc_reg; | ||||
|   machine_mode mode, r_mode; | ||||
| 
 | ||||
|   bval = operands[0]; | ||||
|   rval = operands[1]; | ||||
|  | @ -14587,36 +14614,19 @@ aarch64_expand_compare_and_swap (rtx operands[]) | |||
|   mod_s = operands[6]; | ||||
|   mod_f = operands[7]; | ||||
|   mode = GET_MODE (mem); | ||||
|   cmp_mode = mode; | ||||
| 
 | ||||
|   /* Normally the succ memory model must be stronger than fail, but in the
 | ||||
|      unlikely event of fail being ACQUIRE and succ being RELEASE we need to | ||||
|      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */ | ||||
| 
 | ||||
|   if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f))) | ||||
|       && is_mm_release (memmodel_from_int (INTVAL (mod_s)))) | ||||
|     mod_s = GEN_INT (MEMMODEL_ACQ_REL); | ||||
| 
 | ||||
|   switch (mode) | ||||
|   r_mode = mode; | ||||
|   if (mode == QImode || mode == HImode) | ||||
|     { | ||||
|     case E_QImode: | ||||
|     case E_HImode: | ||||
|       /* For short modes, we're going to perform the comparison in SImode,
 | ||||
| 	 so do the zero-extension now.  */ | ||||
|       cmp_mode = SImode; | ||||
|       rval = gen_reg_rtx (SImode); | ||||
|       oldval = convert_modes (SImode, mode, oldval, true); | ||||
|       /* Fall through.  */ | ||||
| 
 | ||||
|     case E_SImode: | ||||
|     case E_DImode: | ||||
|       /* Force the value into a register if needed.  */ | ||||
|       if (!aarch64_plus_operand (oldval, mode)) | ||||
| 	oldval = force_reg (cmp_mode, oldval); | ||||
|       break; | ||||
| 
 | ||||
|     default: | ||||
|       gcc_unreachable (); | ||||
|       r_mode = SImode; | ||||
|       rval = gen_reg_rtx (r_mode); | ||||
|     } | ||||
| 
 | ||||
|   if (TARGET_LSE) | ||||
|  | @ -14624,26 +14634,32 @@ aarch64_expand_compare_and_swap (rtx operands[]) | |||
|       /* The CAS insn requires oldval and rval overlap, but we need to
 | ||||
| 	 have a copy of oldval saved across the operation to tell if | ||||
| 	 the operation is successful.  */ | ||||
|       if (mode == QImode || mode == HImode) | ||||
| 	rval = copy_to_mode_reg (SImode, gen_lowpart (SImode, oldval)); | ||||
|       else if (reg_overlap_mentioned_p (rval, oldval)) | ||||
|         rval = copy_to_mode_reg (mode, oldval); | ||||
|       if (reg_overlap_mentioned_p (rval, oldval)) | ||||
|         rval = copy_to_mode_reg (r_mode, oldval); | ||||
|       else | ||||
| 	emit_move_insn (rval, oldval); | ||||
| 	emit_move_insn (rval, gen_lowpart (r_mode, oldval)); | ||||
| 
 | ||||
|       emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem, | ||||
| 						   newval, mod_s)); | ||||
|       aarch64_gen_compare_reg (EQ, rval, oldval); | ||||
|       cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode); | ||||
|     } | ||||
|   else | ||||
|     emit_insn (gen_aarch64_compare_and_swap (mode, rval, mem, oldval, newval, | ||||
| 					     is_weak, mod_s, mod_f)); | ||||
|     { | ||||
|       /* The oldval predicate varies by mode.  Test it and force to reg.  */ | ||||
|       insn_code code = code_for_aarch64_compare_and_swap (mode); | ||||
|       if (!insn_data[code].operand[2].predicate (oldval, mode)) | ||||
| 	oldval = force_reg (mode, oldval); | ||||
| 
 | ||||
|   if (mode == QImode || mode == HImode) | ||||
|       emit_insn (GEN_FCN (code) (rval, mem, oldval, newval, | ||||
| 				 is_weak, mod_s, mod_f)); | ||||
|       cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); | ||||
|     } | ||||
| 
 | ||||
|   if (r_mode != mode) | ||||
|     rval = gen_lowpart (mode, rval); | ||||
|   emit_move_insn (operands[1], rval); | ||||
| 
 | ||||
|   x = gen_rtx_REG (CCmode, CC_REGNUM); | ||||
|   x = gen_rtx_EQ (SImode, x, const0_rtx); | ||||
|   x = gen_rtx_EQ (SImode, cc_reg, const0_rtx); | ||||
|   emit_insn (gen_rtx_SET (bval, x)); | ||||
| } | ||||
| 
 | ||||
|  | @ -14758,10 +14774,10 @@ aarch64_split_compare_and_swap (rtx operands[]) | |||
|     } | ||||
|   else | ||||
|     { | ||||
|       cond = aarch64_gen_compare_reg (NE, rval, oldval); | ||||
|       cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode); | ||||
|       x = gen_rtx_NE (VOIDmode, cond, const0_rtx); | ||||
|       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, | ||||
| 				 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); | ||||
| 				gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); | ||||
|       aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
|  | @ -24,8 +24,8 @@ | |||
|   [(match_operand:SI 0 "register_operand" "")			;; bool out | ||||
|    (match_operand:ALLI 1 "register_operand" "")			;; val out | ||||
|    (match_operand:ALLI 2 "aarch64_sync_memory_operand" "")	;; memory | ||||
|    (match_operand:ALLI 3 "general_operand" "")			;; expected | ||||
|    (match_operand:ALLI 4 "aarch64_reg_or_zero" "")			;; desired | ||||
|    (match_operand:ALLI 3 "nonmemory_operand" "")		;; expected | ||||
|    (match_operand:ALLI 4 "aarch64_reg_or_zero" "")		;; desired | ||||
|    (match_operand:SI 5 "const_int_operand")			;; is_weak | ||||
|    (match_operand:SI 6 "const_int_operand")			;; mod_s | ||||
|    (match_operand:SI 7 "const_int_operand")]			;; mod_f | ||||
|  | @ -36,19 +36,22 @@ | |||
|   } | ||||
| ) | ||||
| 
 | ||||
| (define_mode_attr cas_short_expected_pred | ||||
|   [(QI "aarch64_reg_or_imm") (HI "aarch64_plushi_operand")]) | ||||
| 
 | ||||
| (define_insn_and_split "@aarch64_compare_and_swap<mode>" | ||||
|   [(set (reg:CC CC_REGNUM)					;; bool out | ||||
|     (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) | ||||
|    (set (match_operand:SI 0 "register_operand" "=&r")	   ;; val out | ||||
|    (set (match_operand:SI 0 "register_operand" "=&r")		;; val out | ||||
|     (zero_extend:SI | ||||
|       (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory | ||||
|    (set (match_dup 1) | ||||
|     (unspec_volatile:SHORT | ||||
|       [(match_operand:SI 2 "aarch64_plus_operand" "rI")	;; expected | ||||
|       [(match_operand:SHORT 2 "<cas_short_expected_pred>" "rn")	;; expected | ||||
|        (match_operand:SHORT 3 "aarch64_reg_or_zero" "rZ")	;; desired | ||||
|        (match_operand:SI 4 "const_int_operand")		;; is_weak | ||||
|        (match_operand:SI 5 "const_int_operand")		;; mod_s | ||||
|        (match_operand:SI 6 "const_int_operand")]	;; mod_f | ||||
|        (match_operand:SI 4 "const_int_operand")			;; is_weak | ||||
|        (match_operand:SI 5 "const_int_operand")			;; mod_s | ||||
|        (match_operand:SI 6 "const_int_operand")]		;; mod_f | ||||
|       UNSPECV_ATOMIC_CMPSW)) | ||||
|    (clobber (match_scratch:SI 7 "=&r"))] | ||||
|   "" | ||||
|  | @ -68,7 +71,7 @@ | |||
|     (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q"))   ;; memory | ||||
|    (set (match_dup 1) | ||||
|     (unspec_volatile:GPI | ||||
|       [(match_operand:GPI 2 "aarch64_plus_operand" "rI")	;; expect | ||||
|       [(match_operand:GPI 2 "aarch64_plus_operand" "rn")	;; expect | ||||
|        (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")		;; desired | ||||
|        (match_operand:SI 4 "const_int_operand")			;; is_weak | ||||
|        (match_operand:SI 5 "const_int_operand")			;; mod_s | ||||
|  |  | |||
|  | @ -114,6 +114,18 @@ | |||
|   (ior (match_operand 0 "register_operand") | ||||
|        (match_operand 0 "aarch64_plus_immediate"))) | ||||
| 
 | ||||
| (define_predicate "aarch64_plushi_immediate" | ||||
|   (match_code "const_int") | ||||
| { | ||||
|   HOST_WIDE_INT val = INTVAL (op); | ||||
|   /* The HImode value must be zero-extendable to an SImode plus_operand.  */ | ||||
|   return ((val & 0xfff) == val || sext_hwi (val & 0xf000, 16) == val); | ||||
| }) | ||||
| 
 | ||||
| (define_predicate "aarch64_plushi_operand" | ||||
|   (ior (match_operand 0 "register_operand") | ||||
|        (match_operand 0 "aarch64_plushi_immediate"))) | ||||
| 
 | ||||
| (define_predicate "aarch64_pluslong_immediate" | ||||
|   (and (match_code "const_int") | ||||
|        (match_test "(INTVAL (op) < 0xffffff && INTVAL (op) > -0xffffff)"))) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Richard Henderson
						Richard Henderson