mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			[NDS32] Optimize movmem and setmem operations.
gcc/ * config/nds32/nds32-intrinsic.md (unaligned_storedi): Modify patterns implementation. (unaligned_store_dw): Ditto. * config/nds32/nds32-memory-manipulation.c (nds32_expand_movmemsi_loop_known_size): Refactoring implementation. (nds32_gen_dup_4_byte_to_word_value): Rename to ... (nds32_gen_dup_4_byte_to_word_value_aux): ... this. (emit_setmem_word_loop): Rename to ... (emit_setmem_doubleword_loop): ... this. (nds32_gen_dup_4_byte_to_word_value): New function. (nds32_gen_dup_8_byte_to_double_word_value): New function. (nds32_expand_setmem_loop): Refine implementation. (nds32_expand_setmem_loop_v3m): Ditto. * config/nds32/nds32-multiple.md (unaligned_store_update_base_dw): New pattern. Co-Authored-By: Chung-Ju Wu <jasonwucj@gmail.com> From-SVN: r260805
This commit is contained in:
		
							parent
							
								
									0be3bad705
								
							
						
					
					
						commit
						8889fbe542
					
				|  | @ -1,3 +1,22 @@ | |||
| 2018-05-27  Monk Chiang  <sh.chiang04@gmail.com> | ||||
| 	    Chung-Ju Wu  <jasonwucj@gmail.com> | ||||
| 
 | ||||
| 	* config/nds32/nds32-intrinsic.md (unaligned_storedi): Modify patterns | ||||
| 	implementation. | ||||
| 	(unaligned_store_dw): Ditto. | ||||
| 	* config/nds32/nds32-memory-manipulation.c | ||||
| 	(nds32_expand_movmemsi_loop_known_size): Refactoring implementation. | ||||
| 	(nds32_gen_dup_4_byte_to_word_value): Rename to ... | ||||
| 	(nds32_gen_dup_4_byte_to_word_value_aux): ... this. | ||||
| 	(emit_setmem_word_loop): Rename to ... | ||||
| 	(emit_setmem_doubleword_loop): ... this. | ||||
| 	(nds32_gen_dup_4_byte_to_word_value): New function. | ||||
| 	(nds32_gen_dup_8_byte_to_double_word_value): New function. | ||||
| 	(nds32_expand_setmem_loop): Refine implementation. | ||||
| 	(nds32_expand_setmem_loop_v3m): Ditto. | ||||
| 	* config/nds32/nds32-multiple.md (unaligned_store_update_base_dw): New | ||||
| 	pattern. | ||||
| 
 | ||||
| 2018-05-27  Chung-Ju Wu  <jasonwucj@gmail.com> | ||||
| 
 | ||||
| 	* config/nds32/nds32.md (bswapsi2, bswaphi2): New patterns. | ||||
|  |  | |||
|  | @ -1596,22 +1596,17 @@ | |||
|   if (TARGET_ISA_V3M) | ||||
|     nds32_expand_unaligned_store (operands, DImode); | ||||
|   else | ||||
|     emit_insn (gen_unaligned_store_dw (operands[0], operands[1])); | ||||
|     emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[0]), | ||||
| 				       operands[1])); | ||||
|   DONE; | ||||
| }) | ||||
| 
 | ||||
| (define_insn "unaligned_store_dw" | ||||
|   [(set (mem:DI (match_operand:SI 0 "register_operand" "r")) | ||||
| 	(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))] | ||||
|   [(set (match_operand:DI 0 "nds32_lmw_smw_base_operand"   "=Umw") | ||||
| 	(unspec:DI [(match_operand:DI 1 "register_operand" "   r")] UNSPEC_UASTORE_DW))] | ||||
|   "" | ||||
| { | ||||
|   rtx otherops[3]; | ||||
|   otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1])); | ||||
|   otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); | ||||
|   otherops[2] = operands[0]; | ||||
| 
 | ||||
|   output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops); | ||||
|   return ""; | ||||
|   return nds32_output_smw_double_word (operands); | ||||
| } | ||||
|   [(set_attr "type"   "store") | ||||
|    (set_attr "length"     "4")] | ||||
|  |  | |||
|  | @ -257,8 +257,124 @@ static bool | |||
| nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, | ||||
| 				       rtx size, rtx alignment) | ||||
| { | ||||
|   return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, | ||||
| 						  size, alignment); | ||||
|   rtx dst_base_reg, src_base_reg; | ||||
|   rtx dst_itr, src_itr; | ||||
|   rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m; | ||||
|   rtx dst_end; | ||||
|   rtx double_word_mode_loop, byte_mode_loop; | ||||
|   rtx tmp; | ||||
|   int start_regno; | ||||
|   bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0; | ||||
|   unsigned HOST_WIDE_INT total_bytes = UINTVAL (size); | ||||
| 
 | ||||
|   if (TARGET_ISA_V3M && !align_to_4_bytes) | ||||
|     return 0; | ||||
| 
 | ||||
|   if (TARGET_REDUCED_REGS) | ||||
|     start_regno = 2; | ||||
|   else | ||||
|     start_regno = 16; | ||||
| 
 | ||||
|   dst_itr = gen_reg_rtx (Pmode); | ||||
|   src_itr = gen_reg_rtx (Pmode); | ||||
|   dst_end = gen_reg_rtx (Pmode); | ||||
|   tmp = gen_reg_rtx (QImode); | ||||
| 
 | ||||
|   double_word_mode_loop = gen_label_rtx (); | ||||
|   byte_mode_loop = gen_label_rtx (); | ||||
| 
 | ||||
|   dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0)); | ||||
|   src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0)); | ||||
| 
 | ||||
|   if (total_bytes < 8) | ||||
|     { | ||||
|       /* Emit total_bytes less than 8 loop version of movmem.
 | ||||
| 	add     $dst_end, $dst, $size | ||||
| 	move    $dst_itr, $dst | ||||
| 	.Lbyte_mode_loop: | ||||
| 	lbi.bi  $tmp, [$src_itr], #1 | ||||
| 	sbi.bi  $tmp, [$dst_itr], #1 | ||||
| 	! Not readch upper bound. Loop. | ||||
| 	bne     $dst_itr, $dst_end, .Lbyte_mode_loop */ | ||||
| 
 | ||||
|       /* add     $dst_end, $dst, $size */ | ||||
|       dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, | ||||
| 			      NULL_RTX, 0, OPTAB_WIDEN); | ||||
|       /* move    $dst_itr, $dst
 | ||||
| 	 move    $src_itr, $src */ | ||||
|       emit_move_insn (dst_itr, dst_base_reg); | ||||
|       emit_move_insn (src_itr, src_base_reg); | ||||
| 
 | ||||
|       /* .Lbyte_mode_loop: */ | ||||
|       emit_label (byte_mode_loop); | ||||
| 
 | ||||
|       /* lbi.bi  $tmp, [$src_itr], #1 */ | ||||
|       nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true); | ||||
| 
 | ||||
|       /* sbi.bi  $tmp, [$dst_itr], #1 */ | ||||
|       nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false); | ||||
|       /* ! Not readch upper bound. Loop.
 | ||||
| 	 bne     $dst_itr, $dst_end, .Lbyte_mode_loop */ | ||||
|       emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL, | ||||
| 			       SImode, 1, byte_mode_loop); | ||||
|       return true; | ||||
|     } | ||||
|   else if (total_bytes % 8 == 0) | ||||
|     { | ||||
|       /* Emit multiple of 8 loop version of movmem.
 | ||||
| 
 | ||||
| 	 add     $dst_end, $dst, $size | ||||
| 	 move    $dst_itr, $dst | ||||
| 	 move    $src_itr, $src | ||||
| 
 | ||||
| 	.Ldouble_word_mode_loop: | ||||
| 	lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr | ||||
| 	smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr | ||||
| 	! move will delete after register allocation | ||||
| 	move    $src_itr, $src_itr' | ||||
| 	move    $dst_itr, $dst_itr' | ||||
| 	! Not readch upper bound. Loop. | ||||
| 	bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ | ||||
| 
 | ||||
|       /* add     $dst_end, $dst, $size */ | ||||
|       dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size, | ||||
| 			      NULL_RTX, 0, OPTAB_WIDEN); | ||||
| 
 | ||||
|       /* move    $dst_itr, $dst
 | ||||
| 	 move    $src_itr, $src */ | ||||
|       emit_move_insn (dst_itr, dst_base_reg); | ||||
|       emit_move_insn (src_itr, src_base_reg); | ||||
| 
 | ||||
|       /* .Ldouble_word_mode_loop: */ | ||||
|       emit_label (double_word_mode_loop); | ||||
|       /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
 | ||||
| 	 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */ | ||||
|       src_itr_m = src_itr; | ||||
|       dst_itr_m = dst_itr; | ||||
|       srcmem_m = srcmem; | ||||
|       dstmem_m = dstmem; | ||||
|       nds32_emit_mem_move_block (start_regno, 2, | ||||
| 				 &dst_itr_m, &dstmem_m, | ||||
| 				 &src_itr_m, &srcmem_m, | ||||
| 				 true); | ||||
|       /* move    $src_itr, $src_itr'
 | ||||
| 	 move    $dst_itr, $dst_itr' */ | ||||
|       emit_move_insn (dst_itr, dst_itr_m); | ||||
|       emit_move_insn (src_itr, src_itr_m); | ||||
| 
 | ||||
|       /* ! Not readch upper bound. Loop.
 | ||||
| 	 bne     $double_word_end, $dst_itr, .Ldouble_word_mode_loop */ | ||||
|       emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL, | ||||
| 			       Pmode, 1, double_word_mode_loop); | ||||
|     } | ||||
|   else | ||||
|     { | ||||
|       /* Handle size greater than 8, and not a multiple of 8.  */ | ||||
|       return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, | ||||
| 						      size, alignment); | ||||
|     } | ||||
| 
 | ||||
|   return true; | ||||
| } | ||||
| 
 | ||||
| static bool | ||||
|  | @ -433,10 +549,8 @@ nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) | |||
| /* Auxiliary function for expand setmem pattern.  */ | ||||
| 
 | ||||
| static rtx | ||||
| nds32_gen_dup_4_byte_to_word_value (rtx value) | ||||
| nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word) | ||||
| { | ||||
|   rtx value4word = gen_reg_rtx (SImode); | ||||
| 
 | ||||
|   gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value)); | ||||
| 
 | ||||
|   if (CONST_INT_P (value)) | ||||
|  | @ -493,7 +607,30 @@ nds32_gen_dup_4_byte_to_word_value (rtx value) | |||
| } | ||||
| 
 | ||||
| static rtx | ||||
| emit_setmem_word_loop (rtx itr, rtx size, rtx value) | ||||
| nds32_gen_dup_4_byte_to_word_value (rtx value) | ||||
| { | ||||
|   rtx value4word = gen_reg_rtx (SImode); | ||||
|   nds32_gen_dup_4_byte_to_word_value_aux (value, value4word); | ||||
| 
 | ||||
|   return value4word; | ||||
| } | ||||
| 
 | ||||
| static rtx | ||||
| nds32_gen_dup_8_byte_to_double_word_value (rtx value) | ||||
| { | ||||
|   rtx value4doubleword = gen_reg_rtx (DImode); | ||||
| 
 | ||||
|   nds32_gen_dup_4_byte_to_word_value_aux ( | ||||
|     value, nds32_di_low_part_subreg(value4doubleword)); | ||||
| 
 | ||||
|   emit_move_insn (nds32_di_high_part_subreg(value4doubleword), | ||||
| 		  nds32_di_low_part_subreg(value4doubleword)); | ||||
|   return value4doubleword; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static rtx | ||||
| emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value) | ||||
| { | ||||
|   rtx word_mode_label = gen_label_rtx (); | ||||
|   rtx word_mode_end_label = gen_label_rtx (); | ||||
|  | @ -502,9 +639,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value) | |||
|   rtx word_mode_end = gen_reg_rtx (SImode); | ||||
|   rtx size_for_word = gen_reg_rtx (SImode); | ||||
| 
 | ||||
|   /* and     $size_for_word, $size, #~3  */ | ||||
|   /* and     $size_for_word, $size, #~0x7  */ | ||||
|   size_for_word = expand_binop (SImode, and_optab, size, | ||||
| 				gen_int_mode (~3, SImode), | ||||
| 				gen_int_mode (~0x7, SImode), | ||||
| 				NULL_RTX, 0, OPTAB_WIDEN); | ||||
| 
 | ||||
|   emit_move_insn (byte_mode_size, size); | ||||
|  | @ -516,8 +653,8 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value) | |||
|   word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word, | ||||
| 				NULL_RTX, 0, OPTAB_WIDEN); | ||||
| 
 | ||||
|   /* andi    $byte_mode_size, $size, 3  */ | ||||
|   byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (3), | ||||
|   /* andi    $byte_mode_size, $size, 0x7  */ | ||||
|   byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7), | ||||
| 				     NULL_RTX, 0, OPTAB_WIDEN); | ||||
| 
 | ||||
|   emit_move_insn (byte_mode_size, byte_mode_size_tmp); | ||||
|  | @ -527,9 +664,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value) | |||
|   /*   ! word-mode set loop
 | ||||
|        smw.bim $value4word, [$dst_itr], $value4word, 0 | ||||
|        bne     $word_mode_end, $dst_itr, .Lword_mode  */ | ||||
|   emit_insn (gen_unaligned_store_update_base_w (itr, | ||||
| 						itr, | ||||
| 						value)); | ||||
|   emit_insn (gen_unaligned_store_update_base_dw (itr, | ||||
| 						 itr, | ||||
| 						 value)); | ||||
|   emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL, | ||||
| 			   Pmode, 1, word_mode_label); | ||||
| 
 | ||||
|  | @ -581,7 +718,7 @@ emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end) | |||
| static bool | ||||
| nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) | ||||
| { | ||||
|   rtx value4word; | ||||
|   rtx value4doubleword; | ||||
|   rtx value4byte; | ||||
|   rtx dst; | ||||
|   rtx byte_mode_size; | ||||
|  | @ -624,7 +761,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) | |||
|      or      $tmp3, $tmp1, $tmp2             ! $tmp3  <- 0x0000abab | ||||
|      slli    $tmp4, $tmp3, 16                ! $tmp4  <- 0xabab0000 | ||||
|      or      $val4word, $tmp3, $tmp4         ! $value4word  <- 0xabababab  */ | ||||
|   value4word = nds32_gen_dup_4_byte_to_word_value (value); | ||||
|   value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); | ||||
| 
 | ||||
|   /*   and     $size_for_word, $size, #-4
 | ||||
|        beqz    $size_for_word, .Lword_mode_end | ||||
|  | @ -637,7 +774,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) | |||
|        smw.bim $value4word, [$dst], $value4word, 0 | ||||
|        bne     $word_mode_end, $dst, .Lword_mode | ||||
|      .Lword_mode_end:  */ | ||||
|   byte_mode_size = emit_setmem_word_loop (dst, size, value4word); | ||||
|   byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword); | ||||
| 
 | ||||
|   /*   beqz    $byte_mode_size, .Lend
 | ||||
|        add     $byte_mode_end, $dst, $byte_mode_size | ||||
|  | @ -648,8 +785,8 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value) | |||
|        bne     $byte_mode_end, $dst, .Lbyte_mode | ||||
|      .Lend: */ | ||||
| 
 | ||||
|   value4byte = simplify_gen_subreg (QImode, value4word, SImode, | ||||
| 				    subreg_lowpart_offset (QImode, SImode)); | ||||
|   value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, | ||||
| 				    subreg_lowpart_offset (QImode, DImode)); | ||||
| 
 | ||||
|   emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false); | ||||
| 
 | ||||
|  | @ -666,14 +803,15 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value) | |||
|   rtx byte_loop_size = gen_reg_rtx (SImode); | ||||
|   rtx remain_size = gen_reg_rtx (SImode); | ||||
|   rtx new_base_reg; | ||||
|   rtx value4byte, value4word; | ||||
|   rtx value4byte, value4doubleword; | ||||
|   rtx byte_mode_size; | ||||
|   rtx last_byte_loop_label = gen_label_rtx (); | ||||
| 
 | ||||
|   size = force_reg (SImode, size); | ||||
| 
 | ||||
|   value4word = nds32_gen_dup_4_byte_to_word_value (value); | ||||
|   value4byte = simplify_gen_subreg (QImode, value4word, SImode, 0); | ||||
|   value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value); | ||||
|   value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode, | ||||
| 				    subreg_lowpart_offset (QImode, DImode)); | ||||
| 
 | ||||
|   emit_move_insn (byte_loop_size, size); | ||||
|   emit_move_insn (byte_loop_base, base_reg); | ||||
|  | @ -701,9 +839,9 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value) | |||
|   emit_insn (gen_subsi3 (remain_size, size, need_align_bytes)); | ||||
| 
 | ||||
|   /* Set memory word by word. */ | ||||
|   byte_mode_size = emit_setmem_word_loop (new_base_reg, | ||||
| 					  remain_size, | ||||
| 					  value4word); | ||||
|   byte_mode_size = emit_setmem_doubleword_loop (new_base_reg, | ||||
| 						remain_size, | ||||
| 						value4doubleword); | ||||
| 
 | ||||
|   emit_move_insn (byte_loop_base, new_base_reg); | ||||
|   emit_move_insn (byte_loop_size, byte_mode_size); | ||||
|  |  | |||
|  | @ -2854,6 +2854,25 @@ | |||
|    (set_attr "length"              "4")] | ||||
| ) | ||||
| 
 | ||||
| (define_expand "unaligned_store_update_base_dw" | ||||
|   [(parallel [(set (match_operand:SI 0 "register_operand" "=r") | ||||
| 		   (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 8))) | ||||
| 	      (set (mem:DI (match_dup 1)) | ||||
| 		   (unspec:DI [(match_operand:DI 2 "register_operand" "r")] UNSPEC_UASTORE_DW))])] | ||||
|   "" | ||||
| { | ||||
|   /* DO NOT emit unaligned_store_w_m immediately since web pass don't | ||||
|      recognize post_inc, try it again after GCC 5.0. | ||||
|      REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156  */ | ||||
|   emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[1]), operands[2])); | ||||
|   emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (8, Pmode))); | ||||
|   DONE; | ||||
| } | ||||
|   [(set_attr "type"   "store_multiple") | ||||
|    (set_attr "combo"               "2") | ||||
|    (set_attr "length"              "4")] | ||||
| ) | ||||
| 
 | ||||
| (define_insn "*stmsi25" | ||||
|   [(match_parallel 0 "nds32_store_multiple_operation" | ||||
|     [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Monk Chiang
						Monk Chiang