mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			125 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			125 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| /* Copyright (C) 2011-2019 Free Software Foundation, Inc.
 | |
|    Contributed by Embecosm on behalf of Adapteva, Inc.
 | |
| 
 | |
| This file is part of GCC.
 | |
| 
 | |
| GCC is free software; you can redistribute it and/or modify it under
 | |
| the terms of the GNU General Public License as published by the Free
 | |
| Software Foundation; either version 3, or (at your option) any later
 | |
| version.
 | |
| 
 | |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 | |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or
 | |
| FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 | |
| for more details.
 | |
| 
 | |
| Under Section 7 of GPL version 3, you are granted additional
 | |
| permissions described in the GCC Runtime Library Exception, version
 | |
| 3.1, as published by the Free Software Foundation.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License and
 | |
| a copy of the GCC Runtime Library Exception along with this program;
 | |
| see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 | |
| <http://www.gnu.org/licenses/>.  */
 | |
| 
 | |
| #include "../epiphany-asm.h"
 | |
| 
 | |
| .section _fast_div_text,"a",@progbits;
 | |
|   .balign 8;
 | |
| _fast_div_table:
 | |
| .word 0x007fffff//   mantissa mask
 | |
| .word 0x40257ebb//   hold constant a = 2.58586
 | |
| 
 | |
| .word 0x3f000000//   hold constant 126 shifted to bits [30:23]
 | |
| .word 0xc0ba2e88//   hold constant b = -5.81818
 | |
| 
 | |
| .word 0x4087c1e8//   hold constant c = 4.24242
 | |
| .word 0x40000000//  to hold constant 2 for Newton-Raphson iterations
 | |
| 
 | |
|  .global SYM(__fast_recipsf2)
 | |
|  FUNC(__fast_recipsf2)
 | |
| SYM(__fast_recipsf2):
 | |
| 
 | |
| //###################
 | |
| //# input operands:
 | |
| //###################
 | |
| // Divisor
 | |
| //R0
 | |
| // Function address (used with negative offsets to read _fast_div_table)
 | |
| //R1
 | |
| /* Scratch registers:  two single (TMP0/TMP5) and two pairs.  */
 | |
| #define P0L TMP1
 | |
| #define P0H TMP2
 | |
| #define P1L TMP3
 | |
| #define P1H TMP4
 | |
| 
 | |
| //#########################################
 | |
| //# Constants to be used in the algorithm
 | |
| //#########################################
 | |
| ldrd P0L , [ R1 , -3 ]
 | |
| 
 | |
| ldrd P1L , [ R1 , -2 ]
 | |
| 
 | |
| 
 | |
| 
 | |
| //#############################################################################
 | |
| //#                       The Algorithm
 | |
| //#
 | |
| //# Operation: C=A/B
 | |
| //# stage 1 - find the reciprocal 1/B according to the following scheme:
 | |
| //#  B = (2^E)*m                                (1<m<2, E=e-127)
 | |
| //#  1/B = 1/((2^E)*m) = 1/((2^(E+1))*m1)          (0.5<m1<1)
 | |
| //#      = (2^-(E+1))*(1/m1) = (2^E1)*(1/m1)
 | |
| //#
 | |
| //# Now we can find the new exponent:
 | |
| //# e1 = E1+127 = -E-1+127 = -e+127-1+127 = 253-e **
 | |
| //# 1/m1 alreadt has the exponent 127, so we have to add 126-e.
 | |
| //# the exponent might underflow, which we can detect as a sign change.
 | |
| //# Since the architeture uses flush-to-zero for subnormals, we can
 | |
| //# give the result 0. then.
 | |
| //#
 | |
| //# The 1/m1 term with 0.5<m1<1 is approximated with the Chebyshev polynomial
 | |
| //# 1/m1 = 2.58586*(m1^2) - 5.81818*m1 + 4.24242
 | |
| //#
 | |
| //# Next step is to use two iterations of Newton-Raphson algorithm to complete
 | |
| //# the reciprocal calculation.
 | |
| //#
 | |
| //# Final result is achieved by multiplying A with 1/B
 | |
| //#############################################################################
 | |
| 
 | |
| 
 | |
| 
 | |
| // R0 exponent and sign "replacement" into TMP0
 | |
| AND TMP0,R0,P0L		 ;
 | |
| ORR TMP0,TMP0,P1L
 | |
| SUB TMP5,R0,TMP0 // R0 sign/exponent extraction into TMP5
 | |
| // Calculate new mantissa
 | |
| FMADD P1H,TMP0,P0H	         ;
 | |
| 		// Calculate new exponent offset 126 - "old exponent"
 | |
| 		SUB P1L,P1L,TMP5
 | |
| 	ldrd P0L , [ R1 , -1 ]
 | |
| FMADD P0L,TMP0,P1H	         ;
 | |
| 		eor P1H,r0,P1L // check for overflow (N-BIT).
 | |
| 		blt .Lret_0
 | |
| // P0L exponent and sign "replacement"
 | |
| sub P0L,P0L,TMP5
 | |
| 
 | |
| // Newton-Raphson iteration #1
 | |
| MOV TMP0,P0H	         ;
 | |
| FMSUB P0H,R0,P0L	 ;
 | |
| FMUL  P0L,P0H,P0L	 ;
 | |
| // Newton-Raphson iteration #2
 | |
| FMSUB TMP0,R0,P0L	;
 | |
| FMUL  R0,TMP0,P0L	         ;
 | |
| jr lr
 | |
| .Lret_0:ldrd P0L , [ R1 , -3 ]
 | |
| 	lsr TMP0,r0,31 ; extract sign
 | |
| 	lsl TMP0,TMP0,31
 | |
| 	add P0L,P0L,r0 ; check for NaN input
 | |
| 	eor P0L,P0L,r0
 | |
| 	movgte r0,TMP0
 | |
| 	jr lr
 | |
| // Quotient calculation is expected by the caller: FMUL quotient,divident,R0
 | |
|         ;
 | |
| 	ENDFUNC(__fast_recipsf2)
 |