mirror of git://gcc.gnu.org/git/gcc.git
config.host (arc*-*-elf*, [...]): New configurations.
2013-10-01 Joern Rennecke <joern.rennecke@embecosm.com>
Brendan Kehoe <brendan@zen.org>
Simon Cook <simon.cook@embecosm.com>
* config.host (arc*-*-elf*, arc*-*-linux-uclibc*): New configurations.
* config/arc: New directory.
* longlong.h [__arc__] (umul_ppmm): Remove.
[__arc__] (__umulsidi3): Define.
[__arc__ && __ARC_NORM__] (count_leading_zeroes): Define.
[__arc__ && __ARC_NORM__] (COUNT_LEADING_ZEROS_0): Likewise.
Co-Authored-By: Brendan Kehoe <brendan@zen.org>
Co-Authored-By: Simon Cook <simon.cook@embecosm.com>
From-SVN: r203073
This commit is contained in:
parent
526b7aee8f
commit
d38a64b4e0
|
|
@ -1,3 +1,14 @@
|
|||
2013-10-01 Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
Brendan Kehoe <brendan@zen.org>
|
||||
Simon Cook <simon.cook@embecosm.com>
|
||||
|
||||
* config.host (arc*-*-elf*, arc*-*-linux-uclibc*): New configurations.
|
||||
* config/arc: New directory.
|
||||
* longlong.h [__arc__] (umul_ppmm): Remove.
|
||||
[__arc__] (__umulsidi3): Define.
|
||||
[__arc__ && __ARC_NORM__] (count_leading_zeroes): Define.
|
||||
[__arc__ && __ARC_NORM__] (COUNT_LEADING_ZEROS_0): Likewise.
|
||||
|
||||
2013-09-17 Jacek Caban <jacek@codeweavers.com>
|
||||
|
||||
* config/i386/gthr-win32.c: CreateSemaphoreW instead of
|
||||
|
|
|
|||
|
|
@ -91,6 +91,9 @@ alpha*-*-*)
|
|||
am33_2.0-*-linux*)
|
||||
cpu_type=mn10300
|
||||
;;
|
||||
arc*-*-*)
|
||||
cpu_type=arc
|
||||
;;
|
||||
arm*-*-*)
|
||||
cpu_type=arm
|
||||
;;
|
||||
|
|
@ -315,6 +318,14 @@ alpha*-dec-*vms*)
|
|||
extra_parts="$extra_parts vms-dwarf2.o vms-dwarf2eh.o"
|
||||
md_unwind_header=alpha/vms-unwind.h
|
||||
;;
|
||||
arc*-*-elf*)
|
||||
tmake_file="arc/t-arc-newlib arc/t-arc"
|
||||
extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o libgmon.a crtg.o crtgend.o"
|
||||
;;
|
||||
arc*-*-linux-uclibc*)
|
||||
tmake_file="${tmake_file} t-slibgcc-libgcc t-slibgcc-nolc-override arc/t-arc700-uClibc arc/t-arc"
|
||||
extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o libgmon.a crtg.o crtgend.o"
|
||||
;;
|
||||
arm-wrs-vxworks)
|
||||
tmake_file="$tmake_file arm/t-arm arm/t-vxworks t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
|
||||
extra_parts="$extra_parts crti.o crtn.o"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,30 @@
|
|||
/* Assembler macros for the Synopsys DesignWare ARC CPU.
|
||||
|
||||
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
|
||||
Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#define FUNC(X) .type X,@function
|
||||
#define ENDFUNC(X) .size X, .-X
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
/* Code to start and stop profiling for the Synopsys DesignWare ARC CPU.
|
||||
|
||||
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
|
||||
Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
.section .init
|
||||
.global _init
|
||||
.global _fini
|
||||
.global __monstartup
|
||||
mov_s r0,_init
|
||||
mov_s r1,_fini
|
||||
jl __monstartup
|
||||
|
||||
.section .__arc_profile_desc, "a"
|
||||
.global __arc_profile_desc_secstart
|
||||
.balign 4
|
||||
__arc_profile_desc_secstart:
|
||||
.section .__arc_profile_forward, "a"
|
||||
.global __arc_profile_forward_secstart
|
||||
.balign 4
|
||||
__arc_profile_forward_secstart:
|
||||
.section .__arc_profile_counters, "aw"
|
||||
.global __arc_profile_counters_secstart
|
||||
.balign 4
|
||||
__arc_profile_counters_secstart:
|
||||
|
||||
.section .fini
|
||||
.global _mcleanup
|
||||
jl _mcleanup
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/* Code to start and stop profiling for the Synopsys DesignWare ARC CPU.
|
||||
|
||||
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
|
||||
Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
.section .__arc_profile_desc, "a"
|
||||
.global __arc_profile_desc_secend
|
||||
__arc_profile_desc_secend:
|
||||
.section .__arc_profile_forward, "a"
|
||||
.global __arc_profile_forward_secend
|
||||
__arc_profile_forward_secend:
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
/* .fini/.init stack frame setup for the Synopsys DesignWare ARC CPU.
|
||||
|
||||
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
|
||||
Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
# This file contains the stack frame setup for contents of the .fini and
|
||||
# .init sections.
|
||||
|
||||
.section .init
|
||||
.global _init
|
||||
.word 0
|
||||
_init:
|
||||
push_s blink
|
||||
|
||||
.section .fini
|
||||
.global _fini
|
||||
.word 0
|
||||
_fini:
|
||||
push_s blink
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
/* Ensure .fini/.init return for the Synopsys DesignWare ARC CPU.
|
||||
|
||||
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
|
||||
Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
# This file just makes sure that the .fini and .init sections do in
|
||||
# fact return. This file is the last thing linked into any executable.
|
||||
|
||||
.section .init
|
||||
pop_s blink
|
||||
j_s [blink]
|
||||
|
||||
|
||||
.section .fini
|
||||
pop_s blink
|
||||
j_s [blink]
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/* Copyright (C) 2004, 2006, 2007-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 3, or (at your option) any
|
||||
later version.
|
||||
|
||||
In addition to the permissions in the GNU General Public License, the
|
||||
Free Software Foundation gives you unlimited permission to link the
|
||||
compiled version of this file into combinations with other programs,
|
||||
and to distribute those combinations without any restriction coming
|
||||
from the use of this file. (The General Public License restrictions
|
||||
do apply in other respects; for example, they cover modification of
|
||||
the file, and distribution when not linked into a combine
|
||||
executable.)
|
||||
|
||||
This file is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Calculate division table for ARC700 integer division
|
||||
Contributed by Joern Rennecke
|
||||
joern.rennecke@arc.com */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i, j;
|
||||
unsigned x;
|
||||
double q, r, err, max_err = -1;
|
||||
|
||||
puts("/* This table has been generated by divtab-arc700.c. */");
|
||||
puts("\
|
||||
/* 1/512 .. 1/256, normalized. There is a leading 1 in bit 31.\n\
|
||||
For powers of two, we list unnormalized numbers instead. The values\n\
|
||||
for powers of 2 are loaded, but not used. The value for 1 is actually\n\
|
||||
the first instruction after .Lmuldiv. */\n\
|
||||
.balign 4");
|
||||
puts (".Ldivtab:\n");
|
||||
for (i = 256; i >= 2; --i)
|
||||
{
|
||||
j = i < 0 ? -i : i;
|
||||
if (j & (j-1))
|
||||
while (j < 128)
|
||||
j += j;
|
||||
else
|
||||
/* Power of two. */
|
||||
j *= 128;
|
||||
q = 4.*(1<<30)*128/j;
|
||||
r = ceil (q);
|
||||
printf ("\t.long\t0x%X\n", (unsigned) r);
|
||||
err = r - q;
|
||||
if (err > max_err)
|
||||
max_err = err;
|
||||
}
|
||||
#if 0
|
||||
printf ("\t/* maximum error: %f */\n", max_err);
|
||||
#endif
|
||||
exit (0);
|
||||
}
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
/* Copyright (C) 2007-2013 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* This file selects the double-precision parts of fp-bit.c that are
|
||||
still needed for some ARC hardware variants; it also renames functions
|
||||
that duplicate asm-coded functionality so that their results can be
|
||||
used to compare with the optimized versions for debugging. */
|
||||
|
||||
#define FINE_GRAINED_LIBRARIES
|
||||
#define ARC_DP_DEBUG 1
|
||||
#if !defined (__ARC_NORM__) || ARC_DP_DEBUG
|
||||
#define L_pack_df
|
||||
#define L_unpack_df
|
||||
#define L_make_df
|
||||
#define L_thenan_df
|
||||
#define L_sf_to_df
|
||||
#endif
|
||||
#ifndef __ARC_NORM__
|
||||
#define L_addsub_df
|
||||
#elif ARC_DP_DEBUG
|
||||
#define L_addsub_df
|
||||
#define __adddf3 __adddf3_c
|
||||
#define __subdf3 __subdf3_c
|
||||
#endif
|
||||
#ifndef __ARC_NORM__
|
||||
#define L_mul_df
|
||||
#define L_div_df
|
||||
#elif (!defined (__ARC700__) && !defined (__ARC_MUL64__) \
|
||||
&& !defined(__ARC_MUL32BY16__))
|
||||
#define L_mul_df
|
||||
#define L_div_df
|
||||
#undef QUIET_NAN
|
||||
#define QUIET_NAN 0xfffffffffffffLL
|
||||
#elif ARC_DP_DEBUG
|
||||
#define L_mul_df
|
||||
#define __muldf3 __muldf3_c
|
||||
#define L_div_df
|
||||
#define __divdf3 __divdf3_c
|
||||
#endif
|
||||
#ifndef __ARC_NORM__
|
||||
#define L_df_to_sf
|
||||
#define L_si_to_df
|
||||
#define L_df_to_si
|
||||
#define L_tf_to_usi /* need to defined this instead of df_to_usi */
|
||||
#define L_usi_to_df
|
||||
#elif ARC_DP_DEBUG
|
||||
#define L_df_to_sf
|
||||
#define __truncdfsf2 __truncdfsf2_c
|
||||
#define L_si_to_df
|
||||
#define __floatsidf __floatsidf_c
|
||||
#define L_df_to_si
|
||||
#define __fixdfsi __fixdfsi_c
|
||||
#define L_tf_to_usi
|
||||
#define __fixunsdfsi __fixunsdfsi_c
|
||||
#define L_usi_to_df
|
||||
#define __floatunsidf __floatunsidf_c
|
||||
#endif
|
||||
#ifndef __ARC_NORM__
|
||||
#define L_fpcmp_parts_df
|
||||
#define L_compare_df
|
||||
#define L_eq_df
|
||||
#define L_ne_df
|
||||
#define L_gt_df
|
||||
#define L_ge_df
|
||||
#define L_lt_df
|
||||
#define L_le_df
|
||||
#define L_unord_df
|
||||
#define L_negate_df
|
||||
#elif ARC_DP_DEBUG
|
||||
#define L_fpcmp_parts_df
|
||||
#define L_eq_df
|
||||
#define __eqdf2 __eqdf2_c
|
||||
#define L_gt_df
|
||||
#define __gtdf2 __gtdf2_c
|
||||
#define L_ge_df
|
||||
#define __gedf2 __gedf2_c
|
||||
#define L_unord_df
|
||||
#define __unorddf2 __unorddf2_c
|
||||
#endif
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* This file selects the single-precision parts of fp-bit.c that are
|
||||
still needed for some ARC hardware variants; it also renames functions
|
||||
that duplicate asm-coded functionality so that their results can be
|
||||
used to compare with the optimized versions for debugging. */
|
||||
|
||||
#define ARC_FP_DEBUG 1
|
||||
#define FINE_GRAINED_LIBRARIES
|
||||
#if !defined (__ARC_NORM__) || ARC_FP_DEBUG
|
||||
#define L_pack_sf
|
||||
#define L_unpack_sf
|
||||
#define L_make_sf
|
||||
#define L_thenan_sf
|
||||
#endif
|
||||
#ifndef __ARC_NORM__
|
||||
#define L_addsub_sf
|
||||
#define L_mul_sf
|
||||
#define L_div_sf
|
||||
#define L_sf_to_df
|
||||
#define L_si_to_sf
|
||||
#define L_sf_to_si
|
||||
#define L_usi_to_sf
|
||||
#elif ARC_FP_DEBUG
|
||||
#define L_addsub_sf
|
||||
#define __addsf3 __addsf3_c
|
||||
#define __subsf3 __subsf3_c
|
||||
#define L_mul_sf
|
||||
#define __mulsf3 __mulsf3_c
|
||||
#define L_div_sf
|
||||
#define __divsf3 __divsf3_c
|
||||
#define L_sf_to_df
|
||||
#define __extendsfdf2 __extendsfdf2_c
|
||||
#define L_si_to_sf
|
||||
#define __floatsisf __floatsisf_c
|
||||
#define L_sf_to_si
|
||||
#define __fixsfsi __fixsfsi_c
|
||||
#define L_usi_to_sf
|
||||
#define __floatunsisf __floatunsisf_c
|
||||
#endif
|
||||
#ifndef __ARC_NORM__
|
||||
#define L_fpcmp_parts_sf
|
||||
#define L_compare_sf
|
||||
#define L_eq_sf
|
||||
#define L_ne_sf
|
||||
#define L_gt_sf
|
||||
#define L_ge_sf
|
||||
#define L_lt_sf
|
||||
#define L_le_sf
|
||||
#define L_unord_sf
|
||||
#define L_negate_sf
|
||||
#elif ARC_FP_DEBUG
|
||||
#define L_fpcmp_parts_sf
|
||||
#define L_eq_sf
|
||||
#define __eqsf2 __eqsf2_c
|
||||
#define L_gt_sf
|
||||
#define __gtsf2 __gtsf2_c
|
||||
#define L_ge_sf
|
||||
#define __gesf2 __gesf2_c
|
||||
#define L_unord_sf
|
||||
#define __unordsf2 __unordsf2_c
|
||||
#endif
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* File deliberately left blank. */
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#define LP_START 0x02
|
||||
#define LP_END 0x03
|
||||
#define IDENTITY 0x04
|
||||
#define STATUS32 0x0a
|
||||
#define COUNT0 0x21 /* Timer 0 count */
|
||||
#define CONTROL0 0x22 /* Timer 0 control */
|
||||
#define LIMIT0 0x23 /* Timer 0 limit */
|
||||
#define INT_VECTOR_BASE 0x25
|
||||
#define D_CACHE_BUILD 0x72
|
||||
#define DC_FLDL 0x4c
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
/* This file contains code to do profiling.
|
||||
|
||||
Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "../asm.h"
|
||||
#include "auxreg.h"
|
||||
/* This file contains code to do profiling. */
|
||||
.weak __profile_timer_cycles
|
||||
.global __profile_timer_cycles
|
||||
.set __profile_timer_cycles, 200
|
||||
.text
|
||||
; For Arctangent-A5, if no data cache is present, a read of the
|
||||
; cache build register returns the ID register. For ARC600 and
|
||||
; later, the version field will be zero.
|
||||
.global __dcache_linesz
|
||||
.balign 4
|
||||
__dcache_linesz:
|
||||
lr r12,[D_CACHE_BUILD]
|
||||
extb_s r0,r12
|
||||
breq_s r0,0,.Lsz_nocache
|
||||
brge r0,0x20,.Lsz_havecache
|
||||
lr r0,[IDENTITY]
|
||||
breq r12,r0,.Lsz_nocache
|
||||
.Lsz_havecache:
|
||||
lsr_s r12,r12,16
|
||||
mov_s r0,16
|
||||
bmsk_s r12,r12,3
|
||||
asl_s r0,r0,r12
|
||||
j_s [blink]
|
||||
.Lsz_nocache:
|
||||
mov_s r0,1
|
||||
j_s [blink]
|
||||
|
|
@ -0,0 +1,450 @@
|
|||
/*-
|
||||
* Copyright (c) 1983, 1992, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
* Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
#if 0
|
||||
#include <sys/param.h>
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include <sys/gmon.h>
|
||||
#include <sys/gmon_out.h>
|
||||
|
||||
#include <stddef.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#if 0
|
||||
#include <libc-internal.h>
|
||||
#include <not-cancel.h>
|
||||
|
||||
#ifdef USE_IN_LIBIO
|
||||
# include <wchar.h>
|
||||
#endif
|
||||
#endif
|
||||
#define internal_function
|
||||
#define weak_alias(fun,aliasid) extern __typeof(fun) aliasid __attribute__ ((weak, alias (#fun)));
|
||||
#define __libc_enable_secure 0
|
||||
|
||||
/* Head of basic-block list or NULL. */
|
||||
struct __bb *__bb_head attribute_hidden;
|
||||
|
||||
struct gmonparam _gmonparam attribute_hidden = { GMON_PROF_OFF };
|
||||
|
||||
/*
|
||||
* See profil(2) where this is described:
|
||||
*/
|
||||
static int s_scale;
|
||||
#define SCALE_1_TO_1 0x10000L
|
||||
|
||||
#define ERR(s) write (STDERR_FILENO, s, sizeof (s) - 1)
|
||||
|
||||
void moncontrol (int mode);
|
||||
void __moncontrol (int mode);
|
||||
static void write_hist (int fd) internal_function;
|
||||
static void write_call_graph (int fd) internal_function;
|
||||
static void write_bb_counts (int fd) internal_function;
|
||||
|
||||
/*
|
||||
* Control profiling
|
||||
* profiling is what mcount checks to see if
|
||||
* all the data structures are ready.
|
||||
*/
|
||||
void
|
||||
__moncontrol (int mode)
|
||||
{
|
||||
struct gmonparam *p = &_gmonparam;
|
||||
|
||||
/* Don't change the state if we ran into an error. */
|
||||
if (p->state == GMON_PROF_ERROR)
|
||||
return;
|
||||
|
||||
if (mode)
|
||||
{
|
||||
/* start */
|
||||
__profil((void *) p->kcount, p->kcountsize, p->lowpc, s_scale);
|
||||
p->state = GMON_PROF_ON;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* stop */
|
||||
__profil(NULL, 0, 0, 0);
|
||||
p->state = GMON_PROF_OFF;
|
||||
}
|
||||
}
|
||||
weak_alias (__moncontrol, moncontrol)
|
||||
|
||||
|
||||
void
|
||||
__monstartup (u_long lowpc, u_long highpc)
|
||||
{
|
||||
register int o;
|
||||
char *cp;
|
||||
struct gmonparam *p = &_gmonparam;
|
||||
int linesz;
|
||||
|
||||
/*
|
||||
* round lowpc and highpc to multiples of the density we're using
|
||||
* so the rest of the scaling (here and in gprof) stays in ints.
|
||||
*/
|
||||
p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER));
|
||||
if (sizeof *p->froms % sizeof(HISTCOUNTER) != 0)
|
||||
{
|
||||
p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER));
|
||||
p->textsize = p->highpc - p->lowpc;
|
||||
p->kcountsize = ROUNDUP((p->textsize + HISTFRACTION - 1) / HISTFRACTION,
|
||||
sizeof (*p->froms));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Avoid odd scales by rounding up highpc to get kcountsize rounded. */
|
||||
p->textsize = ROUNDUP (highpc - p->lowpc,
|
||||
HISTFRACTION * sizeof (*p->froms));
|
||||
p->highpc = p->lowpc + p->textsize;
|
||||
p->kcountsize = p->textsize / HISTFRACTION;
|
||||
}
|
||||
p->hashfraction = HASHFRACTION;
|
||||
p->log_hashfraction = -1;
|
||||
/* The following test must be kept in sync with the corresponding
|
||||
test in mcount.c. */
|
||||
if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) {
|
||||
/* if HASHFRACTION is a power of two, mcount can use shifting
|
||||
instead of integer division. Precompute shift amount. */
|
||||
p->log_hashfraction = ffs(p->hashfraction * sizeof(*p->froms)) - 1;
|
||||
}
|
||||
p->tolimit = p->textsize * ARCDENSITY / 100;
|
||||
if (p->tolimit < MINARCS)
|
||||
p->tolimit = MINARCS;
|
||||
else if (p->tolimit > MAXARCS)
|
||||
p->tolimit = MAXARCS;
|
||||
p->tossize = p->tolimit * sizeof(struct tostruct);
|
||||
|
||||
/* p->kcount must not share cache lines with the adjacent data, because
|
||||
we use uncached accesses while profiling. */
|
||||
linesz = __dcache_linesz ();
|
||||
cp = calloc (ROUNDUP (p->kcountsize, linesz) + p->tossize
|
||||
+ (linesz - 1), 1);
|
||||
if (! cp)
|
||||
{
|
||||
ERR("monstartup: out of memory\n");
|
||||
p->tos = NULL;
|
||||
p->state = GMON_PROF_ERROR;
|
||||
/* In case we loose the error state due to a race,
|
||||
prevent invalid writes also by clearing tolimit. */
|
||||
p->tolimit = 0;
|
||||
return;
|
||||
}
|
||||
p->tos = (struct tostruct *)cp;
|
||||
cp += p->tossize;
|
||||
cp = (char *) ROUNDUP ((ptrdiff_t) cp, linesz);
|
||||
p->kcount = (HISTCOUNTER *)cp;
|
||||
cp += ROUNDUP (p->kcountsize, linesz);
|
||||
|
||||
p->tos[0].link = 0;
|
||||
|
||||
o = p->highpc - p->lowpc;
|
||||
if (p->kcountsize < (u_long) o)
|
||||
{
|
||||
#ifndef hp300
|
||||
s_scale = ((float)p->kcountsize / o ) * SCALE_1_TO_1;
|
||||
#else
|
||||
/* avoid floating point operations */
|
||||
int quot = o / p->kcountsize;
|
||||
|
||||
if (quot >= 0x10000)
|
||||
s_scale = 1;
|
||||
else if (quot >= 0x100)
|
||||
s_scale = 0x10000 / quot;
|
||||
else if (o >= 0x800000)
|
||||
s_scale = 0x1000000 / (o / (p->kcountsize >> 8));
|
||||
else
|
||||
s_scale = 0x1000000 / ((o << 8) / p->kcountsize);
|
||||
#endif
|
||||
} else
|
||||
s_scale = SCALE_1_TO_1;
|
||||
|
||||
__moncontrol(1);
|
||||
}
|
||||
weak_alias (__monstartup, monstartup)
|
||||
|
||||
|
||||
static void
|
||||
internal_function
|
||||
write_hist (int fd)
|
||||
{
|
||||
u_char tag = GMON_TAG_TIME_HIST;
|
||||
struct arc_gmon_hist_hdr thdr __attribute__ ((aligned (__alignof__ (char *))));
|
||||
int r;
|
||||
|
||||
if (_gmonparam.kcountsize > 0)
|
||||
{
|
||||
*(char **) thdr.low_pc = (char *) _gmonparam.lowpc;
|
||||
*(char **) thdr.high_pc = (char *) _gmonparam.highpc;
|
||||
*(int32_t *) thdr.hist_size = (_gmonparam.kcountsize
|
||||
/ sizeof (HISTCOUNTER));
|
||||
*(int32_t *) thdr.prof_rate = __profile_frequency ();
|
||||
strncpy (thdr.dimen, "seconds", sizeof (thdr.dimen));
|
||||
thdr.dimen_abbrev = 's';
|
||||
|
||||
r = write (fd, &tag, sizeof tag);
|
||||
if (r != sizeof tag)
|
||||
return;
|
||||
r = write (fd, &thdr, sizeof thdr);
|
||||
if (r != sizeof thdr)
|
||||
return;
|
||||
r = write (fd,_gmonparam.kcount, _gmonparam.kcountsize);
|
||||
if ((unsigned) r != _gmonparam.kcountsize)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
internal_function
|
||||
write_call_graph (int fd)
|
||||
{
|
||||
#define NARCS_PER_WRITE 64
|
||||
#define BYTES_PER_ARC (1 + sizeof (struct gmon_cg_arc_record))
|
||||
#define BYTES_PER_WRITE (BYTES_PER_ARC * NARCS_PER_WRITE)
|
||||
ARCINDEX to_index;
|
||||
u_long frompc, selfpc, count;
|
||||
char buffer[BYTES_PER_WRITE], *p;
|
||||
u_long *prof_desc = __arc_profile_desc_secstart;
|
||||
u_long *prof_count = __arc_profile_counters_secstart;
|
||||
u_long *prof_desc_end = __arc_profile_desc_secend;
|
||||
u_long *prof_forward = __arc_profile_forward_secstart;
|
||||
|
||||
for (p = buffer; p < buffer + BYTES_PER_WRITE; p += BYTES_PER_ARC)
|
||||
*p = GMON_TAG_CG_ARC;
|
||||
p = buffer;
|
||||
frompc = *prof_desc++ & -2;
|
||||
while (prof_desc < prof_desc_end)
|
||||
{
|
||||
selfpc = *prof_desc++;
|
||||
if (selfpc & 1)
|
||||
{
|
||||
frompc = selfpc & -2;
|
||||
selfpc = *prof_desc++;
|
||||
}
|
||||
count = *prof_count++;
|
||||
if (selfpc)
|
||||
{
|
||||
struct arc
|
||||
{
|
||||
char *frompc;
|
||||
char *selfpc;
|
||||
int32_t count;
|
||||
}
|
||||
arc;
|
||||
|
||||
if (!count)
|
||||
continue;
|
||||
arc.frompc = (char *) frompc;
|
||||
arc.selfpc = (char *) selfpc;
|
||||
arc.count = count;
|
||||
memcpy (p + 1, &arc, sizeof arc);
|
||||
p += 1 + sizeof arc;
|
||||
|
||||
if (p == buffer + BYTES_PER_WRITE)
|
||||
{
|
||||
write (fd, buffer, BYTES_PER_WRITE);
|
||||
p = buffer;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (to_index = count;
|
||||
to_index != 0;
|
||||
to_index = _gmonparam.tos[to_index].link)
|
||||
{
|
||||
struct arc
|
||||
{
|
||||
char *frompc;
|
||||
char *selfpc;
|
||||
int32_t count;
|
||||
}
|
||||
arc;
|
||||
|
||||
arc.frompc = (char *) frompc;
|
||||
arc.selfpc = (char *) _gmonparam.tos[to_index].selfpc;
|
||||
arc.count = _gmonparam.tos[to_index].count;
|
||||
memcpy (p + 1, &arc, sizeof arc);
|
||||
p += 1 + sizeof arc;
|
||||
|
||||
if (p == buffer + BYTES_PER_WRITE)
|
||||
{
|
||||
write (fd, buffer, BYTES_PER_WRITE);
|
||||
p = buffer;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
while (prof_forward < __arc_profile_forward_secend)
|
||||
{
|
||||
/* ??? The 'call count' is actually supposed to be a fixed point
|
||||
factor, with 16 bits each before and after the point.
|
||||
It would be much nicer if we figured out the actual number
|
||||
of calls to the caller, and multiplied that with the fixed point
|
||||
factor to arrive at the estimated calls for the callee. */
|
||||
memcpy (p + 1, prof_forward, 3 * sizeof *prof_forward);
|
||||
prof_forward += 3;
|
||||
p += 1 + 3 * sizeof *prof_forward;
|
||||
if (p == buffer + BYTES_PER_WRITE)
|
||||
{
|
||||
write (fd, buffer, BYTES_PER_WRITE);
|
||||
p = buffer;
|
||||
}
|
||||
}
|
||||
if (p != buffer)
|
||||
write (fd, buffer, p - buffer);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
internal_function
|
||||
write_bb_counts (int fd)
|
||||
{
|
||||
struct __bb *grp;
|
||||
u_char tag = GMON_TAG_BB_COUNT;
|
||||
size_t ncounts;
|
||||
size_t i;
|
||||
|
||||
struct { unsigned long address; long count; } bbbody[8];
|
||||
size_t nfilled;
|
||||
|
||||
/* Write each group of basic-block info (all basic-blocks in a
|
||||
compilation unit form a single group). */
|
||||
|
||||
for (grp = __bb_head; grp; grp = grp->next)
|
||||
{
|
||||
ncounts = grp->ncounts;
|
||||
write (fd, &tag, 1);
|
||||
write (fd, &ncounts, sizeof ncounts);
|
||||
for (nfilled = i = 0; i < ncounts; ++i)
|
||||
{
|
||||
if (nfilled == sizeof (bbbody) / sizeof (bbbody[0]))
|
||||
{
|
||||
write (fd, bbbody, sizeof bbbody);
|
||||
nfilled = 0;
|
||||
}
|
||||
|
||||
bbbody[nfilled].address = grp->addresses[i];
|
||||
bbbody[nfilled++].count = grp->counts[i];
|
||||
}
|
||||
if (nfilled > 0)
|
||||
write (fd, bbbody, nfilled * sizeof bbbody[0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
write_gmon (void)
|
||||
{
|
||||
struct gmon_hdr ghdr __attribute__ ((aligned (__alignof__ (int))));
|
||||
int fd = -1;
|
||||
char *env;
|
||||
|
||||
#ifndef O_NOFOLLOW
|
||||
# define O_NOFOLLOW 0
|
||||
#endif
|
||||
|
||||
env = getenv ("GMON_OUT_PREFIX");
|
||||
if (env != NULL && !__libc_enable_secure)
|
||||
{
|
||||
size_t len = strlen (env);
|
||||
char buf[len + 20];
|
||||
snprintf (buf, sizeof (buf), "%s.%u", env, getpid ());
|
||||
fd = open (buf, O_CREAT|O_TRUNC|O_WRONLY|O_NOFOLLOW, 0666);
|
||||
}
|
||||
|
||||
if (fd == -1)
|
||||
{
|
||||
fd = open ("gmon.out", O_CREAT|O_TRUNC|O_WRONLY|O_NOFOLLOW,
|
||||
0666);
|
||||
if (fd < 0)
|
||||
{
|
||||
perror ("_mcleanup: gmon.out");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* write gmon.out header: */
|
||||
memset (&ghdr, '\0', sizeof (struct gmon_hdr));
|
||||
memcpy (&ghdr.cookie[0], GMON_MAGIC, sizeof (ghdr.cookie));
|
||||
*(int32_t *) ghdr.version = GMON_VERSION;
|
||||
write (fd, &ghdr, sizeof (struct gmon_hdr));
|
||||
|
||||
/* write PC histogram: */
|
||||
write_hist (fd);
|
||||
|
||||
/* write call-graph: */
|
||||
write_call_graph (fd);
|
||||
|
||||
/* write basic-block execution counts: */
|
||||
write_bb_counts (fd);
|
||||
|
||||
close (fd);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
__write_profiling (void)
|
||||
{
|
||||
int save = _gmonparam.state;
|
||||
_gmonparam.state = GMON_PROF_OFF;
|
||||
if (save == GMON_PROF_ON)
|
||||
write_gmon ();
|
||||
_gmonparam.state = save;
|
||||
}
|
||||
#ifndef SHARED
|
||||
/* This symbol isn't used anywhere in the DSO and it is not exported.
|
||||
This would normally mean it should be removed to get the same API
|
||||
in static libraries. But since profiling is special in static libs
|
||||
anyway we keep it. But not when building the DSO since some
|
||||
quality assurance tests will otherwise trigger. */
|
||||
weak_alias (__write_profiling, write_profiling)
|
||||
#endif
|
||||
|
||||
|
||||
void
|
||||
_mcleanup (void)
|
||||
{
|
||||
__moncontrol (0);
|
||||
|
||||
if (_gmonparam.state != GMON_PROF_ERROR)
|
||||
write_gmon ();
|
||||
|
||||
/* free the memory. */
|
||||
if (_gmonparam.tos != NULL)
|
||||
free (_gmonparam.tos);
|
||||
}
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef MACHINE_GMON_H
|
||||
#define MACHINE_GMON_H
|
||||
|
||||
/* We can't fake out own <sys/types.h> header because the newlib / uclibc
|
||||
headers in GCC_FOR_TARGET take precedence. */
|
||||
|
||||
#define __BEGIN_DECLS
|
||||
#define __END_DECLS
|
||||
|
||||
#define __THROW
|
||||
|
||||
extern int __dcache_linesz (void);
|
||||
|
||||
#define _MCOUNT_DECL(countp, selfpc) \
|
||||
static inline void _mcount_internal (void *countp, u_long selfpc)
|
||||
|
||||
extern void _mcount (void);
|
||||
extern void _mcount_call (void);
|
||||
|
||||
/* N.B.: the calling point might be a sibcall, thus blink does not necessarily
|
||||
hold the caller's address. r8 doesn't hold the caller's address, either,
|
||||
but rather a pointer to the counter data structure associated with the
|
||||
caller.
|
||||
This function must be compiled with optimization turned on in order to
|
||||
enable a sibcall for the final call to selfpc; this is important when trying
|
||||
to profile a program with deep tail-recursion that would get a stack
|
||||
overflow otherwise. */
|
||||
#define MCOUNT \
|
||||
void \
|
||||
_mcount_call (void) \
|
||||
{ \
|
||||
register void *countp __asm("r8"); \
|
||||
register u_long selfpc __asm("r9"); \
|
||||
_mcount_internal (countp, selfpc); \
|
||||
((void (*)(void)) selfpc) (); \
|
||||
}
|
||||
|
||||
extern int __profil (u_short *,size_t, size_t, u_int);
|
||||
|
||||
#endif /* MACHINE_GMON_H */
|
||||
|
|
@ -0,0 +1,206 @@
|
|||
/*-
|
||||
* Copyright (c) 1983, 1992, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if !defined(lint) && !defined(KERNEL) && defined(LIBC_SCCS)
|
||||
static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
#include <unistd.h>
|
||||
#include <sys/param.h>
|
||||
#endif
|
||||
#include <sys/gmon.h>
|
||||
|
||||
/* This file provides the machine-dependent definitions of the _MCOUNT_DECL
|
||||
and MCOUNT macros. */
|
||||
#include <machine-gmon.h>
|
||||
|
||||
#include <atomic.h>
|
||||
|
||||
/*
|
||||
* mcount is called on entry to each function compiled with the profiling
|
||||
* switch set. _mcount(), which is declared in a machine-dependent way
|
||||
* with _MCOUNT_DECL, does the actual work and is either inlined into a
|
||||
* C routine or called by an assembly stub. In any case, this magic is
|
||||
* taken care of by the MCOUNT definition in <machine/profile.h>.
|
||||
*
|
||||
* _mcount updates data structures that represent traversals of the
|
||||
* program's call graph edges. frompc and selfpc are the return
|
||||
* address and function address that represents the given call graph edge.
|
||||
*
|
||||
* Note: the original BSD code used the same variable (frompcindex) for
|
||||
* both frompcindex and frompc. Any reasonable, modern compiler will
|
||||
* perform this optimization.
|
||||
*/
|
||||
_MCOUNT_DECL(count_ptr, selfpc) /* _mcount; may be static, inline, etc */
|
||||
{
|
||||
register ARCINDEX *frompcindex;
|
||||
register struct tostruct *top, *prevtop;
|
||||
register struct gmonparam *p;
|
||||
register ARCINDEX toindex;
|
||||
|
||||
/* Check for nested function trampoline. */
|
||||
if (selfpc & 2)
|
||||
selfpc = *(u_long *) (selfpc + 10);
|
||||
|
||||
p = &_gmonparam;
|
||||
/*
|
||||
* check that we are profiling
|
||||
* and that we aren't recursively invoked.
|
||||
*/
|
||||
#if 0
|
||||
if (catomic_compare_and_exchange_bool_acq (&p->state, GMON_PROF_BUSY,
|
||||
GMON_PROF_ON))
|
||||
return;
|
||||
#elif defined (__ARC700__)
|
||||
/* ??? This could temporrarily loose the ERROR / OFF condition in a race,
|
||||
but doing an actual compare_and_exchange would be too costly. It would
|
||||
be better if we had a semaphore independent of the 'sticky' state, but
|
||||
then we could run into ABI compatibility problems with the size of struct
|
||||
gmonparam. */
|
||||
{
|
||||
u_long old_state;
|
||||
|
||||
__asm ("ex %0,%1": "=r" (old_state), "+m" (p->state)
|
||||
: "0" (GMON_PROF_BUSY));
|
||||
if (old_state != GMON_PROF_ON)
|
||||
{
|
||||
switch (old_state)
|
||||
{
|
||||
case GMON_PROF_OFF:
|
||||
__asm ("ex %0,%1": "+r" (old_state), "+m" (p->state));
|
||||
if (old_state == GMON_PROF_BUSY
|
||||
/* Switching off while we say we are busy while profiling
|
||||
was actually already switched off is all right. */
|
||||
|| old_state == GMON_PROF_OFF)
|
||||
break;
|
||||
/* It is not clear if we should allow switching on
|
||||
profiling at this point, and how to handle further races.
|
||||
For now, record an error in this case. */
|
||||
/* Fall through. */
|
||||
default: /* We expect here only GMON_PROF_ERROR. */
|
||||
p->state = GMON_PROF_ERROR;
|
||||
break;
|
||||
case GMON_PROF_BUSY: break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
#else /* ??? No semaphore primitives available. */
|
||||
if (p->state != GMON_PROF_ON)
|
||||
return;
|
||||
p->state = GMON_PROF_BUSY;
|
||||
#endif
|
||||
|
||||
frompcindex = count_ptr;
|
||||
toindex = *frompcindex;
|
||||
if (toindex == 0) {
|
||||
/*
|
||||
* first time traversing this arc
|
||||
*/
|
||||
toindex = ++p->tos[0].link;
|
||||
if (toindex >= (ARCINDEX) p->tolimit)
|
||||
/* halt further profiling */
|
||||
goto overflow;
|
||||
|
||||
*frompcindex = toindex;
|
||||
top = &p->tos[toindex];
|
||||
top->selfpc = selfpc;
|
||||
top->count = 1;
|
||||
top->link = 0;
|
||||
goto done;
|
||||
}
|
||||
top = &p->tos[toindex];
|
||||
if (top->selfpc == selfpc) {
|
||||
/*
|
||||
* arc at front of chain; usual case.
|
||||
*/
|
||||
top->count++;
|
||||
goto done;
|
||||
}
|
||||
/*
|
||||
* have to go looking down chain for it.
|
||||
* top points to what we are looking at,
|
||||
* prevtop points to previous top.
|
||||
* we know it is not at the head of the chain.
|
||||
*/
|
||||
for (; /* goto done */; ) {
|
||||
if (top->link == 0) {
|
||||
/*
|
||||
* top is end of the chain and none of the chain
|
||||
* had top->selfpc == selfpc.
|
||||
* so we allocate a new tostruct
|
||||
* and link it to the head of the chain.
|
||||
*/
|
||||
toindex = ++p->tos[0].link;
|
||||
if (toindex >= (ARCINDEX) p->tolimit)
|
||||
goto overflow;
|
||||
|
||||
top = &p->tos[toindex];
|
||||
top->selfpc = selfpc;
|
||||
top->count = 1;
|
||||
top->link = *frompcindex;
|
||||
*frompcindex = toindex;
|
||||
goto done;
|
||||
}
|
||||
/*
|
||||
* otherwise, check the next arc on the chain.
|
||||
*/
|
||||
prevtop = top;
|
||||
top = &p->tos[top->link];
|
||||
if (top->selfpc == selfpc) {
|
||||
/*
|
||||
* there it is.
|
||||
* increment its count
|
||||
* move it to the head of the chain.
|
||||
*/
|
||||
top->count++;
|
||||
toindex = prevtop->link;
|
||||
prevtop->link = top->link;
|
||||
top->link = *frompcindex;
|
||||
*frompcindex = toindex;
|
||||
goto done;
|
||||
}
|
||||
|
||||
}
|
||||
done:
|
||||
p->state = GMON_PROF_ON;
|
||||
return;
|
||||
overflow:
|
||||
p->state = GMON_PROF_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Actual definition of mcount function. Defined in <machine/profile.h>,
|
||||
* which is included by <sys/gmon.h>.
|
||||
*/
|
||||
MCOUNT
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
/* This file contains code to do profiling.
|
||||
|
||||
Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "../asm.h"
|
||||
/* This file contains code to do profiling. */
|
||||
.weak __profile_frequency_value
|
||||
.global __profile_frequency_value
|
||||
.set __profile_frequency_value, 1000
|
||||
.text
|
||||
.balign 4
|
||||
.global __profile_frequency
|
||||
FUNC(__profile_frequency)
|
||||
__profile_frequency:
|
||||
mov_s r0,__profile_frequency_value
|
||||
j_s [blink]
|
||||
ENDFUNC(__profile_frequency)
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
/* Return frequency of ticks reported by profil. Generic version. */
|
||||
/*-
|
||||
* Copyright (c) 1983, 1992, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#if 0
|
||||
#include <libc-internal.h>
|
||||
#else
|
||||
#include "sys/gmon.h"
|
||||
#endif
|
||||
|
||||
int
|
||||
__profile_frequency (void)
|
||||
{
|
||||
/*
|
||||
* Discover the tick frequency of the machine if something goes wrong,
|
||||
* we return 0, an impossible hertz.
|
||||
*/
|
||||
struct itimerval tim;
|
||||
|
||||
tim.it_interval.tv_sec = 0;
|
||||
tim.it_interval.tv_usec = 1;
|
||||
tim.it_value.tv_sec = 0;
|
||||
tim.it_value.tv_usec = 0;
|
||||
setitimer(ITIMER_REAL, &tim, 0);
|
||||
setitimer(ITIMER_REAL, 0, &tim);
|
||||
if (tim.it_interval.tv_usec < 2)
|
||||
return 0;
|
||||
return (1000000 / tim.it_interval.tv_usec);
|
||||
}
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
/* This file contains code to do profiling.
|
||||
|
||||
Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "../asm.h"
|
||||
#include "auxreg.h"
|
||||
/* This file contains code to do profiling. */
|
||||
.weak __profile_timer_cycles
|
||||
.global __profile_timer_cycles
|
||||
.set __profile_timer_cycles, 200
|
||||
|
||||
.section .bss
|
||||
.global __profil_offset
|
||||
.align 4
|
||||
.type __profil_offset, @object
|
||||
.size __profil_offset, 4
|
||||
__profil_offset:
|
||||
.zero 4
|
||||
|
||||
.text
|
||||
.global __dcache_linesz
|
||||
.global __profil
|
||||
FUNC(__profil)
|
||||
.Lstop_profiling:
|
||||
sr r0,[CONTROL0]
|
||||
j_s [blink]
|
||||
.balign 4
|
||||
__profil:
|
||||
.Lprofil:
|
||||
breq_s r0,0,.Lstop_profiling
|
||||
; r0: buf r1: bufsiz r2: offset r3: scale
|
||||
bxor.f r3,r3,15; scale must be 0x8000, i.e. 1/2; generate 0.
|
||||
push_s blink
|
||||
lsr_s r2,r2,1
|
||||
mov_s r8,r0
|
||||
flag.ne 1 ; halt if wrong scale
|
||||
sub_s r0,r0,r2
|
||||
st r0,[__profil_offset]
|
||||
bl __dcache_linesz
|
||||
pop_s blink
|
||||
bbit1.d r0,0,nocache
|
||||
mov_s r0,r8
|
||||
#ifdef __ARC700__
|
||||
add_s r1,r1,31
|
||||
lsr.f lp_count,r1,5
|
||||
lpne 2f
|
||||
sr r0,[DC_FLDL]
|
||||
add_s r0,r0,32
|
||||
#else /* !__ARC700__ */
|
||||
# FIX ME: set up loop according to cache line size
|
||||
lr r12,[D_CACHE_BUILD]
|
||||
sub_s r0,r0,16
|
||||
sub_s r1,r1,1
|
||||
lsr_s r12,r12,16
|
||||
asr_s r1,r1,4
|
||||
bmsk_s r12,r12,3
|
||||
asr_s r1,r1,r12
|
||||
add.f lp_count,r1,1
|
||||
mov_s r1,16
|
||||
asl_s r1,r1,r12
|
||||
lpne 2f
|
||||
add r0,r0,r1
|
||||
sr r0,[DC_FLDL]
|
||||
#endif /* __ARC700__ */
|
||||
2: b_s .Lcounters_cleared
|
||||
nocache:
|
||||
.Lcounters_cleared:
|
||||
lr r1,[INT_VECTOR_BASE] ; disable timer0 interrupts
|
||||
sr r3,[CONTROL0]
|
||||
sr r3,[COUNT0]
|
||||
0: ld_s r0,[pcl,1f-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF
|
||||
0: ld_s r12,[pcl,1f+4-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF + 4
|
||||
st_s r0,[r1,24]; timer0 uses vector3
|
||||
st_s r12,[r1,24+4]; timer0 uses vector3
|
||||
;sr 10000,[LIMIT0]
|
||||
sr __profile_timer_cycles,[LIMIT0]
|
||||
mov_s r12,3 ; enable timer interrupts; count only when not halted.
|
||||
sr r12,[CONTROL0]
|
||||
lr r12,[STATUS32]
|
||||
bset_s r12,r12,1 ; allow level 1 interrupts
|
||||
flag r12
|
||||
mov_s r0,0
|
||||
j_s [blink]
|
||||
.balign 4
|
||||
1: j __profil_irq
|
||||
ENDFUNC(__profil)
|
||||
|
||||
FUNC(__profil_irq)
|
||||
.balign 4 ; make final jump unaligned to avoid delay penalty
|
||||
.balign 32,0,12 ; make sure the code spans no more that two cache lines
|
||||
nop_s
|
||||
__profil_irq:
|
||||
push_s r0
|
||||
ld r0,[__profil_offset]
|
||||
push_s r1
|
||||
lsr r1,ilink1,2
|
||||
push_s r2
|
||||
ldw.as.di r2,[r0,r1]
|
||||
add1 r0,r0,r1
|
||||
ld_s r1,[sp,4]
|
||||
add_s r2,r2,1
|
||||
bbit1 r2,16,nostore
|
||||
stw.di r2,[r0]
|
||||
nostore:ld.ab r2,[sp,8]
|
||||
pop_s r0
|
||||
j.f [ilink1]
|
||||
ENDFUNC(__profil_irq)
|
||||
|
||||
; could save one cycle if the counters were allocated at link time and
|
||||
; the contents of __profil_offset were pre-computed at link time, like this:
|
||||
#if 0
|
||||
; __profil_offset needs to be PROVIDEd as __profile_base-text/4
|
||||
.global __profil_offset
|
||||
.balign 4
|
||||
__profil_irq:
|
||||
push_s r0
|
||||
lsr r0,ilink1,2
|
||||
add1 r0,__profil_offset,r0
|
||||
push_s r1
|
||||
ldw.di r1,[r0]
|
||||
|
||||
|
||||
add_s r1,r1,1
|
||||
bbit1 r1,16,nostore
|
||||
stw.di r1,[r0]
|
||||
nostore:pop_s r1
|
||||
pop_s r0
|
||||
j [ilink1]
|
||||
#endif /* 0 */
|
||||
|
|
@ -0,0 +1,217 @@
|
|||
/*-
|
||||
* Copyright (c) 1982, 1986, 1992, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
* Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)gmon.h 8.2 (Berkeley) 1/4/94
|
||||
*/
|
||||
|
||||
#ifndef _SYS_GMON_H
|
||||
#define _SYS_GMON_H 1
|
||||
|
||||
#if 0
|
||||
#include <features.h>
|
||||
#include <sys/types.h>
|
||||
#else
|
||||
#include <sys/types.h>
|
||||
#include "machine-gmon.h"
|
||||
#define attribute_hidden __attribute__ ((visibility("hidden")))
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
* See gmon_out.h for gmon.out format.
|
||||
*/
|
||||
|
||||
/* structure emitted by "gcc -a". This must match struct bb in
|
||||
gcc/libgcc2.c. It is OK for gcc to declare a longer structure as
|
||||
long as the members below are present. */
|
||||
struct __bb
|
||||
{
|
||||
long zero_word;
|
||||
const char *filename;
|
||||
long *counts;
|
||||
long ncounts;
|
||||
struct __bb *next;
|
||||
const unsigned long *addresses;
|
||||
};
|
||||
|
||||
extern struct __bb *__bb_head;
|
||||
|
||||
/*
|
||||
* histogram counters are unsigned shorts (according to the kernel).
|
||||
*/
|
||||
#define HISTCOUNTER unsigned short
|
||||
|
||||
/*
|
||||
* fraction of text space to allocate for histogram counters here, 1/2
|
||||
*/
|
||||
#define HISTFRACTION 2
|
||||
|
||||
/*
|
||||
* Fraction of text space to allocate for from hash buckets.
|
||||
* The value of HASHFRACTION is based on the minimum number of bytes
|
||||
* of separation between two subroutine call points in the object code.
|
||||
* Given MIN_SUBR_SEPARATION bytes of separation the value of
|
||||
* HASHFRACTION is calculated as:
|
||||
*
|
||||
* HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1);
|
||||
*
|
||||
* For example, on the VAX, the shortest two call sequence is:
|
||||
*
|
||||
* calls $0,(r0)
|
||||
* calls $0,(r0)
|
||||
*
|
||||
* which is separated by only three bytes, thus HASHFRACTION is
|
||||
* calculated as:
|
||||
*
|
||||
* HASHFRACTION = 3 / (2 * 2 - 1) = 1
|
||||
*
|
||||
* Note that the division above rounds down, thus if MIN_SUBR_FRACTION
|
||||
* is less than three, this algorithm will not work!
|
||||
*
|
||||
* In practice, however, call instructions are rarely at a minimal
|
||||
* distance. Hence, we will define HASHFRACTION to be 2 across all
|
||||
* architectures. This saves a reasonable amount of space for
|
||||
* profiling data structures without (in practice) sacrificing
|
||||
* any granularity.
|
||||
*/
|
||||
#define HASHFRACTION 2
|
||||
|
||||
/*
|
||||
* Percent of text space to allocate for tostructs.
|
||||
* This is a heuristic; we will fail with a warning when profiling programs
|
||||
* with a very large number of very small functions, but that's
|
||||
* normally OK.
|
||||
* 2 is probably still a good value for normal programs.
|
||||
* Profiling a test case with 64000 small functions will work if
|
||||
* you raise this value to 3 and link statically (which bloats the
|
||||
* text size, thus raising the number of arcs expected by the heuristic).
|
||||
*/
|
||||
#define ARCDENSITY 3
|
||||
|
||||
/*
|
||||
* Always allocate at least this many tostructs. This
|
||||
* hides the inadequacy of the ARCDENSITY heuristic, at least
|
||||
* for small programs.
|
||||
*/
|
||||
#define MINARCS 50
|
||||
|
||||
/*
|
||||
* The type used to represent indices into gmonparam.tos[].
|
||||
*/
|
||||
#define ARCINDEX u_long
|
||||
|
||||
/*
|
||||
* Maximum number of arcs we want to allow.
|
||||
* Used to be max representable value of ARCINDEX minus 2, but now
|
||||
* that ARCINDEX is a long, that's too large; we don't really want
|
||||
* to allow a 48 gigabyte table.
|
||||
* The old value of 1<<16 wasn't high enough in practice for large C++
|
||||
* programs; will 1<<20 be adequate for long? FIXME
|
||||
*/
|
||||
#define MAXARCS (1 << 20)
|
||||
|
||||
struct tostruct {
|
||||
u_long selfpc;
|
||||
long count;
|
||||
ARCINDEX link;
|
||||
};
|
||||
|
||||
/*
|
||||
* a raw arc, with pointers to the calling site and
|
||||
* the called site and a count.
|
||||
*/
|
||||
struct rawarc {
|
||||
u_long raw_frompc;
|
||||
u_long raw_selfpc;
|
||||
long raw_count;
|
||||
};
|
||||
|
||||
/*
|
||||
* general rounding functions.
|
||||
*/
|
||||
#define ROUNDDOWN(x,y) (((x)/(y))*(y))
|
||||
#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
|
||||
|
||||
/*
|
||||
* The profiling data structures are housed in this structure.
|
||||
*/
|
||||
struct gmonparam {
|
||||
long int state;
|
||||
u_short *kcount;
|
||||
u_long kcountsize;
|
||||
ARCINDEX *froms;
|
||||
u_long fromssize;
|
||||
struct tostruct *tos;
|
||||
u_long tossize;
|
||||
long tolimit;
|
||||
u_long lowpc;
|
||||
u_long highpc;
|
||||
u_long textsize;
|
||||
u_long hashfraction;
|
||||
long log_hashfraction;
|
||||
};
|
||||
extern struct gmonparam _gmonparam;
|
||||
|
||||
/*
|
||||
* Possible states of profiling.
|
||||
*/
|
||||
#define GMON_PROF_ON 0
|
||||
#define GMON_PROF_BUSY 1
|
||||
#define GMON_PROF_ERROR 2
|
||||
#define GMON_PROF_OFF 3
|
||||
|
||||
/*
|
||||
* Sysctl definitions for extracting profiling information from the kernel.
|
||||
*/
|
||||
#define GPROF_STATE 0 /* int: profiling enabling variable */
|
||||
#define GPROF_COUNT 1 /* struct: profile tick count buffer */
|
||||
#define GPROF_FROMS 2 /* struct: from location hash bucket */
|
||||
#define GPROF_TOS 3 /* struct: destination/count structure */
|
||||
#define GPROF_GMONPARAM 4 /* struct: profiling parameters (see above) */
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
/* Set up data structures and start profiling. */
|
||||
extern void __monstartup (u_long __lowpc, u_long __highpc) __THROW;
|
||||
extern void monstartup (u_long __lowpc, u_long __highpc) __THROW;
|
||||
|
||||
/* Clean up profiling and write out gmon.out. */
|
||||
extern void _mcleanup (void) __THROW;
|
||||
|
||||
extern void __write_profiling (void);
|
||||
extern int attribute_hidden __profile_frequency (void);
|
||||
|
||||
extern u_long __arc_profile_desc_secstart[], __arc_profile_desc_secend[];
|
||||
extern u_long __arc_profile_forward_secstart[], __arc_profile_forward_secend[];
|
||||
extern u_long __arc_profile_counters_secstart[];
|
||||
|
||||
__END_DECLS
|
||||
|
||||
#endif /* sys/gmon.h */
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#define GMON_TAG_TIME_HIST 0
|
||||
#define GMON_TAG_CG_ARC 1
|
||||
#define GMON_TAG_BB_COUNT 2
|
||||
|
||||
#define GMON_MAGIC "gmon"
|
||||
#define GMON_VERSION 1
|
||||
|
||||
struct arc_gmon_hist_hdr
|
||||
{
|
||||
char low_pc[4];
|
||||
char high_pc[4];
|
||||
char hist_size[4];
|
||||
char prof_rate[4];
|
||||
char dimen[15];
|
||||
char dimen_abbrev;
|
||||
};
|
||||
|
||||
struct gmon_cg_arc_record
|
||||
{
|
||||
char afrompc[4];
|
||||
char selfpc[4];
|
||||
char count[4];
|
||||
};
|
||||
|
||||
struct gmon_hdr
|
||||
{
|
||||
char cookie[4];
|
||||
char version[4];
|
||||
char c[12];
|
||||
};
|
||||
|
|
@ -0,0 +1,524 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
#if 0 /* DEBUG */
|
||||
.global __adddf3
|
||||
.balign 4
|
||||
__adddf3:
|
||||
push_s blink
|
||||
push_s r2
|
||||
push_s r3
|
||||
push_s r0
|
||||
bl.d __adddf3_c
|
||||
push_s r1
|
||||
ld_s r2,[sp,12]
|
||||
ld_s r3,[sp,8]
|
||||
st_s r0,[sp,12]
|
||||
st_s r1,[sp,8]
|
||||
pop_s r1
|
||||
bl.d __adddf3_asm
|
||||
pop_s r0
|
||||
pop_s r3
|
||||
pop_s r2
|
||||
pop_s blink
|
||||
cmp r0,r2
|
||||
cmp.eq r1,r3
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
.global __subdf3
|
||||
.balign 4
|
||||
__subdf3:
|
||||
push_s blink
|
||||
push_s r2
|
||||
push_s r3
|
||||
push_s r0
|
||||
bl.d __subdf3_c
|
||||
push_s r1
|
||||
ld_s r2,[sp,12]
|
||||
ld_s r3,[sp,8]
|
||||
st_s r0,[sp,12]
|
||||
st_s r1,[sp,8]
|
||||
pop_s r1
|
||||
bl.d __subdf3_asm
|
||||
pop_s r0
|
||||
pop_s r3
|
||||
pop_s r2
|
||||
pop_s blink
|
||||
cmp r0,r2
|
||||
cmp.eq r1,r3
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
#define __adddf3 __adddf3_asm
|
||||
#define __subdf3 __subdf3_asm
|
||||
#endif /* DEBUG */
|
||||
/* N.B. This is optimized for ARC700.
|
||||
ARC600 has very different scheduling / instruction selection criteria. */
|
||||
|
||||
/* inputs: DBL0, DBL1 (r0-r3)
|
||||
output: DBL0 (r0, r1)
|
||||
clobber: r2-r10, r12, flags
|
||||
All NaN highword bits must be 1. NaN low word is random. */
|
||||
|
||||
.balign 4
|
||||
.global __adddf3
|
||||
.global __subdf3
|
||||
.long 0x7ff00000 ; exponent mask
|
||||
FUNC(__adddf3)
|
||||
FUNC(__subdf3)
|
||||
__subdf3:
|
||||
bxor_l DBL1H,DBL1H,31
|
||||
__adddf3:
|
||||
ld r9,[pcl,-8]
|
||||
bmsk r4,DBL0H,30
|
||||
xor r10,DBL0H,DBL1H
|
||||
and r6,DBL1H,r9
|
||||
sub.f r12,r4,r6
|
||||
asr_s r12,r12,20
|
||||
blo .Ldbl1_gt
|
||||
brhs r4,r9,.Linf_nan
|
||||
brhs r12,32,.Large_shift
|
||||
brne r12,0,.Lsmall_shift
|
||||
brge r10,0,.Ladd_same_exp ; r12 == 0
|
||||
|
||||
/* After subtracting, we need to normalize; when shifting to place the
|
||||
leading 1 into position for the implicit 1 and adding that to DBL0H,
|
||||
we increment the exponent. Thus, we have to subtract one more than
|
||||
the shift count from the exponent beforehand. Iff the exponent drops thus
|
||||
below zero (before adding in the fraction with the leading one), we have
|
||||
generated a denormal number. Denormal handling is basicallly reducing the
|
||||
shift count so that we produce a zero exponent instead; however, this way
|
||||
the shift count can become zero (if we started out with exponent 1).
|
||||
Therefore, a simple min operation is not good enough, since we don't
|
||||
want to handle a zero normalizing shift in the main path.
|
||||
On the plus side, we don't need to check for denorm input, the result
|
||||
of subtracing these looks just the same as denormals generated during
|
||||
subtraction. */
|
||||
bmsk r7,DBL1H,30
|
||||
cmp r4,r7
|
||||
cmp.eq DBL0L,DBL1L
|
||||
blo .L_rsub_same_exp
|
||||
sub.f DBL0L,DBL0L,DBL1L
|
||||
bmsk r12,DBL0H,19
|
||||
bic DBL1H,DBL0H,r12
|
||||
sbc.f r4,r4,r7
|
||||
beq_l .Large_cancel
|
||||
norm DBL1L,r4
|
||||
b.d .Lsub_done_same_exp
|
||||
sub r12,DBL1L,9
|
||||
|
||||
.balign 4
|
||||
.Linf_nan:
|
||||
; If both inputs are inf, but with different signs, the result is NaN.
|
||||
asr r12,r10,31
|
||||
or_s DBL1H,DBL1H,r12
|
||||
j_s.d [blink]
|
||||
or.eq DBL0H,DBL0H,DBL1H
|
||||
|
||||
.balign 4
|
||||
.L_rsub_same_exp:
|
||||
rsub.f DBL0L,DBL0L,DBL1L
|
||||
bmsk r12,DBL1H,19
|
||||
bic_s DBL1H,DBL1H,r12
|
||||
sbc.f r4,r7,r4
|
||||
beq_l .Large_cancel
|
||||
norm DBL1L,r4
|
||||
|
||||
sub r12,DBL1L,9
|
||||
.Lsub_done_same_exp:
|
||||
asl_s r12,r12,20
|
||||
sub_s DBL1L,DBL1L,10
|
||||
sub DBL0H,DBL1H,r12
|
||||
xor.f 0,DBL0H,DBL1H
|
||||
bmi .Ldenorm
|
||||
.Lpast_denorm:
|
||||
neg_s r12,DBL1L
|
||||
lsr r7,DBL0L,r12
|
||||
asl r12,r4,DBL1L
|
||||
asl_s DBL0L,DBL0L,DBL1L
|
||||
add_s r12,r12,r7
|
||||
j_s.d [blink]
|
||||
add_l DBL0H,DBL0H,r12
|
||||
.balign 4
|
||||
.Ladd_same_exp:
|
||||
/* This is a special case because we can't test for need to shift
|
||||
down by checking if bit 20 of DBL0H changes. OTOH, here we know
|
||||
that we always need to shift down. */
|
||||
; The implicit 1 of DBL0 is not shifted together with the
|
||||
; fraction, thus effectively doubled, compensating for not setting
|
||||
; implicit1 for DBL1
|
||||
add_s r12,DBL0L,DBL1L
|
||||
lsr.f 0,r12,2 ; round to even
|
||||
breq r6,0,.Ldenorm_add
|
||||
adc.f DBL0L,DBL0L,DBL1L
|
||||
sub r7,DBL1H,DBL0H
|
||||
sub1 r7,r7,r9 ; boost exponent by 2/2
|
||||
rrc DBL0L,DBL0L
|
||||
asr.f r7,r7 ; DBL1.fraction/2 - DBL0.fraction/2 ; exp++
|
||||
add.cs.f DBL0L,DBL0L,0x80000000
|
||||
add_l DBL0H,DBL0H,r7 ; DBL0.implicit1 not shifted for DBL1.implicit1
|
||||
add.cs DBL0H,DBL0H,1
|
||||
bic.f 0,r9,DBL0H ; check for overflow -> infinity.
|
||||
jne_l [blink]
|
||||
and DBL0H,DBL0H,0xfff00000
|
||||
j_s.d [blink]
|
||||
mov_s DBL0L,0
|
||||
.balign 4
|
||||
.Large_shift:
|
||||
brhs r12,55,.Lret_dbl0
|
||||
bmsk_s DBL1H,DBL1H,19
|
||||
brne r6,0,.Lno_denorm_large_shift
|
||||
brhi.d r12,33,.Lfixed_denorm_large_shift
|
||||
sub_s r12,r12,1
|
||||
breq r12,31, .Lfixed_denorm_small_shift
|
||||
.Lshift32:
|
||||
mov_s r12,DBL1L
|
||||
mov_s DBL1L,DBL1H
|
||||
brlt.d r10,0,.Lsub
|
||||
mov_s DBL1H,0
|
||||
b_s .Ladd
|
||||
.Ldenorm_add:
|
||||
cmp_s r12,DBL1L
|
||||
mov_s DBL0L,r12
|
||||
j_s.d [blink]
|
||||
adc DBL0H,r4,DBL1H
|
||||
|
||||
.Lret_dbl0:
|
||||
j_s [blink]
|
||||
.balign 4
|
||||
.Lsmall_shift:
|
||||
breq.d r6,0,.Ldenorm_small_shift
|
||||
bmsk_s DBL1H,DBL1H,19
|
||||
bset_s DBL1H,DBL1H,20
|
||||
.Lfixed_denorm_small_shift:
|
||||
neg r8,r12
|
||||
asl r4,DBL1H,r8
|
||||
lsr_l DBL1H,DBL1H,r12
|
||||
lsr r5,DBL1L,r12
|
||||
asl r12,DBL1L,r8
|
||||
brge.d r10,0,.Ladd
|
||||
or DBL1L,r4,r5
|
||||
/* subtract, abs(DBL0) > abs(DBL1) */
|
||||
/* DBL0H, DBL0L: original values
|
||||
DBL1H, DBL1L: fraction with explicit leading 1, shifted into place
|
||||
r4: orig. DBL0H & 0x7fffffff
|
||||
r6: orig. DBL1H & 0x7ff00000
|
||||
r9: 0x7ff00000
|
||||
r10: orig. DBL0H ^ DBL1H
|
||||
r12: guard bits */
|
||||
.balign 4
|
||||
.Lsub:
|
||||
neg.f r12,r12
|
||||
mov_s r7,DBL1H
|
||||
bmsk r5,DBL0H,19
|
||||
sbc.f DBL0L,DBL0L,DBL1L
|
||||
bic DBL1H,DBL0H,r5
|
||||
bset r5,r5,20
|
||||
sbc.f r4,r5,r7
|
||||
beq_l .Large_cancel_sub
|
||||
norm DBL1L,r4
|
||||
bmsk r6,DBL1H,30
|
||||
.Lsub_done:
|
||||
sub_s DBL1L,DBL1L,9
|
||||
breq DBL1L,1,.Lsub_done_noshift
|
||||
asl r5,DBL1L,20
|
||||
sub_s DBL1L,DBL1L,1
|
||||
brlo r6,r5,.Ldenorm_sub
|
||||
sub DBL0H,DBL1H,r5
|
||||
.Lpast_denorm_sub:
|
||||
neg_s DBL1H,DBL1L
|
||||
lsr r6,r12,DBL1H
|
||||
asl_s r12,r12,DBL1L
|
||||
and r8,r6,1
|
||||
add1.f 0,r8,r12
|
||||
add.ne.f r12,r12,r12
|
||||
asl r8,DBL0L,DBL1L
|
||||
lsr r12,DBL0L,DBL1H
|
||||
adc.f DBL0L,r8,r6
|
||||
asl r5,r4,DBL1L
|
||||
add_s DBL0H,DBL0H,r12
|
||||
j_s.d [blink]
|
||||
adc DBL0H,DBL0H,r5
|
||||
|
||||
.balign 4
|
||||
.Lno_denorm_large_shift:
|
||||
breq.d r12,32,.Lshift32
|
||||
bset_l DBL1H,DBL1H,20
|
||||
.Lfixed_denorm_large_shift:
|
||||
neg r8,r12
|
||||
asl r4,DBL1H,r8
|
||||
lsr r5,DBL1L,r12
|
||||
asl.f 0,DBL1L,r8
|
||||
lsr DBL1L,DBL1H,r12
|
||||
or r12,r4,r5
|
||||
tst.eq r12,1
|
||||
or.ne r12,r12,2
|
||||
brlt.d r10,0,.Lsub
|
||||
mov_s DBL1H,0
|
||||
b_l .Ladd
|
||||
|
||||
; If a denorm is produced without shifting, we have an exact result -
|
||||
; no need for rounding.
|
||||
.balign 4
|
||||
.Ldenorm_sub:
|
||||
lsr DBL1L,r6,20
|
||||
xor DBL0H,r6,DBL1H
|
||||
brne.d DBL1L,1,.Lpast_denorm_sub
|
||||
sub_s DBL1L,DBL1L,1
|
||||
.Lsub_done_noshift:
|
||||
add.f 0,r12,r12
|
||||
btst.eq DBL0L,0
|
||||
cmp.eq r12,r12
|
||||
add.cs.f DBL0L,DBL0L,1
|
||||
bclr r4,r4,20
|
||||
j_s.d [blink]
|
||||
adc DBL0H,DBL1H,r4
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_small_shift:
|
||||
brne.d r12,1,.Lfixed_denorm_small_shift
|
||||
sub_l r12,r12,1
|
||||
brlt r10,0,.Lsub
|
||||
.Ladd: ; bit 20 of DBL1H is clear and bit 0 of r12 does not matter
|
||||
add.f DBL0L,DBL0L,DBL1L
|
||||
add_s DBL1H,DBL1H,DBL0H
|
||||
add.cs DBL1H,DBL1H,1
|
||||
xor_l DBL0H,DBL0H,DBL1H
|
||||
bbit0 DBL0H,20,.Lno_shiftdown
|
||||
lsr.f DBL0H,DBL1H
|
||||
and r4,DBL0L,2
|
||||
bmsk DBL0H,DBL0H,18
|
||||
sbc DBL0H,DBL1H,DBL0H
|
||||
rrc.f DBL0L,DBL0L
|
||||
or.f r12,r12,r4
|
||||
cmp.eq r12,r12
|
||||
add.cs.f DBL0L,DBL0L,1
|
||||
bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
|
||||
jne.d [blink] ; ... non-zero fraction
|
||||
add.cs DBL0H,DBL0H,1
|
||||
mov_s DBL0L,0
|
||||
bmsk DBL1H,DBL0H,19
|
||||
j_s.d [blink]
|
||||
bic_s DBL0H,DBL0H,DBL1H
|
||||
.Lno_shiftdown:
|
||||
mov_s DBL0H,DBL1H
|
||||
add.f 0,r12,r12
|
||||
btst.eq DBL0L,0
|
||||
cmp.eq r12,r12
|
||||
add.cs.f DBL0L,DBL0L,1
|
||||
j_s.d [blink]
|
||||
add.cs DBL0H,DBL0H,1
|
||||
.balign 4
|
||||
.Ldenorm:
|
||||
bmsk DBL0H,DBL1H,30
|
||||
lsr r12,DBL0H,20
|
||||
xor_s DBL0H,DBL0H,DBL1H
|
||||
sub_l DBL1L,r12,1
|
||||
bgt .Lpast_denorm
|
||||
j_s.d [blink]
|
||||
add_l DBL0H,DBL0H,r4
|
||||
|
||||
.balign 4
|
||||
.Large_cancel:
|
||||
;DBL0L: mantissa DBL1H: sign & exponent
|
||||
norm.f DBL1L,DBL0L
|
||||
bmsk DBL0H,DBL1H,30
|
||||
add_s DBL1L,DBL1L,22
|
||||
mov.mi DBL1L,21
|
||||
add_s r12,DBL1L,1
|
||||
asl_s r12,r12,20
|
||||
beq_s .Lret0
|
||||
brhs.d DBL0H,r12,.Lpast_denorm_large_cancel
|
||||
sub DBL0H,DBL1H,r12
|
||||
bmsk DBL0H,DBL1H,30
|
||||
lsr r12,DBL0H,20
|
||||
xor_s DBL0H,DBL0H,DBL1H
|
||||
sub.f DBL1L,r12,1
|
||||
jle [blink]
|
||||
.Lpast_denorm_large_cancel:
|
||||
rsub.f r7,DBL1L,32
|
||||
lsr r7,DBL0L,r7
|
||||
asl_s DBL0L,DBL0L,DBL1L
|
||||
mov.ls r7,DBL0L
|
||||
add_s DBL0H,DBL0H,r7
|
||||
j_s.d [blink]
|
||||
mov.ls DBL0L,0
|
||||
.Lret0:
|
||||
j_s.d [blink]
|
||||
mov_l DBL0H,0
|
||||
|
||||
/* r4:DBL0L:r12 : unnormalized result fraction
|
||||
DBL1H: result sign and exponent */
|
||||
/* When seeing large cancellation, only the topmost guard bit might be set. */
|
||||
.balign 4
|
||||
.Large_cancel_sub:
|
||||
norm.f DBL1L,DBL0L
|
||||
bpnz.d 0f
|
||||
bmsk DBL0H,DBL1H,30
|
||||
mov r5,22<<20
|
||||
bne.d 1f
|
||||
mov_s DBL1L,21
|
||||
bset r5,r5,5+20
|
||||
add_s DBL1L,DBL1L,32
|
||||
brne r12,0,1f
|
||||
j_s.d [blink]
|
||||
mov_l DBL0H,0
|
||||
.balign 4
|
||||
0: add r5,DBL1L,23
|
||||
asl r5,r5,20
|
||||
add_s DBL1L,DBL1L,22
|
||||
1: brlo DBL0H,r5,.Ldenorm_large_cancel_sub
|
||||
sub DBL0H,DBL1H,r5
|
||||
.Lpast_denorm_large_cancel_sub:
|
||||
rsub.f r7,DBL1L,32
|
||||
lsr r12,r12,r7
|
||||
lsr r7,DBL0L,r7
|
||||
asl_s DBL0L,DBL0L,DBL1L
|
||||
add.ge DBL0H,DBL0H,r7
|
||||
add_s DBL0L,DBL0L,r12
|
||||
add.lt DBL0H,DBL0H,DBL0L
|
||||
mov.eq DBL0L,r12
|
||||
j_s.d [blink]
|
||||
mov.lt DBL0L,0
|
||||
.balign 4
|
||||
.Ldenorm_large_cancel_sub:
|
||||
lsr r5,DBL0H,20
|
||||
xor_s DBL0H,DBL0H,DBL1H
|
||||
brgt.d r5,1,.Lpast_denorm_large_cancel_sub
|
||||
sub DBL1L,r5,1
|
||||
j_l [blink] ; denorm, no shift -> no rounding needed.
|
||||
|
||||
/* r4: DBL0H & 0x7fffffff
|
||||
r6: DBL1H & 0x7ff00000
|
||||
r9: 0x7ff00000
|
||||
r10: sign difference
|
||||
r12: shift count (negative) */
|
||||
.balign 4
|
||||
.Ldbl1_gt:
|
||||
brhs r6,r9,.Lret_dbl1 ; inf or NaN
|
||||
neg r8,r12
|
||||
brhs r8,32,.Large_shift_dbl0
|
||||
.Lsmall_shift_dbl0:
|
||||
breq.d r6,0,.Ldenorm_small_shift_dbl0
|
||||
bmsk_s DBL0H,DBL0H,19
|
||||
bset_s DBL0H,DBL0H,20
|
||||
.Lfixed_denorm_small_shift_dbl0:
|
||||
asl r4,DBL0H,r12
|
||||
lsr DBL0H,DBL0H,r8
|
||||
lsr r5,DBL0L,r8
|
||||
asl r12,DBL0L,r12
|
||||
brge.d r10,0,.Ladd_dbl1_gt
|
||||
or DBL0L,r4,r5
|
||||
/* subtract, abs(DBL0) < abs(DBL1) */
|
||||
/* DBL0H, DBL0L: fraction with explicit leading 1, shifted into place
|
||||
DBL1H, DBL1L: original values
|
||||
r6: orig. DBL1H & 0x7ff00000
|
||||
r9: 0x7ff00000
|
||||
r12: guard bits */
|
||||
.balign 4
|
||||
.Lrsub:
|
||||
neg.f r12,r12
|
||||
bmsk r7,DBL1H,19
|
||||
mov_s r5,DBL0H
|
||||
sbc.f DBL0L,DBL1L,DBL0L
|
||||
bic DBL1H,DBL1H,r7
|
||||
bset r7,r7,20
|
||||
sbc.f r4,r7,r5
|
||||
beq_l .Large_cancel_sub
|
||||
norm DBL1L,r4
|
||||
b_l .Lsub_done ; note: r6 is already set up.
|
||||
|
||||
.Lret_dbl1:
|
||||
mov_s DBL0H,DBL1H
|
||||
j_s.d [blink]
|
||||
mov_l DBL0L,DBL1L
|
||||
.balign 4
|
||||
.Ldenorm_small_shift_dbl0:
|
||||
sub.f r8,r8,1
|
||||
bne.d .Lfixed_denorm_small_shift_dbl0
|
||||
add_s r12,r12,1
|
||||
brlt r10,0,.Lrsub
|
||||
.Ladd_dbl1_gt: ; bit 20 of DBL0H is clear and bit 0 of r12 does not matter
|
||||
add.f DBL0L,DBL0L,DBL1L
|
||||
add_s DBL0H,DBL0H,DBL1H
|
||||
add.cs DBL0H,DBL0H,1
|
||||
xor DBL1H,DBL0H,DBL1H
|
||||
bbit0 DBL1H,20,.Lno_shiftdown_dbl1_gt
|
||||
lsr.f DBL1H,DBL0H
|
||||
and r4,DBL0L,2
|
||||
bmsk DBL1H,DBL1H,18
|
||||
sbc DBL0H,DBL0H,DBL1H
|
||||
rrc.f DBL0L,DBL0L
|
||||
or.f r12,r12,r4
|
||||
cmp.eq r12,r12
|
||||
add.cs.f DBL0L,DBL0L,1
|
||||
bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
|
||||
jne.d [blink] ; ... non-zero fraction
|
||||
add.cs DBL0H,DBL0H,1
|
||||
mov_s DBL0L,0
|
||||
bmsk DBL1H,DBL0H,19
|
||||
j_s.d [blink]
|
||||
bic_s DBL0H,DBL0H,DBL1H
|
||||
.Lno_shiftdown_dbl1_gt:
|
||||
add.f 0,r12,r12
|
||||
btst.eq DBL0L,0
|
||||
cmp.eq r12,r12
|
||||
add.cs.f DBL0L,DBL0L,1
|
||||
j_s.d [blink]
|
||||
add.cs DBL0H,DBL0H,1
|
||||
|
||||
.balign 4
|
||||
.Large_shift_dbl0:
|
||||
brhs r8,55,.Lret_dbl1
|
||||
bmsk_s DBL0H,DBL0H,19
|
||||
brne r6,0,.Lno_denorm_large_shift_dbl0
|
||||
add_s r12,r12,1
|
||||
brne.d r8,33,.Lfixed_denorm_large_shift_dbl0
|
||||
sub r8,r8,1
|
||||
bset_s DBL0H,DBL0H,20
|
||||
.Lshift32_dbl0:
|
||||
mov_s r12,DBL0L
|
||||
mov_s DBL0L,DBL0H
|
||||
brlt.d r10,0,.Lrsub
|
||||
mov_s DBL0H,0
|
||||
b_s .Ladd_dbl1_gt
|
||||
|
||||
.balign 4
|
||||
.Lno_denorm_large_shift_dbl0:
|
||||
breq.d r8,32,.Lshift32_dbl0
|
||||
bset_l DBL0H,DBL0H,20
|
||||
.Lfixed_denorm_large_shift_dbl0:
|
||||
asl r4,DBL0H,r12
|
||||
lsr r5,DBL0L,r8
|
||||
asl.f 0,DBL0L,r12
|
||||
lsr DBL0L,DBL0H,r8
|
||||
or r12,r4,r5
|
||||
tst.eq r12,1
|
||||
or.ne r12,r12,2
|
||||
brlt.d r10,0,.Lrsub
|
||||
mov_s DBL0H,0
|
||||
b_l .Ladd_dbl1_gt
|
||||
ENDFUNC(__adddf3)
|
||||
ENDFUNC(__subdf3)
|
||||
|
|
@ -0,0 +1,344 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
#if 0 /* DEBUG */
|
||||
.global __addsf3
|
||||
FUNC(__addsf3)
|
||||
.balign 4
|
||||
__addsf3:
|
||||
push_s blink
|
||||
push_s r1
|
||||
bl.d __addsf3_c
|
||||
push_s r0
|
||||
ld_s r1,[sp,4]
|
||||
st_s r0,[sp,4]
|
||||
bl.d __addsf3_asm
|
||||
pop_s r0
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__addsf3)
|
||||
.global __subsf3
|
||||
FUNC(__subsf3)
|
||||
.balign 4
|
||||
__subsf3:
|
||||
push_s blink
|
||||
push_s r1
|
||||
bl.d __subsf3_c
|
||||
push_s r0
|
||||
ld_s r1,[sp,4]
|
||||
st_s r0,[sp,4]
|
||||
bl.d __subsf3_asm
|
||||
pop_s r0
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__subsf3)
|
||||
#define __addsf3 __addsf3_asm
|
||||
#define __subsf3 __subsf3_asm
|
||||
#endif /* DEBUG */
|
||||
/* N.B. This is optimized for ARC700.
|
||||
ARC600 has very different scheduling / instruction selection criteria. */
|
||||
|
||||
/* inputs: r0, r1
|
||||
output: r0
|
||||
clobber: r1-r10, r12, flags */
|
||||
|
||||
.balign 4
|
||||
.global __addsf3
|
||||
.global __subsf3
|
||||
FUNC(__addsf3)
|
||||
FUNC(__subsf3)
|
||||
.long 0x7f800000 ; exponent mask
|
||||
__subsf3:
|
||||
bxor_l r1,r1,31
|
||||
__addsf3:
|
||||
ld r9,[pcl,-8]
|
||||
bmsk r4,r0,30
|
||||
xor r10,r0,r1
|
||||
and r6,r1,r9
|
||||
sub.f r12,r4,r6
|
||||
asr_s r12,r12,23
|
||||
blo .Ldbl1_gt
|
||||
brhs r4,r9,.Linf_nan
|
||||
brne r12,0,.Lsmall_shift
|
||||
brge r10,0,.Ladd_same_exp ; r12 == 0
|
||||
/* After subtracting, we need to normalize; when shifting to place the
|
||||
leading 1 into position for the implicit 1 and adding that to DBL0,
|
||||
we increment the exponent. Thus, we have to subtract one more than
|
||||
the shift count from the exponent beforehand. Iff the exponent drops thus
|
||||
below zero (before adding in the fraction with the leading one), we have
|
||||
generated a denormal number. Denormal handling is basicallly reducing the
|
||||
shift count so that we produce a zero exponent instead; FWIW, this way
|
||||
the shift count can become zero (if we started out with exponent 1).
|
||||
On the plus side, we don't need to check for denorm input, the result
|
||||
of subtracing these looks just the same as denormals generated during
|
||||
subtraction. */
|
||||
bmsk r7,r1,30
|
||||
breq r4,r7,.Lret0
|
||||
sub.f r5,r4,r7
|
||||
lsr r12,r4,23
|
||||
neg.cs r5,r5
|
||||
norm r3,r5
|
||||
bmsk r2,r0,22
|
||||
sub_s r3,r3,6
|
||||
min r12,r12,r3
|
||||
bic r1,r0,r2
|
||||
sub_s r3,r12,1
|
||||
asl_s r12,r12,23
|
||||
asl r2,r5,r3
|
||||
sub_s r1,r1,r12
|
||||
add_s r0,r1,r2
|
||||
j_s.d [blink]
|
||||
bxor.cs r0,r0,31
|
||||
.balign 4
|
||||
.Linf_nan:
|
||||
; If both inputs are inf, but with different signs, the result is NaN.
|
||||
asr r12,r10,31
|
||||
or_s r1,r1,r12
|
||||
j_s.d [blink]
|
||||
or.eq r0,r0,r1
|
||||
.balign 4
|
||||
.Ladd_same_exp:
|
||||
/* This is a special case because we can't test for need to shift
|
||||
down by checking if bit 23 of DBL0 changes. OTOH, here we know
|
||||
that we always need to shift down. */
|
||||
; adding the two floating point numbers together makes the sign
|
||||
; cancel out and apear as carry; the exponent is doubled, and the
|
||||
; fraction also in need of shifting left by one. The two implicit
|
||||
; ones of the sources make an implicit 1 of the result, again
|
||||
; non-existent in a place shifted by one.
|
||||
add.f r0,r0,r1
|
||||
btst_s r0,1
|
||||
breq r6,0,.Ldenorm_add
|
||||
add.ne r0,r0,1 ; round to even.
|
||||
rrc r0,r0
|
||||
bmsk r1,r9,23
|
||||
add r0,r0,r1 ; increment exponent
|
||||
bic.f 0,r9,r0; check for overflow -> infinity.
|
||||
jne_l [blink]
|
||||
mov_s r0,r9
|
||||
j_s.d [blink]
|
||||
bset.cs r0,r0,31
|
||||
|
||||
.Ldenorm_add:
|
||||
j_s.d [blink]
|
||||
add r0,r4,r1
|
||||
|
||||
.Lret_dbl0:
|
||||
j_s [blink]
|
||||
|
||||
.balign 4
|
||||
.Lsmall_shift:
|
||||
brhi r12,25,.Lret_dbl0
|
||||
breq.d r6,0,.Ldenorm_small_shift
|
||||
bmsk_s r1,r1,22
|
||||
bset_s r1,r1,23
|
||||
.Lfixed_denorm_small_shift:
|
||||
neg r8,r12
|
||||
asl r5,r1,r8
|
||||
brge.d r10,0,.Ladd
|
||||
lsr_l r1,r1,r12
|
||||
/* subtract, abs(DBL0) > abs(DBL1) */
|
||||
/* DBL0: original values
|
||||
DBL1: fraction with explicit leading 1, shifted into place
|
||||
r4: orig. DBL0 & 0x7fffffff
|
||||
r6: orig. DBL1 & 0x7f800000
|
||||
r9: 0x7f800000
|
||||
r10: orig. DBL0H ^ DBL1H
|
||||
r5 : guard bits */
|
||||
.balign 4
|
||||
.Lsub:
|
||||
neg.f r12,r5
|
||||
bmsk r3,r0,22
|
||||
bset r5,r3,23
|
||||
sbc.f r4,r5,r1
|
||||
beq.d .Large_cancel_sub
|
||||
bic r7,r0,r3
|
||||
norm r3,r4
|
||||
bmsk r6,r7,30
|
||||
.Lsub_done:
|
||||
sub_s r3,r3,6
|
||||
breq r3,1,.Lsub_done_noshift
|
||||
asl r5,r3,23
|
||||
sub_l r3,r3,1
|
||||
brlo r6,r5,.Ldenorm_sub
|
||||
sub r0,r7,r5
|
||||
neg_s r1,r3
|
||||
lsr.f r2,r12,r1
|
||||
asl_s r12,r12,r3
|
||||
btst_s r2,0
|
||||
bmsk.eq.f r12,r12,30
|
||||
asl r5,r4,r3
|
||||
add_s r0,r0,r2
|
||||
adc.ne r0,r0,0
|
||||
j_s.d [blink]
|
||||
add_l r0,r0,r5
|
||||
|
||||
.Lret0:
|
||||
j_s.d [blink]
|
||||
mov_l r0,0
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_small_shift:
|
||||
brne.d r12,1,.Lfixed_denorm_small_shift
|
||||
sub_s r12,r12,1
|
||||
brlt.d r10,0,.Lsub
|
||||
mov_s r5,r12 ; zero r5, and align following code
|
||||
.Ladd: ; Both bit 23 of DBL1 and bit 0 of r5 are clear.
|
||||
bmsk r2,r0,22
|
||||
add_s r2,r2,r1
|
||||
bbit0.d r2,23,.Lno_shiftdown
|
||||
add_s r0,r0,r1
|
||||
bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity
|
||||
bmsk r1,r2,22
|
||||
lsr.ne.f r2,r2,2; cc: even ; hi: might round down
|
||||
lsr.ne r1,r1,1
|
||||
rcmp.hi r5,1; hi : round down
|
||||
bclr.hi r0,r0,0
|
||||
j_l.d [blink]
|
||||
sub_s r0,r0,r1
|
||||
|
||||
/* r4: DBL0H & 0x7fffffff
|
||||
r6: DBL1H & 0x7f800000
|
||||
r9: 0x7f800000
|
||||
r10: sign difference
|
||||
r12: shift count (negative) */
|
||||
.balign 4
|
||||
.Ldbl1_gt:
|
||||
brhs r6,r9,.Lret_dbl1 ; inf or NaN
|
||||
neg r8,r12
|
||||
brhi r8,25,.Lret_dbl1
|
||||
.Lsmall_shift_dbl0:
|
||||
breq.d r6,0,.Ldenorm_small_shift_dbl0
|
||||
bmsk_s r0,r0,22
|
||||
bset_s r0,r0,23
|
||||
.Lfixed_denorm_small_shift_dbl0:
|
||||
asl r5,r0,r12
|
||||
brge.d r10,0,.Ladd_dbl1_gt
|
||||
lsr r0,r0,r8
|
||||
/* subtract, abs(DBL0) < abs(DBL1) */
|
||||
/* DBL0: fraction with explicit leading 1, shifted into place
|
||||
DBL1: original value
|
||||
r6: orig. DBL1 & 0x7f800000
|
||||
r9: 0x7f800000
|
||||
r5: guard bits */
|
||||
.balign 4
|
||||
.Lrsub:
|
||||
neg.f r12,r5
|
||||
bmsk r5,r1,22
|
||||
bic r7,r1,r5
|
||||
bset r5,r5,23
|
||||
sbc.f r4,r5,r0
|
||||
bne.d .Lsub_done ; note: r6 is already set up.
|
||||
norm r3,r4
|
||||
/* Fall through */
|
||||
|
||||
/* r4:r12 : unnormalized result fraction
|
||||
r7: result sign and exponent */
|
||||
/* When seeing large cancellation, only the topmost guard bit might be set. */
|
||||
.balign 4
|
||||
.Large_cancel_sub:
|
||||
breq_s r12,0,.Lret0
|
||||
sub r0,r7,24<<23
|
||||
xor.f 0,r0,r7 ; test if exponent is negative
|
||||
tst.pl r9,r0 ; test if exponent is zero
|
||||
jpnz [blink] ; return if non-denormal result
|
||||
bmsk r6,r7,30
|
||||
lsr r3,r6,23
|
||||
xor r0,r6,r7
|
||||
sub_s r3,r3,24-22
|
||||
j_s.d [blink]
|
||||
bset r0,r0,r3
|
||||
|
||||
; If a denorm is produced, we have an exact result -
|
||||
; no need for rounding.
|
||||
.balign 4
|
||||
.Ldenorm_sub:
|
||||
sub r3,r6,1
|
||||
lsr.f r3,r3,23
|
||||
xor r0,r6,r7
|
||||
neg_s r1,r3
|
||||
asl.ne r4,r4,r3
|
||||
lsr_s r12,r12,r1
|
||||
add_s r0,r0,r4
|
||||
j_s.d [blink]
|
||||
add.ne r0,r0,r12
|
||||
|
||||
.balign 4
|
||||
.Lsub_done_noshift:
|
||||
add.f 0,r12,r12
|
||||
btst.eq r4,0
|
||||
bclr r4,r4,23
|
||||
add r0,r7,r4
|
||||
j_s.d [blink]
|
||||
adc.ne r0,r0,0
|
||||
|
||||
.balign 4
|
||||
.Lno_shiftdown:
|
||||
add.f 0,r5,r5
|
||||
btst.eq r0,0
|
||||
cmp.eq r5,r5
|
||||
j_s.d [blink]
|
||||
add.cs r0,r0,1
|
||||
|
||||
.Lret_dbl1:
|
||||
j_s.d [blink]
|
||||
mov_l r0,r1
|
||||
.balign 4
|
||||
.Ldenorm_small_shift_dbl0:
|
||||
sub.f r8,r8,1
|
||||
bne.d .Lfixed_denorm_small_shift_dbl0
|
||||
add_s r12,r12,1
|
||||
brlt.d r10,0,.Lrsub
|
||||
mov r5,0
|
||||
.Ladd_dbl1_gt: ; both bit 23 of DBL0 and bit 0 of r5 are clear.
|
||||
bmsk r2,r1,22
|
||||
add_s r2,r2,r0
|
||||
bbit0.d r2,23,.Lno_shiftdown_dbl1_gt
|
||||
add_s r0,r1,r0
|
||||
bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity
|
||||
bmsk r1,r2,22
|
||||
lsr.ne.f r2,r2,2; cc: even ; hi: might round down
|
||||
lsr.ne r1,r1,1
|
||||
rcmp.hi r5,1; hi : round down
|
||||
bclr.hi r0,r0,0
|
||||
j_l.d [blink]
|
||||
sub_s r0,r0,r1
|
||||
|
||||
.balign 4
|
||||
.Lno_shiftdown_dbl1_gt:
|
||||
add.f 0,r5,r5
|
||||
btst.eq r0,0
|
||||
cmp.eq r5,r5
|
||||
j_s.d [blink]
|
||||
add.cs r0,r0,1
|
||||
ENDFUNC(__addsf3)
|
||||
ENDFUNC(__subsf3)
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
#define DBL0L r0
|
||||
#define DBL0H r1
|
||||
#define DBL1L r2
|
||||
#define DBL1H r3
|
||||
#else
|
||||
#define DBL0L r1
|
||||
#define DBL0H r0
|
||||
#define DBL1L r3
|
||||
#define DBL1H r2
|
||||
#endif
|
||||
#define add_l add
|
||||
#define asr_l asr
|
||||
#define j_l j
|
||||
#define jne_l jne
|
||||
#define jeq_l jeq
|
||||
#define or_l or
|
||||
#define mov_l mov
|
||||
#define b_l b
|
||||
#define beq_l beq
|
||||
#define bne_l bne
|
||||
#define brne_l brne
|
||||
#define bset_l bset
|
||||
#define sub_l sub
|
||||
#define sub1_l sub1
|
||||
#define lsr_l lsr
|
||||
#define xor_l xor
|
||||
#define bic_l bic
|
||||
#define bmsk_l bmsk
|
||||
#define bxor_l bxor
|
||||
#define bcs_s blo_s
|
||||
|
|
@ -0,0 +1,421 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/*
|
||||
to calculate a := b/x as b*y, with y := 1/x:
|
||||
- x is in the range [1..2)
|
||||
- calculate 15..18 bit inverse y0 using a table of approximating polynoms.
|
||||
Precision is higher for polynoms used to evaluate input with larger
|
||||
value.
|
||||
- Do one newton-raphson iteration step to double the precision,
|
||||
then multiply this with the divisor
|
||||
-> more time to decide if dividend is subnormal
|
||||
- the worst error propagation is on the side of the value range
|
||||
with the least initial defect, thus giving us about 30 bits precision.
|
||||
The truncation error for the either is less than 1 + x/2 ulp.
|
||||
A 31 bit inverse can be simply calculated by using x with implicit 1
|
||||
and chaining the multiplies. For a 32 bit inverse, we multiply y0^2
|
||||
with the bare fraction part of x, then add in y0^2 for the implicit
|
||||
1 of x.
|
||||
- If calculating a 31 bit inverse, the systematic error is less than
|
||||
-1 ulp; likewise, for 32 bit, it is less than -2 ulp.
|
||||
- If we calculate our seed with a 32 bit fraction, we can archive a
|
||||
tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we
|
||||
only need to take the step to calculate the 2nd stage rest and
|
||||
rounding adjust 1/32th of the time. However, if we use a 20 bit
|
||||
fraction for the seed, the negative error can exceed -2 ulp/128, (2)
|
||||
thus for a simple add / tst check, we need to do the 2nd stage
|
||||
rest calculation/ rounding adjust 1/16th of the time.
|
||||
(1): The inexactness of the 32 bit inverse contributes an error in the
|
||||
range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the
|
||||
rest contributes an error < +1/x ulp/128 . In the interval [1,2),
|
||||
x/2 + 1/x <= 1.5 .
|
||||
(2): Unless proven otherwise. I have not actually looked for an
|
||||
example where -2 ulp/128 is exceeded, and my calculations indicate
|
||||
that the excess, if existent, is less than -1/512 ulp.
|
||||
??? The algorithm is still based on the ARC700 optimized code.
|
||||
Maybe we could make better use of 32x16 bit multiply, or 64 bit multiply
|
||||
results.
|
||||
*/
|
||||
#include "../arc-ieee-754.h"
|
||||
#define mlo acc2
|
||||
#define mhi acc1
|
||||
#define mul64(b,c) mullw 0,b,c` machlw 0,b,c
|
||||
#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c
|
||||
|
||||
/* N.B. fp-bit.c does double rounding on denormal numbers. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __divdf3
|
||||
FUNC(__divdf3)
|
||||
.balign 4
|
||||
__divdf3:
|
||||
push_s blink
|
||||
push_s r2
|
||||
push_s r3
|
||||
push_s r0
|
||||
bl.d __divdf3_c
|
||||
push_s r1
|
||||
ld_s r2,[sp,12]
|
||||
ld_s r3,[sp,8]
|
||||
st_s r0,[sp,12]
|
||||
st_s r1,[sp,8]
|
||||
pop_s r1
|
||||
bl.d __divdf3_asm
|
||||
pop_s r0
|
||||
pop_s r3
|
||||
pop_s r2
|
||||
pop_s blink
|
||||
cmp r0,r2
|
||||
cmp.eq r1,r3
|
||||
jeq_s [blink]
|
||||
and r12,DBL0H,DBL1H
|
||||
bic.f 0,0x7ff80000,r12 ; both NaN -> OK
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__divdf3)
|
||||
#define __divdf3 __divdf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
FUNC(__divdf3)
|
||||
.balign 4
|
||||
.L7ff00000:
|
||||
.long 0x7ff00000
|
||||
.Ldivtab:
|
||||
.long 0xfc0fffe1
|
||||
.long 0xf46ffdfb
|
||||
.long 0xed1ffa54
|
||||
.long 0xe61ff515
|
||||
.long 0xdf7fee75
|
||||
.long 0xd91fe680
|
||||
.long 0xd2ffdd52
|
||||
.long 0xcd1fd30c
|
||||
.long 0xc77fc7cd
|
||||
.long 0xc21fbbb6
|
||||
.long 0xbcefaec0
|
||||
.long 0xb7efa100
|
||||
.long 0xb32f92bf
|
||||
.long 0xae8f83b7
|
||||
.long 0xaa2f7467
|
||||
.long 0xa5ef6479
|
||||
.long 0xa1cf53fa
|
||||
.long 0x9ddf433e
|
||||
.long 0x9a0f3216
|
||||
.long 0x965f2091
|
||||
.long 0x92df0f11
|
||||
.long 0x8f6efd05
|
||||
.long 0x8c1eeacc
|
||||
.long 0x88eed876
|
||||
.long 0x85dec615
|
||||
.long 0x82eeb3b9
|
||||
.long 0x800ea10b
|
||||
.long 0x7d3e8e0f
|
||||
.long 0x7a8e7b3f
|
||||
.long 0x77ee6836
|
||||
.long 0x756e5576
|
||||
.long 0x72fe4293
|
||||
.long 0x709e2f93
|
||||
.long 0x6e4e1c7f
|
||||
.long 0x6c0e095e
|
||||
.long 0x69edf6c5
|
||||
.long 0x67cde3a5
|
||||
.long 0x65cdd125
|
||||
.long 0x63cdbe25
|
||||
.long 0x61ddab3f
|
||||
.long 0x600d991f
|
||||
.long 0x5e3d868c
|
||||
.long 0x5c6d7384
|
||||
.long 0x5abd615f
|
||||
.long 0x590d4ecd
|
||||
.long 0x576d3c83
|
||||
.long 0x55dd2a89
|
||||
.long 0x545d18e9
|
||||
.long 0x52dd06e9
|
||||
.long 0x516cf54e
|
||||
.long 0x4ffce356
|
||||
.long 0x4e9cd1ce
|
||||
.long 0x4d3cbfec
|
||||
.long 0x4becae86
|
||||
.long 0x4aac9da4
|
||||
.long 0x496c8c73
|
||||
.long 0x483c7bd3
|
||||
.long 0x470c6ae8
|
||||
.long 0x45dc59af
|
||||
.long 0x44bc4915
|
||||
.long 0x43ac3924
|
||||
.long 0x428c27fb
|
||||
.long 0x418c187a
|
||||
.long 0x407c07bd
|
||||
|
||||
__divdf3_support: /* This label makes debugger output saner. */
|
||||
.balign 4
|
||||
.Ldenorm_dbl1:
|
||||
brge r6, \
|
||||
0x43500000,.Linf_NaN ; large number / denorm -> Inf
|
||||
bmsk.f r12,DBL1H,19
|
||||
mov.eq r12,DBL1L
|
||||
mov.eq DBL1L,0
|
||||
sub.eq r7,r7,32
|
||||
norm.f r11,r12 ; flag for x/0 -> Inf check
|
||||
beq_s .Linf_NaN
|
||||
mov.mi r11,0
|
||||
add.pl r11,r11,1
|
||||
add_s r12,r12,r12
|
||||
asl r8,r12,r11
|
||||
rsub r12,r11,31
|
||||
lsr r12,DBL1L,r12
|
||||
tst_s DBL1H,DBL1H
|
||||
or r8,r8,r12
|
||||
lsr r4,r8,26
|
||||
lsr DBL1H,r8,12
|
||||
ld.as r4,[r10,r4]
|
||||
bxor.mi DBL1H,DBL1H,31
|
||||
sub r11,r11,11
|
||||
asl DBL1L,DBL1L,r11
|
||||
sub r11,r11,1
|
||||
mulu64 (r4,r8)
|
||||
sub r7,r7,r11
|
||||
b.d .Lpast_denorm_dbl1
|
||||
asl r7,r7,20
|
||||
|
||||
.Linf_NaN:
|
||||
tst_s DBL0L,DBL0L ; 0/0 -> NaN
|
||||
xor_s DBL1H,DBL1H,DBL0H
|
||||
bclr.eq.f DBL0H,DBL0H,31
|
||||
bmsk DBL0H,DBL1H,30
|
||||
xor_s DBL0H,DBL0H,DBL1H
|
||||
sub.eq DBL0H,DBL0H,1
|
||||
mov_s DBL0L,0
|
||||
j_s.d [blink]
|
||||
or DBL0H,DBL0H,r9
|
||||
.balign 4
|
||||
.Lret0_2:
|
||||
xor_s DBL1H,DBL1H,DBL0H
|
||||
mov_s DBL0L,0
|
||||
bmsk DBL0H,DBL1H,30
|
||||
j_s.d [blink]
|
||||
xor_s DBL0H,DBL0H,DBL1H
|
||||
.balign 4
|
||||
.global __divdf3
|
||||
/* N.B. the spacing between divtab and the sub3 to get its address must
|
||||
be a multiple of 8. */
|
||||
__divdf3:
|
||||
asl r8,DBL1H,12
|
||||
lsr r4,r8,26
|
||||
sub3 r10,pcl,51;(.-.Ldivtab) >> 3
|
||||
ld.as r9,[pcl,-104]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
|
||||
ld.as r4,[r10,r4]
|
||||
lsr r12,DBL1L,20
|
||||
and.f r7,DBL1H,r9
|
||||
or r8,r8,r12
|
||||
mulu64 (r4,r8)
|
||||
beq.d .Ldenorm_dbl1
|
||||
.Lpast_denorm_dbl1:
|
||||
and.f r6,DBL0H,r9
|
||||
breq.d r7,r9,.Linf_nan_dbl1
|
||||
asl r4,r4,12
|
||||
sub r4,r4,mhi
|
||||
mululw 0,r4,r4
|
||||
machulw r5,r4,r4
|
||||
bne.d .Lnormal_dbl0
|
||||
lsr r8,r8,1
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl0:
|
||||
bmsk.f r12,DBL0H,19
|
||||
; wb stall
|
||||
mov.eq r12,DBL0L
|
||||
sub.eq r6,r6,32
|
||||
norm.f r11,r12 ; flag for 0/x -> 0 check
|
||||
brge r7, \
|
||||
0x43500000, .Lret0_2 ; denorm/large number -> 0
|
||||
beq_s .Lret0_2
|
||||
mov.mi r11,0
|
||||
add.pl r11,r11,1
|
||||
asl r12,r12,r11
|
||||
sub r6,r6,r11
|
||||
add.f 0,r6,31
|
||||
lsr r10,DBL0L,r6
|
||||
mov.mi r10,0
|
||||
add r6,r6,11+32
|
||||
neg.f r11,r6
|
||||
asl DBL0L,DBL0L,r11
|
||||
mov.pl DBL0L,0
|
||||
sub r6,r6,32-1
|
||||
b.d .Lpast_denorm_dbl0
|
||||
asl r6,r6,20
|
||||
|
||||
.balign 4
|
||||
.Linf_nan_dbl1: ; 0/Inf -> NaN Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN
|
||||
or.f 0,r6,DBL0L
|
||||
cmp.ne r6,r9
|
||||
not_s DBL0L,DBL1H
|
||||
sub_s.ne DBL0L,DBL0L,DBL0L
|
||||
tst_s DBL0H,DBL0H
|
||||
add_s DBL0H,DBL1H,DBL0L
|
||||
j_s.d [blink]
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
|
||||
.balign 4
|
||||
.Lnormal_dbl0:
|
||||
breq.d r6,r9,.Linf_nan_dbl0
|
||||
asl r12,DBL0H,11
|
||||
lsr r10,DBL0L,21
|
||||
.Lpast_denorm_dbl0:
|
||||
bset r8,r8,31
|
||||
mulu64 (r5,r8)
|
||||
add_s r12,r12,r10
|
||||
bset r5,r12,31
|
||||
cmp r5,r8
|
||||
cmp.eq DBL0L,DBL1L
|
||||
lsr.cc r5,r5,1
|
||||
sub r4,r4,mhi ; u1.31 inverse, about 30 bit
|
||||
mululw 0,r5,r4
|
||||
machulw r11,r5,r4 ; result fraction highpart
|
||||
lsr r8,r8,2 ; u3.29
|
||||
add r5,r6, /* wait for immediate */ \
|
||||
0x3fe00000
|
||||
mulu64 (r11,r8) ; u-28.31
|
||||
asl_s DBL1L,DBL1L,9 ; u-29.23:9
|
||||
sbc r6,r5,r7
|
||||
mov r12,mlo ; u-28.31
|
||||
mulu64 (r11,DBL1L) ; mhi: u-28.23:9
|
||||
add.cs DBL0L,DBL0L,DBL0L
|
||||
asl_s DBL0L,DBL0L,6 ; u-26.25:7
|
||||
asl r10,r11,23
|
||||
sub_l DBL0L,DBL0L,r12
|
||||
lsr r7,r11,9
|
||||
sub r5,DBL0L,mhi ; rest msw ; u-26.31:0
|
||||
mul64 (r5,r4) ; mhi: result fraction lowpart
|
||||
xor.f 0,DBL0H,DBL1H
|
||||
and DBL0H,r6,r9
|
||||
add_s DBL0H,DBL0H,r7
|
||||
bclr r12,r9,20 ; 0x7fe00000
|
||||
brhs.d r6,r12,.Linf_denorm
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
add.f r12,mhi,0x11
|
||||
asr r9,r12,5
|
||||
sub.mi DBL0H,DBL0H,1
|
||||
add.f DBL0L,r9,r10
|
||||
tst r12,0x1c
|
||||
jne.d [blink]
|
||||
add.cs DBL0H,DBL0H,1
|
||||
/* work out exact rounding if we fall through here. */
|
||||
/* We know that the exact result cannot be represented in double
|
||||
precision. Find the mid-point between the two nearest
|
||||
representable values, multiply with the divisor, and check if
|
||||
the result is larger than the dividend. Since we want to know
|
||||
only the sign bit, it is sufficient to calculate only the
|
||||
highpart of the lower 64 bits. */
|
||||
mulu64 (r11,DBL1L) ; rest before considering r12 in r5 : -mlo
|
||||
sub.f DBL0L,DBL0L,1
|
||||
asl r12,r9,2 ; u-22.30:2
|
||||
sub.cs DBL0H,DBL0H,1
|
||||
sub.f r12,r12,2
|
||||
mov r10,mlo ; rest before considering r12 in r5 : -r10
|
||||
mululw 0,r12,DBL1L
|
||||
machulw r7,r12,DBL1L ; mhi: u-51.32
|
||||
asl r5,r5,25 ; s-51.7:25
|
||||
lsr r10,r10,7 ; u-51.30:2
|
||||
mulu64 (r12,r8) ; mlo: u-51.31:1
|
||||
sub r5,r5,r10
|
||||
add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L
|
||||
bset r7,r7,0 ; make sure that the result is not zero, and that
|
||||
sub r5,r5,r7 ; a highpart zero appears negative
|
||||
sub.f r5,r5,mlo ; rest msw
|
||||
add.pl.f DBL0L,DBL0L,1
|
||||
j_s.d [blink]
|
||||
add.eq DBL0H,DBL0H,1
|
||||
|
||||
.Linf_nan_dbl0:
|
||||
tst_s DBL1H,DBL1H
|
||||
j_s.d [blink]
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
.balign 4
|
||||
.Linf_denorm:
|
||||
lsr r12,r6,28
|
||||
brlo.d r12,0xc,.Linf
|
||||
.Ldenorm:
|
||||
asr r6,r6,20
|
||||
neg r9,r6
|
||||
mov_s DBL0H,0
|
||||
brhs.d r9,54,.Lret0
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
add r12,mhi,1
|
||||
and r12,r12,-4
|
||||
rsub r7,r6,5
|
||||
asr r10,r12,28
|
||||
bmsk r4,r12,27
|
||||
min r7,r7,31
|
||||
asr DBL0L,r4,r7
|
||||
add DBL1H,r11,r10
|
||||
abs.f r10,r4
|
||||
sub.mi r10,r10,1
|
||||
add.f r7,r6,32-5
|
||||
asl r4,r4,r7
|
||||
mov.mi r4,r10
|
||||
add.f r10,r6,23
|
||||
rsub r7,r6,9
|
||||
lsr r7,DBL1H,r7
|
||||
asl r10,DBL1H,r10
|
||||
or.pnz DBL0H,DBL0H,r7
|
||||
or.mi r4,r4,r10
|
||||
mov.mi r10,r7
|
||||
add.f DBL0L,r10,DBL0L
|
||||
add.cs.f DBL0H,DBL0H,1 ; carry clear after this point
|
||||
bxor.f 0,r4,31
|
||||
add.pnz.f DBL0L,DBL0L,1
|
||||
add.cs.f DBL0H,DBL0H,1
|
||||
jne_s [blink]
|
||||
/* Calculation so far was not conclusive; calculate further rest. */
|
||||
mulu64 (r11,DBL1L) ; rest before considering r12 in r5 : -mlo
|
||||
asr.f r12,r12,3
|
||||
asl r5,r5,25 ; s-51.7:25
|
||||
mov r11,mlo ; rest before considering r12 in r5 : -r11
|
||||
mulu64 (r12,r8) ; u-51.31:1
|
||||
and r9,DBL0L,1 ; tie-breaker: round to even
|
||||
lsr r11,r11,7 ; u-51.30:2
|
||||
mov DBL1H,mlo ; u-51.31:1
|
||||
mulu64 (r12,DBL1L) ; u-51.62:2
|
||||
sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
|
||||
add_s DBL1H,DBL1H,r11
|
||||
sub DBL1H,DBL1H,r5 ; -rest msw
|
||||
add_s DBL1H,DBL1H,mhi ; -rest msw
|
||||
add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-(
|
||||
tst_s DBL1H,DBL1H
|
||||
cmp.eq mlo,r9
|
||||
add.cs.f DBL0L,DBL0L,1
|
||||
j_s.d [blink]
|
||||
add.cs DBL0H,DBL0H,1
|
||||
|
||||
.Lret0:
|
||||
/* return +- 0 */
|
||||
j_s.d [blink]
|
||||
mov_s DBL0L,0
|
||||
.Linf:
|
||||
mov_s DBL0H,r9
|
||||
mov_s DBL0L,0
|
||||
j_s.d [blink]
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
ENDFUNC(__divdf3)
|
||||
|
|
@ -0,0 +1,274 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/*
|
||||
- calculate 15..18 bit inverse using a table of approximating polynoms.
|
||||
precision is higher for polynoms used to evaluate input with larger
|
||||
value.
|
||||
- do one newton-raphson iteration step to double the precision,
|
||||
then multiply this with the divisor
|
||||
-> more time to decide if dividend is subnormal
|
||||
- the worst error propagation is on the side of the value range
|
||||
with the least initial defect, thus giving us about 30 bits precision.
|
||||
*/
|
||||
#include "../arc-ieee-754.h"
|
||||
#define mlo acc2
|
||||
#define mhi acc1
|
||||
#define mul64(b,c) mullw 0,b,c` machlw 0,b,c
|
||||
#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __divsf3
|
||||
FUNC(__divsf3)
|
||||
.balign 4
|
||||
__divsf3:
|
||||
push_s blink
|
||||
push_s r1
|
||||
bl.d __divsf3_c
|
||||
push_s r0
|
||||
ld_s r1,[sp,4]
|
||||
st_s r0,[sp,4]
|
||||
bl.d __divsf3_asm
|
||||
pop_s r0
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
#if 1
|
||||
bne abort
|
||||
jeq_s [blink]
|
||||
b abort
|
||||
#else
|
||||
bne abort
|
||||
j_s [blink]
|
||||
#endif
|
||||
ENDFUNC(__divsf3)
|
||||
#define __divsf3 __divsf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
FUNC(__divsf3)
|
||||
.balign 4
|
||||
.Ldivtab:
|
||||
.long 0xfc0ffff0
|
||||
.long 0xf46ffefd
|
||||
.long 0xed1ffd2a
|
||||
.long 0xe627fa8e
|
||||
.long 0xdf7ff73b
|
||||
.long 0xd917f33b
|
||||
.long 0xd2f7eea3
|
||||
.long 0xcd1fe986
|
||||
.long 0xc77fe3e7
|
||||
.long 0xc21fdddb
|
||||
.long 0xbcefd760
|
||||
.long 0xb7f7d08c
|
||||
.long 0xb32fc960
|
||||
.long 0xae97c1ea
|
||||
.long 0xaa27ba26
|
||||
.long 0xa5e7b22e
|
||||
.long 0xa1cfa9fe
|
||||
.long 0x9ddfa1a0
|
||||
.long 0x9a0f990c
|
||||
.long 0x9667905d
|
||||
.long 0x92df878a
|
||||
.long 0x8f6f7e84
|
||||
.long 0x8c27757e
|
||||
.long 0x88f76c54
|
||||
.long 0x85df630c
|
||||
.long 0x82e759c5
|
||||
.long 0x8007506d
|
||||
.long 0x7d3f470a
|
||||
.long 0x7a8f3da2
|
||||
.long 0x77ef341e
|
||||
.long 0x756f2abe
|
||||
.long 0x72f7212d
|
||||
.long 0x709717ad
|
||||
.long 0x6e4f0e44
|
||||
.long 0x6c1704d6
|
||||
.long 0x69e6fb44
|
||||
.long 0x67cef1d7
|
||||
.long 0x65c6e872
|
||||
.long 0x63cedf18
|
||||
.long 0x61e6d5cd
|
||||
.long 0x6006cc6d
|
||||
.long 0x5e36c323
|
||||
.long 0x5c76b9f3
|
||||
.long 0x5abeb0b7
|
||||
.long 0x5916a79b
|
||||
.long 0x57769e77
|
||||
.long 0x55de954d
|
||||
.long 0x54568c4e
|
||||
.long 0x52d6834d
|
||||
.long 0x51667a7f
|
||||
.long 0x4ffe71b5
|
||||
.long 0x4e9e68f1
|
||||
.long 0x4d466035
|
||||
.long 0x4bf65784
|
||||
.long 0x4aae4ede
|
||||
.long 0x496e4646
|
||||
.long 0x48363dbd
|
||||
.long 0x47063547
|
||||
.long 0x45de2ce5
|
||||
.long 0x44be2498
|
||||
.long 0x43a61c64
|
||||
.long 0x4296144a
|
||||
.long 0x41860c0e
|
||||
.long 0x407e03ee
|
||||
.L7f800000:
|
||||
.long 0x7f800000
|
||||
.balign 4
|
||||
.global __divsf3_support
|
||||
__divsf3_support:
|
||||
.Linf_NaN:
|
||||
bclr.f 0,r0,31 ; 0/0 -> NaN
|
||||
xor_s r0,r0,r1
|
||||
bmsk r1,r0,30
|
||||
bic_s r0,r0,r1
|
||||
sub.eq r0,r0,1
|
||||
j_s.d [blink]
|
||||
or r0,r0,r9
|
||||
.Lret0:
|
||||
xor_s r0,r0,r1
|
||||
bmsk r1,r0,30
|
||||
j_s.d [blink]
|
||||
bic_s r0,r0,r1
|
||||
/* N.B. the spacing between divtab and the sub3 to get its address must
|
||||
be a multiple of 8. */
|
||||
__divsf3:
|
||||
ld.as r9,[pcl,-9]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
|
||||
sub3 r3,pcl,37;(.-.Ldivtab) >> 3
|
||||
lsr r2,r1,17
|
||||
and.f r11,r1,r9
|
||||
bmsk r5,r2,5
|
||||
beq.d .Ldenorm_fp1
|
||||
asl r6,r1,8
|
||||
and.f r2,r0,r9
|
||||
ld.as r5,[r3,r5]
|
||||
asl r4,r1,9
|
||||
bset r6,r6,31
|
||||
breq.d r11,r9,.Linf_nan_fp1
|
||||
.Lpast_denorm_fp1:
|
||||
mululw 0,r5,r4
|
||||
machulw r8,r5,r4
|
||||
breq.d r2,r9,.Linf_nan_fp0
|
||||
asl r5,r5,13
|
||||
sub r7,r5,r8
|
||||
mululw 0,r7,r6
|
||||
machulw r8,r7,r6
|
||||
beq.d .Ldenorm_fp0
|
||||
asl r12,r0,8
|
||||
mulu64 (r8,r7)
|
||||
bset r3,r12,31
|
||||
.Lpast_denorm_fp0:
|
||||
cmp_s r3,r6
|
||||
lsr.cc r3,r3,1
|
||||
add_s r2,r2, /* wait for immediate */ \
|
||||
0x3f000000
|
||||
sub r7,r7,mhi ; u1.31 inverse, about 30 bit
|
||||
mulu64 (r3,r7)
|
||||
sbc r2,r2,r11
|
||||
xor.f 0,r0,r1
|
||||
and r0,r2,r9
|
||||
bclr r3,r9,23 ; 0x7f000000
|
||||
brhs.d r2,r3,.Linf_denorm
|
||||
bxor.mi r0,r0,31
|
||||
.Lpast_denorm:
|
||||
add r3,mhi,0x22 ; round to nearest or higher
|
||||
tst r3,0x3c ; check if rounding was unsafe
|
||||
lsr r3,r3,6
|
||||
jne.d [blink] ; return if rounding was safe.
|
||||
add_s r0,r0,r3
|
||||
/* work out exact rounding if we fall through here. */
|
||||
/* We know that the exact result cannot be represented in single
|
||||
precision. Find the mid-point between the two nearest
|
||||
representable values, multiply with the divisor, and check if
|
||||
the result is larger than the dividend. */
|
||||
add_s r3,r3,r3
|
||||
sub_s r3,r3,1
|
||||
mulu64 (r3,r6)
|
||||
asr.f 0,r0,1 ; for round-to-even in case this is a denorm
|
||||
rsub r2,r9,25
|
||||
asl_s r12,r12,r2
|
||||
sub.f 0,r12,mlo
|
||||
j_s.d [blink]
|
||||
sub.mi r0,r0,1
|
||||
.Linf_nan_fp1:
|
||||
lsr_s r0,r0,31
|
||||
bmsk.f 0,r1,22
|
||||
asl_s r0,r0,31
|
||||
bne_s 0f ; inf/inf -> nan
|
||||
brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
|
||||
0: j_s.d [blink]
|
||||
mov r0,-1
|
||||
.Lsigned0:
|
||||
.Linf_nan_fp0:
|
||||
tst_s r1,r1
|
||||
j_s.d [blink]
|
||||
bxor.mi r0,r0,31
|
||||
.balign 4
|
||||
.global __divsf3
|
||||
/* For denormal results, it is possible that an exact result needs
|
||||
rounding, and thus the round-to-even rule has to come into play. */
|
||||
.Linf_denorm:
|
||||
brlo r2,0xc0000000,.Linf
|
||||
.Ldenorm:
|
||||
asr_s r2,r2,23
|
||||
bic r0,r0,r9
|
||||
neg r9,r2
|
||||
brlo.d r9,25,.Lpast_denorm
|
||||
lsr r3,mlo,r9
|
||||
/* Fall through: return +- 0 */
|
||||
j_s [blink]
|
||||
.Linf:
|
||||
j_s.d [blink]
|
||||
or r0,r0,r9
|
||||
.balign 4
|
||||
.Ldenorm_fp1:
|
||||
norm.f r12,r6 ; flag for x/0 -> Inf check
|
||||
add r6,r6,r6
|
||||
rsub r5,r12,16
|
||||
ror r5,r1,r5
|
||||
bmsk r5,r5,5
|
||||
bic.ne.f 0, \
|
||||
0x60000000,r0 ; large number / denorm -> Inf
|
||||
ld.as r5,[r3,r5]
|
||||
asl r6,r6,r12
|
||||
beq.d .Linf_NaN
|
||||
and.f r2,r0,r9
|
||||
add r4,r6,r6
|
||||
asl_s r12,r12,23
|
||||
bne.d .Lpast_denorm_fp1
|
||||
add_s r2,r2,r12
|
||||
.Ldenorm_fp0:
|
||||
mulu64 (r8,r7)
|
||||
bclr r12,r12,31
|
||||
norm.f r3,r12 ; flag for 0/x -> 0 check
|
||||
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
|
||||
beq_s .Lret0
|
||||
asl_s r12,r12,r3
|
||||
asl_s r3,r3,23
|
||||
add_s r12,r12,r12
|
||||
add r11,r11,r3
|
||||
b.d .Lpast_denorm_fp0
|
||||
mov_s r3,r12
|
||||
ENDFUNC(__divsf3)
|
||||
|
|
@ -0,0 +1,231 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "../arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __muldf3
|
||||
.balign 4
|
||||
__muldf3:
|
||||
push_s blink
|
||||
push_s r2
|
||||
push_s r3
|
||||
push_s r0
|
||||
bl.d __muldf3_c
|
||||
push_s r1
|
||||
ld_s r2,[sp,12]
|
||||
ld_s r3,[sp,8]
|
||||
st_s r0,[sp,12]
|
||||
st_s r1,[sp,8]
|
||||
pop_s r1
|
||||
bl.d __muldf3_asm
|
||||
pop_s r0
|
||||
pop_s r3
|
||||
pop_s r2
|
||||
pop_s blink
|
||||
cmp r0,r2
|
||||
cmp.eq r1,r3
|
||||
jeq_s [blink]
|
||||
b abort
|
||||
#define __muldf3 __muldf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
__muldf3_support: /* This label makes debugger output saner. */
|
||||
.balign 4
|
||||
FUNC(__muldf3)
|
||||
.Ldenorm_2:
|
||||
breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
|
||||
norm.f r12,DBL1L
|
||||
mov.mi r12,21
|
||||
add.pl r12,r12,22
|
||||
neg r11,r12
|
||||
asl_s r12,r12,20
|
||||
lsr.f DBL1H,DBL1L,r11
|
||||
ror DBL1L,DBL1L,r11
|
||||
sub_s DBL0H,DBL0H,r12
|
||||
mov.eq DBL1H,DBL1L
|
||||
sub_l DBL1L,DBL1L,DBL1H
|
||||
/* Fall through. */
|
||||
.global __muldf3
|
||||
.balign 4
|
||||
__muldf3:
|
||||
mululw 0,DBL0L,DBL1L
|
||||
machulw r4,DBL0L,DBL1L
|
||||
ld.as r9,[pcl,0x67] ; ((.L7ff00000-.+2)/4)]
|
||||
bmsk r6,DBL0H,19
|
||||
bset r6,r6,20
|
||||
mov r8,acc2
|
||||
mululw 0,r4,1
|
||||
and r11,DBL0H,r9
|
||||
breq.d r11,0,.Ldenorm_dbl0
|
||||
and r12,DBL1H,r9
|
||||
breq.d r12,0,.Ldenorm_dbl1
|
||||
maclw 0,r6,DBL1L
|
||||
machulw 0,r6,DBL1L
|
||||
breq.d r11,r9,.Linf_nan
|
||||
bmsk r10,DBL1H,19
|
||||
breq.d r12,r9,.Linf_nan
|
||||
bset r10,r10,20
|
||||
maclw 0,r10,DBL0L
|
||||
machulw r5,r10,DBL0L
|
||||
add_s r12,r12,r11 ; add exponents
|
||||
mov r4,acc2
|
||||
mululw 0,r5,1
|
||||
maclw 0,r6,r10
|
||||
machulw r7,r6,r10 ; fraction product in r7:acc2:r4:r8
|
||||
tst r8,r8
|
||||
bclr r8,r9,30 ; 0x3ff00000
|
||||
bset.ne r4,r4,0 ; put least significant word into sticky bit
|
||||
bclr r6,r9,20 ; 0x7fe00000
|
||||
lsr.f r10,r7,9
|
||||
rsub.eq r8,r8,r9 ; 0x40000000
|
||||
sub r12,r12,r8 ; subtract bias + implicit 1
|
||||
brhs.d r12,r6,.Linf_denorm
|
||||
rsub r10,r10,12
|
||||
.Lshift_frac:
|
||||
neg r8,r10
|
||||
asl r6,r4,r10
|
||||
lsr DBL0L,r4,r8
|
||||
add.f 0,r6,r6
|
||||
btst.eq DBL0L,0
|
||||
cmp.eq r4,r4 ; round to nearest / round to even
|
||||
asl r4,acc2,r10
|
||||
lsr r5,acc2,r8
|
||||
adc.f DBL0L,DBL0L,r4
|
||||
xor.f 0,DBL0H,DBL1H
|
||||
asl r7,r7,r10
|
||||
add_s r12,r12,r5
|
||||
adc DBL0H,r12,r7
|
||||
j_s.d [blink]
|
||||
bset.mi DBL0H,DBL0H,31
|
||||
|
||||
/* N.B. This is optimized for ARC700.
|
||||
ARC600 has very different scheduling / instruction selection criteria. */
|
||||
|
||||
/* If one number is denormal, subtract some from the exponent of the other
|
||||
one (if the other exponent is too small, return 0), and normalize the
|
||||
denormal. Then re-run the computation. */
|
||||
.Lret0_2:
|
||||
lsr_s DBL0H,DBL0H,31
|
||||
asl_s DBL0H,DBL0H,31
|
||||
j_s.d [blink]
|
||||
mov_s DBL0L,0
|
||||
.balign 4
|
||||
.Ldenorm_dbl0:
|
||||
mov_s r12,DBL0L
|
||||
mov_s DBL0L,DBL1L
|
||||
mov_s DBL1L,r12
|
||||
mov_s r12,DBL0H
|
||||
mov_s DBL0H,DBL1H
|
||||
mov_s DBL1H,r12
|
||||
and r11,DBL0H,r9
|
||||
.Ldenorm_dbl1:
|
||||
brhs r11,r9,.Linf_nan
|
||||
brhs 0x3ca00001,r11,.Lret0
|
||||
sub_s DBL0H,DBL0H,DBL1H
|
||||
bmsk.f DBL1H,DBL1H,30
|
||||
add_s DBL0H,DBL0H,DBL1H
|
||||
beq.d .Ldenorm_2
|
||||
norm r12,DBL1H
|
||||
sub_s r12,r12,10
|
||||
asl r5,r12,20
|
||||
asl_s DBL1H,DBL1H,r12
|
||||
sub DBL0H,DBL0H,r5
|
||||
neg r5,r12
|
||||
lsr r6,DBL1L,r5
|
||||
asl_s DBL1L,DBL1L,r12
|
||||
b.d __muldf3
|
||||
add_s DBL1H,DBL1H,r6
|
||||
|
||||
.Lret0: xor_s DBL0H,DBL0H,DBL1H
|
||||
bclr DBL1H,DBL0H,31
|
||||
xor_s DBL0H,DBL0H,DBL1H
|
||||
j_s.d [blink]
|
||||
mov_s DBL0L,0
|
||||
|
||||
.balign 4
|
||||
.Linf_nan:
|
||||
bclr r12,DBL1H,31
|
||||
xor_s DBL1H,DBL1H,DBL0H
|
||||
bclr_s DBL0H,DBL0H,31
|
||||
max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
|
||||
or.f 0,DBL0H,DBL0L
|
||||
mov_s DBL0L,0
|
||||
or.ne.f DBL1L,DBL1L,r12
|
||||
not_s DBL0H,DBL0L ; inf * 0 -> NaN
|
||||
mov.ne DBL0H,r8
|
||||
tst_s DBL1H,DBL1H
|
||||
j_s.d [blink]
|
||||
bset.mi DBL0H,DBL0H,31
|
||||
|
||||
/* We have checked for infinitey / NaN input before, and transformed
|
||||
denormalized inputs into normalized inputs. Thus, the worst case
|
||||
exponent overflows are:
|
||||
1 + 1 - 0x400 == 0xc02 : maximum underflow
|
||||
0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
|
||||
N.B. 0x7e and 0x7f are also values for overflow.
|
||||
|
||||
If (r12 <= -54), we have an underflow to zero. */
|
||||
.balign 4
|
||||
.Linf_denorm:
|
||||
lsr r6,r12,28
|
||||
brlo.d r6,0xc,.Linf
|
||||
asr r6,r12,20
|
||||
add.f r10,r10,r6
|
||||
brgt.d r10,0,.Lshift_frac
|
||||
mov_s r12,0
|
||||
beq.d .Lround_frac
|
||||
add r10,r10,32
|
||||
.Lshift32_frac:
|
||||
tst r4,r4
|
||||
mov r4,acc2
|
||||
bset.ne r4,r4,1
|
||||
mululw 0,r7,1
|
||||
brge.d r10,1,.Lshift_frac
|
||||
mov r7,0
|
||||
breq.d r10,0,.Lround_frac
|
||||
add r10,r10,32
|
||||
brgt r10,21,.Lshift32_frac
|
||||
b_s .Lret0
|
||||
|
||||
.Lround_frac:
|
||||
add.f 0,r4,r4
|
||||
btst.eq acc2,0
|
||||
mov_s DBL0L,acc2
|
||||
mov_s DBL0H,r7
|
||||
adc.eq.f DBL0L,DBL0L,0
|
||||
j_s.d [blink]
|
||||
adc.eq DBL0H,DBL0H,0
|
||||
|
||||
.Linf: mov_s DBL0L,0
|
||||
xor.f DBL1H,DBL1H,DBL0H
|
||||
mov_s DBL0H,r9
|
||||
j_s.d [blink]
|
||||
bset.mi DBL0H,DBL0H,31
|
||||
ENDFUNC(__muldf3)
|
||||
|
||||
.balign 4
|
||||
.L7ff00000:
|
||||
.long 0x7ff00000
|
||||
|
|
@ -0,0 +1,176 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "../arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __mulsf3
|
||||
FUNC(__mulsf3)
|
||||
.balign 4
|
||||
__mulsf3:
|
||||
push_s blink
|
||||
push_s r1
|
||||
bl.d __mulsf3_c
|
||||
push_s r0
|
||||
ld_s r1,[sp,4]
|
||||
st_s r0,[sp,4]
|
||||
bl.d __mulsf3_asm
|
||||
pop_s r0
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
and r12,r0,r1
|
||||
bic.f 0,0x7f800000,r12
|
||||
bne 0f
|
||||
bmsk.f 0,r0,22
|
||||
bmsk.ne.f r1,r1,22
|
||||
jne_s [blink] ; both NaN -> OK
|
||||
0: bl abort
|
||||
ENDFUNC(__mulsf3)
|
||||
#define __mulsf3 __mulsf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
.balign 4
|
||||
.global __mulsf3
|
||||
FUNC(__mulsf3)
|
||||
__mulsf3:
|
||||
ld.as r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)]
|
||||
bmsk r4,r1,22
|
||||
bset r2,r0,23
|
||||
asl_s r2,r2,8
|
||||
bset r3,r4,23
|
||||
and r11,r0,r9
|
||||
breq.d r11,0,.Ldenorm_dbl0
|
||||
and r12,r1,r9
|
||||
breq.d r12,0,.Ldenorm_dbl1
|
||||
xor_s r0,r0,r1
|
||||
mululw 0,r2,r3
|
||||
machulw r6,r2,r3
|
||||
breq.d r11,r9,.Linf_nan_dbl0
|
||||
ld.as r4,[pcl,69]; [pcl,((.L7fffffff-.+2)/4)]
|
||||
breq.d r12,r9,.Linf_nan_dbl1
|
||||
.Lpast_denorm:
|
||||
asl.f 0,r6,8
|
||||
mov r7,acc2
|
||||
add.pl r6,r6,r6
|
||||
bclr.pl r6,r6,23
|
||||
add.pl.f r7,r7,r7
|
||||
add.cs r6,r6,1
|
||||
lsr.f 0,r6,1
|
||||
add_s r12,r12,r11
|
||||
adc.f 0,r7,r4
|
||||
add_s r12,r12, \
|
||||
-0x3f800000
|
||||
adc.f r8,r6,r12
|
||||
tst.pl r8,r9
|
||||
bic r0,r0,r4
|
||||
min r3,r8,r9
|
||||
jpnz.d [blink]
|
||||
add.pnz r0,r0,r3
|
||||
; infinity or denormal number
|
||||
add.ne.f r3,r3,r3
|
||||
asr_s r3,r3,23+1
|
||||
bset r6,r6,23
|
||||
bpnz.d .Linfinity
|
||||
sub_s r3,r3,1
|
||||
neg_s r2,r3
|
||||
brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
|
||||
lsr r2,r6,r2
|
||||
asl r9,r6,r3
|
||||
lsr.f 0,r2,1
|
||||
tst r7,r7
|
||||
add_s r0,r0,r2
|
||||
bset.ne r9,r9,0
|
||||
adc.f 0,r9,r4
|
||||
j_s.d [blink]
|
||||
add.cs r0,r0,1
|
||||
.Linfinity:
|
||||
j_s.d [blink]
|
||||
add_s r0,r0,r9
|
||||
|
||||
.Lret_r0: j_s [blink]
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl0:
|
||||
bclr_s r2,r2,31
|
||||
norm.f r4,r2
|
||||
add_s r2,r2,r2
|
||||
asl r2,r2,r4
|
||||
breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
|
||||
asl r4,r4,23
|
||||
mululw 0,r2,r3
|
||||
machulw r6,r2,r3
|
||||
sub.ne.f r12,r12,r4
|
||||
ld.as r4,[pcl,28]; [pcl,((.L7fffffff-.+2)/4)]
|
||||
bhi.d .Lpast_denorm
|
||||
xor_s r0,r0,r1
|
||||
bmsk r1,r0,30
|
||||
j_s.d [blink]
|
||||
bic_s r0,r0,r1
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl0_inf_nan_dbl1:
|
||||
bmsk.f 0,r0,30
|
||||
mov.eq r1,-1
|
||||
.Linf_nan_dbl1:
|
||||
xor_s r1,r1,r0
|
||||
.Linf_nan_dbl0:
|
||||
bclr_s r1,r1,31
|
||||
j_s.d [blink]
|
||||
xor_s r0,r0,r1
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl1:
|
||||
breq.d r11,r9,.Linf_nan_dbl0_2
|
||||
norm.f r3,r4
|
||||
sub_s r3,r3,7
|
||||
asl r4,r4,r3
|
||||
mululw 0,r2,r4
|
||||
machulw r6,r2,r4
|
||||
sub_s r3,r3,1
|
||||
asl_s r3,r3,23
|
||||
sub.ne.f r11,r11,r3
|
||||
ld.as r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)]
|
||||
bhi.d .Lpast_denorm
|
||||
bmsk r8,r0,30
|
||||
j_s.d [blink]
|
||||
bic r0,r0,r8
|
||||
|
||||
.balign 4
|
||||
.Linf_nan_dbl0_2:
|
||||
bclr_s r1,r1,31
|
||||
xor_s r0,r0,r1
|
||||
sub.eq r1,r1,1 ; inf/nan * 0 -> nan
|
||||
bic.f 0,r9,r1
|
||||
j_s.d [blink]
|
||||
or.eq r0,r0,r1 ; r1 nan -> result nan
|
||||
|
||||
.balign 4
|
||||
.L7f800000:
|
||||
.long 0x7f800000
|
||||
.L7fffffff:
|
||||
.long 0x7fffffff
|
||||
ENDFUNC(__mulsf3)
|
||||
|
|
@ -0,0 +1,410 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/*
|
||||
to calculate a := b/x as b*y, with y := 1/x:
|
||||
- x is in the range [1..2)
|
||||
- calculate 15..18 bit inverse y0 using a table of approximating polynoms.
|
||||
Precision is higher for polynoms used to evaluate input with larger
|
||||
value.
|
||||
- Do one newton-raphson iteration step to double the precision,
|
||||
then multiply this with the divisor
|
||||
-> more time to decide if dividend is subnormal
|
||||
- the worst error propagation is on the side of the value range
|
||||
with the least initial defect, thus giving us about 30 bits precision.
|
||||
The truncation error for the either is less than 1 + x/2 ulp.
|
||||
A 31 bit inverse can be simply calculated by using x with implicit 1
|
||||
and chaining the multiplies. For a 32 bit inverse, we multiply y0^2
|
||||
with the bare fraction part of x, then add in y0^2 for the implicit
|
||||
1 of x.
|
||||
- If calculating a 31 bit inverse, the systematic error is less than
|
||||
-1 ulp; likewise, for 32 bit, it is less than -2 ulp.
|
||||
- If we calculate our seed with a 32 bit fraction, we can archive a
|
||||
tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we
|
||||
only need to take the step to calculate the 2nd stage rest and
|
||||
rounding adjust 1/32th of the time. However, if we use a 20 bit
|
||||
fraction for the seed, the negative error can exceed -2 ulp/128, (2)
|
||||
thus for a simple add / tst check, we need to do the 2nd stage
|
||||
rest calculation/ rounding adjust 1/16th of the time.
|
||||
(1): The inexactness of the 32 bit inverse contributes an error in the
|
||||
range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the
|
||||
rest contributes an error < +1/x ulp/128 . In the interval [1,2),
|
||||
x/2 + 1/x <= 1.5 .
|
||||
(2): Unless proven otherwise. I have not actually looked for an
|
||||
example where -2 ulp/128 is exceeded, and my calculations indicate
|
||||
that the excess, if existent, is less than -1/512 ulp.
|
||||
??? The algorithm is still based on the ARC700 optimized code.
|
||||
Maybe we could make better use of 64 bit multiply results and/or mmed .
|
||||
*/
|
||||
#include "../arc-ieee-754.h"
|
||||
|
||||
/* N.B. fp-bit.c does double rounding on denormal numbers. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __divdf3
|
||||
FUNC(__divdf3)
|
||||
.balign 4
|
||||
__divdf3:
|
||||
push_s blink
|
||||
push_s r2
|
||||
push_s r3
|
||||
push_s r0
|
||||
bl.d __divdf3_c
|
||||
push_s r1
|
||||
ld_s r2,[sp,12]
|
||||
ld_s r3,[sp,8]
|
||||
st_s r0,[sp,12]
|
||||
st_s r1,[sp,8]
|
||||
pop_s r1
|
||||
bl.d __divdf3_asm
|
||||
pop_s r0
|
||||
pop_s r3
|
||||
pop_s r2
|
||||
pop_s blink
|
||||
cmp r0,r2
|
||||
cmp.eq r1,r3
|
||||
jeq_s [blink]
|
||||
and r12,DBL0H,DBL1H
|
||||
bic.f 0,0x7ff80000,r12 ; both NaN -> OK
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__divdf3)
|
||||
#define __divdf3 __divdf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
FUNC(__divdf3)
|
||||
.balign 4
|
||||
.L7ff00000:
|
||||
.long 0x7ff00000
|
||||
.Ldivtab:
|
||||
.long 0xfc0fffe1
|
||||
.long 0xf46ffdfb
|
||||
.long 0xed1ffa54
|
||||
.long 0xe61ff515
|
||||
.long 0xdf7fee75
|
||||
.long 0xd91fe680
|
||||
.long 0xd2ffdd52
|
||||
.long 0xcd1fd30c
|
||||
.long 0xc77fc7cd
|
||||
.long 0xc21fbbb6
|
||||
.long 0xbcefaec0
|
||||
.long 0xb7efa100
|
||||
.long 0xb32f92bf
|
||||
.long 0xae8f83b7
|
||||
.long 0xaa2f7467
|
||||
.long 0xa5ef6479
|
||||
.long 0xa1cf53fa
|
||||
.long 0x9ddf433e
|
||||
.long 0x9a0f3216
|
||||
.long 0x965f2091
|
||||
.long 0x92df0f11
|
||||
.long 0x8f6efd05
|
||||
.long 0x8c1eeacc
|
||||
.long 0x88eed876
|
||||
.long 0x85dec615
|
||||
.long 0x82eeb3b9
|
||||
.long 0x800ea10b
|
||||
.long 0x7d3e8e0f
|
||||
.long 0x7a8e7b3f
|
||||
.long 0x77ee6836
|
||||
.long 0x756e5576
|
||||
.long 0x72fe4293
|
||||
.long 0x709e2f93
|
||||
.long 0x6e4e1c7f
|
||||
.long 0x6c0e095e
|
||||
.long 0x69edf6c5
|
||||
.long 0x67cde3a5
|
||||
.long 0x65cdd125
|
||||
.long 0x63cdbe25
|
||||
.long 0x61ddab3f
|
||||
.long 0x600d991f
|
||||
.long 0x5e3d868c
|
||||
.long 0x5c6d7384
|
||||
.long 0x5abd615f
|
||||
.long 0x590d4ecd
|
||||
.long 0x576d3c83
|
||||
.long 0x55dd2a89
|
||||
.long 0x545d18e9
|
||||
.long 0x52dd06e9
|
||||
.long 0x516cf54e
|
||||
.long 0x4ffce356
|
||||
.long 0x4e9cd1ce
|
||||
.long 0x4d3cbfec
|
||||
.long 0x4becae86
|
||||
.long 0x4aac9da4
|
||||
.long 0x496c8c73
|
||||
.long 0x483c7bd3
|
||||
.long 0x470c6ae8
|
||||
.long 0x45dc59af
|
||||
.long 0x44bc4915
|
||||
.long 0x43ac3924
|
||||
.long 0x428c27fb
|
||||
.long 0x418c187a
|
||||
.long 0x407c07bd
|
||||
|
||||
__divdf3_support: /* This label makes debugger output saner. */
|
||||
.balign 4
|
||||
.Ldenorm_dbl1:
|
||||
brge r6, \
|
||||
0x43500000,.Linf_NaN ; large number / denorm -> Inf
|
||||
bmsk.f r12,DBL1H,19
|
||||
mov.eq r12,DBL1L
|
||||
mov.eq DBL1L,0
|
||||
sub.eq r7,r7,32
|
||||
norm.f r11,r12 ; flag for x/0 -> Inf check
|
||||
beq_s .Linf_NaN
|
||||
mov.mi r11,0
|
||||
add.pl r11,r11,1
|
||||
add_s r12,r12,r12
|
||||
asl r8,r12,r11
|
||||
rsub r12,r11,31
|
||||
lsr r12,DBL1L,r12
|
||||
tst_s DBL1H,DBL1H
|
||||
or r8,r8,r12
|
||||
lsr r4,r8,26
|
||||
lsr DBL1H,r8,12
|
||||
ld.as r4,[r10,r4]
|
||||
bxor.mi DBL1H,DBL1H,31
|
||||
sub r11,r11,11
|
||||
asl DBL1L,DBL1L,r11
|
||||
sub r11,r11,1
|
||||
mulu64 r4,r8
|
||||
sub r7,r7,r11
|
||||
b.d .Lpast_denorm_dbl1
|
||||
asl r7,r7,20
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl0:
|
||||
bmsk.f r12,DBL0H,19
|
||||
; wb stall
|
||||
mov.eq r12,DBL0L
|
||||
sub.eq r6,r6,32
|
||||
norm.f r11,r12 ; flag for 0/x -> 0 check
|
||||
brge r7, \
|
||||
0x43500000, .Lret0_2 ; denorm/large number -> 0
|
||||
beq_s .Lret0_2
|
||||
mov.mi r11,0
|
||||
add.pl r11,r11,1
|
||||
asl r12,r12,r11
|
||||
sub r6,r6,r11
|
||||
add.f 0,r6,31
|
||||
lsr r10,DBL0L,r6
|
||||
mov.mi r10,0
|
||||
add r6,r6,11+32
|
||||
neg.f r11,r6
|
||||
asl DBL0L,DBL0L,r11
|
||||
mov.pl DBL0L,0
|
||||
sub r6,r6,32-1
|
||||
b.d .Lpast_denorm_dbl0
|
||||
asl r6,r6,20
|
||||
|
||||
.Linf_NaN:
|
||||
tst_s DBL0L,DBL0L ; 0/0 -> NaN
|
||||
xor_s DBL1H,DBL1H,DBL0H
|
||||
bclr.eq.f DBL0H,DBL0H,31
|
||||
bmsk DBL0H,DBL1H,30
|
||||
xor_s DBL0H,DBL0H,DBL1H
|
||||
sub.eq DBL0H,DBL0H,1
|
||||
mov_s DBL0L,0
|
||||
j_s.d [blink]
|
||||
or DBL0H,DBL0H,r9
|
||||
.balign 4
|
||||
.Lret0_2:
|
||||
xor_s DBL1H,DBL1H,DBL0H
|
||||
mov_s DBL0L,0
|
||||
bmsk DBL0H,DBL1H,30
|
||||
j_s.d [blink]
|
||||
xor_s DBL0H,DBL0H,DBL1H
|
||||
.balign 4
|
||||
.global __divdf3
|
||||
/* N.B. the spacing between divtab and the sub3 to get its address must
|
||||
be a multiple of 8. */
|
||||
__divdf3:
|
||||
asl r8,DBL1H,12
|
||||
lsr r4,r8,26
|
||||
sub3 r10,pcl,61; (.-.Ldivtab) >> 3
|
||||
ld.as r9,[pcl,-124]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
|
||||
ld.as r4,[r10,r4]
|
||||
lsr r12,DBL1L,20
|
||||
and.f r7,DBL1H,r9
|
||||
or r8,r8,r12
|
||||
mulu64 r4,r8
|
||||
beq.d .Ldenorm_dbl1
|
||||
.Lpast_denorm_dbl1:
|
||||
and.f r6,DBL0H,r9
|
||||
breq.d r7,r9,.Linf_nan_dbl1
|
||||
asl r4,r4,12
|
||||
sub r4,r4,mhi
|
||||
mulu64 r4,r4
|
||||
beq.d .Ldenorm_dbl0
|
||||
lsr r8,r8,1
|
||||
breq.d r6,r9,.Linf_nan_dbl0
|
||||
asl r12,DBL0H,11
|
||||
lsr r10,DBL0L,21
|
||||
.Lpast_denorm_dbl0:
|
||||
bset r8,r8,31
|
||||
mulu64 mhi,r8
|
||||
add_s r12,r12,r10
|
||||
bset r5,r12,31
|
||||
cmp r5,r8
|
||||
cmp.eq DBL0L,DBL1L
|
||||
lsr.cc r5,r5,1
|
||||
sub r4,r4,mhi ; u1.31 inverse, about 30 bit
|
||||
mulu64 r5,r4 ; result fraction highpart
|
||||
lsr r8,r8,2 ; u3.29
|
||||
add r5,r6, /* wait for immediate */ \
|
||||
0x3fe00000
|
||||
mov r11,mhi ; result fraction highpart
|
||||
mulu64 r11,r8 ; u-28.31
|
||||
asl_s DBL1L,DBL1L,9 ; u-29.23:9
|
||||
sbc r6,r5,r7
|
||||
mov r12,mlo ; u-28.31
|
||||
mulu64 r11,DBL1L ; mhi: u-28.23:9
|
||||
add.cs DBL0L,DBL0L,DBL0L
|
||||
asl_s DBL0L,DBL0L,6 ; u-26.25:7
|
||||
asl r10,r11,23
|
||||
sub_l DBL0L,DBL0L,r12
|
||||
lsr r7,r11,9
|
||||
sub r5,DBL0L,mhi ; rest msw ; u-26.31:0
|
||||
mul64 r5,r4 ; mhi: result fraction lowpart
|
||||
xor.f 0,DBL0H,DBL1H
|
||||
and DBL0H,r6,r9
|
||||
add_s DBL0H,DBL0H,r7
|
||||
bclr r12,r9,20 ; 0x7fe00000
|
||||
brhs.d r6,r12,.Linf_denorm
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
add.f r12,mhi,0x11
|
||||
asr r9,r12,5
|
||||
sub.mi DBL0H,DBL0H,1
|
||||
add.f DBL0L,r9,r10
|
||||
tst r12,0x1c
|
||||
jne.d [blink]
|
||||
add.cs DBL0H,DBL0H,1
|
||||
/* work out exact rounding if we fall through here. */
|
||||
/* We know that the exact result cannot be represented in double
|
||||
precision. Find the mid-point between the two nearest
|
||||
representable values, multiply with the divisor, and check if
|
||||
the result is larger than the dividend. Since we want to know
|
||||
only the sign bit, it is sufficient to calculate only the
|
||||
highpart of the lower 64 bits. */
|
||||
mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo
|
||||
sub.f DBL0L,DBL0L,1
|
||||
asl r12,r9,2 ; u-22.30:2
|
||||
sub.cs DBL0H,DBL0H,1
|
||||
sub.f r12,r12,2
|
||||
mov r10,mlo ; rest before considering r12 in r5 : -r10
|
||||
mulu64 r12,DBL1L ; mhi: u-51.32
|
||||
asl r5,r5,25 ; s-51.7:25
|
||||
lsr r10,r10,7 ; u-51.30:2
|
||||
mov r7,mhi ; u-51.32
|
||||
mulu64 r12,r8 ; mlo: u-51.31:1
|
||||
sub r5,r5,r10
|
||||
add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L
|
||||
bset r7,r7,0 ; make sure that the result is not zero, and that
|
||||
sub r5,r5,r7 ; a highpart zero appears negative
|
||||
sub.f r5,r5,mlo ; rest msw
|
||||
add.pl.f DBL0L,DBL0L,1
|
||||
j_s.d [blink]
|
||||
add.eq DBL0H,DBL0H,1
|
||||
|
||||
.Linf_nan_dbl1: ; 0/Inf -> NaN Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN
|
||||
or.f 0,r6,DBL0L
|
||||
cmp.ne r6,r9
|
||||
not_s DBL0L,DBL1H
|
||||
sub_s.ne DBL0L,DBL0L,DBL0L
|
||||
tst_s DBL0H,DBL0H
|
||||
add_s DBL0H,DBL1H,DBL0L
|
||||
j_s.d [blink]
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
.Linf_nan_dbl0:
|
||||
tst_s DBL1H,DBL1H
|
||||
j_s.d [blink]
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
.balign 4
|
||||
.Linf_denorm:
|
||||
lsr r12,r6,28
|
||||
brlo.d r12,0xc,.Linf
|
||||
.Ldenorm:
|
||||
asr r6,r6,20
|
||||
neg r9,r6
|
||||
mov_s DBL0H,0
|
||||
brhs.d r9,54,.Lret0
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
add r12,mhi,1
|
||||
and r12,r12,-4
|
||||
rsub r7,r6,5
|
||||
asr r10,r12,28
|
||||
bmsk r4,r12,27
|
||||
min r7,r7,31
|
||||
asr DBL0L,r4,r7
|
||||
add DBL1H,r11,r10
|
||||
abs.f r10,r4
|
||||
sub.mi r10,r10,1
|
||||
add.f r7,r6,32-5
|
||||
asl r4,r4,r7
|
||||
mov.mi r4,r10
|
||||
add.f r10,r6,23
|
||||
rsub r7,r6,9
|
||||
lsr r7,DBL1H,r7
|
||||
asl r10,DBL1H,r10
|
||||
or.pnz DBL0H,DBL0H,r7
|
||||
or.mi r4,r4,r10
|
||||
mov.mi r10,r7
|
||||
add.f DBL0L,r10,DBL0L
|
||||
add.cs.f DBL0H,DBL0H,1 ; carry clear after this point
|
||||
bxor.f 0,r4,31
|
||||
add.pnz.f DBL0L,DBL0L,1
|
||||
add.cs.f DBL0H,DBL0H,1
|
||||
jne_s [blink]
|
||||
/* Calculation so far was not conclusive; calculate further rest. */
|
||||
mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo
|
||||
asr.f r12,r12,3
|
||||
asl r5,r5,25 ; s-51.7:25
|
||||
mov r11,mlo ; rest before considering r12 in r5 : -r11
|
||||
mulu64 r12,r8 ; u-51.31:1
|
||||
and r9,DBL0L,1 ; tie-breaker: round to even
|
||||
lsr r11,r11,7 ; u-51.30:2
|
||||
mov DBL1H,mlo ; u-51.31:1
|
||||
mulu64 r12,DBL1L ; u-51.62:2
|
||||
sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
|
||||
add_s DBL1H,DBL1H,r11
|
||||
sub DBL1H,DBL1H,r5 ; -rest msw
|
||||
add_s DBL1H,DBL1H,mhi ; -rest msw
|
||||
add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-(
|
||||
tst_s DBL1H,DBL1H
|
||||
cmp.eq mlo,r9
|
||||
add.cs.f DBL0L,DBL0L,1
|
||||
j_s.d [blink]
|
||||
add.cs DBL0H,DBL0H,1
|
||||
|
||||
.Lret0:
|
||||
/* return +- 0 */
|
||||
j_s.d [blink]
|
||||
mov_s DBL0L,0
|
||||
.Linf:
|
||||
mov_s DBL0H,r9
|
||||
mov_s DBL0L,0
|
||||
j_s.d [blink]
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
ENDFUNC(__divdf3)
|
||||
|
|
@ -0,0 +1,274 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/*
|
||||
- calculate 15..18 bit inverse using a table of approximating polynoms.
|
||||
precision is higher for polynoms used to evaluate input with larger
|
||||
value.
|
||||
- do one newton-raphson iteration step to double the precision,
|
||||
then multiply this with the divisor
|
||||
-> more time to decide if dividend is subnormal
|
||||
- the worst error propagation is on the side of the value range
|
||||
with the least initial defect, thus giving us about 30 bits precision.
|
||||
*/
|
||||
#include "../arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __divsf3
|
||||
FUNC(__divsf3)
|
||||
.balign 4
|
||||
__divsf3:
|
||||
push_s blink
|
||||
push_s r1
|
||||
bl.d __divsf3_c
|
||||
push_s r0
|
||||
ld_s r1,[sp,4]
|
||||
st_s r0,[sp,4]
|
||||
bl.d __divsf3_asm
|
||||
pop_s r0
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
#if 1
|
||||
bne abort
|
||||
jeq_s [blink]
|
||||
b abort
|
||||
#else
|
||||
bne abort
|
||||
j_s [blink]
|
||||
#endif
|
||||
ENDFUNC(__divsf3)
|
||||
#define __divsf3 __divsf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
FUNC(__divsf3)
|
||||
.balign 4
|
||||
.Ldivtab:
|
||||
.long 0xfc0ffff0
|
||||
.long 0xf46ffefd
|
||||
.long 0xed1ffd2a
|
||||
.long 0xe627fa8e
|
||||
.long 0xdf7ff73b
|
||||
.long 0xd917f33b
|
||||
.long 0xd2f7eea3
|
||||
.long 0xcd1fe986
|
||||
.long 0xc77fe3e7
|
||||
.long 0xc21fdddb
|
||||
.long 0xbcefd760
|
||||
.long 0xb7f7d08c
|
||||
.long 0xb32fc960
|
||||
.long 0xae97c1ea
|
||||
.long 0xaa27ba26
|
||||
.long 0xa5e7b22e
|
||||
.long 0xa1cfa9fe
|
||||
.long 0x9ddfa1a0
|
||||
.long 0x9a0f990c
|
||||
.long 0x9667905d
|
||||
.long 0x92df878a
|
||||
.long 0x8f6f7e84
|
||||
.long 0x8c27757e
|
||||
.long 0x88f76c54
|
||||
.long 0x85df630c
|
||||
.long 0x82e759c5
|
||||
.long 0x8007506d
|
||||
.long 0x7d3f470a
|
||||
.long 0x7a8f3da2
|
||||
.long 0x77ef341e
|
||||
.long 0x756f2abe
|
||||
.long 0x72f7212d
|
||||
.long 0x709717ad
|
||||
.long 0x6e4f0e44
|
||||
.long 0x6c1704d6
|
||||
.long 0x69e6fb44
|
||||
.long 0x67cef1d7
|
||||
.long 0x65c6e872
|
||||
.long 0x63cedf18
|
||||
.long 0x61e6d5cd
|
||||
.long 0x6006cc6d
|
||||
.long 0x5e36c323
|
||||
.long 0x5c76b9f3
|
||||
.long 0x5abeb0b7
|
||||
.long 0x5916a79b
|
||||
.long 0x57769e77
|
||||
.long 0x55de954d
|
||||
.long 0x54568c4e
|
||||
.long 0x52d6834d
|
||||
.long 0x51667a7f
|
||||
.long 0x4ffe71b5
|
||||
.long 0x4e9e68f1
|
||||
.long 0x4d466035
|
||||
.long 0x4bf65784
|
||||
.long 0x4aae4ede
|
||||
.long 0x496e4646
|
||||
.long 0x48363dbd
|
||||
.long 0x47063547
|
||||
.long 0x45de2ce5
|
||||
.long 0x44be2498
|
||||
.long 0x43a61c64
|
||||
.long 0x4296144a
|
||||
.long 0x41860c0e
|
||||
.long 0x407e03ee
|
||||
.L7f800000:
|
||||
.long 0x7f800000
|
||||
.balign 4
|
||||
.global __divsf3_support
|
||||
__divsf3_support:
|
||||
.Linf_NaN:
|
||||
bclr.f 0,r0,31 ; 0/0 -> NaN
|
||||
xor_s r0,r0,r1
|
||||
bmsk r1,r0,30
|
||||
bic_s r0,r0,r1
|
||||
sub.eq r0,r0,1
|
||||
j_s.d [blink]
|
||||
or r0,r0,r9
|
||||
.Lret0:
|
||||
xor_s r0,r0,r1
|
||||
bmsk r1,r0,30
|
||||
j_s.d [blink]
|
||||
bic_s r0,r0,r1
|
||||
/* N.B. the spacing between divtab and the sub3 to get its address must
|
||||
be a multiple of 8. */
|
||||
__divsf3:
|
||||
lsr r2,r1,17
|
||||
sub3 r3,pcl,37 ; (.-.Ldivtab) >> 3
|
||||
bmsk_s r2,r2,5
|
||||
ld.as r5,[r3,r2]
|
||||
asl r4,r1,9
|
||||
ld.as r9,[pcl,-13]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
|
||||
mulu64 r5,r4
|
||||
and.f r11,r1,r9
|
||||
asl r6,r1,8
|
||||
bset r6,r6,31
|
||||
beq.d .Ldenorm_fp1
|
||||
asl r5,r5,13
|
||||
breq.d r11,r9,.Linf_nan_fp1
|
||||
and.f r2,r0,r9
|
||||
sub r7,r5,mhi
|
||||
mulu64 r7,r6
|
||||
beq.d .Ldenorm_fp0
|
||||
asl r12,r0,8
|
||||
breq.d r2,r9,.Linf_nan_fp0
|
||||
mulu64 mhi,r7
|
||||
.Lpast_denorm_fp1:
|
||||
bset r3,r12,31
|
||||
.Lpast_denorm_fp0:
|
||||
cmp_s r3,r6
|
||||
lsr.cc r3,r3,1
|
||||
add_s r2,r2, /* wait for immediate */ \
|
||||
0x3f000000
|
||||
sub r7,r7,mhi ; u1.31 inverse, about 30 bit
|
||||
mulu64 r3,r7
|
||||
sbc r2,r2,r11
|
||||
xor.f 0,r0,r1
|
||||
and r0,r2,r9
|
||||
bclr r3,r9,23 ; 0x7f000000
|
||||
brhs.d r2,r3,.Linf_denorm
|
||||
bxor.mi r0,r0,31
|
||||
.Lpast_denorm:
|
||||
add r3,mhi,0x22 ; round to nearest or higher
|
||||
tst r3,0x3c ; check if rounding was unsafe
|
||||
lsr r3,r3,6
|
||||
jne.d [blink] ; return if rounding was safe.
|
||||
add_s r0,r0,r3
|
||||
/* work out exact rounding if we fall through here. */
|
||||
/* We know that the exact result cannot be represented in single
|
||||
precision. Find the mid-point between the two nearest
|
||||
representable values, multiply with the divisor, and check if
|
||||
the result is larger than the dividend. */
|
||||
add_s r3,r3,r3
|
||||
sub_s r3,r3,1
|
||||
mulu64 r3,r6
|
||||
asr.f 0,r0,1 ; for round-to-even in case this is a denorm
|
||||
rsub r2,r9,25
|
||||
asl_s r12,r12,r2
|
||||
sub.f 0,r12,mlo
|
||||
j_s.d [blink]
|
||||
sub.mi r0,r0,1
|
||||
.Linf_nan_fp1:
|
||||
lsr_s r0,r0,31
|
||||
bmsk.f 0,r1,22
|
||||
asl_s r0,r0,31
|
||||
bne_s 0f ; inf/inf -> nan
|
||||
brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
|
||||
0: j_s.d [blink]
|
||||
mov r0,-1
|
||||
.Lsigned0:
|
||||
.Linf_nan_fp0:
|
||||
tst_s r1,r1
|
||||
j_s.d [blink]
|
||||
bxor.mi r0,r0,31
|
||||
.balign 4
|
||||
.global __divsf3
|
||||
/* For denormal results, it is possible that an exact result needs
|
||||
rounding, and thus the round-to-even rule has to come into play. */
|
||||
.Linf_denorm:
|
||||
brlo r2,0xc0000000,.Linf
|
||||
.Ldenorm:
|
||||
asr_s r2,r2,23
|
||||
bic r0,r0,r9
|
||||
neg r9,r2
|
||||
brlo.d r9,25,.Lpast_denorm
|
||||
lsr r3,mlo,r9
|
||||
/* Fall through: return +- 0 */
|
||||
j_s [blink]
|
||||
.Linf:
|
||||
j_s.d [blink]
|
||||
or r0,r0,r9
|
||||
.balign 4
|
||||
.Ldenorm_fp1:
|
||||
bclr r6,r6,31
|
||||
norm.f r12,r6 ; flag for x/0 -> Inf check
|
||||
add r6,r6,r6
|
||||
rsub r5,r12,16
|
||||
ror r5,r1,r5
|
||||
asl r6,r6,r12
|
||||
bmsk r5,r5,5
|
||||
ld.as r5,[r3,r5]
|
||||
add r4,r6,r6
|
||||
; load latency
|
||||
mulu64 r5,r4
|
||||
bic.ne.f 0, \
|
||||
0x60000000,r0 ; large number / denorm -> Inf
|
||||
asl r5,r5,13
|
||||
sub r7,r5,mhi
|
||||
beq.d .Linf_NaN
|
||||
mulu64 r7,r6
|
||||
asl_s r12,r12,23
|
||||
and.f r2,r0,r9
|
||||
add_s r2,r2,r12
|
||||
asl r12,r0,8
|
||||
bne.d .Lpast_denorm_fp1
|
||||
.Ldenorm_fp0: mulu64 mhi,r7
|
||||
bclr r12,r12,31
|
||||
norm.f r3,r12 ; flag for 0/x -> 0 check
|
||||
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
|
||||
beq_s .Lret0
|
||||
asl_s r12,r12,r3
|
||||
asl_s r3,r3,23
|
||||
add_s r12,r12,r12
|
||||
add r11,r11,r3
|
||||
b.d .Lpast_denorm_fp0
|
||||
mov_s r3,r12
|
||||
ENDFUNC(__divsf3)
|
||||
|
|
@ -0,0 +1,234 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "../arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __muldf3
|
||||
.balign 4
|
||||
__muldf3:
|
||||
push_s blink
|
||||
push_s r2
|
||||
push_s r3
|
||||
push_s r0
|
||||
bl.d __muldf3_c
|
||||
push_s r1
|
||||
ld_s r2,[sp,12]
|
||||
ld_s r3,[sp,8]
|
||||
st_s r0,[sp,12]
|
||||
st_s r1,[sp,8]
|
||||
pop_s r1
|
||||
bl.d __muldf3_asm
|
||||
pop_s r0
|
||||
pop_s r3
|
||||
pop_s r2
|
||||
pop_s blink
|
||||
cmp r0,r2
|
||||
cmp.eq r1,r3
|
||||
jeq_s [blink]
|
||||
and r12,DBL0H,DBL1H
|
||||
bic.f 0,0x7ff80000,r12 ; both NaN -> OK
|
||||
jeq_s [blink]
|
||||
b abort
|
||||
#define __muldf3 __muldf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
__muldf3_support: /* This label makes debugger output saner. */
|
||||
.balign 4
|
||||
FUNC(__muldf3)
|
||||
.Ldenorm_2:
|
||||
breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
|
||||
norm.f r12,DBL1L
|
||||
mov.mi r12,21
|
||||
add.pl r12,r12,22
|
||||
neg r11,r12
|
||||
asl_s r12,r12,20
|
||||
lsr.f DBL1H,DBL1L,r11
|
||||
ror DBL1L,DBL1L,r11
|
||||
sub_s DBL0H,DBL0H,r12
|
||||
mov.eq DBL1H,DBL1L
|
||||
sub_l DBL1L,DBL1L,DBL1H
|
||||
/* Fall through. */
|
||||
.global __muldf3
|
||||
.balign 4
|
||||
__muldf3:
|
||||
mulu64 DBL0L,DBL1L
|
||||
ld.as r9,[pcl,0x68] ; ((.L7ff00000-.+2)/4)]
|
||||
bmsk r6,DBL0H,19
|
||||
bset r6,r6,20
|
||||
and r11,DBL0H,r9
|
||||
breq.d r11,0,.Ldenorm_dbl0
|
||||
and r12,DBL1H,r9
|
||||
breq.d r12,0,.Ldenorm_dbl1
|
||||
mov r8,mlo
|
||||
mov r4,mhi
|
||||
mulu64 r6,DBL1L
|
||||
breq.d r11,r9,.Linf_nan
|
||||
bmsk r10,DBL1H,19
|
||||
breq.d r12,r9,.Linf_nan
|
||||
bset r10,r10,20
|
||||
add.f r4,r4,mlo
|
||||
adc r5,mhi,0
|
||||
mulu64 r10,DBL0L
|
||||
add_s r12,r12,r11 ; add exponents
|
||||
add.f r4,r4,mlo
|
||||
adc r5,r5,mhi
|
||||
mulu64 r6,r10
|
||||
tst r8,r8
|
||||
bclr r8,r9,30 ; 0x3ff00000
|
||||
bset.ne r4,r4,0 ; put least significant word into sticky bit
|
||||
bclr r6,r9,20 ; 0x7fe00000
|
||||
add.f r5,r5,mlo
|
||||
adc r7,mhi,0 ; fraction product in r7:r5:r4
|
||||
lsr.f r10,r7,9
|
||||
rsub.eq r8,r8,r9 ; 0x40000000
|
||||
sub r12,r12,r8 ; subtract bias + implicit 1
|
||||
brhs.d r12,r6,.Linf_denorm
|
||||
rsub r10,r10,12
|
||||
.Lshift_frac:
|
||||
neg r8,r10
|
||||
asl r6,r4,r10
|
||||
lsr DBL0L,r4,r8
|
||||
add.f 0,r6,r6
|
||||
btst.eq DBL0L,0
|
||||
cmp.eq r4,r4 ; round to nearest / round to even
|
||||
asl r4,r5,r10
|
||||
lsr r5,r5,r8
|
||||
adc.f DBL0L,DBL0L,r4
|
||||
xor.f 0,DBL0H,DBL1H
|
||||
asl r7,r7,r10
|
||||
add_s r12,r12,r5
|
||||
adc DBL0H,r12,r7
|
||||
j_s.d [blink]
|
||||
bset.mi DBL0H,DBL0H,31
|
||||
|
||||
/* N.B. This is optimized for ARC700.
|
||||
ARC600 has very different scheduling / instruction selection criteria. */
|
||||
|
||||
/* If one number is denormal, subtract some from the exponent of the other
|
||||
one (if the other exponent is too small, return 0), and normalize the
|
||||
denormal. Then re-run the computation. */
|
||||
.Lret0_2:
|
||||
lsr_s DBL0H,DBL0H,31
|
||||
asl_s DBL0H,DBL0H,31
|
||||
j_s.d [blink]
|
||||
mov_s DBL0L,0
|
||||
.balign 4
|
||||
.Ldenorm_dbl0:
|
||||
mov_s r12,DBL0L
|
||||
mov_s DBL0L,DBL1L
|
||||
mov_s DBL1L,r12
|
||||
mov_s r12,DBL0H
|
||||
mov_s DBL0H,DBL1H
|
||||
mov_s DBL1H,r12
|
||||
and r11,DBL0H,r9
|
||||
.Ldenorm_dbl1:
|
||||
brhs r11,r9,.Linf_nan
|
||||
brhs 0x3ca00001,r11,.Lret0
|
||||
sub_s DBL0H,DBL0H,DBL1H
|
||||
bmsk.f DBL1H,DBL1H,30
|
||||
add_s DBL0H,DBL0H,DBL1H
|
||||
beq.d .Ldenorm_2
|
||||
norm r12,DBL1H
|
||||
sub_s r12,r12,10
|
||||
asl r5,r12,20
|
||||
asl_s DBL1H,DBL1H,r12
|
||||
sub DBL0H,DBL0H,r5
|
||||
neg r5,r12
|
||||
lsr r6,DBL1L,r5
|
||||
asl_s DBL1L,DBL1L,r12
|
||||
b.d __muldf3
|
||||
add_s DBL1H,DBL1H,r6
|
||||
|
||||
.Lret0: xor_s DBL0H,DBL0H,DBL1H
|
||||
bclr DBL1H,DBL0H,31
|
||||
xor_s DBL0H,DBL0H,DBL1H
|
||||
j_s.d [blink]
|
||||
mov_s DBL0L,0
|
||||
|
||||
.balign 4
|
||||
.Linf_nan:
|
||||
bclr r12,DBL1H,31
|
||||
xor_s DBL1H,DBL1H,DBL0H
|
||||
bclr_s DBL0H,DBL0H,31
|
||||
max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
|
||||
or.f 0,DBL0H,DBL0L
|
||||
mov_s DBL0L,0
|
||||
or.ne.f DBL1L,DBL1L,r12
|
||||
not_s DBL0H,DBL0L ; inf * 0 -> NaN
|
||||
mov.ne DBL0H,r8
|
||||
tst_s DBL1H,DBL1H
|
||||
j_s.d [blink]
|
||||
bset.mi DBL0H,DBL0H,31
|
||||
|
||||
/* We have checked for infinitey / NaN input before, and transformed
|
||||
denormalized inputs into normalized inputs. Thus, the worst case
|
||||
exponent overflows are:
|
||||
1 + 1 - 0x400 == 0xc02 : maximum underflow
|
||||
0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
|
||||
N.B. 0x7e and 0x7f are also values for overflow.
|
||||
|
||||
If (r12 <= -54), we have an underflow to zero. */
|
||||
.balign 4
|
||||
.Linf_denorm:
|
||||
lsr r6,r12,28
|
||||
brlo.d r6,0xc,.Linf
|
||||
asr r6,r12,20
|
||||
add.f r10,r10,r6
|
||||
brgt.d r10,0,.Lshift_frac
|
||||
mov_s r12,0
|
||||
beq.d .Lround_frac
|
||||
add r10,r10,32
|
||||
.Lshift32_frac:
|
||||
tst r4,r4
|
||||
mov r4,r5
|
||||
bset.ne r4,r4,1
|
||||
mov r5,r7
|
||||
brge.d r10,1,.Lshift_frac
|
||||
mov r7,0
|
||||
breq.d r10,0,.Lround_frac
|
||||
add r10,r10,32
|
||||
brgt r10,21,.Lshift32_frac
|
||||
b_s .Lret0
|
||||
|
||||
.Lround_frac:
|
||||
add.f 0,r4,r4
|
||||
btst.eq r5,0
|
||||
mov_s DBL0L,r5
|
||||
mov_s DBL0H,r7
|
||||
adc.eq.f DBL0L,DBL0L,0
|
||||
j_s.d [blink]
|
||||
adc.eq DBL0H,DBL0H,0
|
||||
|
||||
.Linf: mov_s DBL0L,0
|
||||
xor.f DBL1H,DBL1H,DBL0H
|
||||
mov_s DBL0H,r9
|
||||
j_s.d [blink]
|
||||
bset.mi DBL0H,DBL0H,31
|
||||
ENDFUNC(__muldf3)
|
||||
|
||||
.balign 4
|
||||
.L7ff00000:
|
||||
.long 0x7ff00000
|
||||
|
|
@ -0,0 +1,180 @@
|
|||
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "../arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __mulsf3
|
||||
FUNC(__mulsf3)
|
||||
.balign 4
|
||||
__mulsf3:
|
||||
push_s blink
|
||||
push_s r1
|
||||
bl.d __mulsf3_c
|
||||
push_s r0
|
||||
ld_s r1,[sp,4]
|
||||
st_s r0,[sp,4]
|
||||
bl.d __mulsf3_asm
|
||||
pop_s r0
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
and r12,r0,r1
|
||||
bic.f 0,0x7f800000,r12
|
||||
bne 0f
|
||||
bmsk.f 0,r0,22
|
||||
bmsk.ne.f r1,r1,22
|
||||
jne_s [blink] ; both NaN -> OK
|
||||
0: bl abort
|
||||
ENDFUNC(__mulsf3)
|
||||
#define __mulsf3 __mulsf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
.balign 4
|
||||
.global __mulsf3
|
||||
FUNC(__mulsf3)
|
||||
__mulsf3:
|
||||
ld.as r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)]
|
||||
bmsk r4,r1,22
|
||||
bset r2,r0,23
|
||||
asl_s r2,r2,8
|
||||
bset r3,r4,23
|
||||
mulu64 r2,r3
|
||||
and r11,r0,r9
|
||||
breq.d r11,0,.Ldenorm_dbl0
|
||||
and r12,r1,r9
|
||||
breq.d r12,0,.Ldenorm_dbl1
|
||||
xor_s r0,r0,r1
|
||||
breq.d r11,r9,.Linf_nan_dbl0
|
||||
ld.as r4,[pcl,70]; [pcl,((.L7fffffff-.+2)/4)]
|
||||
breq.d r12,r9,.Linf_nan_dbl1
|
||||
.Lpast_denorm:
|
||||
asl.f 0,mhi,8
|
||||
mov r6,mhi
|
||||
mov r7,mlo
|
||||
add.pl r6,r6,r6
|
||||
bclr.pl r6,r6,23
|
||||
add.pl.f r7,r7,r7
|
||||
add.cs r6,r6,1
|
||||
lsr.f 0,r6,1
|
||||
add_s r12,r12,r11
|
||||
adc.f 0,r7,r4
|
||||
add_s r12,r12, \
|
||||
-0x3f800000
|
||||
adc.f r8,r6,r12
|
||||
tst.pl r8,r9
|
||||
bic r0,r0,r4
|
||||
min r3,r8,r9
|
||||
jpnz.d [blink]
|
||||
add.pnz r0,r0,r3
|
||||
; infinity or denormal number
|
||||
add.ne.f r3,r3,r3
|
||||
asr_s r3,r3,23+1
|
||||
bset r6,r6,23
|
||||
bpnz.d .Linfinity
|
||||
sub_s r3,r3,1
|
||||
neg_s r2,r3
|
||||
brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
|
||||
lsr r2,r6,r2
|
||||
asl r9,r6,r3
|
||||
lsr.f 0,r2,1
|
||||
tst r7,r7
|
||||
add_s r0,r0,r2
|
||||
bset.ne r9,r9,0
|
||||
adc.f 0,r9,r4
|
||||
j_s.d [blink]
|
||||
add.cs r0,r0,1
|
||||
.Linfinity:
|
||||
j_s.d [blink]
|
||||
add_s r0,r0,r9
|
||||
|
||||
.Lret_r0: j_s [blink]
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl0:
|
||||
bclr_s r2,r2,31
|
||||
norm.f r4,r2
|
||||
add_s r2,r2,r2
|
||||
asl r2,r2,r4
|
||||
mulu64 r2,r3
|
||||
breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
|
||||
asl r4,r4,23
|
||||
sub.ne.f r12,r12,r4
|
||||
ld.as r4,[pcl,29]; [pcl,((.L7fffffff-.+2)/4)]
|
||||
bhi.d .Lpast_denorm
|
||||
xor_s r0,r0,r1
|
||||
bmsk r1,r0,30
|
||||
j_s.d [blink]
|
||||
bic_s r0,r0,r1
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl0_inf_nan_dbl1:
|
||||
bmsk.f 0,r0,30
|
||||
beq_s .Lretnan
|
||||
xor_s r0,r0,r1
|
||||
.Linf_nan_dbl1:
|
||||
xor_s r1,r1,r0
|
||||
.Linf_nan_dbl0:
|
||||
bclr_s r1,r1,31
|
||||
cmp_s r1,r9
|
||||
jls.d [blink]
|
||||
xor_s r0,r0,r1
|
||||
; r1 NaN -> result NaN
|
||||
.Lretnan:
|
||||
j_s.d [blink]
|
||||
mov r0,-1
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl1:
|
||||
breq.d r11,r9,.Linf_nan_dbl0_2
|
||||
norm.f r3,r4
|
||||
sub_s r3,r3,7
|
||||
asl r4,r4,r3
|
||||
mulu64 r2,r4
|
||||
sub_s r3,r3,1
|
||||
asl_s r3,r3,23
|
||||
sub.ne.f r11,r11,r3
|
||||
ld.as r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)]
|
||||
bhi.d .Lpast_denorm
|
||||
bmsk r8,r0,30
|
||||
j_s.d [blink]
|
||||
bic r0,r0,r8
|
||||
|
||||
.balign 4
|
||||
.Linf_nan_dbl0_2:
|
||||
bclr_s r1,r1,31
|
||||
xor_s r0,r0,r1
|
||||
sub.eq r1,r1,1 ; inf/nan * 0 -> nan
|
||||
bic.f 0,r9,r1
|
||||
j_s.d [blink]
|
||||
or.eq r0,r0,r1 ; r1 nan -> result nan
|
||||
|
||||
.balign 4
|
||||
.L7f800000:
|
||||
.long 0x7f800000
|
||||
.L7fffffff:
|
||||
.long 0x7fffffff
|
||||
ENDFUNC(__mulsf3)
|
||||
|
|
@ -0,0 +1,227 @@
|
|||
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "../arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __divsf3
|
||||
FUNC(__divsf3)
|
||||
.balign 4
|
||||
__divsf3:
|
||||
push_s blink
|
||||
push_s r1
|
||||
bl.d __divsf3_c
|
||||
push_s r0
|
||||
ld_s r1,[sp,4]
|
||||
st_s r0,[sp,4]
|
||||
bl.d __divsf3_asm
|
||||
pop_s r0
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
and r12,r0,r1
|
||||
bic.f 0,0x7f800000,r12 ; both NaN -> OK
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__divsf3)
|
||||
#define __divsf3 __divsf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
.balign 4
|
||||
__divdf3_support: /* This label makes debugger output saner. */
|
||||
FUNC(__divsf3)
|
||||
.Ldenorm_fp0:
|
||||
norm.f r12,r2 ; flag for 0/x -> 0 check
|
||||
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
|
||||
beq_s .Lret0_NaN
|
||||
tst r1,r9
|
||||
add_s r2,r2,r2
|
||||
sub_s r12,r12,8
|
||||
asl_s r2,r2,r12
|
||||
asl_l r12,r12,23
|
||||
bne.d .Lpast_denorm_fp0
|
||||
add r5,r5,r12
|
||||
/* r0 is subnormal, r1 is subnormal or 0. */
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_fp1:
|
||||
norm.f r12,r3 ; flag for x/0 -> Inf check
|
||||
bic.ne.f 0,0x60000000,r0 ; large number/denorm -> Inf
|
||||
beq_s .Linf
|
||||
add_s r3,r3,r3
|
||||
sub_s r12,r12,8
|
||||
asl_s r3,r3,r12
|
||||
asl_s r12,r12,23
|
||||
b.d .Lpast_denorm_fp1
|
||||
add r4,r4,r12
|
||||
|
||||
.Lret0_NaN:
|
||||
bclr.f 0,r1,31 ; 0/0 -> NaN
|
||||
bic r0,r10,r9
|
||||
j_s.d [blink]
|
||||
sub.eq r0,r0,1
|
||||
|
||||
.balign 4
|
||||
.Linf_nan_fp0:
|
||||
bic.f 0,r9,r1 ; fp1 Inf -> result NaN
|
||||
bic r1,r5,r9 ; fp1 sign
|
||||
sub.eq r1,r1,1
|
||||
j_s.d [blink]
|
||||
xor_s r0,r0,r1
|
||||
.Linf_nan_fp1:
|
||||
bic r0,r4,r9 ; fp0 sign
|
||||
bmsk.f 0,r1,22 ; x/inf -> 0, x/nan -> nan
|
||||
xor.eq r1,r1,r9
|
||||
j_s.d [blink]
|
||||
xor_s r0,r0,r1
|
||||
|
||||
.global __divsf3
|
||||
.balign 4
|
||||
.long 0x7f800000 ; exponent mask
|
||||
__divsf3:
|
||||
ld r9,[pcl,-4]
|
||||
bmsk r2,r0,22
|
||||
xor r4,r0,r2
|
||||
bmsk r3,r1,22
|
||||
xor r5,r1,r3
|
||||
and r11,r0,r9
|
||||
breq.d r11,0,.Ldenorm_fp0
|
||||
xor r10,r4,r5
|
||||
breq r11,r9,.Linf_nan_fp0
|
||||
bset_s r2,r2,23
|
||||
and r11,r1,r9
|
||||
breq r11,0,.Ldenorm_fp1
|
||||
breq r11,r9,.Linf_nan_fp1
|
||||
.Lpast_denorm_fp0:
|
||||
bset_s r3,r3,23
|
||||
.Lpast_denorm_fp1:
|
||||
cmp r2,r3
|
||||
asl_s r2,r2,6+1
|
||||
asl_s r3,r3,7
|
||||
add.lo r2,r2,r2
|
||||
bclr r8,r9,30 ; exponent bias
|
||||
bclr.lo r8,r8,23 ; reduce exp by one if fraction is shifted
|
||||
sub r4,r4,r5
|
||||
add r4,r4,r8
|
||||
xor.f 0,r10,r4
|
||||
bmi .Linf_denorm
|
||||
and.f r12,r4,r9
|
||||
beq .Ldenorm
|
||||
sub_s r2,r2,r3 ; discard implicit 1
|
||||
rsub r3,r3,1 ; prime r3 for two-insn divide-step use
|
||||
.Ldiv_23bit:
|
||||
.rep 6
|
||||
add1.f r2,r3,r2
|
||||
sub.cc r2,r2,r3
|
||||
.endr
|
||||
breq r12,r9,.Linf
|
||||
bmsk r0,r2,6
|
||||
xor_s r2,r2,r0
|
||||
.Ldiv_17bit:
|
||||
.rep 7
|
||||
add1.f r2,r3,r2
|
||||
sub.cc r2,r2,r3
|
||||
.endr
|
||||
asl_s r0,r0,7
|
||||
bmsk r1,r2,6
|
||||
xor_s r2,r2,r1
|
||||
or_s r0,r0,r1
|
||||
.Ldiv_10bit:
|
||||
.rep 7
|
||||
add1.f r2,r3,r2
|
||||
sub.cc r2,r2,r3
|
||||
.endr
|
||||
asl_s r0,r0,7
|
||||
bmsk r1,r2,6
|
||||
xor_s r2,r2,r1
|
||||
or_s r0,r0,r1
|
||||
.Ldiv_3bit:
|
||||
.rep 3
|
||||
add1.f r2,r3,r2
|
||||
sub.cc r2,r2,r3
|
||||
.endr
|
||||
asl_s r0,r0,3
|
||||
.Ldiv_0bit:
|
||||
add1.f r1,r3,r2
|
||||
sub.cc r1,r1,r3
|
||||
bmsk_s r2,r2,2
|
||||
tst r1,-0x7e ; 0xffffff82, test for rest or odd
|
||||
bmsk_s r1,r1,0
|
||||
add_s r0,r0,r2 ; assemble fraction
|
||||
add_s r0,r0,r4 ; add in sign & exponent
|
||||
j_s.d [blink]
|
||||
add.ne r0,r0,r1 ; round to nearest / even
|
||||
|
||||
.balign 4
|
||||
.Linf:
|
||||
j_s.d [blink]
|
||||
or r0,r10,r9
|
||||
|
||||
.Lret_r4:
|
||||
j_s.d [blink]
|
||||
mov_s r0,r4
|
||||
.balign 4
|
||||
.Linf_denorm:
|
||||
add.f r12,r4,r4
|
||||
asr_l r12,r12,24
|
||||
bpl .Linf
|
||||
max r12,r12,-24
|
||||
.Ldenorm:
|
||||
rsub r3,r3,1
|
||||
add r1,pcl,68; .Ldenorm_tab-.
|
||||
ldw.as r12,[r1,r12]
|
||||
mov_s r0,0
|
||||
lsr_s r2,r2
|
||||
sub_s r1,r1,r12
|
||||
j_s.d [r1]
|
||||
bic r4,r10,r9
|
||||
.short .Ldenorm_tab-.Lret_r4
|
||||
.short .Ldenorm_tab-.Ldiv_0bit
|
||||
.short .Ldenorm_tab-.Ldiv_3bit-2*8
|
||||
.short .Ldenorm_tab-.Ldiv_3bit-1*8
|
||||
.short .Ldenorm_tab-.Ldiv_3bit
|
||||
.short .Ldenorm_tab-.Ldiv_10bit-6*8
|
||||
.short .Ldenorm_tab-.Ldiv_10bit-5*8
|
||||
.short .Ldenorm_tab-.Ldiv_10bit-3*8
|
||||
.short .Ldenorm_tab-.Ldiv_10bit-3*8
|
||||
.short .Ldenorm_tab-.Ldiv_10bit-2*8
|
||||
.short .Ldenorm_tab-.Ldiv_10bit-1*8
|
||||
.short .Ldenorm_tab-.Ldiv_10bit
|
||||
.short .Ldenorm_tab-.Ldiv_17bit-6*8
|
||||
.short .Ldenorm_tab-.Ldiv_17bit-5*8
|
||||
.short .Ldenorm_tab-.Ldiv_17bit-4*8
|
||||
.short .Ldenorm_tab-.Ldiv_17bit-3*8
|
||||
.short .Ldenorm_tab-.Ldiv_17bit-2*8
|
||||
.short .Ldenorm_tab-.Ldiv_17bit-1*8
|
||||
.short .Ldenorm_tab-.Ldiv_17bit
|
||||
.short .Ldenorm_tab-.Ldiv_23bit-5*8
|
||||
.short .Ldenorm_tab-.Ldiv_23bit-4*8
|
||||
.short .Ldenorm_tab-.Ldiv_23bit-3*8
|
||||
.short .Ldenorm_tab-.Ldiv_23bit-2*8
|
||||
.short .Ldenorm_tab-.Ldiv_23bit-1*8
|
||||
.Ldenorm_tab:
|
||||
.short .Ldenorm_tab-.Ldiv_23bit
|
||||
ENDFUNC(__divsf3)
|
||||
|
|
@ -0,0 +1,179 @@
|
|||
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "../arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __mulsf3
|
||||
FUNC(__mulsf3)
|
||||
.balign 4
|
||||
__mulsf3:
|
||||
push_s blink
|
||||
push_s r1
|
||||
bl.d __mulsf3_c
|
||||
push_s r0
|
||||
ld_s r1,[sp,4]
|
||||
st_s r0,[sp,4]
|
||||
bl.d __mulsf3_asm
|
||||
pop_s r0
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
and r12,r0,r1
|
||||
bic.f 0,0x7f800000,r12
|
||||
bne 0f
|
||||
bmsk.f 0,r0,22
|
||||
bmsk.ne.f r1,r1,22
|
||||
jne_s [blink] ; both NaN -> OK
|
||||
0: bl abort
|
||||
ENDFUNC(__mulsf3)
|
||||
#define __mulsf3 __mulsf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
.balign 4
|
||||
.global __mulsf3
|
||||
FUNC(__mulsf3)
|
||||
__mulsf3:
|
||||
ld.as r9,[pcl,76]; [pcl,((.L7f800000-.+2)/4)]
|
||||
bmsk r4,r1,22
|
||||
bset r3,r4,23
|
||||
bmsk r2,r0,22
|
||||
and r11,r0,r9
|
||||
breq.d r11,0,.Ldenorm_dbl0
|
||||
and r12,r1,r9
|
||||
xor_s r0,r0,r1
|
||||
breq.d r11,r9,.Linf_nan_dbl0
|
||||
bset_s r2,r2,23
|
||||
breq r12,0,.Ldenorm_dbl1
|
||||
breq r12,r9,.Linf_nan_dbl1
|
||||
.Lpast_denorm:
|
||||
mov r6,0
|
||||
lsr.f r7,r2
|
||||
; We could so this a bit faster here with a 32 bit shift register and
|
||||
; inserting the r2 factor / retrieving the low result a byte at a time,
|
||||
; but that'd increase code size.
|
||||
mov lp_count,24
|
||||
.balign 4
|
||||
lp 0f
|
||||
add.cs r6,r6,r3
|
||||
lsr.f r6,r6
|
||||
rrc.f r7,r7
|
||||
0:
|
||||
ld.as r4,[pcl,59]; [pcl,((.L7fffffff-.+2)/4)]
|
||||
asl.f 0,r6,8
|
||||
add.pl r6,r6,r6
|
||||
bclr.pl r6,r6,23
|
||||
add.pl.f r7,r7,r7
|
||||
add.cs r6,r6,1
|
||||
lsr.f 0,r6,1
|
||||
add_s r12,r12,r11
|
||||
adc.f 0,r7,r4
|
||||
add_s r12,r12, \
|
||||
-0x3f800000
|
||||
adc.f r8,r6,r12
|
||||
tst.pl r8,r9
|
||||
bic r0,r0,r4
|
||||
min r3,r8,r9
|
||||
jpnz.d [blink]
|
||||
add.pnz r0,r0,r3
|
||||
; infinity or denormal number
|
||||
add.ne.f r3,r3,r3
|
||||
asr_s r3,r3,23+1
|
||||
bset r6,r6,23
|
||||
bpnz.d .Linfinity
|
||||
sub_s r3,r3,1
|
||||
neg_s r2,r3
|
||||
brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
|
||||
lsr r2,r6,r2
|
||||
asl r9,r6,r3
|
||||
lsr.f 0,r2,1
|
||||
tst r7,r7
|
||||
add_s r0,r0,r2
|
||||
bset.ne r9,r9,0
|
||||
adc.f 0,r9,r4
|
||||
j_s.d [blink]
|
||||
add.cs r0,r0,1
|
||||
.Linfinity:
|
||||
j_s.d [blink]
|
||||
add_s r0,r0,r9
|
||||
|
||||
.Lret_r0: j_s [blink]
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl0:
|
||||
asl_s r2,r2,8
|
||||
norm.f r4,r2
|
||||
lsr_s r2,r2,7
|
||||
asl r2,r2,r4
|
||||
breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
|
||||
asl r4,r4,23
|
||||
sub.ne.f r12,r12,r4
|
||||
bhi.d .Lpast_denorm
|
||||
xor_s r0,r0,r1
|
||||
bmsk r1,r0,30
|
||||
j_s.d [blink]
|
||||
bic_s r0,r0,r1
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl0_inf_nan_dbl1:
|
||||
bmsk.f 0,r0,30
|
||||
beq_s .Lretnan
|
||||
xor_s r0,r0,r1
|
||||
.Linf_nan_dbl1:
|
||||
xor_s r1,r1,r0
|
||||
bclr_s r1,r1,31
|
||||
j_s.d [blink]
|
||||
xor_s r0,r0,r1
|
||||
.Linf_nan_dbl0:
|
||||
sub_s r2,r1,1 ; inf/nan * 0 -> nan; inf * nan -> nan (use |r2| >= inf)
|
||||
bic.f 0,r9,r2
|
||||
xor_s r0,r0,r1
|
||||
bclr_s r1,r1,31
|
||||
xor_s r0,r0,r1
|
||||
jne_s [blink]
|
||||
.Lretnan:
|
||||
j_s.d [blink]
|
||||
mov r0,-1
|
||||
.balign 4
|
||||
.Ldenorm_dbl1:
|
||||
norm.f r3,r4
|
||||
sub_s r3,r3,7
|
||||
asl r4,r4,r3
|
||||
sub_s r3,r3,1
|
||||
asl_s r3,r3,23
|
||||
sub.ne.f r11,r11,r3
|
||||
bhi.d .Lpast_denorm
|
||||
mov_s r3,r4
|
||||
bmsk r3,r0,30
|
||||
j_s.d [blink]
|
||||
bic_s r0,r0,r3
|
||||
|
||||
.balign 4
|
||||
.L7f800000:
|
||||
.long 0x7f800000
|
||||
.L7fffffff:
|
||||
.long 0x7fffffff
|
||||
ENDFUNC(__mulsf3)
|
||||
|
|
@ -0,0 +1,416 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/*
|
||||
to calculate a := b/x as b*y, with y := 1/x:
|
||||
- x is in the range [1..2)
|
||||
- calculate 15..18 bit inverse y0 using a table of approximating polynoms.
|
||||
Precision is higher for polynoms used to evaluate input with larger
|
||||
value.
|
||||
- Do one newton-raphson iteration step to double the precision,
|
||||
then multiply this with the divisor
|
||||
-> more time to decide if dividend is subnormal
|
||||
- the worst error propagation is on the side of the value range
|
||||
with the least initial defect, thus giving us about 30 bits precision.
|
||||
The truncation error for the either is less than 1 + x/2 ulp.
|
||||
A 31 bit inverse can be simply calculated by using x with implicit 1
|
||||
and chaining the multiplies. For a 32 bit inverse, we multiply y0^2
|
||||
with the bare fraction part of x, then add in y0^2 for the implicit
|
||||
1 of x.
|
||||
- If calculating a 31 bit inverse, the systematic error is less than
|
||||
-1 ulp; likewise, for 32 bit, it is less than -2 ulp.
|
||||
- If we calculate our seed with a 32 bit fraction, we can archive a
|
||||
tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we
|
||||
only need to take the step to calculate the 2nd stage rest and
|
||||
rounding adjust 1/32th of the time. However, if we use a 20 bit
|
||||
fraction for the seed, the negative error can exceed -2 ulp/128, (2)
|
||||
thus for a simple add / tst check, we need to do the 2nd stage
|
||||
rest calculation/ rounding adjust 1/16th of the time.
|
||||
(1): The inexactness of the 32 bit inverse contributes an error in the
|
||||
range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the
|
||||
rest contributes an error < +1/x ulp/128 . In the interval [1,2),
|
||||
x/2 + 1/x <= 1.5 .
|
||||
(2): Unless proven otherwise. I have not actually looked for an
|
||||
example where -2 ulp/128 is exceeded, and my calculations indicate
|
||||
that the excess, if existent, is less than -1/512 ulp.
|
||||
*/
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
/* N.B. fp-bit.c does double rounding on denormal numbers. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __divdf3
|
||||
FUNC(__divdf3)
|
||||
.balign 4
|
||||
__divdf3:
|
||||
push_s blink
|
||||
push_s r2
|
||||
push_s r3
|
||||
push_s r0
|
||||
bl.d __divdf3_c
|
||||
push_s r1
|
||||
ld_s r2,[sp,12]
|
||||
ld_s r3,[sp,8]
|
||||
st_s r0,[sp,12]
|
||||
st_s r1,[sp,8]
|
||||
pop_s r1
|
||||
bl.d __divdf3_asm
|
||||
pop_s r0
|
||||
pop_s r3
|
||||
pop_s r2
|
||||
pop_s blink
|
||||
cmp r0,r2
|
||||
cmp.eq r1,r3
|
||||
jeq_s [blink]
|
||||
and r12,DBL0H,DBL1H
|
||||
bic.f 0,0x7ff80000,r12 ; both NaN -> OK
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__divdf3)
|
||||
#define __divdf3 __divdf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
FUNC(__divdf3)
|
||||
__divdf3_support: /* This label makes debugger output saner. */
|
||||
.balign 4
|
||||
.Ldenorm_dbl1:
|
||||
brge r6, \
|
||||
0x43500000,.Linf_NaN ; large number / denorm -> Inf
|
||||
bmsk.f r12,DBL1H,19
|
||||
mov.eq r12,DBL1L
|
||||
mov.eq DBL1L,0
|
||||
sub.eq r7,r7,32
|
||||
norm.f r11,r12 ; flag for x/0 -> Inf check
|
||||
beq_s .Linf_NaN
|
||||
mov.mi r11,0
|
||||
add.pl r11,r11,1
|
||||
add_s r12,r12,r12
|
||||
asl r8,r12,r11
|
||||
rsub r12,r11,31
|
||||
lsr r12,DBL1L,r12
|
||||
tst_s DBL1H,DBL1H
|
||||
or r8,r8,r12
|
||||
lsr r4,r8,26
|
||||
lsr DBL1H,r8,12
|
||||
ld.as r4,[r10,r4]
|
||||
bxor.mi DBL1H,DBL1H,31
|
||||
sub r11,r11,11
|
||||
asl DBL1L,DBL1L,r11
|
||||
sub r11,r11,1
|
||||
mpyhu r5,r4,r8
|
||||
sub r7,r7,r11
|
||||
asl r4,r4,12
|
||||
b.d .Lpast_denorm_dbl1
|
||||
asl r7,r7,20
|
||||
; wb stall
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl0:
|
||||
bmsk.f r12,DBL0H,19
|
||||
; wb stall
|
||||
mov.eq r12,DBL0L
|
||||
sub.eq r6,r6,32
|
||||
norm.f r11,r12 ; flag for 0/x -> 0 check
|
||||
brge r7, \
|
||||
0x43500000, .Lret0_NaN ; denorm/large number -> 0
|
||||
beq_s .Lret0_NaN
|
||||
mov.mi r11,0
|
||||
add.pl r11,r11,1
|
||||
asl r12,r12,r11
|
||||
sub r6,r6,r11
|
||||
add.f 0,r6,31
|
||||
lsr r10,DBL0L,r6
|
||||
mov.mi r10,0
|
||||
add r6,r6,11+32
|
||||
neg.f r11,r6
|
||||
asl DBL0L,DBL0L,r11
|
||||
mov.pl DBL0L,0
|
||||
sub r6,r6,32-1
|
||||
b.d .Lpast_denorm_dbl0
|
||||
asl r6,r6,20
|
||||
|
||||
.Linf_NaN:
|
||||
tst_s DBL0L,DBL0L ; 0/0 -> NaN
|
||||
xor_s DBL1H,DBL1H,DBL0H
|
||||
bclr.eq.f DBL0H,DBL0H,31
|
||||
bmsk DBL0H,DBL1H,30
|
||||
xor_s DBL0H,DBL0H,DBL1H
|
||||
sub.eq DBL0H,DBL0H,1
|
||||
mov_s DBL0L,0
|
||||
j_s.d [blink]
|
||||
or DBL0H,DBL0H,r9
|
||||
.balign 4
|
||||
.Lret0_NaN:
|
||||
xor_s DBL1H,DBL1H,DBL0H
|
||||
cmp_s r12,r9
|
||||
mov_s DBL0L,0
|
||||
bmsk DBL0H,DBL1H,30
|
||||
xor_s DBL0H,DBL0H,DBL1H
|
||||
j_s.d [blink]
|
||||
sub.hi DBL0H,DBL0H,1
|
||||
.Linf_nan_dbl1: ; Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN
|
||||
not_s DBL0L,DBL1H
|
||||
cmp r6,r9
|
||||
sub_s.ne DBL0L,DBL0L,DBL0L
|
||||
tst_s DBL0H,DBL0H
|
||||
add_s DBL0H,DBL1H,DBL0L
|
||||
j_s.d [blink]
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
.Linf_nan_dbl0:
|
||||
tst_s DBL1H,DBL1H
|
||||
j_s.d [blink]
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
.balign 4
|
||||
.global __divdf3
|
||||
/* N.B. the spacing between divtab and the add3 to get its address must
|
||||
be a multiple of 8. */
|
||||
__divdf3:
|
||||
asl r8,DBL1H,12
|
||||
lsr r12,DBL1L,20
|
||||
lsr r4,r8,26
|
||||
add3 r10,pcl,59 ; (.Ldivtab-.) >> 3
|
||||
ld.as r4,[r10,r4]
|
||||
ld.as r9,[pcl,180]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
|
||||
or r8,r8,r12
|
||||
mpyhu r5,r4,r8
|
||||
and.f r7,DBL1H,r9
|
||||
asl r4,r4,12 ; having the asl here is a concession to the XMAC pipeline.
|
||||
beq.d .Ldenorm_dbl1
|
||||
and r6,DBL0H,r9
|
||||
.Lpast_denorm_dbl1: ; wb stall
|
||||
sub r4,r4,r5
|
||||
mpyhu r5,r4,r4
|
||||
breq.d r6,0,.Ldenorm_dbl0
|
||||
lsr r8,r8,1
|
||||
asl r12,DBL0H,11
|
||||
lsr r10,DBL0L,21
|
||||
.Lpast_denorm_dbl0: ; wb stall
|
||||
bset r8,r8,31
|
||||
mpyhu r11,r5,r8
|
||||
add_s r12,r12,r10
|
||||
bset r5,r12,31
|
||||
cmp r5,r8
|
||||
cmp.eq DBL0L,DBL1L
|
||||
; wb stall
|
||||
lsr.cc r5,r5,1
|
||||
sub r4,r4,r11 ; u1.31 inverse, about 30 bit
|
||||
mpyhu r11,r5,r4 ; result fraction highpart
|
||||
breq r7,r9,.Linf_nan_dbl1
|
||||
lsr r8,r8,2 ; u3.29
|
||||
add r5,r6, /* wait for immediate / XMAC wb stall */ \
|
||||
0x3fe00000
|
||||
; wb stall (not for XMAC)
|
||||
breq r6,r9,.Linf_nan_dbl0
|
||||
mpyu r12,r11,r8 ; u-28.31
|
||||
asl_s DBL1L,DBL1L,9 ; u-29.23:9
|
||||
sbc r6,r5,r7
|
||||
; resource conflict (not for XMAC)
|
||||
mpyhu r5,r11,DBL1L ; u-28.23:9
|
||||
add.cs DBL0L,DBL0L,DBL0L
|
||||
asl_s DBL0L,DBL0L,6 ; u-26.25:7
|
||||
asl r10,r11,23
|
||||
sub_l DBL0L,DBL0L,r12
|
||||
; wb stall (before 'and' for XMAC)
|
||||
lsr r7,r11,9
|
||||
sub r5,DBL0L,r5 ; rest msw ; u-26.31:0
|
||||
mpyh r12,r5,r4 ; result fraction lowpart
|
||||
xor.f 0,DBL0H,DBL1H
|
||||
and DBL0H,r6,r9
|
||||
add_s DBL0H,DBL0H,r7 ; (XMAC wb stall)
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
brhs r6, /* wb stall / wait for immediate */ \
|
||||
0x7fe00000,.Linf_denorm
|
||||
add.f r12,r12,0x11
|
||||
asr r9,r12,5
|
||||
sub.mi DBL0H,DBL0H,1
|
||||
add.f DBL0L,r9,r10
|
||||
tst r12,0x1c
|
||||
jne.d [blink]
|
||||
add.cs DBL0H,DBL0H,1
|
||||
/* work out exact rounding if we fall through here. */
|
||||
/* We know that the exact result cannot be represented in double
|
||||
precision. Find the mid-point between the two nearest
|
||||
representable values, multiply with the divisor, and check if
|
||||
the result is larger than the dividend. Since we want to know
|
||||
only the sign bit, it is sufficient to calculate only the
|
||||
highpart of the lower 64 bits. */
|
||||
sub.f DBL0L,DBL0L,1
|
||||
asl r12,r9,2 ; u-22.30:2
|
||||
mpyu r10,r11,DBL1L ; rest before considering r12 in r5 : -r10
|
||||
sub.cs DBL0H,DBL0H,1
|
||||
sub.f r12,r12,2
|
||||
; resource conflict (not for XMAC)
|
||||
mpyhu r7,r12,DBL1L ; u-51.32
|
||||
asl r5,r5,25 ; s-51.7:25
|
||||
lsr r10,r10,7 ; u-51.30:2
|
||||
; resource conflict (not for XMAC)
|
||||
; resource conflict (not for XMAC)
|
||||
mpyu r9,r12,r8 ; u-51.31:1
|
||||
sub r5,r5,r10
|
||||
add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L
|
||||
bset r7,r7,0 ; make sure that the result is not zero, and that
|
||||
; wb stall (one earlier for XMAC)
|
||||
sub r5,r5,r7 ; a highpart zero appears negative
|
||||
sub.f r5,r5,r9 ; rest msw
|
||||
add.pl.f DBL0L,DBL0L,1
|
||||
j_s.d [blink]
|
||||
add.eq DBL0H,DBL0H,1
|
||||
|
||||
.balign 4
|
||||
.Linf_denorm:
|
||||
brlo r6,0xc0000000,.Linf
|
||||
.Ldenorm:
|
||||
asr r6,r6,20
|
||||
neg r9,r6
|
||||
mov_s DBL0H,0
|
||||
brhs.d r9,54,.Lret0
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
add_l r12,r12,1
|
||||
and r12,r12,-4
|
||||
rsub r7,r6,5
|
||||
asr r10,r12,28
|
||||
bmsk r4,r12,27
|
||||
asrs DBL0L,r4,r7
|
||||
add DBL1H,r11,r10
|
||||
add.f r7,r6,32-5
|
||||
abss r10,r4
|
||||
asl r4,r4,r7
|
||||
mov.mi r4,r10
|
||||
add.f r10,r6,23
|
||||
rsub r7,r6,9
|
||||
lsr r7,DBL1H,r7
|
||||
asl r10,DBL1H,r10
|
||||
or.pnz DBL0H,DBL0H,r7
|
||||
or.mi r4,r4,r10
|
||||
mov.mi r10,r7
|
||||
add.f DBL0L,r10,DBL0L
|
||||
add.cs.f DBL0H,DBL0H,1 ; carry clear after this point
|
||||
bxor.f 0,r4,31
|
||||
add.pnz.f DBL0L,DBL0L,1
|
||||
add.cs.f DBL0H,DBL0H,1
|
||||
jne_l [blink]
|
||||
/* Calculation so far was not conclusive; calculate further rest. */
|
||||
mpyu r11,r11,DBL1L ; rest before considering r12 in r5 : -r11
|
||||
asr.f r12,r12,3
|
||||
asl r5,r5,25 ; s-51.7:25
|
||||
; resource conflict (not for XMAC)
|
||||
mpyu DBL1H,r12,r8 ; u-51.31:1
|
||||
and r9,DBL0L,1 ; tie-breaker: round to even
|
||||
lsr r11,r11,7 ; u-51.30:2
|
||||
; resource conflict (not for XMAC)
|
||||
mpyhu r8,r12,DBL1L ; u-51.32
|
||||
sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
|
||||
add_s DBL1H,DBL1H,r11
|
||||
; resource conflict (not for XMAC)
|
||||
; resource conflict (not for XMAC)
|
||||
mpyu r12,r12,DBL1L ; u-83.30:2
|
||||
sub DBL1H,DBL1H,r5 ; -rest msw
|
||||
add_s DBL1H,DBL1H,r8 ; -rest msw
|
||||
add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-(
|
||||
; wb stall (XMAC: Before add.f)
|
||||
tst_s DBL1H,DBL1H
|
||||
cmp.eq r12,r9
|
||||
add.cs.f DBL0L,DBL0L,1
|
||||
j_s.d [blink]
|
||||
add.cs DBL0H,DBL0H,1
|
||||
|
||||
.Lret0:
|
||||
/* return +- 0 */
|
||||
j_s.d [blink]
|
||||
mov_s DBL0L,0
|
||||
.Linf:
|
||||
mov_s DBL0H,r9
|
||||
mov_s DBL0L,0
|
||||
j_s.d [blink]
|
||||
bxor.mi DBL0H,DBL0H,31
|
||||
|
||||
.balign 4
|
||||
.Ldivtab:
|
||||
.long 0xfc0fffe1
|
||||
.long 0xf46ffdfb
|
||||
.long 0xed1ffa54
|
||||
.long 0xe61ff515
|
||||
.long 0xdf7fee75
|
||||
.long 0xd91fe680
|
||||
.long 0xd2ffdd52
|
||||
.long 0xcd1fd30c
|
||||
.long 0xc77fc7cd
|
||||
.long 0xc21fbbb6
|
||||
.long 0xbcefaec0
|
||||
.long 0xb7efa100
|
||||
.long 0xb32f92bf
|
||||
.long 0xae8f83b7
|
||||
.long 0xaa2f7467
|
||||
.long 0xa5ef6479
|
||||
.long 0xa1cf53fa
|
||||
.long 0x9ddf433e
|
||||
.long 0x9a0f3216
|
||||
.long 0x965f2091
|
||||
.long 0x92df0f11
|
||||
.long 0x8f6efd05
|
||||
.long 0x8c1eeacc
|
||||
.long 0x88eed876
|
||||
.long 0x85dec615
|
||||
.long 0x82eeb3b9
|
||||
.long 0x800ea10b
|
||||
.long 0x7d3e8e0f
|
||||
.long 0x7a8e7b3f
|
||||
.long 0x77ee6836
|
||||
.long 0x756e5576
|
||||
.long 0x72fe4293
|
||||
.long 0x709e2f93
|
||||
.long 0x6e4e1c7f
|
||||
.long 0x6c0e095e
|
||||
.long 0x69edf6c5
|
||||
.long 0x67cde3a5
|
||||
.long 0x65cdd125
|
||||
.long 0x63cdbe25
|
||||
.long 0x61ddab3f
|
||||
.long 0x600d991f
|
||||
.long 0x5e3d868c
|
||||
.long 0x5c6d7384
|
||||
.long 0x5abd615f
|
||||
.long 0x590d4ecd
|
||||
.long 0x576d3c83
|
||||
.long 0x55dd2a89
|
||||
.long 0x545d18e9
|
||||
.long 0x52dd06e9
|
||||
.long 0x516cf54e
|
||||
.long 0x4ffce356
|
||||
.long 0x4e9cd1ce
|
||||
.long 0x4d3cbfec
|
||||
.long 0x4becae86
|
||||
.long 0x4aac9da4
|
||||
.long 0x496c8c73
|
||||
.long 0x483c7bd3
|
||||
.long 0x470c6ae8
|
||||
.long 0x45dc59af
|
||||
.long 0x44bc4915
|
||||
.long 0x43ac3924
|
||||
.long 0x428c27fb
|
||||
.long 0x418c187a
|
||||
.long 0x407c07bd
|
||||
.L7ff00000:
|
||||
.long 0x7ff00000
|
||||
ENDFUNC(__divdf3)
|
||||
|
|
@ -0,0 +1,281 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/*
|
||||
- calculate 15..18 bit inverse using a table of approximating polynoms.
|
||||
precision is higher for polynoms used to evaluate input with larger
|
||||
value.
|
||||
- do one newton-raphson iteration step to double the precision,
|
||||
then multiply this with the divisor
|
||||
-> more time to decide if dividend is subnormal
|
||||
- the worst error propagation is on the side of the value range
|
||||
with the least initial defect, thus giving us about 30 bits precision.
|
||||
*/
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __divsf3
|
||||
FUNC(__divsf3)
|
||||
.balign 4
|
||||
__divsf3:
|
||||
push_s blink
|
||||
push_s r1
|
||||
bl.d __divsf3_c
|
||||
push_s r0
|
||||
ld_s r1,[sp,4]
|
||||
st_s r0,[sp,4]
|
||||
bl.d __divsf3_asm
|
||||
pop_s r0
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
#if 1
|
||||
bne abort
|
||||
jeq_s [blink]
|
||||
b abort
|
||||
#else
|
||||
bne abort
|
||||
j_s [blink]
|
||||
#endif
|
||||
ENDFUNC(__divsf3)
|
||||
#define __divsf3 __divsf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
FUNC(__divsf3)
|
||||
.balign 4
|
||||
.L7f800000:
|
||||
.long 0x7f800000
|
||||
.Ldivtab:
|
||||
.long 0xfc0ffff0
|
||||
.long 0xf46ffefd
|
||||
.long 0xed1ffd2a
|
||||
.long 0xe627fa8e
|
||||
.long 0xdf7ff73b
|
||||
.long 0xd917f33b
|
||||
.long 0xd2f7eea3
|
||||
.long 0xcd1fe986
|
||||
.long 0xc77fe3e7
|
||||
.long 0xc21fdddb
|
||||
.long 0xbcefd760
|
||||
.long 0xb7f7d08c
|
||||
.long 0xb32fc960
|
||||
.long 0xae97c1ea
|
||||
.long 0xaa27ba26
|
||||
.long 0xa5e7b22e
|
||||
.long 0xa1cfa9fe
|
||||
.long 0x9ddfa1a0
|
||||
.long 0x9a0f990c
|
||||
.long 0x9667905d
|
||||
.long 0x92df878a
|
||||
.long 0x8f6f7e84
|
||||
.long 0x8c27757e
|
||||
.long 0x88f76c54
|
||||
.long 0x85df630c
|
||||
.long 0x82e759c5
|
||||
.long 0x8007506d
|
||||
.long 0x7d3f470a
|
||||
.long 0x7a8f3da2
|
||||
.long 0x77ef341e
|
||||
.long 0x756f2abe
|
||||
.long 0x72f7212d
|
||||
.long 0x709717ad
|
||||
.long 0x6e4f0e44
|
||||
.long 0x6c1704d6
|
||||
.long 0x69e6fb44
|
||||
.long 0x67cef1d7
|
||||
.long 0x65c6e872
|
||||
.long 0x63cedf18
|
||||
.long 0x61e6d5cd
|
||||
.long 0x6006cc6d
|
||||
.long 0x5e36c323
|
||||
.long 0x5c76b9f3
|
||||
.long 0x5abeb0b7
|
||||
.long 0x5916a79b
|
||||
.long 0x57769e77
|
||||
.long 0x55de954d
|
||||
.long 0x54568c4e
|
||||
.long 0x52d6834d
|
||||
.long 0x51667a7f
|
||||
.long 0x4ffe71b5
|
||||
.long 0x4e9e68f1
|
||||
.long 0x4d466035
|
||||
.long 0x4bf65784
|
||||
.long 0x4aae4ede
|
||||
.long 0x496e4646
|
||||
.long 0x48363dbd
|
||||
.long 0x47063547
|
||||
.long 0x45de2ce5
|
||||
.long 0x44be2498
|
||||
.long 0x43a61c64
|
||||
.long 0x4296144a
|
||||
.long 0x41860c0e
|
||||
.long 0x407e03ee
|
||||
__divsf3_support: /* This label makes debugger output saner. */
|
||||
.Ldenorm_fp1:
|
||||
bclr r6,r6,31
|
||||
norm.f r12,r6 ; flag for x/0 -> Inf check
|
||||
add r6,r6,r6
|
||||
rsub r5,r12,16
|
||||
ror r5,r1,r5
|
||||
asl r6,r6,r12
|
||||
bmsk r5,r5,5
|
||||
ld.as r5,[r3,r5]
|
||||
add r4,r6,r6
|
||||
; load latency
|
||||
mpyhu r7,r5,r4
|
||||
bic.ne.f 0, \
|
||||
0x60000000,r0 ; large number / denorm -> Inf
|
||||
beq_s .Linf_NaN
|
||||
asl r5,r5,13
|
||||
; wb stall
|
||||
; slow track
|
||||
sub r7,r5,r7
|
||||
mpyhu r8,r7,r6
|
||||
asl_s r12,r12,23
|
||||
and.f r2,r0,r9
|
||||
add r2,r2,r12
|
||||
asl r12,r0,8
|
||||
; wb stall
|
||||
bne.d .Lpast_denorm_fp1
|
||||
.Ldenorm_fp0:
|
||||
mpyhu r8,r8,r7
|
||||
bclr r12,r12,31
|
||||
norm.f r3,r12 ; flag for 0/x -> 0 check
|
||||
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
|
||||
beq_s .Lret0
|
||||
asl_s r12,r12,r3
|
||||
asl_s r3,r3,23
|
||||
add_s r12,r12,r12
|
||||
add r11,r11,r3
|
||||
b.d .Lpast_denorm_fp0
|
||||
mov_s r3,r12
|
||||
.balign 4
|
||||
.Linf_NaN:
|
||||
bclr.f 0,r0,31 ; 0/0 -> NaN
|
||||
xor_s r0,r0,r1
|
||||
bmsk r1,r0,30
|
||||
bic_s r0,r0,r1
|
||||
sub.eq r0,r0,1
|
||||
j_s.d [blink]
|
||||
or r0,r0,r9
|
||||
.Lret0:
|
||||
xor_s r0,r0,r1
|
||||
bmsk r1,r0,30
|
||||
j_s.d [blink]
|
||||
bic_s r0,r0,r1
|
||||
.Linf_nan_fp1:
|
||||
lsr_s r0,r0,31
|
||||
bmsk.f 0,r1,22
|
||||
asl_s r0,r0,31
|
||||
bne_s 0f ; inf/inf -> nan
|
||||
brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
|
||||
0: j_s.d [blink]
|
||||
mov r0,-1
|
||||
.Lsigned0:
|
||||
.Linf_nan_fp0:
|
||||
tst_s r1,r1
|
||||
j_s.d [blink]
|
||||
bxor.mi r0,r0,31
|
||||
.balign 4
|
||||
.global __divsf3
|
||||
/* N.B. the spacing between divtab and the sub3 to get its address must
|
||||
be a multiple of 8. */
|
||||
__divsf3:
|
||||
lsr r2,r1,17
|
||||
sub3 r3,pcl,55;(.-.Ldivtab) >> 3
|
||||
bmsk_s r2,r2,5
|
||||
ld.as r5,[r3,r2]
|
||||
asl r4,r1,9
|
||||
ld.as r9,[pcl,-114]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
|
||||
mpyhu r7,r5,r4
|
||||
asl r6,r1,8
|
||||
and.f r11,r1,r9
|
||||
bset r6,r6,31
|
||||
asl r5,r5,13
|
||||
; wb stall
|
||||
beq .Ldenorm_fp1
|
||||
sub r7,r5,r7
|
||||
mpyhu r8,r7,r6
|
||||
breq.d r11,r9,.Linf_nan_fp1
|
||||
and.f r2,r0,r9
|
||||
beq.d .Ldenorm_fp0
|
||||
asl r12,r0,8
|
||||
; wb stall
|
||||
breq r2,r9,.Linf_nan_fp0
|
||||
mpyhu r8,r8,r7
|
||||
.Lpast_denorm_fp1:
|
||||
bset r3,r12,31
|
||||
.Lpast_denorm_fp0:
|
||||
cmp_s r3,r6
|
||||
lsr.cc r3,r3,1
|
||||
add_s r2,r2, /* wait for immediate */ \
|
||||
/* wb stall */ \
|
||||
0x3f000000
|
||||
sub r7,r7,r8 ; u1.31 inverse, about 30 bit
|
||||
mpyhu r3,r3,r7
|
||||
sbc r2,r2,r11
|
||||
xor.f 0,r0,r1
|
||||
and r0,r2,r9
|
||||
bxor.mi r0,r0,31
|
||||
brhs r2, /* wb stall / wait for immediate */ \
|
||||
0x7f000000,.Linf_denorm
|
||||
.Lpast_denorm:
|
||||
add_s r3,r3,0x22 ; round to nearest or higher
|
||||
tst r3,0x3c ; check if rounding was unsafe
|
||||
lsr r3,r3,6
|
||||
jne.d [blink] ; return if rounding was safe.
|
||||
add_s r0,r0,r3
|
||||
/* work out exact rounding if we fall through here. */
|
||||
/* We know that the exact result cannot be represented in single
|
||||
precision. Find the mid-point between the two nearest
|
||||
representable values, multiply with the divisor, and check if
|
||||
the result is larger than the dividend. */
|
||||
add_s r3,r3,r3
|
||||
sub_s r3,r3,1
|
||||
mpyu r3,r3,r6
|
||||
asr.f 0,r0,1 ; for round-to-even in case this is a denorm
|
||||
rsub r2,r9,25
|
||||
asl_s r12,r12,r2
|
||||
; wb stall
|
||||
; slow track
|
||||
sub.f 0,r12,r3
|
||||
j_s.d [blink]
|
||||
sub.mi r0,r0,1
|
||||
/* For denormal results, it is possible that an exact result needs
|
||||
rounding, and thus the round-to-even rule has to come into play. */
|
||||
.Linf_denorm:
|
||||
brlo r2,0xc0000000,.Linf
|
||||
.Ldenorm:
|
||||
asr_s r2,r2,23
|
||||
bic r0,r0,r9
|
||||
neg r9,r2
|
||||
brlo.d r9,25,.Lpast_denorm
|
||||
lsr r3,r3,r9
|
||||
/* Fall through: return +- 0 */
|
||||
j_s [blink]
|
||||
.Linf:
|
||||
j_s.d [blink]
|
||||
or r0,r0,r9
|
||||
ENDFUNC(__divsf3)
|
||||
|
|
@ -0,0 +1,221 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __divsf3
|
||||
FUNC(__divsf3)
|
||||
.balign 4
|
||||
__divsf3:
|
||||
push_s blink
|
||||
push_s r1
|
||||
bl.d __divsf3_c
|
||||
push_s r0
|
||||
ld_s r1,[sp,4]
|
||||
st_s r0,[sp,4]
|
||||
bl.d __divsf3_asm
|
||||
pop_s r0
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
#if 1
|
||||
bne abort
|
||||
jeq_s [blink]
|
||||
b abort
|
||||
#else
|
||||
bne abort
|
||||
j_s [blink]
|
||||
#endif
|
||||
ENDFUNC(__divsf3)
|
||||
#define __divsf3 __divsf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
.balign 4
|
||||
__divdf3_support: /* This label makes debugger output saner. */
|
||||
FUNC(__divsf3)
|
||||
.Ldenorm_fp0:
|
||||
norm.f r12,r2 ; flag for 0/x -> 0 check
|
||||
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
|
||||
beq_s .Lret0_NaN
|
||||
tst r1,r9
|
||||
add_s r2,r2,r2
|
||||
sub_s r12,r12,8
|
||||
asl_s r2,r2,r12
|
||||
asl_l r12,r12,23
|
||||
bne.d .Lpast_denorm_fp0
|
||||
add r5,r5,r12
|
||||
/* r0 is subnormal, r1 is subnormal or 0. */
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_fp1:
|
||||
norm.f r12,r3 ; flag for x/0 -> Inf check
|
||||
bic.ne.f 0,0x60000000,r0 ; large number/denorm -> Inf
|
||||
beq_s .Linf
|
||||
add_s r3,r3,r3
|
||||
sub_s r12,r12,8
|
||||
asl_s r3,r3,r12
|
||||
asl_s r12,r12,23
|
||||
b.d .Lpast_denorm_fp1
|
||||
add r4,r4,r12
|
||||
|
||||
.Lret0_NaN:
|
||||
bclr.f 0,r1,31 ; 0/0 -> NaN
|
||||
bic r0,r10,r9
|
||||
j_s.d [blink]
|
||||
sub.eq r0,r0,1
|
||||
|
||||
.global __divsf3
|
||||
.balign 4
|
||||
.long 0x7f800000 ; exponent mask
|
||||
__divsf3:
|
||||
ld r9,[pcl,-4]
|
||||
bmsk r2,r0,22
|
||||
xor r4,r0,r2
|
||||
bmsk r3,r1,22
|
||||
xor r5,r1,r3
|
||||
and r11,r0,r9
|
||||
breq.d r11,0,.Ldenorm_fp0
|
||||
xor r10,r4,r5
|
||||
breq r11,r9,.Linf_nan_fp0
|
||||
bset_s r2,r2,23
|
||||
and r11,r1,r9
|
||||
breq r11,0,.Ldenorm_fp1
|
||||
breq r11,r9,.Linf_nan_fp1
|
||||
.Lpast_denorm_fp0:
|
||||
bset_s r3,r3,23
|
||||
.Lpast_denorm_fp1:
|
||||
cmp r2,r3
|
||||
asl_s r2,r2,6+1
|
||||
asl_s r3,r3,7
|
||||
add.lo r2,r2,r2
|
||||
bclr r8,r9,30 ; exponent bias
|
||||
bclr.lo r8,r8,23 ; reduce exp by one if fraction is shifted
|
||||
sub r4,r4,r5
|
||||
add r4,r4,r8
|
||||
xor.f 0,r10,r4
|
||||
bmi .Linf_denorm
|
||||
and r12,r4,r9
|
||||
breq r12,0,.Ldenorm
|
||||
sub_s r2,r2,r3 ; discard implicit 1
|
||||
.Ldiv_23bit:
|
||||
.rep 6
|
||||
divaw r2,r2,r3
|
||||
.endr
|
||||
breq r12,r9,.Linf
|
||||
bmsk r0,r2,6
|
||||
xor_s r2,r2,r0
|
||||
.Ldiv_17bit:
|
||||
.rep 7
|
||||
divaw r2,r2,r3
|
||||
.endr
|
||||
asl_s r0,r0,7
|
||||
bmsk r1,r2,6
|
||||
xor_s r2,r2,r1
|
||||
or_s r0,r0,r1
|
||||
.Ldiv_10bit:
|
||||
.rep 7
|
||||
divaw r2,r2,r3
|
||||
.endr
|
||||
asl_s r0,r0,7
|
||||
bmsk r1,r2,6
|
||||
xor_s r2,r2,r1
|
||||
or_s r0,r0,r1
|
||||
.Ldiv_3bit:
|
||||
.rep 3
|
||||
divaw r2,r2,r3
|
||||
.endr
|
||||
asl_s r0,r0,3
|
||||
.Ldiv_0bit:
|
||||
divaw r1,r2,r3
|
||||
bmsk_s r2,r2,2
|
||||
tst r1,-0x7e ; 0xffffff82, test for rest or odd
|
||||
bmsk_s r1,r1,0
|
||||
add_s r0,r0,r2 ; assemble fraction
|
||||
add_s r0,r0,r4 ; add in sign & exponent
|
||||
j_s.d [blink]
|
||||
add.ne r0,r0,r1 ; round to nearest / even
|
||||
|
||||
.balign 4
|
||||
.Linf_nan_fp0:
|
||||
bic.f 0,r9,r1 ; fp1 Inf -> result NaN
|
||||
bic r1,r5,r9 ; fp1 sign
|
||||
sub.eq r1,r1,1
|
||||
j_s.d [blink]
|
||||
xor_s r0,r0,r1
|
||||
.Linf_nan_fp1:
|
||||
bic r0,r4,r9 ; fp0 sign
|
||||
bmsk.f 0,r1,22 ; x/inf -> 0, x/nan -> nan
|
||||
xor.eq r1,r1,r9
|
||||
j_s.d [blink]
|
||||
xor_s r0,r0,r1
|
||||
.Linf:
|
||||
j_s.d [blink]
|
||||
or r0,r10,r9
|
||||
|
||||
.Lret_r4:
|
||||
j_s.d [blink]
|
||||
mov_s r0,r4
|
||||
.balign 4
|
||||
.Linf_denorm:
|
||||
add.f r12,r4,r4
|
||||
asr_l r12,r12,24
|
||||
bpl .Linf
|
||||
max r12,r12,-24
|
||||
.Ldenorm:
|
||||
add r1,pcl,42; .Ldenorm_tab-.
|
||||
ldb_s r12,[r12,r1]
|
||||
mov_s r0,0
|
||||
lsr_s r2,r2
|
||||
sub_s r1,r1,r12
|
||||
j_s.d [r1]
|
||||
bic r4,r10,r9
|
||||
.byte .Ldenorm_tab-.Lret_r4
|
||||
.byte .Ldenorm_tab-.Ldiv_0bit
|
||||
.byte .Ldenorm_tab-.Ldiv_3bit-8
|
||||
.byte .Ldenorm_tab-.Ldiv_3bit-4
|
||||
.byte .Ldenorm_tab-.Ldiv_3bit
|
||||
.byte .Ldenorm_tab-.Ldiv_10bit-24
|
||||
.byte .Ldenorm_tab-.Ldiv_10bit-20
|
||||
.byte .Ldenorm_tab-.Ldiv_10bit-16
|
||||
.byte .Ldenorm_tab-.Ldiv_10bit-12
|
||||
.byte .Ldenorm_tab-.Ldiv_10bit-8
|
||||
.byte .Ldenorm_tab-.Ldiv_10bit-4
|
||||
.byte .Ldenorm_tab-.Ldiv_10bit
|
||||
.byte .Ldenorm_tab-.Ldiv_17bit-24
|
||||
.byte .Ldenorm_tab-.Ldiv_17bit-20
|
||||
.byte .Ldenorm_tab-.Ldiv_17bit-16
|
||||
.byte .Ldenorm_tab-.Ldiv_17bit-12
|
||||
.byte .Ldenorm_tab-.Ldiv_17bit-8
|
||||
.byte .Ldenorm_tab-.Ldiv_17bit-4
|
||||
.byte .Ldenorm_tab-.Ldiv_17bit
|
||||
.byte .Ldenorm_tab-.Ldiv_23bit-20
|
||||
.byte .Ldenorm_tab-.Ldiv_23bit-16
|
||||
.byte .Ldenorm_tab-.Ldiv_23bit-12
|
||||
.byte .Ldenorm_tab-.Ldiv_23bit-8
|
||||
.byte .Ldenorm_tab-.Ldiv_23bit-4
|
||||
.Ldenorm_tab:
|
||||
.byte .Ldenorm_tab-.Ldiv_23bit
|
||||
ENDFUNC(__divsf3)
|
||||
|
|
@ -0,0 +1,161 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* We use a polynom similar to a Tchebycheff polynom to get an initial
|
||||
seed, and then use a newton-raphson iteration step to get an
|
||||
approximate result
|
||||
If this result can't be rounded to the exact result with confidence, we
|
||||
round to the value between the two closest representable values, and
|
||||
test if the correctly rounded value is above or below this value.
|
||||
|
||||
Because of the Newton-raphson iteration step, an error in the seed at X
|
||||
is amplified by X. Therefore, we don't want a Tchebycheff polynom
|
||||
or a polynom that is close to optimal according to the maximum norm
|
||||
on the errro of the seed value; we want one that is close to optimal
|
||||
according to the maximum norm on the error of the result, i.e. we
|
||||
want the maxima of the polynom to increase linearily.
|
||||
Given an interval [X0,X2) over which to approximate,
|
||||
with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have,
|
||||
like for Tchebycheff polynoms:
|
||||
P(0) := 1
|
||||
but then we have:
|
||||
P(1) := X + S*D
|
||||
P(2) := 2 * X^2 + S*D * X - D^2
|
||||
Then again:
|
||||
P(n+1) := 2 * X * P(n) - D^2 * P (n-1)
|
||||
*/
|
||||
|
||||
static long double merr = 42.;
|
||||
|
||||
double
|
||||
err (long double a0, long double a1, long double x)
|
||||
{
|
||||
long double y0 = a0 + (x-1)*a1;
|
||||
|
||||
long double approx = 2. * y0 - y0 * x * y0;
|
||||
long double true = 1./x;
|
||||
long double err = approx - true;
|
||||
|
||||
if (err <= -1./65536./16384.)
|
||||
printf ("ERROR EXCEEDS 1 ULP %.15f %.15f %.15f\n",
|
||||
(double)x, (double)approx, (double)true);
|
||||
if (merr > err)
|
||||
merr = err;
|
||||
return err;
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
long double T[5]; /* Taylor polynom */
|
||||
long double P[5][5];
|
||||
int i, j;
|
||||
long double X0, X1, X2, S;
|
||||
long double inc = 1./64;
|
||||
long double D = inc*0.5;
|
||||
long i0, i1, i2, io;
|
||||
|
||||
memset (P, 0, sizeof (P));
|
||||
P[0][0] = 1.;
|
||||
for (i = 1; i < 5; i++)
|
||||
P[i][i] = 1 << i-1;
|
||||
P[2][0] = -D*D;
|
||||
for (X0 = 1.; X0 < 2.; X0 += inc)
|
||||
{
|
||||
X1 = X0 + inc * 0.5;
|
||||
X2 = X0 + inc;
|
||||
S = D / X1;
|
||||
T[0] = 1./X1;
|
||||
for (i = 1; i < 5; i++)
|
||||
T[i] = T[i-1] * -T[0];
|
||||
#if 0
|
||||
printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2],
|
||||
(double)T[3], (double)T[4]);
|
||||
#endif
|
||||
P[1][0] = S*D;
|
||||
P[2][1] = S*D;
|
||||
for (i = 3; i < 5; i++)
|
||||
{
|
||||
P[i][0] = -D*D*P[i-2][0];
|
||||
for (j = 1; j < i; j++)
|
||||
P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j];
|
||||
}
|
||||
#if 0
|
||||
printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2],
|
||||
(double)P[3][3], (double)P[3][4]);
|
||||
printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2],
|
||||
(double)P[4][3], (double)P[4][4]);
|
||||
#endif
|
||||
for (i = 4; i > 1; i--)
|
||||
{
|
||||
long double a = T[i]/P[i][i];
|
||||
|
||||
for (j = 0; j < i; j++)
|
||||
T[j] -= a * P[i][j];
|
||||
}
|
||||
#if 0
|
||||
printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
|
||||
#endif
|
||||
#if 0
|
||||
i2 = T[2]*1024;
|
||||
long double a = (T[2]-i/1024.)/P[2][2];
|
||||
for (j = 0; j < 2; j++)
|
||||
T[j] -= a * P[2][j];
|
||||
#else
|
||||
i2 = 0;
|
||||
#endif
|
||||
long double T0, Ti1;
|
||||
for (i = 0, i0 = 0; i < 4; i++)
|
||||
{
|
||||
|
||||
i1 = T[1]*4096. + i0 / (long double)(1 << 20) - 0.5;
|
||||
i1 = - (-i1 & 0x0fff);
|
||||
Ti1 = ((unsigned)(-i1 << 20) | i0) /-(long double)(1LL<<32LL);
|
||||
T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1;
|
||||
i0 = T0 * 1024 * 1024 + 0.5;
|
||||
i0 &= 0xfffff;
|
||||
}
|
||||
#if 0
|
||||
printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
|
||||
#endif
|
||||
io = (unsigned)(-i1 << 20) | i0;
|
||||
long double A1 = (unsigned)io/-65536./65536.;
|
||||
long double A0 = (unsigned)(io << 12)/65536./65536.;
|
||||
long double Xm0 = 1./sqrt (-A1);
|
||||
long double Xm1 = 0.5+0.5*-A0/A1;
|
||||
#if 0
|
||||
printf ("%f %f %f %f\n", (double)A0, (double)A1, (double) Ti1, (double)X0);
|
||||
printf ("%.12f %.12f %.12f\n",
|
||||
err (A0, A1, X0), err (A0, A1, X1), err (A0, A1, X2));
|
||||
printf ("%.12f %.12f\n", (double)Xm0, (double)Xm1);
|
||||
printf ("%.12f %.12f\n", err (A0, A1, Xm0), err (A0, A1, Xm1));
|
||||
#endif
|
||||
printf ("\t.long 0x%x\n", io);
|
||||
}
|
||||
#if 0
|
||||
printf ("maximum error: %.15f %x %f\n", (double)merr, (unsigned)(long long)(-merr * 65536 * 65536), (double)log(-merr)/log(2));
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* We use a polynom similar to a Tchebycheff polynom to get an initial
|
||||
seed, and then use a newton-raphson iteration step to get an
|
||||
approximate result
|
||||
If this result can't be rounded to the exact result with confidence, we
|
||||
round to the value between the two closest representable values, and
|
||||
test if the correctly rounded value is above or below this value.
|
||||
|
||||
Because of the Newton-raphson iteration step, an error in the seed at X
|
||||
is amplified by X. Therefore, we don't want a Tchebycheff polynom
|
||||
or a polynom that is close to optimal according to the maximum norm
|
||||
on the errro of the seed value; we want one that is close to optimal
|
||||
according to the maximum norm on the error of the result, i.e. we
|
||||
want the maxima of the polynom to increase linearily.
|
||||
Given an interval [X0,X2) over which to approximate,
|
||||
with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have,
|
||||
like for Tchebycheff polynoms:
|
||||
P(0) := 1
|
||||
but then we have:
|
||||
P(1) := X + S*D
|
||||
P(2) := 2 * X^2 + S*D * X - D^2
|
||||
Then again:
|
||||
P(n+1) := 2 * X * P(n) - D^2 * P (n-1)
|
||||
*/
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
long double T[5]; /* Taylor polynom */
|
||||
long double P[5][5];
|
||||
int i, j;
|
||||
long double X0, X1, X2, S;
|
||||
long double inc = 1./64;
|
||||
long double D = inc*0.5;
|
||||
long i0, i1, i2;
|
||||
|
||||
memset (P, 0, sizeof (P));
|
||||
P[0][0] = 1.;
|
||||
for (i = 1; i < 5; i++)
|
||||
P[i][i] = 1 << i-1;
|
||||
P[2][0] = -D*D;
|
||||
for (X0 = 1.; X0 < 2.; X0 += inc)
|
||||
{
|
||||
X1 = X0 + inc * 0.5;
|
||||
X2 = X1 + inc;
|
||||
S = D / X1;
|
||||
T[0] = 1./X1;
|
||||
for (i = 1; i < 5; i++)
|
||||
T[i] = T[i-1] * -T[0];
|
||||
#if 0
|
||||
printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2],
|
||||
(double)T[3], (double)T[4]);
|
||||
#endif
|
||||
P[1][0] = S*D;
|
||||
P[2][1] = S*D;
|
||||
for (i = 3; i < 5; i++)
|
||||
{
|
||||
P[i][0] = -D*D*P[i-2][0];
|
||||
for (j = 1; j < i; j++)
|
||||
P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j];
|
||||
}
|
||||
#if 0
|
||||
printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2],
|
||||
(double)P[3][3], (double)P[3][4]);
|
||||
printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2],
|
||||
(double)P[4][3], (double)P[4][4]);
|
||||
#endif
|
||||
for (i = 4; i > 1; i--)
|
||||
{
|
||||
long double a = T[i]/P[i][i];
|
||||
|
||||
for (j = 0; j < i; j++)
|
||||
T[j] -= a * P[i][j];
|
||||
}
|
||||
#if 0
|
||||
printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
|
||||
#endif
|
||||
#if 0
|
||||
i2 = T[2]*512;
|
||||
long double a = (T[2]-i/512.)/P[2][2];
|
||||
for (j = 0; j < 2; j++)
|
||||
T[j] -= a * P[2][j];
|
||||
#else
|
||||
i2 = 0;
|
||||
#endif
|
||||
for (i = 0, i0 = 0; i < 4; i++)
|
||||
{
|
||||
long double T0, Ti1;
|
||||
|
||||
i1 = T[1]*8192. + i0 / (long double)(1 << 19) - 0.5;
|
||||
i1 = - (-i1 & 0x1fff);
|
||||
Ti1 = ((unsigned)(-i1 << 19) | i0) /-(long double)(1LL<<32LL);
|
||||
T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1;
|
||||
i0 = T0 * 512 * 1024 + 0.5;
|
||||
i0 &= 0x7ffff;
|
||||
}
|
||||
#if 0
|
||||
printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
|
||||
#endif
|
||||
printf ("\t.long 0x%x\n", (-i1 << 19) | i0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
/* inputs: DBL0, DBL1
|
||||
output: z flag
|
||||
clobber: r12, flags
|
||||
For NaNs, bit 19.. bit 30 of the high word must be set. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __eqdf2
|
||||
.balign 4
|
||||
FUNC(__eqdf2)
|
||||
__eqdf2:
|
||||
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
|
||||
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
|
||||
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
|
||||
bl.d __eqdf2_c` push_s r0
|
||||
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
|
||||
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
|
||||
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
|
||||
bl.d __eqdf2_asm` ld.ab r10,[sp,4]
|
||||
pop_s blink
|
||||
breq.d r11,0,0f
|
||||
ld.ab r11,[sp,4]
|
||||
jne_s [blink]
|
||||
bl abort
|
||||
0: jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__eqdf2)
|
||||
#define __eqdf2 __eqdf2_asm
|
||||
#endif /* DEBUG */
|
||||
.global __eqdf2
|
||||
.balign 4
|
||||
HIDDEN_FUNC(__eqdf2)
|
||||
/* Good performance as long as the difference in high word is
|
||||
well predictable (as seen from the branch predictor). */
|
||||
__eqdf2:
|
||||
brne.d DBL0H,DBL1H,.Lhighdiff
|
||||
bmsk r12,DBL0H,20
|
||||
#ifdef DPFP_COMPAT
|
||||
or.f 0,DBL0L,DBL1L
|
||||
bset.ne r12,r12,21
|
||||
#endif /* DPFP_COMPAT */
|
||||
add1.f r12,r12,DBL0H /* set c iff NaN; also, clear z if NaN. */
|
||||
j_s.d [blink]
|
||||
cmp.cc DBL0L,DBL1L
|
||||
.balign 4
|
||||
.Lhighdiff:
|
||||
or r12,DBL0H,DBL1H
|
||||
or.f 0,DBL0L,DBL1L
|
||||
j_s.d [blink]
|
||||
bmsk.eq.f r12,r12,30
|
||||
ENDFUNC(__eqdf2)
|
||||
/* ??? could we do better by speeding up some 'common' case of inequality? */
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
/* inputs: r0, r1
|
||||
output: z flag
|
||||
clobber: r12, flags
|
||||
For NaNs, bit 22 .. bit 30 must be set. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __eqsf2
|
||||
.balign 4
|
||||
FUNC(__eqsf2)
|
||||
__eqsf2:
|
||||
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
|
||||
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
|
||||
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
|
||||
bl.d __eqsf2_c` push_s r0
|
||||
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
|
||||
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
|
||||
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
|
||||
bl.d __eqsf2_asm` ld.ab r10,[sp,4]
|
||||
pop_s blink
|
||||
breq.d r11,0,0f
|
||||
ld.ab r11,[sp,4]
|
||||
jne_s [blink]
|
||||
bl abort
|
||||
0: jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__eqsf2)
|
||||
#define __eqsf2 __eqsf2_asm
|
||||
#endif /* DEBUG */
|
||||
/* Good performance as long as the binary difference is
|
||||
well predictable (as seen from the branch predictor). */
|
||||
.global __eqsf2
|
||||
.balign 4
|
||||
HIDDEN_FUNC(__eqsf2)
|
||||
__eqsf2:
|
||||
breq r0, r1,.Lno_bdiff
|
||||
or r12,r0,r1
|
||||
j_s.d [blink]
|
||||
bmsk.f 0,r12,30
|
||||
.Lno_bdiff:
|
||||
bmsk r12,r0,23
|
||||
add1.f r12,r12,r0 /* set c iff NaN; also, clear z if NaN. */
|
||||
j_s.d [blink]
|
||||
cmp.cc r0,r1
|
||||
ENDFUNC(__eqsf2)
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
/* Copyright (C) 2006, 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __extendsfdf2
|
||||
.balign 4
|
||||
FUNC(__extendsfdf2)
|
||||
__extendsfdf2:
|
||||
push_s blink
|
||||
bl.d __extendsfdf2_c
|
||||
push_s r0
|
||||
ld_s r2,[sp]
|
||||
st_s r1,[sp]
|
||||
push_s r0
|
||||
bl.d __extendsfdf2_asm
|
||||
mov_s r0,r2
|
||||
pop_s r2
|
||||
pop_s r3
|
||||
pop_s blink
|
||||
cmp r0,r2
|
||||
cmp.eq r1,r3
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__extendsfdf2)
|
||||
#define __extendsfdf2 __extendsfdf2_asm
|
||||
#endif /* DEBUG */
|
||||
#if 0 /* ARC600 */
|
||||
__extendsfdf2:
|
||||
lsr r2,r0,23
|
||||
tst r2,0xff
|
||||
bic.ne.f r2,0xff
|
||||
beq_s .Linf_nan_denorm_0
|
||||
..
|
||||
.Linf_nan_denorm:
|
||||
bbit1 r0,30,.Linf_nan
|
||||
#endif
|
||||
.global __extendsfdf2
|
||||
.balign 4
|
||||
FUNC(__extendsfdf2)
|
||||
__extendsfdf2:
|
||||
add.f r1,r0,r0
|
||||
norm r3,r1
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
lsr_s DBL0H,r1,4
|
||||
brhs r3,7,.Linf_nan_denorm_0
|
||||
asl_s DBL0L,r0,29
|
||||
add_s DBL0H,DBL0H, \
|
||||
0x38000000
|
||||
#else
|
||||
lsr r2,r1,4
|
||||
brhs r3,7,.Linf_nan_denorm_0
|
||||
asl_s DBL0L,r1,28
|
||||
add DBL0H,r2, \
|
||||
0x38000000
|
||||
#endif
|
||||
j_s.d [blink]
|
||||
bxor.cs DBL0H,DBL0H,31
|
||||
.balign 4
|
||||
.Linf_nan_denorm_0:
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
mov_s DBL0H,r0
|
||||
jeq.d [blink]
|
||||
mov.eq DBL0L,0
|
||||
#else
|
||||
jeq_s [blink]
|
||||
#endif
|
||||
bmi .Linf_nan
|
||||
asl_s r0,r0,r3
|
||||
rsub r3,r3,0x380+6
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
asl_s r3,r3,20
|
||||
lsr DBL0H,r0,9
|
||||
asl_s DBL0L,r0,23
|
||||
add_s DBL0H,DBL0H,r3
|
||||
j_s.d [blink]
|
||||
bxor.cs DBL0H,DBL0H,31
|
||||
#else
|
||||
asl DBL0L,r0,23
|
||||
lsr_s DBL0H,r0,9
|
||||
asl_s r3,r3,20
|
||||
bxor.cs DBL0H,DBL0H,31
|
||||
j_s.d [blink]
|
||||
add_l DBL0H,DBL0H,r3
|
||||
#endif
|
||||
.Linf_nan:
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
lsr DBL0H,r0,3
|
||||
|
||||
or_s DBL0H,DBL0H,r0
|
||||
j_s.d [blink]
|
||||
mov_l DBL0L,0
|
||||
#else
|
||||
lsr r3,r0,3
|
||||
mov_s DBL0L,0
|
||||
j_s.d [blink]
|
||||
or_l DBL0H,r0,r3
|
||||
#endif
|
||||
ENDFUNC(__extendsfdf2)
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
FUNC(__fixdfsi)
|
||||
.global __fixdfsi
|
||||
.balign 4
|
||||
__fixdfsi:
|
||||
push_s blink
|
||||
push_s r0
|
||||
bl.d __fixdfsi_c
|
||||
push_s r1
|
||||
mov_s r2,r0
|
||||
pop_s r1
|
||||
ld r0,[sp]
|
||||
bl.d __fixdfsi_asm
|
||||
st r2,[sp]
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__fixdfsi)
|
||||
#define __fixdfsi __fixdfsi_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
/* If the fraction has to be shifted left by a positive non-zero amount,
|
||||
we have to combine bits from DBL0L and DBL0H. If we shift right,
|
||||
or shift by zero, we only want to have the bits from DBL0H in r0. */
|
||||
|
||||
.global __fixdfsi
|
||||
FUNC(__fixdfsi)
|
||||
.balign 4
|
||||
__fixdfsi:
|
||||
bbit0 DBL0H,30,.Lret0or1
|
||||
asr r2,DBL0H,20
|
||||
bmsk_s DBL0H,DBL0H,19
|
||||
sub_s r2,r2,19; 0x3ff+20-0x400
|
||||
neg_s r3,r2
|
||||
asr.f 0,r3,11
|
||||
bset_s DBL0H,DBL0H,20
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
mov.cs DBL0L,DBL0H
|
||||
asl DBL0H,DBL0H,r2
|
||||
#else
|
||||
asl.cc DBL0H,DBL0H,r2
|
||||
lsr.cs DBL0H,DBL0H,r3
|
||||
#endif
|
||||
lsr_s DBL0L,DBL0L,r3
|
||||
|
||||
add.cc r0,r0,r1
|
||||
j_s.d [blink]
|
||||
neg.pl r0,r0
|
||||
.Lret0or1:
|
||||
add.f r0,DBL0H,0x100000
|
||||
lsr_s r0,r0,30
|
||||
|
||||
bmsk_s r0,r0,0
|
||||
j_s.d [blink]
|
||||
neg.mi r0,r0
|
||||
ENDFUNC(__fixdfsi)
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __fixsfsi
|
||||
FUNC(__fixsfsi)
|
||||
.balign 4
|
||||
__fixsfsi:
|
||||
push_s blink
|
||||
bl.d __fixsfsi_c
|
||||
push_s r0
|
||||
ld_s r1,[sp]
|
||||
st_s r0,[sp]
|
||||
bl.d __fixsfsi_asm
|
||||
mov_s r0,r1
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__fixsfsi)
|
||||
#define __fixsfsi __fixsfsi_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
.global __fixsfsi
|
||||
FUNC(__fixsfsi)
|
||||
.balign 4
|
||||
__fixsfsi:
|
||||
bbit0 r0,30,.Lret0or1
|
||||
lsr r2,r0,23
|
||||
bmsk_s r0,r0,22
|
||||
bset_s r0,r0,23
|
||||
sub_s r2,r2,22;0x7f+23-0x80
|
||||
asl.f 0,r2,24
|
||||
neg r3,r2
|
||||
asl.mi r0,r0,r2
|
||||
lsr.pl r0,r0,r3
|
||||
j_s.d [blink]
|
||||
neg.cs r0,r0
|
||||
.Lret0or1:
|
||||
add.f r0,r0,0x800000
|
||||
lsr_s r0,r0,30
|
||||
|
||||
bmsk_s r0,r0,0
|
||||
j_s.d [blink]
|
||||
neg.mi r0,r0
|
||||
ENDFUNC(__fixsfsi)
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
FUNC(__fixunsdfsi)
|
||||
.global __fixunsdfsi
|
||||
.balign 4
|
||||
__fixunsdfsi:
|
||||
push_s blink
|
||||
push_s r0
|
||||
bl.d __fixunsdfsi_c
|
||||
push_s r1
|
||||
mov_s r2,r0
|
||||
pop_s r1
|
||||
ld r0,[sp]
|
||||
bl.d __fixunsdfsi_asm
|
||||
st r2,[sp]
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__fixunsdfsi)
|
||||
#define __fixunsdfsi __fixunsdfsi_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
.global __fixunsdfsi
|
||||
FUNC(__fixunsdfsi)
|
||||
.balign 4
|
||||
__fixunsdfsi:
|
||||
bbit0 DBL0H,30,.Lret0or1
|
||||
lsr r2,DBL0H,20
|
||||
bmsk_s DBL0H,DBL0H,19
|
||||
sub_s r2,r2,19; 0x3ff+20-0x400
|
||||
neg_s r3,r2
|
||||
btst_s r3,10
|
||||
bset_s DBL0H,DBL0H,20
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
mov.ne DBL0L,DBL0H
|
||||
asl DBL0H,DBL0H,r2
|
||||
#else
|
||||
asl.eq DBL0H,DBL0H,r2
|
||||
lsr.ne DBL0H,DBL0H,r3
|
||||
#endif
|
||||
lsr DBL0L,DBL0L,r3
|
||||
j_s.d [blink]
|
||||
add.eq r0,r0,r1
|
||||
.Lret0:
|
||||
j_s.d [blink]
|
||||
mov_l r0,0
|
||||
.Lret0or1:
|
||||
add_s DBL0H,DBL0H,0x100000
|
||||
lsr_s DBL0H,DBL0H,30
|
||||
j_s.d [blink]
|
||||
bmsk_l r0,DBL0H,0
|
||||
ENDFUNC(__fixunsdfsi)
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __floatsidf
|
||||
.balign 4
|
||||
FUNC(__floatsidf)
|
||||
__floatsidf:
|
||||
push_s blink
|
||||
bl.d __floatsidf_c
|
||||
push_s r0
|
||||
ld_s r2,[sp]
|
||||
st_s r1,[sp]
|
||||
push_s r0
|
||||
bl.d __floatsidf_asm
|
||||
mov_s r0,r2
|
||||
pop_s r2
|
||||
pop_s r3
|
||||
pop_s blink
|
||||
cmp r0,r2
|
||||
cmp.eq r1,r3
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__floatsidf)
|
||||
#define __floatsidf __floatsidf_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
.global __floatsidf
|
||||
.balign 4
|
||||
FUNC(__floatsidf)
|
||||
__floatsidf:
|
||||
abs.f r1,r0
|
||||
jeq_s [blink]
|
||||
lsr r2,r1
|
||||
mov r12,-0x41d ; -(0x3ff+31-1)
|
||||
norm r2,r2
|
||||
bclr.cs r12,r12,11
|
||||
rsub.f r3,r2,11
|
||||
add_s r12,r2,r12
|
||||
add_s r2,r2,21
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
asl DBL0L,r1,r2
|
||||
lsr_s DBL0H,r1,r3
|
||||
#else
|
||||
lsr DBL0H,r1,r3
|
||||
asl_s DBL0L,r1,r2
|
||||
#endif
|
||||
asl_s r12,r12,20
|
||||
mov.lo DBL0H,DBL0L
|
||||
sub_s DBL0H,DBL0H,r12
|
||||
j_s.d [blink]
|
||||
mov.ls DBL0L,0
|
||||
ENDFUNC(__floatsidf)
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __floatsisf
|
||||
FUNC(__floatsisf)
|
||||
.balign 4
|
||||
__floatsisf:
|
||||
push_s blink
|
||||
bl.d __floatsisf_c
|
||||
push_s r0
|
||||
ld_s r1,[sp]
|
||||
st_s r0,[sp]
|
||||
bl.d __floatsisf_asm
|
||||
mov_s r0,r1
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__floatsisf)
|
||||
.global __floatunsisf
|
||||
FUNC(__floatunsisf)
|
||||
.balign 4
|
||||
__floatunsisf:
|
||||
push_s blink
|
||||
bl.d __floatunsisf_c
|
||||
push_s r0
|
||||
ld_s r1,[sp]
|
||||
st_s r0,[sp]
|
||||
bl.d __floatunsisf_asm
|
||||
mov_s r0,r1
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__floatunsisf)
|
||||
#define __floatsisf __floatsisf_asm
|
||||
#define __floatunsisf __floatunsisf_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
.global __floatunsisf
|
||||
.global __floatsisf
|
||||
FUNC(__floatsisf)
|
||||
FUNC(__floatunsisf)
|
||||
.balign 4
|
||||
__floatunsisf:
|
||||
lsr_s r2,r0
|
||||
mov_l r12,0x9d ; 0x7f + 31 - 1
|
||||
norm r2,r2
|
||||
brne_l r0,0,0f
|
||||
j_s [blink]
|
||||
.balign 4
|
||||
__floatsisf:
|
||||
abs.f r0,r0
|
||||
jeq_s [blink]
|
||||
lsr_s r2,r0
|
||||
mov_s r12,0x9d ; 0x7f + 31 - 1
|
||||
norm r2,r2
|
||||
bset.cs r12,r12,8
|
||||
0: rsub.f r3,r2,8
|
||||
bmsk r1,r0,r3
|
||||
ror r1,r1,r3
|
||||
lsr.pl r0,r0,r3
|
||||
neg_s r3,r3
|
||||
asl.mi r0,r0,r3
|
||||
sub_s r12,r12,r2
|
||||
asl_s r12,r12,23
|
||||
bxor.pl.f r1,r1,31
|
||||
add_s r0,r0,r12
|
||||
j_s.d [blink]
|
||||
add.pnz r0,r0,1
|
||||
ENDFUNC(__floatunsisf)
|
||||
ENDFUNC(__floatsisf)
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __floatunsidf
|
||||
.balign 4
|
||||
FUNC(__floatunsidf)
|
||||
__floatunsidf:
|
||||
push_s blink
|
||||
bl.d __floatunsidf_c
|
||||
push_s r0
|
||||
ld_s r2,[sp]
|
||||
st_s r1,[sp]
|
||||
push_s r0
|
||||
bl.d __floatunsidf_asm
|
||||
mov_s r0,r2
|
||||
pop_s r2
|
||||
pop_s r3
|
||||
pop_s blink
|
||||
cmp r0,r2
|
||||
cmp.eq r1,r3
|
||||
jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__floatunsidf)
|
||||
#define __floatunsidf __floatunsidf_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
.global __floatunsidf
|
||||
.balign 4
|
||||
FUNC(__floatunsidf)
|
||||
__floatunsidf:
|
||||
lsr_s r1,r0
|
||||
breq_s r0,0,.Lret0
|
||||
norm r2,r1
|
||||
mov r12,-0x41d ; -(0x3ff+31-1)
|
||||
rsub.f r3,r2,11
|
||||
add_s r12,r2,r12
|
||||
add_s r2,r2,21
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
lsr DBL0H,r0,r3
|
||||
asl_s DBL0L,r0,r2
|
||||
#else
|
||||
asl DBL0L,r0,r2
|
||||
lsr_s DBL0H,r0,r3
|
||||
#endif
|
||||
asl_s r12,r12,20
|
||||
mov.lo DBL0H,DBL0L
|
||||
sub_s DBL0H,DBL0H,r12
|
||||
.Lret0: j_s.d [blink]
|
||||
mov.ls DBL0L,0
|
||||
ENDFUNC(__floatunsidf)
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
/* inputs: DBL0, DBL1
|
||||
output: c flags to be used for 'hs' condition
|
||||
clobber: r12, flags */
|
||||
/* For NaNs, bit 19.. bit 30 of the high word must be set. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __gedf2
|
||||
.balign 4
|
||||
FUNC(__gedf2)
|
||||
__gedf2:
|
||||
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
|
||||
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
|
||||
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
|
||||
bl.d __gedf2_c` push_s r0
|
||||
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
|
||||
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
|
||||
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
|
||||
bl.d __gedf2_asm` ld.ab r10,[sp,4]
|
||||
pop_s blink
|
||||
brge.d r11,0,0f
|
||||
ld.ab r11,[sp,4]
|
||||
jlo [blink]
|
||||
bl abort
|
||||
0: jhs [blink]
|
||||
bl abort
|
||||
ENDFUNC(__gedf2)
|
||||
#define __gedf2 __gedf2_asm
|
||||
#endif /* DEBUG */
|
||||
.global __gedf2
|
||||
.balign 4
|
||||
HIDDEN_FUNC(__gedf2)
|
||||
__gedf2:
|
||||
or.f r12,DBL0H,DBL1H
|
||||
bmi.d .Lneg
|
||||
bmsk_s r12,r12,20
|
||||
add1.f 0,r12,DBL0H ; clear z; set c iff NaN
|
||||
add1.cc.f r12,r12,DBL1H ; clear z; set c iff NaN
|
||||
bbit1 DBL0H,31,.Lneg
|
||||
cmp.cc DBL0H,DBL1H
|
||||
j_s.d [blink]
|
||||
cmp.eq DBL0L,DBL1L
|
||||
.balign 4
|
||||
.Lneg: breq.d DBL1H,0,.L0
|
||||
add1.f 0,r12,DBL0H
|
||||
add1.cc.f r12,r12,DBL1H
|
||||
cmp.cc DBL1H,DBL0H
|
||||
j_s.d [blink]
|
||||
cmp.eq DBL1L,DBL0L
|
||||
.balign 4
|
||||
.L0:
|
||||
bxor.f 0,DBL0H,31 ; check for high word of -0.
|
||||
beq_s .Lcheck_0
|
||||
cmp.cc DBL1H,DBL0H
|
||||
j_s.d [blink]
|
||||
cmp.eq DBL1L,DBL0L
|
||||
.Lcheck_0:
|
||||
; high words suggest DBL0 may be -0, DBL1 +0; check low words.
|
||||
cmp_s DBL1H,DBL0L
|
||||
j_s.d [blink]
|
||||
cmp.cc DBL1H,DBL1L
|
||||
ENDFUNC(__gedf2)
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
/* inputs: r0, r1
|
||||
output: c flag to be used for 'hs' condition
|
||||
clobber: r12,flags */
|
||||
/* For NaNs, bit 22.. bit 30 must be set. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __gesf2
|
||||
.balign 4
|
||||
FUNC(__gesf2)
|
||||
__gesf2:
|
||||
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
|
||||
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
|
||||
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
|
||||
bl.d __gesf2_c` push_s r0
|
||||
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
|
||||
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
|
||||
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
|
||||
bl.d __gesf2_asm` ld.ab r10,[sp,4]
|
||||
pop_s blink
|
||||
brge.d r11,0,0f
|
||||
ld.ab r11,[sp,4]
|
||||
jlo [blink]
|
||||
bl abort
|
||||
0: jhs [blink]
|
||||
bl abort
|
||||
ENDFUNC(__gesf2)
|
||||
#define __gesf2 __gesf2_asm
|
||||
#endif /* DEBUG */
|
||||
.global __gesf2
|
||||
.balign 4
|
||||
HIDDEN_FUNC(__gesf2)
|
||||
__gesf2:
|
||||
or.f r12,r0,r1
|
||||
bmi.d .Lneg
|
||||
bmsk_s r12,r12,23
|
||||
add1.f 0,r12,r0 ; check for NaN
|
||||
add1.cc.f r12,r12,r1
|
||||
j_s.d [blink]
|
||||
cmp.cc r0,r1
|
||||
.balign 4
|
||||
.Lneg: breq.d r1,0,.L0
|
||||
add1.f 0,r12,r0 ; check for NaN
|
||||
add1.cc.f r12,r12,r1
|
||||
j_s.d [blink]
|
||||
cmp.cc r1,r0
|
||||
.balign 4
|
||||
.L0: bxor.f 0,r0,31 ; check for -0
|
||||
j_s.d [blink]
|
||||
cmp.hi r1,r0
|
||||
ENDFUNC(__gesf2)
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
/* inputs: DBL0, DBL1
|
||||
output: c,z flags to be used for 'hi' condition
|
||||
clobber: r12, flags */
|
||||
/* For NaNs, bit 19.. bit 30 of the high word must be set. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __gtdf2
|
||||
.balign 4
|
||||
FUNC(__gtdf2)
|
||||
__gtdf2:
|
||||
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
|
||||
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
|
||||
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
|
||||
bl.d __gtdf2_c` push_s r0
|
||||
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
|
||||
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
|
||||
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
|
||||
bl.d __gtdf2_asm` ld.ab r10,[sp,4]
|
||||
pop_s blink
|
||||
brgt.d r11,0,0f
|
||||
ld.ab r11,[sp,4]
|
||||
jls [blink]
|
||||
bl abort
|
||||
0: jhi [blink]
|
||||
bl abort
|
||||
ENDFUNC(__gtdf2)
|
||||
#define __gtdf2 __gtdf2_asm
|
||||
#endif /* DEBUG */
|
||||
.global __gtdf2
|
||||
.balign 4
|
||||
HIDDEN_FUNC(__gtdf2)
|
||||
__gtdf2:
|
||||
or.f r12,DBL0H,DBL1H
|
||||
bmi.d .Lneg
|
||||
bmsk_s r12,r12,20
|
||||
add1.f 0,r12,DBL0H ; clear z; set c iff NaN
|
||||
add1.cc.f r12,r12,DBL1H ; clear z; set c iff NaN
|
||||
; don't care: z may or may not be cleared if there is no NaN event
|
||||
cmp.cc DBL0H,DBL1H
|
||||
j_s.d [blink]
|
||||
cmp.eq DBL0L,DBL1L
|
||||
.balign 4
|
||||
.Lneg: breq.d DBL0H,0,.L0
|
||||
add1.f 0,r12,DBL1H
|
||||
add1.cc.f r12,r12,DBL0H
|
||||
cmp.cc DBL1H,DBL0H
|
||||
j_s.d [blink]
|
||||
cmp.eq DBL1L,DBL0L
|
||||
.balign 4
|
||||
.L0:
|
||||
bxor.f 0,DBL1H,31
|
||||
beq_s .Lcheck_0
|
||||
cmp.cc DBL1H,DBL0H
|
||||
j_s.d [blink]
|
||||
cmp.eq DBL1L,DBL0L
|
||||
.balign 4
|
||||
.Lcheck_0:
|
||||
; high words suggest DBL0 may be +0, DBL1 -0; check low words.
|
||||
j_s.d [blink]
|
||||
or.f 0,DBL0L,DBL1L
|
||||
ENDFUNC(__gtdf2)
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
/* inputs: r0, r1
|
||||
output: c, z flags to be used for 'hi' condition
|
||||
clobber: r12,flags */
|
||||
/* For NaNs, bit 22.. bit 30 must be set. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __gtsf2
|
||||
.balign 4
|
||||
FUNC(__gtsf2)
|
||||
__gtsf2:
|
||||
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
|
||||
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
|
||||
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
|
||||
bl.d __gtsf2_c` push_s r0
|
||||
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
|
||||
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
|
||||
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
|
||||
bl.d __gtsf2_asm` ld.ab r10,[sp,4]
|
||||
pop_s blink
|
||||
brgt.d r11,0,0f
|
||||
ld.ab r11,[sp,4]
|
||||
jls [blink]
|
||||
bl abort
|
||||
0: jhi [blink]
|
||||
bl abort
|
||||
ENDFUNC(__gtsf2)
|
||||
#define __gtsf2 __gtsf2_asm
|
||||
#endif /* DEBUG */
|
||||
.global __gtsf2
|
||||
.balign 4
|
||||
HIDDEN_FUNC(__gtsf2)
|
||||
__gtsf2:
|
||||
or.f r12,r0,r1
|
||||
bmi.d .Lneg
|
||||
bmsk_s r12,r12,23
|
||||
add1.f 0,r12,r0 ; check for NaN
|
||||
add1.cc.f r12,r12,r1
|
||||
j_s.d [blink]
|
||||
cmp.cc r0,r1
|
||||
.balign 4
|
||||
.Lneg: breq.d r0,0,.L0
|
||||
add1.f 0,r12,r0 ; check for NaN
|
||||
add1.cc.f r12,r12,r1
|
||||
j_s.d [blink]
|
||||
cmp.cc r1,r0
|
||||
.balign 4
|
||||
.L0: bxor.f 0,r1,31 ; check for -0
|
||||
j_s.d [blink]
|
||||
cmp.hi r1,r0
|
||||
ENDFUNC(__gtsf2)
|
||||
|
|
@ -0,0 +1,235 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* XMAC schedule: directly back-to-back multiplies stall; the third
|
||||
instruction after a multiply stalls unless it is also a multiply. */
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __muldf3
|
||||
.balign 4
|
||||
__muldf3:
|
||||
push_s blink
|
||||
push_s r2
|
||||
push_s r3
|
||||
push_s r0
|
||||
bl.d __muldf3_c
|
||||
push_s r1
|
||||
ld_s r2,[sp,12]
|
||||
ld_s r3,[sp,8]
|
||||
st_s r0,[sp,12]
|
||||
st_s r1,[sp,8]
|
||||
pop_s r1
|
||||
bl.d __muldf3_asm
|
||||
pop_s r0
|
||||
pop_s r3
|
||||
pop_s r2
|
||||
pop_s blink
|
||||
cmp r0,r2
|
||||
cmp.eq r1,r3
|
||||
jeq_s [blink]
|
||||
b abort
|
||||
#define __muldf3 __muldf3_asm
|
||||
#endif /* DEBUG */
|
||||
/* N.B. This is optimized for ARC700.
|
||||
ARC600 has very different scheduling / instruction selection criteria. */
|
||||
/* For the standard multiplier, instead of mpyu rx,DBL0L,DBL1L; tst rx,rx ,
|
||||
we can do:
|
||||
sub rx,DBL0L,1; bic rx,DBL0L,rx; lsr rx,rx; norm rx,rx; asl.f 0,DBL1L,rx */
|
||||
|
||||
__muldf3_support: /* This label makes debugger output saner. */
|
||||
/* If one number is denormal, subtract some from the exponent of the other
|
||||
one (if the other exponent is too small, return 0), and normalize the
|
||||
denormal. Then re-run the computation. */
|
||||
.balign 4
|
||||
FUNC(__muldf3)
|
||||
.Ldenorm_dbl0:
|
||||
mov_s r12,DBL0L
|
||||
mov_s DBL0L,DBL1L
|
||||
mov_s DBL1L,r12
|
||||
mov_s r12,DBL0H
|
||||
mov_s DBL0H,DBL1H
|
||||
mov_s DBL1H,r12
|
||||
and r11,DBL0H,r9
|
||||
.Ldenorm_dbl1:
|
||||
brhs r11,r9,.Linf_nan
|
||||
brhs 0x3ca00001,r11,.Lret0
|
||||
sub_s DBL0H,DBL0H,DBL1H
|
||||
bmsk_s DBL1H,DBL1H,30
|
||||
add_s DBL0H,DBL0H,DBL1H
|
||||
breq_s DBL1H,0,.Ldenorm_2
|
||||
norm r12,DBL1H
|
||||
|
||||
sub_s r12,r12,10
|
||||
asl r5,r12,20
|
||||
asl_s DBL1H,DBL1H,r12
|
||||
sub DBL0H,DBL0H,r5
|
||||
neg r5,r12
|
||||
lsr r6,DBL1L,r5
|
||||
asl_s DBL1L,DBL1L,r12
|
||||
b.d __muldf3
|
||||
add_s DBL1H,DBL1H,r6
|
||||
|
||||
.balign 4
|
||||
.Linf_nan:
|
||||
bclr r12,DBL1H,31
|
||||
xor_s DBL1H,DBL1H,DBL0H
|
||||
bclr_s DBL0H,DBL0H,31
|
||||
max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
|
||||
or.f 0,DBL0H,DBL0L
|
||||
mov_s DBL0L,0
|
||||
or.ne.f DBL1L,DBL1L,r12
|
||||
not_s DBL0H,DBL0L ; inf * 0 -> NaN
|
||||
mov.ne DBL0H,r8
|
||||
tst_s DBL1H,DBL1H
|
||||
j_s.d [blink]
|
||||
bset.mi DBL0H,DBL0H,31
|
||||
|
||||
.Lret0: xor_s DBL0H,DBL0H,DBL1H
|
||||
bclr DBL1H,DBL0H,31
|
||||
xor_s DBL0H,DBL0H,DBL1H
|
||||
j_s.d [blink]
|
||||
mov_l DBL0L,0
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_2:
|
||||
breq_s DBL1L,0,.Lret0 ; 0 input -> 0 output
|
||||
norm.f r12,DBL1L
|
||||
|
||||
mov.mi r12,21
|
||||
add.pl r12,r12,22
|
||||
neg r11,r12
|
||||
asl_s r12,r12,20
|
||||
lsr.f DBL1H,DBL1L,r11
|
||||
ror DBL1L,DBL1L,r11
|
||||
sub_s DBL0H,DBL0H,r12
|
||||
mov.eq DBL1H,DBL1L
|
||||
sub_s DBL1L,DBL1L,DBL1H
|
||||
/* Fall through. */
|
||||
.global __muldf3
|
||||
.balign 4
|
||||
__muldf3:
|
||||
ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)]
|
||||
mpyhu r4,DBL0L,DBL1L
|
||||
bmsk r6,DBL0H,19
|
||||
bset r6,r6,20
|
||||
mpyu r7,r6,DBL1L
|
||||
and r11,DBL0H,r9
|
||||
breq r11,0,.Ldenorm_dbl0
|
||||
mpyhu r8,r6,DBL1L
|
||||
bmsk r10,DBL1H,19
|
||||
bset r10,r10,20
|
||||
mpyhu r5,r10,DBL0L
|
||||
add.f r4,r4,r7
|
||||
and r12,DBL1H,r9
|
||||
mpyhu r7,r6,r10
|
||||
breq r12,0,.Ldenorm_dbl1
|
||||
adc.f r5,r5,r8
|
||||
mpyu r8,r10,DBL0L
|
||||
breq r11,r9,.Linf_nan
|
||||
breq r12,r9,.Linf_nan
|
||||
mpyu r6,r6,r10
|
||||
add.cs r7,r7,1
|
||||
add.f r4,r4,r8
|
||||
mpyu r10,DBL1L,DBL0L
|
||||
bclr r8,r9,30 ; 0x3ff00000
|
||||
adc.f r5,r5,r6
|
||||
; XMAC write-back stall / std. mult stall is one cycle later
|
||||
bclr r6,r9,20 ; 0x7fe00000
|
||||
add.cs r7,r7,1 ; fraction product in r7:r5:r4
|
||||
tst r10,r10
|
||||
bset.ne r4,r4,0 ; put least significant word into sticky bit
|
||||
lsr.f r10,r7,9
|
||||
add_l r12,r12,r11 ; add exponents
|
||||
rsub.eq r8,r8,r9 ; 0x40000000
|
||||
sub r12,r12,r8 ; subtract bias + implicit 1
|
||||
brhs.d r12,r6,.Linf_denorm
|
||||
rsub r10,r10,12
|
||||
.Lshift_frac:
|
||||
neg r8,r10
|
||||
asl r6,r4,r10
|
||||
lsr DBL0L,r4,r8
|
||||
add.f 0,r6,r6
|
||||
btst.eq DBL0L,0
|
||||
cmp.eq r4,r4 ; round to nearest / round to even
|
||||
asl r4,r5,r10
|
||||
lsr r5,r5,r8
|
||||
adc.f DBL0L,DBL0L,r4
|
||||
xor.f 0,DBL0H,DBL1H
|
||||
asl r7,r7,r10
|
||||
add_s r12,r12,r5
|
||||
adc DBL0H,r12,r7
|
||||
j_s.d [blink]
|
||||
bset.mi DBL0H,DBL0H,31
|
||||
|
||||
/* We have checked for infinitey / NaN input before, and transformed
|
||||
denormalized inputs into normalized inputs. Thus, the worst case
|
||||
exponent overflows are:
|
||||
1 + 1 - 0x400 == 0xc02 : maximum underflow
|
||||
0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
|
||||
N.B. 0x7e and 0x7f are also values for overflow.
|
||||
|
||||
If (r12 <= -54), we have an underflow to zero. */
|
||||
.balign 4
|
||||
.Linf_denorm:
|
||||
brlo r12,0xc0000000,.Linf
|
||||
asr r6,r12,20
|
||||
mov_s r12,0
|
||||
add.f r10,r10,r6
|
||||
brgt r10,0,.Lshift_frac
|
||||
beq_s .Lround_frac
|
||||
add.f r10,r10,32
|
||||
.Lshift32_frac:
|
||||
tst r4,r4
|
||||
mov r4,r5
|
||||
bset.ne r4,r4,1
|
||||
mov r5,r7
|
||||
mov r7,0
|
||||
brge r10,1,.Lshift_frac
|
||||
breq r10,0,.Lround_frac
|
||||
add.f r10,r10,32
|
||||
brgt r10,21,.Lshift32_frac
|
||||
b_s .Lret0
|
||||
|
||||
.Lround_frac:
|
||||
add.f 0,r4,r4
|
||||
btst.eq r5,0
|
||||
mov_s DBL0L,r5
|
||||
mov_s DBL0H,r7
|
||||
adc.eq.f DBL0L,DBL0L,0
|
||||
j_s.d [blink]
|
||||
|
||||
adc.eq DBL0H,DBL0H,0
|
||||
|
||||
.Linf: xor.f DBL1H,DBL1H,DBL0H
|
||||
mov_s DBL0L,0
|
||||
mov_s DBL0H,r9
|
||||
j_s.d [blink]
|
||||
bset.mi DBL0H,DBL0H,31
|
||||
ENDFUNC(__muldf3)
|
||||
|
||||
.balign 4
|
||||
.L7ff00000:
|
||||
.long 0x7ff00000
|
||||
|
|
@ -0,0 +1,180 @@
|
|||
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* XMAC schedule: directly back-to-back multiplies stall; the third
|
||||
instruction after a multiply stalls unless it is also a multiply. */
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
.global __mulsf3
|
||||
FUNC(__mulsf3)
|
||||
.balign 4
|
||||
__mulsf3:
|
||||
push_s blink
|
||||
push_s r1
|
||||
bl.d __mulsf3_c
|
||||
push_s r0
|
||||
ld_s r1,[sp,4]
|
||||
st_s r0,[sp,4]
|
||||
bl.d __mulsf3_asm
|
||||
pop_s r0
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
and r12,r0,r1
|
||||
bic.f 0,0x7f800000,r12
|
||||
bne 0f
|
||||
bmsk.f 0,r0,22
|
||||
bmsk.ne.f r1,r1,22
|
||||
jne_s [blink] ; both NaN -> OK
|
||||
0: bl abort
|
||||
ENDFUNC(__mulsf3)
|
||||
#define __mulsf3 __mulsf3_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
.balign 4
|
||||
.global __mulsf3
|
||||
FUNC(__mulsf3)
|
||||
__mulsf3:
|
||||
ld.as r9,[pcl,79]; [pcl,((.L7f800000-.+2)/4)]
|
||||
bmsk r4,r1,22
|
||||
bset r2,r0,23
|
||||
asl_s r2,r2,8
|
||||
bset r3,r4,23
|
||||
mpyhu r6,r2,r3
|
||||
and r11,r0,r9
|
||||
breq r11,0,.Ldenorm_dbl0
|
||||
mpyu r7,r2,r3
|
||||
breq r11,r9,.Linf_nan_dbl0
|
||||
and r12,r1,r9
|
||||
asl.f 0,r6,8
|
||||
breq r12,0,.Ldenorm_dbl1
|
||||
.Lpast_denorm:
|
||||
xor_s r0,r0,r1
|
||||
.Lpast_denorm_dbl1:
|
||||
add.pl r6,r6,r6
|
||||
bclr.pl r6,r6,23
|
||||
add.pl.f r7,r7,r7
|
||||
ld.as r4,[pcl,64]; [pcl,((.L7fffffff-.+2)/4)]
|
||||
add.cs r6,r6,1
|
||||
lsr.f 0,r6,1
|
||||
breq r12,r9,.Linf_nan_dbl1
|
||||
add_s r12,r12,r11
|
||||
adc.f 0,r7,r4
|
||||
add_s r12,r12, \
|
||||
-0x3f800000
|
||||
adc.f r8,r6,r12
|
||||
bic r0,r0,r4
|
||||
tst.pl r8,r9
|
||||
min r3,r8,r9
|
||||
jpnz.d [blink]
|
||||
add.pnz r0,r0,r3
|
||||
; infinity or denormal number
|
||||
add.ne.f r3,r3,r3
|
||||
bpnz .Linfinity
|
||||
asr_s r3,r3,23+1
|
||||
bset r6,r6,23
|
||||
sub_s r3,r3,1
|
||||
neg_s r2,r3
|
||||
brhi r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
|
||||
lsr r2,r6,r2
|
||||
asl r9,r6,r3
|
||||
lsr.f 0,r2,1
|
||||
tst r7,r7
|
||||
add_s r0,r0,r2
|
||||
bset.ne r9,r9,0
|
||||
adc.f 0,r9,r4
|
||||
j_s.d [blink]
|
||||
add.cs r0,r0,1
|
||||
.Linfinity:
|
||||
j_s.d [blink]
|
||||
add_s r0,r0,r9
|
||||
|
||||
.Lret_r0: j_s [blink]
|
||||
|
||||
.balign 4
|
||||
.Linf_nan_dbl0:
|
||||
sub_s r2,r1,1 ; inf/nan * 0 -> nan; inf * nan -> nan (use |r2| >= inf)
|
||||
bic.f 0,r9,r2
|
||||
xor_s r0,r0,r1
|
||||
bclr_s r1,r1,31
|
||||
xor_s r0,r0,r1
|
||||
jne_s [blink]
|
||||
.Lretnan:
|
||||
j_s.d [blink]
|
||||
mov r0,-1
|
||||
.Ldenorm_dbl0_inf_nan_dbl1:
|
||||
bmsk.f 0,r0,30
|
||||
beq_s .Lretnan
|
||||
xor_s r0,r0,r1
|
||||
.Linf_nan_dbl1:
|
||||
xor_s r1,r1,r0
|
||||
bclr_s r1,r1,31
|
||||
j_s.d [blink]
|
||||
xor_s r0,r0,r1
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl0:
|
||||
bclr_s r2,r2,31
|
||||
norm.f r4,r2
|
||||
and r12,r1,r9
|
||||
add_s r2,r2,r2
|
||||
asl r2,r2,r4
|
||||
asl r4,r4,23
|
||||
mpyhu r6,r2,r3
|
||||
breq r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
|
||||
sub.ne.f r12,r12,r4
|
||||
mpyu r7,r2,r3
|
||||
bhi.d .Lpast_denorm
|
||||
asl.f 0,r6,8
|
||||
xor_s r0,r0,r1
|
||||
bmsk r1,r0,30
|
||||
j_s.d [blink]
|
||||
bic_l r0,r0,r1
|
||||
|
||||
.balign 4
|
||||
.Ldenorm_dbl1:
|
||||
norm.f r3,r4
|
||||
xor_s r0,r0,r1
|
||||
sub_s r3,r3,7
|
||||
asl r4,r4,r3
|
||||
sub_s r3,r3,1
|
||||
asl_s r3,r3,23
|
||||
mpyhu r6,r2,r4
|
||||
sub.ne.f r11,r11,r3
|
||||
bmsk r8,r0,30
|
||||
mpyu r7,r2,r4
|
||||
bhi.d .Lpast_denorm_dbl1
|
||||
asl.f 0,r6,8
|
||||
j_s.d [blink]
|
||||
bic r0,r0,r8
|
||||
|
||||
.balign 4
|
||||
.L7f800000:
|
||||
.long 0x7f800000
|
||||
.L7fffffff:
|
||||
.long 0x7fffffff
|
||||
ENDFUNC(__mulsf3)
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
/* inputs: r0, r1
|
||||
output: c flag
|
||||
clobber: r12, flags
|
||||
For NaNs, bit 19 .. bit 30 must be set. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __orddf2
|
||||
.balign 4
|
||||
FUNC(__orddf2)
|
||||
__orddf2:
|
||||
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
|
||||
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
|
||||
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
|
||||
bl.d __unorddf2_c` push_s r0
|
||||
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
|
||||
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
|
||||
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
|
||||
bl.d __orddf2_asm` ld.ab r10,[sp,4]
|
||||
pop_s blink
|
||||
brne.d r11,0,0f
|
||||
ld.ab r11,[sp,4]
|
||||
jcc [blink]
|
||||
bl abort
|
||||
0: jcs [blink]
|
||||
bl abort
|
||||
ENDFUNC(__orddf2)
|
||||
#define __orddf2 __orddf2_asm
|
||||
#endif /* DEBUG */
|
||||
.global __orddf2
|
||||
.balign 4
|
||||
HIDDEN_FUNC(__orddf2)
|
||||
__orddf2:
|
||||
bmsk r12,DBL0H,20
|
||||
add1.f r12,r12,DBL0H /* clear z; set c if NaN. */
|
||||
bmsk r12,DBL1H,20
|
||||
j_s.d [blink]
|
||||
add1.cc.f r12,r12,DBL1H /* clear z; set c if NaN. */
|
||||
ENDFUNC(__orddf2)
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
/* inputs: r0, r1
|
||||
output: c flag
|
||||
clobber: r12, flags
|
||||
For NaNs, bit 22 .. bit 30 must be set. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __ordsf2
|
||||
.balign 4
|
||||
FUNC(__ordsf2)
|
||||
__ordsf2:
|
||||
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
|
||||
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
|
||||
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
|
||||
bl.d __unordsf2_c` push_s r0
|
||||
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
|
||||
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
|
||||
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
|
||||
bl.d __ordsf2_asm` ld.ab r10,[sp,4]
|
||||
pop_s blink
|
||||
brne.d r11,0,0f
|
||||
ld.ab r11,[sp,4]
|
||||
jcc [blink]
|
||||
bl abort
|
||||
0: jcs [blink]
|
||||
bl abort
|
||||
ENDFUNC(__ordsf2)
|
||||
#define __ordsf2 __ordsf2_asm
|
||||
#endif /* DEBUG */
|
||||
.global __ordsf2
|
||||
.balign 4
|
||||
HIDDEN_FUNC(__ordsf2)
|
||||
__ordsf2:
|
||||
bmsk r12,r0,23
|
||||
add1.f r12,r12,r0 /* clear z; set c if NaN. */
|
||||
bmsk r12,r1,23
|
||||
j_s.d [blink]
|
||||
add1.cc.f r12,r12,r1 /* clear z; set c if NaN. */
|
||||
ENDFUNC(__ordsf2)
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
/* Copyright (C) 2006, 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
|
||||
#if 0 /* DEBUG */
|
||||
FUNC(__truncdfsf2)
|
||||
.global __truncdfsf2
|
||||
.balign 4
|
||||
__truncdfsf2:
|
||||
push_s blink
|
||||
push_s r0
|
||||
bl.d __truncdfsf2_c
|
||||
push_s r1
|
||||
mov_s r2,r0
|
||||
pop_s r1
|
||||
ld r0,[sp]
|
||||
bl.d __truncdfsf2_asm
|
||||
st r2,[sp]
|
||||
pop_s r1
|
||||
pop_s blink
|
||||
cmp r0,r1
|
||||
jeq_s [blink]
|
||||
and r12,r0,r1
|
||||
bic.f 0,0x7f800000,r12
|
||||
bne 0f
|
||||
bmsk.f 0,r0,22
|
||||
bmsk.ne.f r1,r1,22
|
||||
jne_s [blink] ; both NaN -> OK
|
||||
0: bl abort
|
||||
ENDFUNC(__truncdfsf2)
|
||||
#define __truncdfsf2 __truncdfsf2_asm
|
||||
#endif /* DEBUG */
|
||||
|
||||
.global __truncdfsf2
|
||||
.balign 4
|
||||
FUNC(__truncdfsf2)
|
||||
__truncdfsf2:
|
||||
lsr r2,DBL0H,20
|
||||
asl_s DBL0H,DBL0H,12
|
||||
sub r12,r2,0x380
|
||||
bclr.f r3,r12,11
|
||||
brhs r3,0xff,.Lill_exp
|
||||
beq_l .Ldenorm0
|
||||
asl_s r12,r12,23
|
||||
tst DBL0L, \
|
||||
0x2fffffff /* Check if msb guard bit wants rounding up. */
|
||||
lsr_s DBL0L,DBL0L,28
|
||||
lsr_s DBL0H,DBL0H,8
|
||||
add.ne DBL0L,DBL0L,1
|
||||
add_s DBL0H,DBL0H,DBL0L
|
||||
lsr_s DBL0H,DBL0H
|
||||
btst_s r2,11
|
||||
add_s r0,DBL0H,r12
|
||||
j_s.d [blink]
|
||||
bxor.ne r0,r0,31
|
||||
.balign 4
|
||||
.Lill_exp:
|
||||
bbit1 r2,10,.Linf_nan
|
||||
bmsk_s r12,r12,9
|
||||
rsub.f r12,r12,8+0x400-32 ; Go from 9 to 1 guard bit in MSW. */
|
||||
bhs_s .Lzero
|
||||
lsr r3,DBL0L,21
|
||||
rrc DBL0H,DBL0H ; insert leading 1
|
||||
asl.f 0,DBL0L,8 ; check lower 24 guard bits
|
||||
add_s r3,DBL0H,r3
|
||||
add.pnz r3,r3,1 ; assemble fraction with compressed guard bits.
|
||||
lsr r0,r3,r12
|
||||
neg_s r12,r12
|
||||
btst_s r0,1
|
||||
asl.eq.f r3,r3,r12
|
||||
add.ne r0,r0,1
|
||||
btst_s r2,11
|
||||
lsr_s r0,r0
|
||||
j_s.d [blink]
|
||||
bxor.ne r0,r0,31
|
||||
.Lzero:
|
||||
lsr_s r2,r2,11
|
||||
j_s.d [blink]
|
||||
asl r0,r2,31
|
||||
.Ldenorm0:
|
||||
asl_s r12,r12,20
|
||||
tst DBL0L, \
|
||||
0x5fffffff /* Check if msb guard bit wants rounding up. */
|
||||
lsr_s DBL0L,DBL0L,29
|
||||
lsr_s DBL0H,DBL0H,9
|
||||
add.ne DBL0L,DBL0L,1
|
||||
bset_s DBL0H,DBL0H,23
|
||||
add_s DBL0H,DBL0H,DBL0L
|
||||
lsr_s DBL0H,DBL0H
|
||||
j_s.d [blink]
|
||||
add_l r0,DBL0H,r12
|
||||
|
||||
/* We would generally say that NaNs must have a non-zero high fraction part,
|
||||
but to allow hardware double precision floating point to interoperate
|
||||
with single precision software floating point, we make an exception here.
|
||||
The cost is to replace a tst_s DBL0H with an or.f DBL0L,DBL0L,DBL0H .
|
||||
As we start out unaligned, and there is an odd number of other short insns,
|
||||
we have a choice of letting this cost us a misalign penalty or
|
||||
4 more bytes (if we align the code). We choose the former here because
|
||||
infinity / NaN is not expected to be prevalent in time-critical code. */
|
||||
.Linf_nan:
|
||||
or.f DBL0L,DBL0L,DBL0H
|
||||
mov_s r0,1
|
||||
add.ne r2,r2,1
|
||||
tst r2,0x7ff
|
||||
asl.ne r0,r0,23
|
||||
btst_s r12,11
|
||||
neg r0,r0
|
||||
j_s.d [blink]
|
||||
bxor.eq r0,r0,31
|
||||
ENDFUNC(__truncdfsf2)
|
||||
|
|
@ -0,0 +1,73 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
/* inputs: DBL0, DBL1
|
||||
output: z flag
|
||||
clobber: r12, flags
|
||||
For NaNs, bit 19.. bit 30 of the high word must be set. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __uneqdf2
|
||||
.balign 4
|
||||
FUNC(__uneqdf2)
|
||||
__uneqdf2:
|
||||
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
|
||||
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
|
||||
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
|
||||
bl.d __eqdf2_c` push_s r0
|
||||
push_s r0` ld_s r0, [sp,4]` ld_s r1, [sp,8]` ld_s r2,[sp,12]
|
||||
bl.d __unorddf2_c` ld_s r3,[sp,16]
|
||||
ld.ab r11,[sp,4]` tst r0,r0` mov.ne r11,0
|
||||
pop_s r0` pop_s r1` pop_s r2` pop_s r3
|
||||
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
|
||||
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
|
||||
bl.d __uneqdf2_asm` ld.ab r10,[sp,4]
|
||||
pop_s blink
|
||||
breq.d r11,0,0f
|
||||
ld.ab r11,[sp,4]
|
||||
jne_s [blink]
|
||||
bl abort
|
||||
0: jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__uneqdf2)
|
||||
#define __uneqdf2 __uneqdf2_asm
|
||||
#endif /* DEBUG */
|
||||
.global __uneqdf2
|
||||
.balign 4
|
||||
HIDDEN_FUNC(__uneqdf2)
|
||||
__uneqdf2:
|
||||
cmp_s DBL0H,DBL1H
|
||||
cmp.eq DBL0L,DBL1L
|
||||
jeq_s [blink]
|
||||
or r12,DBL0H,DBL1H
|
||||
or.f 0,DBL0L,DBL1L
|
||||
bclr.eq.f r12,r12,31
|
||||
jeq_s [blink]
|
||||
mov_s r12, \
|
||||
0x7ff80000
|
||||
bic.f 0,r12,DBL0H
|
||||
j_s.d [blink]
|
||||
bic.ne.f r12,r12,DBL1H
|
||||
ENDFUNC(__uneqdf2)
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "arc-ieee-754.h"
|
||||
/* inputs: r0, r1
|
||||
output: z flag
|
||||
clobber: r12, flags
|
||||
For NaNs, bit 22 .. bit 30 must be set. */
|
||||
#if 0 /* DEBUG */
|
||||
.global __uneqsf2
|
||||
.balign 4
|
||||
FUNC(__uneqsf2)
|
||||
__uneqsf2:
|
||||
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
|
||||
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
|
||||
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
|
||||
bl.d __eqsf2_c` push_s r0
|
||||
push_s r0` ld_s r0, [sp,4]
|
||||
bl.d __unordsf2_c` ld_s r1,[sp,8]
|
||||
ld.ab r11,[sp,4]` tst r0,r0` mov.ne r11,0
|
||||
pop_s r0` pop_s r1` pop_s r2` pop_s r3
|
||||
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
|
||||
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
|
||||
bl.d __uneqsf2_asm` ld.ab r10,[sp,4]
|
||||
pop_s blink
|
||||
breq.d r11,0,0f
|
||||
ld.ab r11,[sp,4]
|
||||
jne_s [blink]
|
||||
bl abort
|
||||
0: jeq_s [blink]
|
||||
bl abort
|
||||
ENDFUNC(__uneqsf2)
|
||||
#define __uneqsf2 __uneqsf2_asm
|
||||
#endif /* DEBUG */
|
||||
.global __uneqsf2
|
||||
.balign 4
|
||||
HIDDEN_FUNC(__uneqsf2)
|
||||
__uneqsf2:
|
||||
mov_s r12, \
|
||||
0x7fc00000
|
||||
bic.f 0,r12,r0
|
||||
bic.ne.f r12,r12,r1
|
||||
or r12,r0,r1
|
||||
bmsk.ne.f r12,r12,30
|
||||
j_s.d [blink]
|
||||
cmp.ne r0,r1
|
||||
ENDFUNC(__uneqsf2)
|
||||
|
|
@ -0,0 +1,157 @@
|
|||
/* .init/.fini section handling + C++ global constructor/destructor handling.
|
||||
This file is based on crtstuff.c, sol2-crti.asm, sol2-crtn.asm.
|
||||
|
||||
Copyright (C) 1995, 1997, 1998, 2007-2012 Free Software Foundation, Inc.
|
||||
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
on behalf of Synopsys Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Declare a pointer to void function type. */
|
||||
typedef void (*func_ptr) (void);
|
||||
|
||||
#ifdef CRT_INIT
|
||||
|
||||
/* NOTE: In order to be able to support SVR4 shared libraries, we arrange
|
||||
to have one set of symbols { __CTOR_LIST__, __DTOR_LIST__, __CTOR_END__,
|
||||
__DTOR_END__ } per root executable and also one set of these symbols
|
||||
per shared library. So in any given whole process image, we may have
|
||||
multiple definitions of each of these symbols. In order to prevent
|
||||
these definitions from conflicting with one another, and in order to
|
||||
ensure that the proper lists are used for the initialization/finalization
|
||||
of each individual shared library (respectively), we give these symbols
|
||||
only internal (i.e. `static') linkage, and we also make it a point to
|
||||
refer to only the __CTOR_END__ symbol in crtfini.o and the __DTOR_LIST__
|
||||
symbol in crtinit.o, where they are defined. */
|
||||
|
||||
static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors")))
|
||||
= { (func_ptr) (-1) };
|
||||
|
||||
static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors")))
|
||||
= { (func_ptr) (-1) };
|
||||
|
||||
/* Run all the global destructors on exit from the program. */
|
||||
|
||||
/* Some systems place the number of pointers in the first word of the
|
||||
table. On SVR4 however, that word is -1. In all cases, the table is
|
||||
null-terminated. On SVR4, we start from the beginning of the list and
|
||||
invoke each per-compilation-unit destructor routine in order
|
||||
until we find that null.
|
||||
|
||||
Note that this function MUST be static. There will be one of these
|
||||
functions in each root executable and one in each shared library, but
|
||||
although they all have the same code, each one is unique in that it
|
||||
refers to one particular associated `__DTOR_LIST__' which belongs to the
|
||||
same particular root executable or shared library file. */
|
||||
|
||||
static void __do_global_dtors (void)
|
||||
asm ("__do_global_dtors") __attribute__ ((section (".text")));
|
||||
|
||||
static void
|
||||
__do_global_dtors (void)
|
||||
{
|
||||
func_ptr *p;
|
||||
for (p = __DTOR_LIST__ + 1; *p; p++)
|
||||
(*p) ();
|
||||
}
|
||||
|
||||
/* .init section start.
|
||||
This must appear at the start of the .init section. */
|
||||
|
||||
asm ("\n\
|
||||
.section .init\n\
|
||||
.global init\n\
|
||||
.word 0\n\
|
||||
init:\n\
|
||||
st blink,[sp,4]\n\
|
||||
st fp,[sp]\n\
|
||||
mov fp,sp\n\
|
||||
sub sp,sp,16\n\
|
||||
");
|
||||
|
||||
/* .fini section start.
|
||||
This must appear at the start of the .init section. */
|
||||
|
||||
asm ("\n\
|
||||
.section .fini\n\
|
||||
.global fini\n\
|
||||
.word 0\n\
|
||||
fini:\n\
|
||||
st blink,[sp,4]\n\
|
||||
st fp,[sp]\n\
|
||||
mov fp,sp\n\
|
||||
sub sp,sp,16\n\
|
||||
bl.nd __do_global_dtors\n\
|
||||
");
|
||||
|
||||
#endif /* CRT_INIT */
|
||||
|
||||
#ifdef CRT_FINI
|
||||
|
||||
/* Put a word containing zero at the end of each of our two lists of function
|
||||
addresses. Note that the words defined here go into the .ctors and .dtors
|
||||
sections of the crtend.o file, and since that file is always linked in
|
||||
last, these words naturally end up at the very ends of the two lists
|
||||
contained in these two sections. */
|
||||
|
||||
static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors")))
|
||||
= { (func_ptr) 0 };
|
||||
|
||||
static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors")))
|
||||
= { (func_ptr) 0 };
|
||||
|
||||
/* Run all global constructors for the program.
|
||||
Note that they are run in reverse order. */
|
||||
|
||||
static void __do_global_ctors (void)
|
||||
asm ("__do_global_ctors") __attribute__ ((section (".text")));
|
||||
|
||||
static void
|
||||
__do_global_ctors (void)
|
||||
{
|
||||
func_ptr *p;
|
||||
for (p = __CTOR_END__ - 1; *p != (func_ptr) -1; p--)
|
||||
(*p) ();
|
||||
}
|
||||
|
||||
/* .init section end.
|
||||
This must live at the end of the .init section. */
|
||||
|
||||
asm ("\n\
|
||||
.section .init\n\
|
||||
bl.nd __do_global_ctors\n\
|
||||
ld blink,[fp,4]\n\
|
||||
j.d blink\n\
|
||||
ld.a fp,[sp,16]\n\
|
||||
");
|
||||
|
||||
/* .fini section end.
|
||||
This must live at the end of the .fini section. */
|
||||
|
||||
asm ("\n\
|
||||
.section .fini\n\
|
||||
ld blink,[fp,4]\n\
|
||||
j.d blink\n\
|
||||
ld.a fp,[sp,16]\n\
|
||||
");
|
||||
|
||||
#endif /* CRT_FINI */
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,43 @@
|
|||
# Exclude libgcc.so symbols for the Synopsys DesignWare ARC CPU.
|
||||
|
||||
# Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
# Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
# on behalf of Synopsys Inc.
|
||||
|
||||
# This file is part of GCC.
|
||||
|
||||
# GCC is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free
|
||||
# Software Foundation; either version 3, or (at your option) any later
|
||||
# version.
|
||||
|
||||
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
# for more details.
|
||||
|
||||
# Under Section 7 of GPL version 3, you are granted additional
|
||||
# permissions described in the GCC Runtime Library Exception, version
|
||||
# 3.1, as published by the Free Software Foundation.
|
||||
|
||||
# You should have received a copy of the GNU General Public License and
|
||||
# a copy of the GCC Runtime Library Exception along with this program;
|
||||
# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
# <http://www.gnu.org/licenses/>. */
|
||||
|
||||
# Exclude various symbols which should not be visible in libgcc.so for ARC.
|
||||
# Floating point comparisons use a special lightweight ABI which is not
|
||||
# compatible with calls via a plt. Moreover, the code is so compact that
|
||||
# it is better to include a separate copy in each dso.
|
||||
%exclude {
|
||||
__eqsf2
|
||||
__eqdf2
|
||||
__gtsf2
|
||||
__gtdf2
|
||||
__gesf2
|
||||
__gedf2
|
||||
__uneqsf2
|
||||
__uneqdf2
|
||||
__ordsf2
|
||||
__orddf2
|
||||
}
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
# GCC Makefile fragment for Synopsys DesignWare ARC
|
||||
|
||||
# Copyright (C) 2007-2013 Free Software Foundation, Inc.
|
||||
# Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
# on behalf of Synopsys Inc.
|
||||
|
||||
# This file is part of GCC.
|
||||
|
||||
# GCC is free software; you can redistribute it and/or modify it under the
|
||||
# terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation; either version 3, or (at your option) any later version.
|
||||
|
||||
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
# details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with GCC; see the file COPYING3. If not see
|
||||
# <http://www.gnu.org/licenses/>.
|
||||
|
||||
CROSS_LIBGCC1 = libgcc1-asm.a
|
||||
LIB1ASMSRC = arc/lib1funcs.S
|
||||
LIB1ASMFUNCS = _mulsi3 _umulsidi3 _umulsi3_highpart \
|
||||
_udivsi3 _divsi3 _umodsi3 _modsi3 \
|
||||
_divmod_tools _clzsi2 \
|
||||
_millicodethunk_st _millicodethunk_ld _millicodethunk_ret \
|
||||
_adddf3 _muldf3 _addsf3 _mulsf3 _divsf3 _divdf3 _truncdfsf2 _extendsfdf2 \
|
||||
_eqdf2 _eqsf2 _gedf2 _gesf2 _gtdf2 _gtsf2 _uneqdf2 _uneqsf2 _ordsf2 _orddf2 \
|
||||
_fixdfsi _fixsfsi _floatsidf _floatsisf _fixunsdfsi _floatunsidf
|
||||
|
||||
#LIBGCC2_CFLAGS = -g1 -O2 $(LIBGCC2_INCLUDES) $(GCC_CFLAGS)
|
||||
|
||||
# For floating-point emulation, we mostly use hand-coded assembly.
|
||||
# We use fp-bit.c for debugging purposes, and some parts of it
|
||||
# as a fallback for hardware configurations for which the hand-coded
|
||||
# assembly support is incomplete, i.e., where there is no NORM and/or no
|
||||
# supported multiply instruction. Using floating point on such a
|
||||
# configuration is generally inadvisable, but we got to provide support
|
||||
# somehow so that we can run the testsuites.
|
||||
# fp-hack.h / dp-hack.h take care of slecting the parts that are needed,
|
||||
# and (for debugging) of renaming functions so that they can be
|
||||
# used in an asm wrapper.
|
||||
|
||||
LIB2ADD = fp-bit.c dp-bit.c
|
||||
|
||||
dp-bit.c: $(srcdir)/fp-bit.c
|
||||
echo '#ifndef __big_endian__' > dp-bit.c
|
||||
echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c
|
||||
echo '#endif' >> dp-bit.c
|
||||
echo '#include "fp-bit.h"' >> dp-bit.c
|
||||
echo '#include "config/arc/dp-hack.h"' >> dp-bit.c
|
||||
grep -v 'include.*fp-bit.h' $(srcdir)/fp-bit.c >> dp-bit.c
|
||||
|
||||
fp-bit.c: $(srcdir)/fp-bit.c
|
||||
echo '#define FLOAT' > fp-bit.c
|
||||
echo '#ifndef __big_endian__' >> fp-bit.c
|
||||
echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
|
||||
echo '#endif' >> fp-bit.c
|
||||
echo '#include "config/arc/fp-hack.h"' >> fp-bit.c
|
||||
cat $(srcdir)/fp-bit.c >> fp-bit.c
|
||||
|
||||
# .init/.fini section routines
|
||||
|
||||
crtg.o: $(srcdir)/config/arc/crtg.S
|
||||
$(crt_compile) -c -x assembler-with-cpp $<
|
||||
|
||||
crtgend.o: $(srcdir)/config/arc/crtgend.S
|
||||
$(crt_compile) -c -x assembler-with-cpp $<
|
||||
|
||||
mcount.o: $(srcdir)/config/arc/gmon/mcount.c
|
||||
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -c $< \
|
||||
-fcall-saved-r0 -fcall-saved-r1 -fcall-saved-r2 -fcall-saved-r3 \
|
||||
-fcall-saved-r4 -fcall-saved-r5 -fcall-saved-r6 -fcall-saved-r7 \
|
||||
-fomit-frame-pointer
|
||||
|
||||
gmon.o: $(srcdir)/config/arc/gmon/gmon.c
|
||||
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -mno-sdata -c $< \
|
||||
-fno-strict-aliasing \
|
||||
-Wno-extra # suppress inane warning about missing initializer.
|
||||
# Adding initializers for the remaining elements of gmonparam would
|
||||
# make the code more brittle.
|
||||
|
||||
prof-freq-stub.o: $(srcdir)/config/arc/gmon/prof-freq-stub.S
|
||||
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -c $<
|
||||
|
||||
prof-freq.o: $(srcdir)/config/arc/gmon/prof-freq.c
|
||||
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -c $<
|
||||
|
||||
dcache_linesz.o: $(srcdir)/config/arc/gmon/dcache_linesz.S
|
||||
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -c $<
|
||||
|
||||
profil.o: $(srcdir)/config/arc/gmon/profil.S
|
||||
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -c $<
|
||||
|
||||
profil-uclibc.o: $(srcdir)/config/arc/gmon/profil-uclibc.c
|
||||
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -c $<
|
||||
|
||||
libgmon.a: mcount.o gmon.o dcache_linesz.o $(PROFILE_OSDEP)
|
||||
$(AR_CREATE_FOR_TARGET) $@ $^
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
# GCC Makefile fragment for the Synopsys DesignWare ARC CPU with newlib.
|
||||
|
||||
# Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
# Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
# on behalf of Synopsys Inc.
|
||||
|
||||
# This file is part of GCC.
|
||||
|
||||
# GCC is free software; you can redistribute it and/or modify it under the
|
||||
# terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation; either version 3, or (at your option) any later version.
|
||||
|
||||
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
# details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with GCC; see the file COPYING3. If not see
|
||||
# <http://www.gnu.org/licenses/>.
|
||||
|
||||
PROFILE_OSDEP = prof-freq-stub.o profil.o
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
# GCC Makefile fragment for the Synopsys DesignWare ARC700 CPU with uClibc.
|
||||
|
||||
# Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
# Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
# on behalf of Synopsys Inc.
|
||||
|
||||
# This file is part of GCC.
|
||||
|
||||
# GCC is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free
|
||||
# Software Foundation; either version 3, or (at your option) any later
|
||||
# version.
|
||||
|
||||
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
# for more details.
|
||||
|
||||
# Under Section 7 of GPL version 3, you are granted additional
|
||||
# permissions described in the GCC Runtime Library Exception, version
|
||||
# 3.1, as published by the Free Software Foundation.
|
||||
|
||||
# You should have received a copy of the GNU General Public License and
|
||||
# a copy of the GCC Runtime Library Exception along with this program;
|
||||
# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
# <http://www.gnu.org/licenses/>. */
|
||||
|
||||
CRTSTUFF_T_CFLAGS += -mno-sdata
|
||||
|
||||
# Compile crtbeginS.o and crtendS.o with pic.
|
||||
CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -mA7 -fPIC
|
||||
|
||||
# Compile libgcc2.a with pic.
|
||||
TARGET_LIBGCC2_CFLAGS = -mA7 -fPIC
|
||||
|
||||
PROFILE_OSDEP = prof-freq.o
|
||||
|
||||
# Override t-slibgcc-elf-ver to hide some lib1func
|
||||
# routines which should not be called via PLT.
|
||||
SHLIB_MAPFILES = libgcc-std.ver $(srcdir)/config/arc/libgcc-excl.ver
|
||||
|
|
@ -188,16 +188,20 @@ extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
|
|||
"rIJ" ((USItype) (bh)), \
|
||||
"r" ((USItype) (al)), \
|
||||
"rIJ" ((USItype) (bl)))
|
||||
/* Call libgcc routine. */
|
||||
#define umul_ppmm(w1, w0, u, v) \
|
||||
do { \
|
||||
DWunion __w; \
|
||||
__w.ll = __umulsidi3 (u, v); \
|
||||
w1 = __w.s.high; \
|
||||
w0 = __w.s.low; \
|
||||
} while (0)
|
||||
#define __umulsidi3 __umulsidi3
|
||||
UDItype __umulsidi3 (USItype, USItype);
|
||||
|
||||
#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
|
||||
#ifdef __ARC_NORM__
|
||||
#define count_leading_zeros(count, x) \
|
||||
do \
|
||||
{ \
|
||||
SItype c_; \
|
||||
\
|
||||
__asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
|
||||
(count) = c_ + 1; \
|
||||
} \
|
||||
while (0)
|
||||
#define COUNT_LEADING_ZEROS_0 32
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
|
||||
|
|
|
|||
Loading…
Reference in New Issue