gcc/libgcc/config/tilepro/softmpy.S

95 lines
2.4 KiB
ArmAsm

/* 64-bit multiplication support for TILEPro.
Copyright (C) 2011-2019 Free Software Foundation, Inc.
Contributed by Walter Lee (walt@tilera.com)
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* 64-bit multiplication support. */
.file "softmpy.S"
/* Parameters */
#define lo0 r9 /* low 32 bits of n0 */
#define hi0 r1 /* high 32 bits of n0 */
#define lo1 r2 /* low 32 bits of n1 */
#define hi1 r3 /* high 32 bits of n1 */
/* temps */
#define result1_a r4
#define result1_b r5
#define tmp0 r6
#define tmp0_left_16 r7
#define tmp1 r8
.section .text.__muldi3, "ax"
.align 8
.globl __muldi3
.type __muldi3, @function
__muldi3:
{
move lo0, r0 /* so we can write "out r0" while "in r0" alive */
mulhl_uu tmp0, lo1, r0
}
{
mulll_uu result1_a, lo1, hi0
}
{
move tmp1, tmp0
mulhla_uu tmp0, lo0, lo1
}
{
mulhlsa_uu result1_a, lo1, hi0
}
{
mulll_uu result1_b, lo0, hi1
slt_u tmp1, tmp0, tmp1
}
{
mulhlsa_uu result1_a, lo0, hi1
shli r0, tmp0, 16
}
{
move tmp0_left_16, r0
mulhha_uu result1_b, lo0, lo1
}
{
mullla_uu r0, lo1, lo0
shli tmp1, tmp1, 16
}
{
mulhlsa_uu result1_b, hi0, lo1
inthh tmp1, tmp1, tmp0
}
{
mulhlsa_uu result1_a, hi1, lo0
slt_u tmp0, r0, tmp0_left_16
}
/* NOTE: this will stall for a cycle here. Oh well. */
{
add r1, tmp0, tmp1
add result1_a, result1_a, result1_b
}
{
add r1, r1, result1_a
jrp lr
}
.size __muldi3,.-__muldi3