/* Copyright (C) 2005 Free Software Foundation, Inc. Contributed by Sunnorth This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING. If not, write to the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #define ra r3 #define a0 r4 #define a1 r5 #define a2 r6 #define a3 r7 #define v0 r23 #define t0 r8 #define t1 r9 #define t2 r10 #define t3 r11 #define t4 r22 #ifndef __pic__ #if !defined(L_mulsi3) && !defined(L_divsi3) .text .global _flush_cache _flush_cache: srli r9, r5, 4 mv r8, r4 mtsr r9, sr0 1: cache 0xe, [r8, 0] # write back invalid dcache addi r8, 16 bcnz 1b mfcr r8, cr4 bittst! r8, 0x3 # if LDM is enable, write back LDM beq! 6f ldi r10, 0 cache 0xc, [r10, 0] 6: bittst! r8, 0x2 # if LIM is enable, refill it beq! 7f cache 0x4, [r10, 0] 7: #nop! #nop! #nop! #nop! #nop! mv r8, r4 mtsr r9, sr0 2: cache 0x2, [r8, 0] # invalid unlock icache #nop! #nop! #nop! #nop! #nop! addi r8, 16 bcnz 2b br r3 #endif /* FUNCTION (U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1); REGISTERS: use t0 modify a0 a1 -> become 0 NOTE: this seems to give better performance to just rotate and add. */ #ifdef L_mulsi3 .text .global __umulsi3 .global __mulsi3 /* signed multiplication (32x32) */ .ent __mulsi3 __umulsi3: __mulsi3: li t1, 0 __mulsi3_loop: andri.c t0, a1, 1 # t0 = multiplier[0] srli a1, a1, 1 # a1 /= 2 beq __mulsi3_loop2 # skip if (t0 == 0) add t1, t1, a0 # add multiplicand __mulsi3_loop2: slli a0, a0, 1 # multiplicand mul 2 cmpi.c a1, 0 bne __mulsi3_loop mv r4, t1 br ra .end __mulsi3 #endif /* L_mulsi3 */ /* FUNCTION UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1)); INT32 (v0) = __divsi3 (INT32 (a0), INT32 (a1)); UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1)); INT32 (v0) = __modsi3 (INT32 (a0), INT32 (a1)); DESCRIPTION performs 32-bit division/modulo. REGISTERS used t0 bit-index t1 modify a0 becomes remainer */ #ifdef L_divsi3 .text .global __udivsi3 .global __umodsi3 .global __divsi3 .global __modsi3 /* unsigned division */ .ent __udivsi3 __udivsi3: li t4, 0 cmpi.c a1, 0 beq __uds_exit li t0, 1 blt __uds_ok __uds_normalize: cmp.c a0, a1 bcc __uds_ok slli a1, a1, 1 slli t0, t0, 1 cmpi.c a1, 0 bge __uds_normalize __uds_ok: __uds_loop2: cmp.c a0, a1 bcc __uds_loop3 sub a0, a0, a1 or t4, t4, t0 __uds_loop3: srli t0, t0, 1 srli a1, a1, 1 cmpi.c t0, 0 bne __uds_loop2 __uds_exit: mv a1, a0 mv r4, t4 br ra .end __udivsi3 /* unsigned modulus */ .ent __umodsi3 __umodsi3: mv t3, ra jl __udivsi3 mv r4, a1 br t3 .end __umodsi3 /* abs and div */ .ent __orgsi3 __orgsi3: cmpi.c a0, 0 bge __orgsi3_a0p neg a0, a0 __orgsi3_a0p: cmpi.c a1, 0 bge __udivsi3 neg a1, a1 b __udivsi3 # goto udivsi3 .end __orgsi3 /* signed division */ .ent __divsi3 __divsi3: mv t3, ra xor t2, a0, a1 jl __orgsi3 __divsi3_adjust: cmpi.c t2, 0 bge __divsi3_exit neg r4, r4 __divsi3_exit: br t3 .end __divsi3 /* signed modulus */ .ent __modsi3 __modsi3: mv t3, ra mv t2, a0 jl __orgsi3 mv r4, a1 b __divsi3_adjust .end __modsi3 #endif /* L_divsi3 */ #else /* -fPIC */ #if !defined(L_mulsi3) && !defined(L_divsi3) .set pic .text .global _flush_cache _flush_cache: addi r0, -8 # pic used .cpload r29 # pic used srli r9, r5, 4 mv r8, r4 mtsr r9, sr0 1: cache 0xe, [r8, 0] # write back invalid dcache addi r8, 16 bcnz 1b mfcr r8, cr4 bittst! r8, 0x3 # if LDM is enable, write back LDM beq! 6f ldi r10, 0 cache 0xc, [r10, 0] 6: bittst! r8, 0x2 # if LIM is enable, refill it beq! 7f cache 0x4, [r10, 0] 7: #nop! #nop! #nop! #nop! #nop! mv r8, r4 mtsr r9, sr0 2: cache 0x2, [r8, 0] # invalid unlock icache #nop! #nop! #nop! #nop! #nop! addi r8, 16 bcnz 2b .cprestore r0, 12 # pic used addi r0, 8 # pic used br r3 #endif /* FUNCTION (U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1); REGISTERS: use t0 modify a0 a1 -> become 0 NOTE: this seems to give better performance to just rotate and add. */ #ifdef L_mulsi3 .set pic .text .global __umulsi3 .global __mulsi3 /* signed multiplication (32x32) */ .ent __mulsi3 __umulsi3: __mulsi3: addi r0, -8 # pic used .cpload r29 # pic used li t1, 0 __mulsi3_loop: andri.c t0, a1, 1 # t0 = multiplier[0] srli a1, a1, 1 # a1 /= 2 beq __mulsi3_loop2 # skip if (t0 == 0) add t1, t1, a0 # add multiplicand __mulsi3_loop2: slli a0, a0, 1 # multiplicand mul 2 cmpi.c a1, 0 bne __mulsi3_loop mv r4, t1 .cprestore r0, 12 # pic used addi r0, 8 # pic used br ra .end __mulsi3 #endif /* L_mulsi3 */ /* FUNCTION UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1)); INT32 (v0) = __divsi3 (INT32 (a0), INT32 (a1)); UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1)); INT32 (v0) = __modsi3 (INT32 (a0), INT32 (a1)); DESCRIPTION performs 32-bit division/modulo. REGISTERS used t0 bit-index t1 modify a0 becomes remainer */ #ifdef L_divsi3 .set pic .text .global __udivsi3 .global __umodsi3 .global __divsi3 .global __modsi3 /* unsigned division */ .ent __udivsi3 __udivsi3: addi r0, -8 # pic used .cpload r29 # pic used li t4, 0 cmpi.c a1, 0 beq __uds_exit li t0, 1 blt __uds_ok __uds_normalize: cmp.c a0, a1 bcc __uds_ok slli a1, a1, 1 slli t0, t0, 1 cmpi.c a1, 0 bge __uds_normalize __uds_ok: __uds_loop2: cmp.c a0, a1 bcc __uds_loop3 sub a0, a0, a1 or t4, t4, t0 __uds_loop3: srli t0, t0, 1 srli a1, a1, 1 cmpi.c t0, 0 bne __uds_loop2 __uds_exit: mv a1, a0 mv r4, t4 .cprestore r0, 12 # pic used addi r0, 8 # pic used br ra .end __udivsi3 /* unsigned modulus */ .ent __umodsi3 __umodsi3: addi r0, -8 # pic used .cpload r29 # pic used li t1, 0 mv t3, ra # jl __udivsi3 la r29, __udivsi3 brl r29 mv r4, a1 .cprestore r0, 12 # pic used addi r0, 8 # pic used br t3 .end __umodsi3 /* abs and div */ .ent __orgsi3 __orgsi3: cmpi.c a0, 0 bge __orgsi3_a0p neg a0, a0 __orgsi3_a0p: cmpi.c a1, 0 bge __udivsi3 neg a1, a1 b __udivsi3 # goto udivsi3 .end __orgsi3 /* signed division */ .ent __divsi3 __divsi3: addi r0, -8 # pic used .cpload r29 # pic used mv t3, ra xor t2, a0, a1 # jl __orgsi3 la r29, __orgsi3 brl r29 __divsi3_adjust: cmpi.c t2, 0 bge __divsi3_exit neg r4, r4 __divsi3_exit: .cprestore r0, 12 # pic used addi r0, 8 # pic used br t3 .end __divsi3 /* signed modulus */ .ent __modsi3 __modsi3: addi r0, -8 # pic used .cpload r29 # pic used mv t3, ra mv t2, a0 # jl __orgsi3 la r29, __orgsi3 brl r29 mv r4, a1 b __divsi3_adjust .end __modsi3 #endif /*L_divsi3 */ #endif