mirror of
https://github.com/rn10950/RetroZilla.git
synced 2024-11-14 03:30:17 +01:00
505 lines
10 KiB
ArmAsm
505 lines
10 KiB
ArmAsm
|
/* ***** BEGIN LICENSE BLOCK *****
|
||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||
|
*
|
||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||
|
* the License. You may obtain a copy of the License at
|
||
|
* http://www.mozilla.org/MPL/
|
||
|
*
|
||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||
|
* for the specific language governing rights and limitations under the
|
||
|
* License.
|
||
|
*
|
||
|
* The Original Code is the Netscape security libraries.
|
||
|
*
|
||
|
* The Initial Developer of the Original Code is
|
||
|
* Netscape Communications Corporation.
|
||
|
* Portions created by the Initial Developer are Copyright (C) 2000
|
||
|
* the Initial Developer. All Rights Reserved.
|
||
|
*
|
||
|
* Contributor(s):
|
||
|
*
|
||
|
* Alternatively, the contents of this file may be used under the terms of
|
||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||
|
* of those above. If you wish to allow use of your version of this file only
|
||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||
|
* use your version of this file under the terms of the MPL, indicate your
|
||
|
* decision by deleting the provisions above and replace them with the notice
|
||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||
|
* the provisions above, a recipient may use your version of this file under
|
||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||
|
*
|
||
|
* ***** END LICENSE BLOCK ***** */
|
||
|
#include <regdef.h>
|
||
|
.set noreorder
|
||
|
.set noat
|
||
|
|
||
|
.section .text, 1, 0x00000006, 4, 4
|
||
|
.text:
|
||
|
.section .text
|
||
|
|
||
|
.ent s_mpv_mul_d_add
|
||
|
.globl s_mpv_mul_d_add
|
||
|
|
||
|
s_mpv_mul_d_add:
|
||
|
#/* c += a * b */
|
||
|
#void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
|
||
|
# mp_digit *c)
|
||
|
#{
|
||
|
# mp_digit a0, a1; regs a4, a5
|
||
|
# mp_digit c0, c1; regs a6, a7
|
||
|
# mp_digit cy = 0; reg t2
|
||
|
# mp_word w0, w1; regs t0, t1
|
||
|
#
|
||
|
# if (a_len) {
|
||
|
beq a1,zero,.L.1
|
||
|
move t2,zero # cy = 0
|
||
|
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
|
||
|
dsrl32 a2,a2,0 # This clears the upper 32 bits.
|
||
|
# a0 = a[0];
|
||
|
lwu a4,0(a0)
|
||
|
# w0 = ((mp_word)b * a0);
|
||
|
dmultu a2,a4
|
||
|
# if (--a_len) {
|
||
|
addiu a1,a1,-1
|
||
|
beq a1,zero,.L.2
|
||
|
# while (a_len >= 2) {
|
||
|
sltiu t3,a1,2
|
||
|
bne t3,zero,.L.3
|
||
|
# a1 = a[1];
|
||
|
lwu a5,4(a0)
|
||
|
.L.4:
|
||
|
# a_len -= 2;
|
||
|
addiu a1,a1,-2
|
||
|
# c0 = c[0];
|
||
|
lwu a6,0(a3)
|
||
|
# w0 += cy;
|
||
|
mflo t0
|
||
|
daddu t0,t0,t2
|
||
|
# w0 += c0;
|
||
|
daddu t0,t0,a6
|
||
|
# w1 = (mp_word)b * a1;
|
||
|
dmultu a2,a5 #
|
||
|
# cy = CARRYOUT(w0);
|
||
|
dsrl32 t2,t0,0
|
||
|
# c[0] = ACCUM(w0);
|
||
|
sw t0,0(a3)
|
||
|
# a0 = a[2];
|
||
|
lwu a4,8(a0)
|
||
|
# a += 2;
|
||
|
addiu a0,a0,8
|
||
|
# c1 = c[1];
|
||
|
lwu a7,4(a3)
|
||
|
# w1 += cy;
|
||
|
mflo t1
|
||
|
daddu t1,t1,t2
|
||
|
# w1 += c1;
|
||
|
daddu t1,t1,a7
|
||
|
# w0 = (mp_word)b * a0;
|
||
|
dmultu a2,a4 #
|
||
|
# cy = CARRYOUT(w1);
|
||
|
dsrl32 t2,t1,0
|
||
|
# c[1] = ACCUM(w1);
|
||
|
sw t1,4(a3)
|
||
|
# c += 2;
|
||
|
addiu a3,a3,8
|
||
|
sltiu t3,a1,2
|
||
|
beq t3,zero,.L.4
|
||
|
# a1 = a[1];
|
||
|
lwu a5,4(a0)
|
||
|
# }
|
||
|
.L.3:
|
||
|
# c0 = c[0];
|
||
|
lwu a6,0(a3)
|
||
|
# w0 += cy;
|
||
|
# if (a_len) {
|
||
|
mflo t0
|
||
|
beq a1,zero,.L.5
|
||
|
daddu t0,t0,t2
|
||
|
# w1 = (mp_word)b * a1;
|
||
|
dmultu a2,a5
|
||
|
# w0 += c0;
|
||
|
daddu t0,t0,a6 #
|
||
|
# cy = CARRYOUT(w0);
|
||
|
dsrl32 t2,t0,0
|
||
|
# c[0] = ACCUM(w0);
|
||
|
sw t0,0(a3)
|
||
|
# c1 = c[1];
|
||
|
lwu a7,4(a3)
|
||
|
# w1 += cy;
|
||
|
mflo t1
|
||
|
daddu t1,t1,t2
|
||
|
# w1 += c1;
|
||
|
daddu t1,t1,a7
|
||
|
# c[1] = ACCUM(w1);
|
||
|
sw t1,4(a3)
|
||
|
# cy = CARRYOUT(w1);
|
||
|
dsrl32 t2,t1,0
|
||
|
# c += 1;
|
||
|
b .L.6
|
||
|
addiu a3,a3,4
|
||
|
# } else {
|
||
|
.L.5:
|
||
|
# w0 += c0;
|
||
|
daddu t0,t0,a6
|
||
|
# c[0] = ACCUM(w0);
|
||
|
sw t0,0(a3)
|
||
|
# cy = CARRYOUT(w0);
|
||
|
b .L.6
|
||
|
dsrl32 t2,t0,0
|
||
|
# }
|
||
|
# } else {
|
||
|
.L.2:
|
||
|
# c0 = c[0];
|
||
|
lwu a6,0(a3)
|
||
|
# w0 += c0;
|
||
|
mflo t0
|
||
|
daddu t0,t0,a6
|
||
|
# c[0] = ACCUM(w0);
|
||
|
sw t0,0(a3)
|
||
|
# cy = CARRYOUT(w0);
|
||
|
dsrl32 t2,t0,0
|
||
|
# }
|
||
|
.L.6:
|
||
|
# c[1] = cy;
|
||
|
jr ra
|
||
|
sw t2,4(a3)
|
||
|
# }
|
||
|
.L.1:
|
||
|
jr ra
|
||
|
nop
|
||
|
#}
|
||
|
#
|
||
|
.end s_mpv_mul_d_add
|
||
|
|
||
|
.ent s_mpv_mul_d_add_prop
|
||
|
.globl s_mpv_mul_d_add_prop
|
||
|
|
||
|
s_mpv_mul_d_add_prop:
|
||
|
#/* c += a * b */
|
||
|
#void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
|
||
|
# mp_digit *c)
|
||
|
#{
|
||
|
# mp_digit a0, a1; regs a4, a5
|
||
|
# mp_digit c0, c1; regs a6, a7
|
||
|
# mp_digit cy = 0; reg t2
|
||
|
# mp_word w0, w1; regs t0, t1
|
||
|
#
|
||
|
# if (a_len) {
|
||
|
beq a1,zero,.M.1
|
||
|
move t2,zero # cy = 0
|
||
|
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
|
||
|
dsrl32 a2,a2,0 # This clears the upper 32 bits.
|
||
|
# a0 = a[0];
|
||
|
lwu a4,0(a0)
|
||
|
# w0 = ((mp_word)b * a0);
|
||
|
dmultu a2,a4
|
||
|
# if (--a_len) {
|
||
|
addiu a1,a1,-1
|
||
|
beq a1,zero,.M.2
|
||
|
# while (a_len >= 2) {
|
||
|
sltiu t3,a1,2
|
||
|
bne t3,zero,.M.3
|
||
|
# a1 = a[1];
|
||
|
lwu a5,4(a0)
|
||
|
.M.4:
|
||
|
# a_len -= 2;
|
||
|
addiu a1,a1,-2
|
||
|
# c0 = c[0];
|
||
|
lwu a6,0(a3)
|
||
|
# w0 += cy;
|
||
|
mflo t0
|
||
|
daddu t0,t0,t2
|
||
|
# w0 += c0;
|
||
|
daddu t0,t0,a6
|
||
|
# w1 = (mp_word)b * a1;
|
||
|
dmultu a2,a5 #
|
||
|
# cy = CARRYOUT(w0);
|
||
|
dsrl32 t2,t0,0
|
||
|
# c[0] = ACCUM(w0);
|
||
|
sw t0,0(a3)
|
||
|
# a0 = a[2];
|
||
|
lwu a4,8(a0)
|
||
|
# a += 2;
|
||
|
addiu a0,a0,8
|
||
|
# c1 = c[1];
|
||
|
lwu a7,4(a3)
|
||
|
# w1 += cy;
|
||
|
mflo t1
|
||
|
daddu t1,t1,t2
|
||
|
# w1 += c1;
|
||
|
daddu t1,t1,a7
|
||
|
# w0 = (mp_word)b * a0;
|
||
|
dmultu a2,a4 #
|
||
|
# cy = CARRYOUT(w1);
|
||
|
dsrl32 t2,t1,0
|
||
|
# c[1] = ACCUM(w1);
|
||
|
sw t1,4(a3)
|
||
|
# c += 2;
|
||
|
addiu a3,a3,8
|
||
|
sltiu t3,a1,2
|
||
|
beq t3,zero,.M.4
|
||
|
# a1 = a[1];
|
||
|
lwu a5,4(a0)
|
||
|
# }
|
||
|
.M.3:
|
||
|
# c0 = c[0];
|
||
|
lwu a6,0(a3)
|
||
|
# w0 += cy;
|
||
|
# if (a_len) {
|
||
|
mflo t0
|
||
|
beq a1,zero,.M.5
|
||
|
daddu t0,t0,t2
|
||
|
# w1 = (mp_word)b * a1;
|
||
|
dmultu a2,a5
|
||
|
# w0 += c0;
|
||
|
daddu t0,t0,a6 #
|
||
|
# cy = CARRYOUT(w0);
|
||
|
dsrl32 t2,t0,0
|
||
|
# c[0] = ACCUM(w0);
|
||
|
sw t0,0(a3)
|
||
|
# c1 = c[1];
|
||
|
lwu a7,4(a3)
|
||
|
# w1 += cy;
|
||
|
mflo t1
|
||
|
daddu t1,t1,t2
|
||
|
# w1 += c1;
|
||
|
daddu t1,t1,a7
|
||
|
# c[1] = ACCUM(w1);
|
||
|
sw t1,4(a3)
|
||
|
# cy = CARRYOUT(w1);
|
||
|
dsrl32 t2,t1,0
|
||
|
# c += 1;
|
||
|
b .M.6
|
||
|
addiu a3,a3,8
|
||
|
# } else {
|
||
|
.M.5:
|
||
|
# w0 += c0;
|
||
|
daddu t0,t0,a6
|
||
|
# c[0] = ACCUM(w0);
|
||
|
sw t0,0(a3)
|
||
|
# cy = CARRYOUT(w0);
|
||
|
dsrl32 t2,t0,0
|
||
|
b .M.6
|
||
|
addiu a3,a3,4
|
||
|
# }
|
||
|
# } else {
|
||
|
.M.2:
|
||
|
# c0 = c[0];
|
||
|
lwu a6,0(a3)
|
||
|
# w0 += c0;
|
||
|
mflo t0
|
||
|
daddu t0,t0,a6
|
||
|
# c[0] = ACCUM(w0);
|
||
|
sw t0,0(a3)
|
||
|
# cy = CARRYOUT(w0);
|
||
|
dsrl32 t2,t0,0
|
||
|
addiu a3,a3,4
|
||
|
# }
|
||
|
.M.6:
|
||
|
|
||
|
# while (cy) {
|
||
|
beq t2,zero,.M.1
|
||
|
nop
|
||
|
.M.7:
|
||
|
# mp_word w = (mp_word)*c + cy;
|
||
|
lwu a6,0(a3)
|
||
|
daddu t2,t2,a6
|
||
|
# *c++ = ACCUM(w);
|
||
|
sw t2,0(a3)
|
||
|
# cy = CARRYOUT(w);
|
||
|
dsrl32 t2,t2,0
|
||
|
bne t2,zero,.M.7
|
||
|
addiu a3,a3,4
|
||
|
|
||
|
# }
|
||
|
.M.1:
|
||
|
jr ra
|
||
|
nop
|
||
|
#}
|
||
|
#
|
||
|
.end s_mpv_mul_d_add_prop
|
||
|
|
||
|
.ent s_mpv_mul_d
|
||
|
.globl s_mpv_mul_d
|
||
|
|
||
|
s_mpv_mul_d:
|
||
|
#/* c = a * b */
|
||
|
#void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b,
|
||
|
# mp_digit *c)
|
||
|
#{
|
||
|
# mp_digit a0, a1; regs a4, a5
|
||
|
# mp_digit cy = 0; reg t2
|
||
|
# mp_word w0, w1; regs t0, t1
|
||
|
#
|
||
|
# if (a_len) {
|
||
|
beq a1,zero,.N.1
|
||
|
move t2,zero # cy = 0
|
||
|
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
|
||
|
dsrl32 a2,a2,0 # This clears the upper 32 bits.
|
||
|
# a0 = a[0];
|
||
|
lwu a4,0(a0)
|
||
|
# w0 = ((mp_word)b * a0);
|
||
|
dmultu a2,a4
|
||
|
# if (--a_len) {
|
||
|
addiu a1,a1,-1
|
||
|
beq a1,zero,.N.2
|
||
|
# while (a_len >= 2) {
|
||
|
sltiu t3,a1,2
|
||
|
bne t3,zero,.N.3
|
||
|
# a1 = a[1];
|
||
|
lwu a5,4(a0)
|
||
|
.N.4:
|
||
|
# a_len -= 2;
|
||
|
addiu a1,a1,-2
|
||
|
# w0 += cy;
|
||
|
mflo t0
|
||
|
daddu t0,t0,t2
|
||
|
# cy = CARRYOUT(w0);
|
||
|
dsrl32 t2,t0,0
|
||
|
# w1 = (mp_word)b * a1;
|
||
|
dmultu a2,a5
|
||
|
# c[0] = ACCUM(w0);
|
||
|
sw t0,0(a3)
|
||
|
# a0 = a[2];
|
||
|
lwu a4,8(a0)
|
||
|
# a += 2;
|
||
|
addiu a0,a0,8
|
||
|
# w1 += cy;
|
||
|
mflo t1
|
||
|
daddu t1,t1,t2
|
||
|
# cy = CARRYOUT(w1);
|
||
|
dsrl32 t2,t1,0
|
||
|
# w0 = (mp_word)b * a0;
|
||
|
dmultu a2,a4
|
||
|
# c[1] = ACCUM(w1);
|
||
|
sw t1,4(a3)
|
||
|
# c += 2;
|
||
|
addiu a3,a3,8
|
||
|
sltiu t3,a1,2
|
||
|
beq t3,zero,.N.4
|
||
|
# a1 = a[1];
|
||
|
lwu a5,4(a0)
|
||
|
# }
|
||
|
.N.3:
|
||
|
# w0 += cy;
|
||
|
# if (a_len) {
|
||
|
mflo t0
|
||
|
beq a1,zero,.N.5
|
||
|
daddu t0,t0,t2
|
||
|
# w1 = (mp_word)b * a1;
|
||
|
dmultu a2,a5 #
|
||
|
# cy = CARRYOUT(w0);
|
||
|
dsrl32 t2,t0,0
|
||
|
# c[0] = ACCUM(w0);
|
||
|
sw t0,0(a3)
|
||
|
# w1 += cy;
|
||
|
mflo t1
|
||
|
daddu t1,t1,t2
|
||
|
# c[1] = ACCUM(w1);
|
||
|
sw t1,4(a3)
|
||
|
# cy = CARRYOUT(w1);
|
||
|
dsrl32 t2,t1,0
|
||
|
# c += 1;
|
||
|
b .N.6
|
||
|
addiu a3,a3,4
|
||
|
# } else {
|
||
|
.N.5:
|
||
|
# c[0] = ACCUM(w0);
|
||
|
sw t0,0(a3)
|
||
|
# cy = CARRYOUT(w0);
|
||
|
b .N.6
|
||
|
dsrl32 t2,t0,0
|
||
|
# }
|
||
|
# } else {
|
||
|
.N.2:
|
||
|
mflo t0
|
||
|
# c[0] = ACCUM(w0);
|
||
|
sw t0,0(a3)
|
||
|
# cy = CARRYOUT(w0);
|
||
|
dsrl32 t2,t0,0
|
||
|
# }
|
||
|
.N.6:
|
||
|
# c[1] = cy;
|
||
|
jr ra
|
||
|
sw t2,4(a3)
|
||
|
# }
|
||
|
.N.1:
|
||
|
jr ra
|
||
|
nop
|
||
|
#}
|
||
|
#
|
||
|
.end s_mpv_mul_d
|
||
|
|
||
|
|
||
|
.ent s_mpv_sqr_add_prop
|
||
|
.globl s_mpv_sqr_add_prop
|
||
|
#void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
|
||
|
# registers
|
||
|
# a0 *a
|
||
|
# a1 a_len
|
||
|
# a2 *sqr
|
||
|
# a3 digit from *a, a_i
|
||
|
# a4 square of digit from a
|
||
|
# a5,a6 next 2 digits in sqr
|
||
|
# a7,t0 carry
|
||
|
s_mpv_sqr_add_prop:
|
||
|
move a7,zero
|
||
|
move t0,zero
|
||
|
lwu a3,0(a0)
|
||
|
addiu a1,a1,-1 # --a_len
|
||
|
dmultu a3,a3
|
||
|
beq a1,zero,.P.3 # jump if we've already done the only sqr
|
||
|
addiu a0,a0,4 # ++a
|
||
|
.P.2:
|
||
|
lwu a5,0(a2)
|
||
|
lwu a6,4(a2)
|
||
|
addiu a2,a2,8 # sqrs += 2;
|
||
|
dsll32 a6,a6,0
|
||
|
daddu a5,a5,a6
|
||
|
lwu a3,0(a0)
|
||
|
addiu a0,a0,4 # ++a
|
||
|
mflo a4
|
||
|
daddu a6,a5,a4
|
||
|
sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
|
||
|
dmultu a3,a3
|
||
|
daddu a4,a6,t0
|
||
|
sltu t0,a4,a6
|
||
|
add t0,t0,a7
|
||
|
sw a4,-8(a2)
|
||
|
addiu a1,a1,-1 # --a_len
|
||
|
dsrl32 a4,a4,0
|
||
|
bne a1,zero,.P.2 # loop if a_len > 0
|
||
|
sw a4,-4(a2)
|
||
|
.P.3:
|
||
|
lwu a5,0(a2)
|
||
|
lwu a6,4(a2)
|
||
|
addiu a2,a2,8 # sqrs += 2;
|
||
|
dsll32 a6,a6,0
|
||
|
daddu a5,a5,a6
|
||
|
mflo a4
|
||
|
daddu a6,a5,a4
|
||
|
sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
|
||
|
daddu a4,a6,t0
|
||
|
sltu t0,a4,a6
|
||
|
add t0,t0,a7
|
||
|
sw a4,-8(a2)
|
||
|
beq t0,zero,.P.9 # jump if no carry
|
||
|
dsrl32 a4,a4,0
|
||
|
.P.8:
|
||
|
sw a4,-4(a2)
|
||
|
/* propagate final carry */
|
||
|
lwu a5,0(a2)
|
||
|
daddu a6,a5,t0
|
||
|
sltu t0,a6,a5
|
||
|
bne t0,zero,.P.8 # loop if carry persists
|
||
|
addiu a2,a2,4 # sqrs++
|
||
|
.P.9:
|
||
|
jr ra
|
||
|
sw a4,-4(a2)
|
||
|
|
||
|
.end s_mpv_sqr_add_prop
|