mirror of
https://github.com/rn10950/RetroZilla.git
synced 2024-11-14 11:40:13 +01:00
473 lines
8.6 KiB
ArmAsm
473 lines
8.6 KiB
ArmAsm
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
#include <regdef.h>
|
|
.set noreorder
|
|
.set noat
|
|
|
|
.section .text, 1, 0x00000006, 4, 4
|
|
.text:
|
|
.section .text
|
|
|
|
.ent s_mpv_mul_d_add
|
|
.globl s_mpv_mul_d_add
|
|
|
|
s_mpv_mul_d_add:
|
|
#/* c += a * b */
|
|
#void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
|
|
# mp_digit *c)
|
|
#{
|
|
# mp_digit a0, a1; regs a4, a5
|
|
# mp_digit c0, c1; regs a6, a7
|
|
# mp_digit cy = 0; reg t2
|
|
# mp_word w0, w1; regs t0, t1
|
|
#
|
|
# if (a_len) {
|
|
beq a1,zero,.L.1
|
|
move t2,zero # cy = 0
|
|
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
|
|
dsrl32 a2,a2,0 # This clears the upper 32 bits.
|
|
# a0 = a[0];
|
|
lwu a4,0(a0)
|
|
# w0 = ((mp_word)b * a0);
|
|
dmultu a2,a4
|
|
# if (--a_len) {
|
|
addiu a1,a1,-1
|
|
beq a1,zero,.L.2
|
|
# while (a_len >= 2) {
|
|
sltiu t3,a1,2
|
|
bne t3,zero,.L.3
|
|
# a1 = a[1];
|
|
lwu a5,4(a0)
|
|
.L.4:
|
|
# a_len -= 2;
|
|
addiu a1,a1,-2
|
|
# c0 = c[0];
|
|
lwu a6,0(a3)
|
|
# w0 += cy;
|
|
mflo t0
|
|
daddu t0,t0,t2
|
|
# w0 += c0;
|
|
daddu t0,t0,a6
|
|
# w1 = (mp_word)b * a1;
|
|
dmultu a2,a5 #
|
|
# cy = CARRYOUT(w0);
|
|
dsrl32 t2,t0,0
|
|
# c[0] = ACCUM(w0);
|
|
sw t0,0(a3)
|
|
# a0 = a[2];
|
|
lwu a4,8(a0)
|
|
# a += 2;
|
|
addiu a0,a0,8
|
|
# c1 = c[1];
|
|
lwu a7,4(a3)
|
|
# w1 += cy;
|
|
mflo t1
|
|
daddu t1,t1,t2
|
|
# w1 += c1;
|
|
daddu t1,t1,a7
|
|
# w0 = (mp_word)b * a0;
|
|
dmultu a2,a4 #
|
|
# cy = CARRYOUT(w1);
|
|
dsrl32 t2,t1,0
|
|
# c[1] = ACCUM(w1);
|
|
sw t1,4(a3)
|
|
# c += 2;
|
|
addiu a3,a3,8
|
|
sltiu t3,a1,2
|
|
beq t3,zero,.L.4
|
|
# a1 = a[1];
|
|
lwu a5,4(a0)
|
|
# }
|
|
.L.3:
|
|
# c0 = c[0];
|
|
lwu a6,0(a3)
|
|
# w0 += cy;
|
|
# if (a_len) {
|
|
mflo t0
|
|
beq a1,zero,.L.5
|
|
daddu t0,t0,t2
|
|
# w1 = (mp_word)b * a1;
|
|
dmultu a2,a5
|
|
# w0 += c0;
|
|
daddu t0,t0,a6 #
|
|
# cy = CARRYOUT(w0);
|
|
dsrl32 t2,t0,0
|
|
# c[0] = ACCUM(w0);
|
|
sw t0,0(a3)
|
|
# c1 = c[1];
|
|
lwu a7,4(a3)
|
|
# w1 += cy;
|
|
mflo t1
|
|
daddu t1,t1,t2
|
|
# w1 += c1;
|
|
daddu t1,t1,a7
|
|
# c[1] = ACCUM(w1);
|
|
sw t1,4(a3)
|
|
# cy = CARRYOUT(w1);
|
|
dsrl32 t2,t1,0
|
|
# c += 1;
|
|
b .L.6
|
|
addiu a3,a3,4
|
|
# } else {
|
|
.L.5:
|
|
# w0 += c0;
|
|
daddu t0,t0,a6
|
|
# c[0] = ACCUM(w0);
|
|
sw t0,0(a3)
|
|
# cy = CARRYOUT(w0);
|
|
b .L.6
|
|
dsrl32 t2,t0,0
|
|
# }
|
|
# } else {
|
|
.L.2:
|
|
# c0 = c[0];
|
|
lwu a6,0(a3)
|
|
# w0 += c0;
|
|
mflo t0
|
|
daddu t0,t0,a6
|
|
# c[0] = ACCUM(w0);
|
|
sw t0,0(a3)
|
|
# cy = CARRYOUT(w0);
|
|
dsrl32 t2,t0,0
|
|
# }
|
|
.L.6:
|
|
# c[1] = cy;
|
|
jr ra
|
|
sw t2,4(a3)
|
|
# }
|
|
.L.1:
|
|
jr ra
|
|
nop
|
|
#}
|
|
#
|
|
.end s_mpv_mul_d_add
|
|
|
|
.ent s_mpv_mul_d_add_prop
|
|
.globl s_mpv_mul_d_add_prop
|
|
|
|
s_mpv_mul_d_add_prop:
|
|
#/* c += a * b */
|
|
#void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
|
|
# mp_digit *c)
|
|
#{
|
|
# mp_digit a0, a1; regs a4, a5
|
|
# mp_digit c0, c1; regs a6, a7
|
|
# mp_digit cy = 0; reg t2
|
|
# mp_word w0, w1; regs t0, t1
|
|
#
|
|
# if (a_len) {
|
|
beq a1,zero,.M.1
|
|
move t2,zero # cy = 0
|
|
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
|
|
dsrl32 a2,a2,0 # This clears the upper 32 bits.
|
|
# a0 = a[0];
|
|
lwu a4,0(a0)
|
|
# w0 = ((mp_word)b * a0);
|
|
dmultu a2,a4
|
|
# if (--a_len) {
|
|
addiu a1,a1,-1
|
|
beq a1,zero,.M.2
|
|
# while (a_len >= 2) {
|
|
sltiu t3,a1,2
|
|
bne t3,zero,.M.3
|
|
# a1 = a[1];
|
|
lwu a5,4(a0)
|
|
.M.4:
|
|
# a_len -= 2;
|
|
addiu a1,a1,-2
|
|
# c0 = c[0];
|
|
lwu a6,0(a3)
|
|
# w0 += cy;
|
|
mflo t0
|
|
daddu t0,t0,t2
|
|
# w0 += c0;
|
|
daddu t0,t0,a6
|
|
# w1 = (mp_word)b * a1;
|
|
dmultu a2,a5 #
|
|
# cy = CARRYOUT(w0);
|
|
dsrl32 t2,t0,0
|
|
# c[0] = ACCUM(w0);
|
|
sw t0,0(a3)
|
|
# a0 = a[2];
|
|
lwu a4,8(a0)
|
|
# a += 2;
|
|
addiu a0,a0,8
|
|
# c1 = c[1];
|
|
lwu a7,4(a3)
|
|
# w1 += cy;
|
|
mflo t1
|
|
daddu t1,t1,t2
|
|
# w1 += c1;
|
|
daddu t1,t1,a7
|
|
# w0 = (mp_word)b * a0;
|
|
dmultu a2,a4 #
|
|
# cy = CARRYOUT(w1);
|
|
dsrl32 t2,t1,0
|
|
# c[1] = ACCUM(w1);
|
|
sw t1,4(a3)
|
|
# c += 2;
|
|
addiu a3,a3,8
|
|
sltiu t3,a1,2
|
|
beq t3,zero,.M.4
|
|
# a1 = a[1];
|
|
lwu a5,4(a0)
|
|
# }
|
|
.M.3:
|
|
# c0 = c[0];
|
|
lwu a6,0(a3)
|
|
# w0 += cy;
|
|
# if (a_len) {
|
|
mflo t0
|
|
beq a1,zero,.M.5
|
|
daddu t0,t0,t2
|
|
# w1 = (mp_word)b * a1;
|
|
dmultu a2,a5
|
|
# w0 += c0;
|
|
daddu t0,t0,a6 #
|
|
# cy = CARRYOUT(w0);
|
|
dsrl32 t2,t0,0
|
|
# c[0] = ACCUM(w0);
|
|
sw t0,0(a3)
|
|
# c1 = c[1];
|
|
lwu a7,4(a3)
|
|
# w1 += cy;
|
|
mflo t1
|
|
daddu t1,t1,t2
|
|
# w1 += c1;
|
|
daddu t1,t1,a7
|
|
# c[1] = ACCUM(w1);
|
|
sw t1,4(a3)
|
|
# cy = CARRYOUT(w1);
|
|
dsrl32 t2,t1,0
|
|
# c += 1;
|
|
b .M.6
|
|
addiu a3,a3,8
|
|
# } else {
|
|
.M.5:
|
|
# w0 += c0;
|
|
daddu t0,t0,a6
|
|
# c[0] = ACCUM(w0);
|
|
sw t0,0(a3)
|
|
# cy = CARRYOUT(w0);
|
|
dsrl32 t2,t0,0
|
|
b .M.6
|
|
addiu a3,a3,4
|
|
# }
|
|
# } else {
|
|
.M.2:
|
|
# c0 = c[0];
|
|
lwu a6,0(a3)
|
|
# w0 += c0;
|
|
mflo t0
|
|
daddu t0,t0,a6
|
|
# c[0] = ACCUM(w0);
|
|
sw t0,0(a3)
|
|
# cy = CARRYOUT(w0);
|
|
dsrl32 t2,t0,0
|
|
addiu a3,a3,4
|
|
# }
|
|
.M.6:
|
|
|
|
# while (cy) {
|
|
beq t2,zero,.M.1
|
|
nop
|
|
.M.7:
|
|
# mp_word w = (mp_word)*c + cy;
|
|
lwu a6,0(a3)
|
|
daddu t2,t2,a6
|
|
# *c++ = ACCUM(w);
|
|
sw t2,0(a3)
|
|
# cy = CARRYOUT(w);
|
|
dsrl32 t2,t2,0
|
|
bne t2,zero,.M.7
|
|
addiu a3,a3,4
|
|
|
|
# }
|
|
.M.1:
|
|
jr ra
|
|
nop
|
|
#}
|
|
#
|
|
.end s_mpv_mul_d_add_prop
|
|
|
|
.ent s_mpv_mul_d
|
|
.globl s_mpv_mul_d
|
|
|
|
s_mpv_mul_d:
|
|
#/* c = a * b */
|
|
#void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b,
|
|
# mp_digit *c)
|
|
#{
|
|
# mp_digit a0, a1; regs a4, a5
|
|
# mp_digit cy = 0; reg t2
|
|
# mp_word w0, w1; regs t0, t1
|
|
#
|
|
# if (a_len) {
|
|
beq a1,zero,.N.1
|
|
move t2,zero # cy = 0
|
|
dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
|
|
dsrl32 a2,a2,0 # This clears the upper 32 bits.
|
|
# a0 = a[0];
|
|
lwu a4,0(a0)
|
|
# w0 = ((mp_word)b * a0);
|
|
dmultu a2,a4
|
|
# if (--a_len) {
|
|
addiu a1,a1,-1
|
|
beq a1,zero,.N.2
|
|
# while (a_len >= 2) {
|
|
sltiu t3,a1,2
|
|
bne t3,zero,.N.3
|
|
# a1 = a[1];
|
|
lwu a5,4(a0)
|
|
.N.4:
|
|
# a_len -= 2;
|
|
addiu a1,a1,-2
|
|
# w0 += cy;
|
|
mflo t0
|
|
daddu t0,t0,t2
|
|
# cy = CARRYOUT(w0);
|
|
dsrl32 t2,t0,0
|
|
# w1 = (mp_word)b * a1;
|
|
dmultu a2,a5
|
|
# c[0] = ACCUM(w0);
|
|
sw t0,0(a3)
|
|
# a0 = a[2];
|
|
lwu a4,8(a0)
|
|
# a += 2;
|
|
addiu a0,a0,8
|
|
# w1 += cy;
|
|
mflo t1
|
|
daddu t1,t1,t2
|
|
# cy = CARRYOUT(w1);
|
|
dsrl32 t2,t1,0
|
|
# w0 = (mp_word)b * a0;
|
|
dmultu a2,a4
|
|
# c[1] = ACCUM(w1);
|
|
sw t1,4(a3)
|
|
# c += 2;
|
|
addiu a3,a3,8
|
|
sltiu t3,a1,2
|
|
beq t3,zero,.N.4
|
|
# a1 = a[1];
|
|
lwu a5,4(a0)
|
|
# }
|
|
.N.3:
|
|
# w0 += cy;
|
|
# if (a_len) {
|
|
mflo t0
|
|
beq a1,zero,.N.5
|
|
daddu t0,t0,t2
|
|
# w1 = (mp_word)b * a1;
|
|
dmultu a2,a5 #
|
|
# cy = CARRYOUT(w0);
|
|
dsrl32 t2,t0,0
|
|
# c[0] = ACCUM(w0);
|
|
sw t0,0(a3)
|
|
# w1 += cy;
|
|
mflo t1
|
|
daddu t1,t1,t2
|
|
# c[1] = ACCUM(w1);
|
|
sw t1,4(a3)
|
|
# cy = CARRYOUT(w1);
|
|
dsrl32 t2,t1,0
|
|
# c += 1;
|
|
b .N.6
|
|
addiu a3,a3,4
|
|
# } else {
|
|
.N.5:
|
|
# c[0] = ACCUM(w0);
|
|
sw t0,0(a3)
|
|
# cy = CARRYOUT(w0);
|
|
b .N.6
|
|
dsrl32 t2,t0,0
|
|
# }
|
|
# } else {
|
|
.N.2:
|
|
mflo t0
|
|
# c[0] = ACCUM(w0);
|
|
sw t0,0(a3)
|
|
# cy = CARRYOUT(w0);
|
|
dsrl32 t2,t0,0
|
|
# }
|
|
.N.6:
|
|
# c[1] = cy;
|
|
jr ra
|
|
sw t2,4(a3)
|
|
# }
|
|
.N.1:
|
|
jr ra
|
|
nop
|
|
#}
|
|
#
|
|
.end s_mpv_mul_d
|
|
|
|
|
|
.ent s_mpv_sqr_add_prop
|
|
.globl s_mpv_sqr_add_prop
|
|
#void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
|
|
# registers
|
|
# a0 *a
|
|
# a1 a_len
|
|
# a2 *sqr
|
|
# a3 digit from *a, a_i
|
|
# a4 square of digit from a
|
|
# a5,a6 next 2 digits in sqr
|
|
# a7,t0 carry
|
|
s_mpv_sqr_add_prop:
|
|
move a7,zero
|
|
move t0,zero
|
|
lwu a3,0(a0)
|
|
addiu a1,a1,-1 # --a_len
|
|
dmultu a3,a3
|
|
beq a1,zero,.P.3 # jump if we've already done the only sqr
|
|
addiu a0,a0,4 # ++a
|
|
.P.2:
|
|
lwu a5,0(a2)
|
|
lwu a6,4(a2)
|
|
addiu a2,a2,8 # sqrs += 2;
|
|
dsll32 a6,a6,0
|
|
daddu a5,a5,a6
|
|
lwu a3,0(a0)
|
|
addiu a0,a0,4 # ++a
|
|
mflo a4
|
|
daddu a6,a5,a4
|
|
sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
|
|
dmultu a3,a3
|
|
daddu a4,a6,t0
|
|
sltu t0,a4,a6
|
|
add t0,t0,a7
|
|
sw a4,-8(a2)
|
|
addiu a1,a1,-1 # --a_len
|
|
dsrl32 a4,a4,0
|
|
bne a1,zero,.P.2 # loop if a_len > 0
|
|
sw a4,-4(a2)
|
|
.P.3:
|
|
lwu a5,0(a2)
|
|
lwu a6,4(a2)
|
|
addiu a2,a2,8 # sqrs += 2;
|
|
dsll32 a6,a6,0
|
|
daddu a5,a5,a6
|
|
mflo a4
|
|
daddu a6,a5,a4
|
|
sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
|
|
daddu a4,a6,t0
|
|
sltu t0,a4,a6
|
|
add t0,t0,a7
|
|
sw a4,-8(a2)
|
|
beq t0,zero,.P.9 # jump if no carry
|
|
dsrl32 a4,a4,0
|
|
.P.8:
|
|
sw a4,-4(a2)
|
|
/* propagate final carry */
|
|
lwu a5,0(a2)
|
|
daddu a6,a5,t0
|
|
sltu t0,a6,a5
|
|
bne t0,zero,.P.8 # loop if carry persists
|
|
addiu a2,a2,4 # sqrs++
|
|
.P.9:
|
|
jr ra
|
|
sw a4,-4(a2)
|
|
|
|
.end s_mpv_sqr_add_prop
|