RetroZilla/security/nss/lib/freebl/mpi/mpi_mips.s

/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is the Netscape security libraries.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 2000
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */
#include <regdef.h>
        .set    noreorder
        .set    noat

        .section        .text, 1, 0x00000006, 4, 4
.text:
        .section        .text

        .ent    s_mpv_mul_d_add
        .globl  s_mpv_mul_d_add

s_mpv_mul_d_add: 
 #/* c += a * b */
 #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, 
 #			      mp_digit *c)
 #{
 #  mp_digit   a0, a1;	regs a4, a5
 #  mp_digit   c0, c1;  regs a6, a7
 #  mp_digit   cy = 0;  reg t2
 #  mp_word    w0, w1;  regs t0, t1
 #
 #  if (a_len) {
	beq	a1,zero,.L.1
	move	t2,zero		# cy = 0
	dsll32	a2,a2,0		# "b" is sometimes negative (?!?!)
	dsrl32	a2,a2,0		# This clears the upper 32 bits.
 #    a0 = a[0];
	lwu	a4,0(a0)
 #    w0 = ((mp_word)b * a0);
	dmultu	a2,a4
 #    if (--a_len) {
	addiu	a1,a1,-1
	beq	a1,zero,.L.2
 #      while (a_len >= 2) {
	sltiu	t3,a1,2
	bne	t3,zero,.L.3
 #	  a1     = a[1];
	lwu	a5,4(a0)
.L.4:
 #	  a_len -= 2;
        addiu	a1,a1,-2
 #	  c0     = c[0];
	lwu	a6,0(a3)
 #	  w0    += cy;
	mflo	t0
	daddu	t0,t0,t2
 #	  w0    += c0;
	daddu	t0,t0,a6
 #	  w1     = (mp_word)b * a1; 
	dmultu	a2,a5			#
 #	  cy     = CARRYOUT(w0);
	dsrl32	t2,t0,0
 #	  c[0]   = ACCUM(w0);
	sw	t0,0(a3)
 #	  a0     = a[2];
	lwu	a4,8(a0)
 #	  a     += 2;
	addiu	a0,a0,8
 #	  c1     = c[1];
	lwu	a7,4(a3)
 #	  w1    += cy;
	mflo	t1
	daddu	t1,t1,t2
 #	  w1    += c1;
	daddu	t1,t1,a7
 #	  w0     = (mp_word)b * a0;
	dmultu	a2,a4			#
 #	  cy     = CARRYOUT(w1);
	dsrl32	t2,t1,0
 #	  c[1]   = ACCUM(w1);
	sw	t1,4(a3)
 #	  c     += 2;
	addiu	a3,a3,8
	sltiu	t3,a1,2
	beq	t3,zero,.L.4
 #	  a1     = a[1];
	lwu	a5,4(a0)
 #      }
.L.3:
 #      c0       = c[0];
	lwu	a6,0(a3)
 #      w0      += cy;
 #      if (a_len) {
	mflo	t0
	beq	a1,zero,.L.5
	daddu	t0,t0,t2
 #	  w1     = (mp_word)b * a1; 
	dmultu	a2,a5
 #	  w0    += c0;
	daddu	t0,t0,a6		#
 #	  cy     = CARRYOUT(w0);
	dsrl32	t2,t0,0
 #	  c[0]   = ACCUM(w0);
	sw	t0,0(a3)
 #	  c1     = c[1];
	lwu	a7,4(a3)
 #	  w1    += cy;
	mflo	t1
	daddu	t1,t1,t2
 #	  w1    += c1;
	daddu	t1,t1,a7
 #	  c[1]   = ACCUM(w1);
	sw	t1,4(a3)
 #	  cy     = CARRYOUT(w1);
	dsrl32	t2,t1,0
 #	  c     += 1;
	b	.L.6
	addiu	a3,a3,4
 #      } else {
.L.5:
 #	  w0    += c0;
	daddu	t0,t0,a6
 #	  c[0]   = ACCUM(w0);
	sw	t0,0(a3)
 #	  cy     = CARRYOUT(w0);
	b	.L.6
	dsrl32	t2,t0,0
 #      }
 #    } else {
.L.2:
 #      c0     = c[0];
	lwu	a6,0(a3)
 #      w0    += c0;
	mflo	t0
	daddu	t0,t0,a6
 #      c[0]   = ACCUM(w0);
	sw	t0,0(a3)
 #      cy     = CARRYOUT(w0);
	dsrl32	t2,t0,0
 #    }
.L.6:
 #    c[1] = cy;
	jr	ra
	sw	t2,4(a3)
 #  }
.L.1:
	jr	ra
	nop
 #}
 #
        .end    s_mpv_mul_d_add

        .ent    s_mpv_mul_d_add_prop
        .globl  s_mpv_mul_d_add_prop

s_mpv_mul_d_add_prop: 
 #/* c += a * b */
 #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, 
 #			      mp_digit *c)
 #{
 #  mp_digit   a0, a1;	regs a4, a5
 #  mp_digit   c0, c1;  regs a6, a7
 #  mp_digit   cy = 0;  reg t2
 #  mp_word    w0, w1;  regs t0, t1
 #
 #  if (a_len) {
	beq	a1,zero,.M.1
	move	t2,zero		# cy = 0
	dsll32	a2,a2,0		# "b" is sometimes negative (?!?!)
	dsrl32	a2,a2,0		# This clears the upper 32 bits.
 #    a0 = a[0];
	lwu	a4,0(a0)
 #    w0 = ((mp_word)b * a0);
	dmultu	a2,a4
 #    if (--a_len) {
	addiu	a1,a1,-1
	beq	a1,zero,.M.2
 #      while (a_len >= 2) {
	sltiu	t3,a1,2
	bne	t3,zero,.M.3
 #	  a1     = a[1];
	lwu	a5,4(a0)
.M.4:
 #	  a_len -= 2;
        addiu	a1,a1,-2
 #	  c0     = c[0];
	lwu	a6,0(a3)
 #	  w0    += cy;
	mflo	t0
	daddu	t0,t0,t2
 #	  w0    += c0;
	daddu	t0,t0,a6
 #	  w1     = (mp_word)b * a1; 
	dmultu	a2,a5			#
 #	  cy     = CARRYOUT(w0);
	dsrl32	t2,t0,0
 #	  c[0]   = ACCUM(w0);
	sw	t0,0(a3)
 #	  a0     = a[2];
	lwu	a4,8(a0)
 #	  a     += 2;
	addiu	a0,a0,8
 #	  c1     = c[1];
	lwu	a7,4(a3)
 #	  w1    += cy;
	mflo	t1
	daddu	t1,t1,t2
 #	  w1    += c1;
	daddu	t1,t1,a7
 #	  w0     = (mp_word)b * a0;
	dmultu	a2,a4			#
 #	  cy     = CARRYOUT(w1);
	dsrl32	t2,t1,0
 #	  c[1]   = ACCUM(w1);
	sw	t1,4(a3)
 #	  c     += 2;
	addiu	a3,a3,8
	sltiu	t3,a1,2
	beq	t3,zero,.M.4
 #	  a1     = a[1];
	lwu	a5,4(a0)
 #      }
.M.3:
 #      c0       = c[0];
	lwu	a6,0(a3)
 #      w0      += cy;
 #      if (a_len) {
	mflo	t0
	beq	a1,zero,.M.5
	daddu	t0,t0,t2
 #	  w1     = (mp_word)b * a1; 
	dmultu	a2,a5
 #	  w0    += c0;
	daddu	t0,t0,a6		#
 #	  cy     = CARRYOUT(w0);
	dsrl32	t2,t0,0
 #	  c[0]   = ACCUM(w0);
	sw	t0,0(a3)
 #	  c1     = c[1];
	lwu	a7,4(a3)
 #	  w1    += cy;
	mflo	t1
	daddu	t1,t1,t2
 #	  w1    += c1;
	daddu	t1,t1,a7
 #	  c[1]   = ACCUM(w1);
	sw	t1,4(a3)
 #	  cy     = CARRYOUT(w1);
	dsrl32	t2,t1,0
 #	  c     += 1;
	b	.M.6
	addiu	a3,a3,8
 #      } else {
.M.5:
 #	  w0    += c0;
	daddu	t0,t0,a6
 #	  c[0]   = ACCUM(w0);
	sw	t0,0(a3)
 #	  cy     = CARRYOUT(w0);
	dsrl32	t2,t0,0
	b	.M.6
	addiu	a3,a3,4
 #      }
 #    } else {
.M.2:
 #      c0     = c[0];
	lwu	a6,0(a3)
 #      w0    += c0;
	mflo	t0
	daddu	t0,t0,a6
 #      c[0]   = ACCUM(w0);
	sw	t0,0(a3)
 #      cy     = CARRYOUT(w0);
	dsrl32	t2,t0,0
	addiu	a3,a3,4
 #    }
.M.6:

 #    while (cy) {
	beq	t2,zero,.M.1
	nop
.M.7:
 #      mp_word w = (mp_word)*c + cy;
	lwu	a6,0(a3)
	daddu	t2,t2,a6
 #      *c++ = ACCUM(w);
	sw	t2,0(a3)
 #      cy = CARRYOUT(w);
	dsrl32	t2,t2,0
	bne	t2,zero,.M.7
	addiu	a3,a3,4

 #  }
.M.1:
	jr	ra
	nop
 #}
 #
        .end    s_mpv_mul_d_add_prop

        .ent    s_mpv_mul_d
        .globl  s_mpv_mul_d

s_mpv_mul_d: 
 #/* c = a * b */
 #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, 
 #			      mp_digit *c)
 #{
 #  mp_digit   a0, a1;	regs a4, a5
 #  mp_digit   cy = 0;  reg t2
 #  mp_word    w0, w1;  regs t0, t1
 #
 #  if (a_len) {
	beq	a1,zero,.N.1
	move	t2,zero		# cy = 0
	dsll32	a2,a2,0		# "b" is sometimes negative (?!?!)
	dsrl32	a2,a2,0		# This clears the upper 32 bits.
 #    a0 = a[0];
	lwu	a4,0(a0)
 #    w0 = ((mp_word)b * a0);
	dmultu	a2,a4
 #    if (--a_len) {
	addiu	a1,a1,-1
	beq	a1,zero,.N.2
 #      while (a_len >= 2) {
	sltiu	t3,a1,2
	bne	t3,zero,.N.3
 #	  a1     = a[1];
	lwu	a5,4(a0)
.N.4:
 #	  a_len -= 2;
        addiu	a1,a1,-2
 #	  w0    += cy;
	mflo	t0
	daddu	t0,t0,t2
 #	  cy     = CARRYOUT(w0);
	dsrl32	t2,t0,0
 #	  w1     = (mp_word)b * a1; 
	dmultu	a2,a5	
 #	  c[0]   = ACCUM(w0);
	sw	t0,0(a3)
 #	  a0     = a[2];
	lwu	a4,8(a0)
 #	  a     += 2;
	addiu	a0,a0,8
 #	  w1    += cy;
	mflo	t1
	daddu	t1,t1,t2
 #	  cy     = CARRYOUT(w1);
	dsrl32	t2,t1,0
 #	  w0     = (mp_word)b * a0;
	dmultu	a2,a4	
 #	  c[1]   = ACCUM(w1);
	sw	t1,4(a3)
 #	  c     += 2;
	addiu	a3,a3,8
	sltiu	t3,a1,2
	beq	t3,zero,.N.4
 #	  a1     = a[1];
	lwu	a5,4(a0)
 #      }
.N.3:
 #      w0      += cy;
 #      if (a_len) {
	mflo	t0
	beq	a1,zero,.N.5
	daddu	t0,t0,t2
 #	  w1     = (mp_word)b * a1; 
	dmultu	a2,a5			#
 #	  cy     = CARRYOUT(w0);
	dsrl32	t2,t0,0
 #	  c[0]   = ACCUM(w0);
	sw	t0,0(a3)
 #	  w1    += cy;
	mflo	t1
	daddu	t1,t1,t2
 #	  c[1]   = ACCUM(w1);
	sw	t1,4(a3)
 #	  cy     = CARRYOUT(w1);
	dsrl32	t2,t1,0
 #	  c     += 1;
	b	.N.6
	addiu	a3,a3,4
 #      } else {
.N.5:
 #	  c[0]   = ACCUM(w0);
	sw	t0,0(a3)
 #	  cy     = CARRYOUT(w0);
	b	.N.6
	dsrl32	t2,t0,0
 #      }
 #    } else {
.N.2:
	mflo	t0
 #      c[0]   = ACCUM(w0);
	sw	t0,0(a3)
 #      cy     = CARRYOUT(w0);
	dsrl32	t2,t0,0
 #    }
.N.6:
 #    c[1] = cy;
	jr	ra
	sw	t2,4(a3)
 #  }
.N.1:
	jr	ra
	nop
 #}
 #
        .end    s_mpv_mul_d


        .ent    s_mpv_sqr_add_prop
        .globl  s_mpv_sqr_add_prop
 #void   s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
 #	registers
 #	a0		*a
 #	a1		a_len
 #	a2		*sqr
 #	a3		digit from *a, a_i
 #	a4		square of digit from a
 #	a5,a6		next 2 digits in sqr
 #	a7,t0		carry 
s_mpv_sqr_add_prop:
	move	a7,zero
	move	t0,zero
	lwu	a3,0(a0)
	addiu	a1,a1,-1	# --a_len
	dmultu	a3,a3
	beq	a1,zero,.P.3	# jump if we've already done the only sqr
	addiu	a0,a0,4		# ++a
.P.2:
        lwu	a5,0(a2)
        lwu	a6,4(a2)
	addiu	a2,a2,8		# sqrs += 2;
	dsll32	a6,a6,0
	daddu	a5,a5,a6
	lwu	a3,0(a0)
	addiu	a0,a0,4		# ++a
	mflo	a4
	daddu	a6,a5,a4
	sltu	a7,a6,a5	# a7 = a6 < a5	detect overflow
	dmultu	a3,a3
	daddu	a4,a6,t0
	sltu	t0,a4,a6
	add	t0,t0,a7
	sw	a4,-8(a2)
	addiu	a1,a1,-1	# --a_len
	dsrl32	a4,a4,0
	bne	a1,zero,.P.2	# loop if a_len > 0
	sw	a4,-4(a2)
.P.3:
        lwu	a5,0(a2)
        lwu	a6,4(a2)
	addiu	a2,a2,8		# sqrs += 2;
	dsll32	a6,a6,0
	daddu	a5,a5,a6
	mflo	a4
	daddu	a6,a5,a4
	sltu	a7,a6,a5	# a7 = a6 < a5	detect overflow
	daddu	a4,a6,t0
	sltu	t0,a4,a6
	add	t0,t0,a7
	sw	a4,-8(a2)
	beq	t0,zero,.P.9	# jump if no carry
	dsrl32	a4,a4,0
.P.8:
	sw	a4,-4(a2)
	/* propagate final carry */
	lwu	a5,0(a2)
	daddu	a6,a5,t0
	sltu	t0,a6,a5
	bne	t0,zero,.P.8	# loop if carry persists
	addiu	a2,a2,4		# sqrs++
.P.9:
	jr	ra
	sw	a4,-4(a2)

        .end    s_mpv_sqr_add_prop
first commit 2015-10-21 05:03:22 +02:00			`/* *** BEGIN LICENSE BLOCK ***`
			`* Version: MPL 1.1/GPL 2.0/LGPL 2.1`
			`*`
			`* The contents of this file are subject to the Mozilla Public License Version`
			`* 1.1 (the "License"); you may not use this file except in compliance with`
			`* the License. You may obtain a copy of the License at`
			`* http://www.mozilla.org/MPL/`
			`*`
			`* Software distributed under the License is distributed on an "AS IS" basis,`
			`* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License`
			`* for the specific language governing rights and limitations under the`
			`* License.`
			`*`
			`* The Original Code is the Netscape security libraries.`
			`*`
			`* The Initial Developer of the Original Code is`
			`* Netscape Communications Corporation.`
			`* Portions created by the Initial Developer are Copyright (C) 2000`
			`* the Initial Developer. All Rights Reserved.`
			`*`
			`* Contributor(s):`
			`*`
			`* Alternatively, the contents of this file may be used under the terms of`
			`* either the GNU General Public License Version 2 or later (the "GPL"), or`
			`* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),`
			`* in which case the provisions of the GPL or the LGPL are applicable instead`
			`* of those above. If you wish to allow use of your version of this file only`
			`* under the terms of either the GPL or the LGPL, and not to allow others to`
			`* use your version of this file under the terms of the MPL, indicate your`
			`* decision by deleting the provisions above and replace them with the notice`
			`* and other provisions required by the GPL or the LGPL. If you do not delete`
			`* the provisions above, a recipient may use your version of this file under`
			`* the terms of any one of the MPL, the GPL or the LGPL.`
			`*`
			`* *** END LICENSE BLOCK *** */`
			`#include <regdef.h>`
			`.set noreorder`
			`.set noat`

			`.section .text, 1, 0x00000006, 4, 4`
			`.text:`
			`.section .text`

			`.ent s_mpv_mul_d_add`
			`.globl s_mpv_mul_d_add`

			`s_mpv_mul_d_add:`
			`#/* c += a * b */`
			`#void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,`
			`# mp_digit *c)`
			`#{`
			`# mp_digit a0, a1; regs a4, a5`
			`# mp_digit c0, c1; regs a6, a7`
			`# mp_digit cy = 0; reg t2`
			`# mp_word w0, w1; regs t0, t1`
			`#`
			`# if (a_len) {`
			`beq a1,zero,.L.1`
			`move t2,zero # cy = 0`
			`dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)`
			`dsrl32 a2,a2,0 # This clears the upper 32 bits.`
			`# a0 = a[0];`
			`lwu a4,0(a0)`
			`# w0 = ((mp_word)b * a0);`
			`dmultu a2,a4`
			`# if (--a_len) {`
			`addiu a1,a1,-1`
			`beq a1,zero,.L.2`
			`# while (a_len >= 2) {`
			`sltiu t3,a1,2`
			`bne t3,zero,.L.3`
			`# a1 = a[1];`
			`lwu a5,4(a0)`
			`.L.4:`
			`# a_len -= 2;`
			`addiu a1,a1,-2`
			`# c0 = c[0];`
			`lwu a6,0(a3)`
			`# w0 += cy;`
			`mflo t0`
			`daddu t0,t0,t2`
			`# w0 += c0;`
			`daddu t0,t0,a6`
			`# w1 = (mp_word)b * a1;`
			`dmultu a2,a5 #`
			`# cy = CARRYOUT(w0);`
			`dsrl32 t2,t0,0`
			`# c[0] = ACCUM(w0);`
			`sw t0,0(a3)`
			`# a0 = a[2];`
			`lwu a4,8(a0)`
			`# a += 2;`
			`addiu a0,a0,8`
			`# c1 = c[1];`
			`lwu a7,4(a3)`
			`# w1 += cy;`
			`mflo t1`
			`daddu t1,t1,t2`
			`# w1 += c1;`
			`daddu t1,t1,a7`
			`# w0 = (mp_word)b * a0;`
			`dmultu a2,a4 #`
			`# cy = CARRYOUT(w1);`
			`dsrl32 t2,t1,0`
			`# c[1] = ACCUM(w1);`
			`sw t1,4(a3)`
			`# c += 2;`
			`addiu a3,a3,8`
			`sltiu t3,a1,2`
			`beq t3,zero,.L.4`
			`# a1 = a[1];`
			`lwu a5,4(a0)`
			`# }`
			`.L.3:`
			`# c0 = c[0];`
			`lwu a6,0(a3)`
			`# w0 += cy;`
			`# if (a_len) {`
			`mflo t0`
			`beq a1,zero,.L.5`
			`daddu t0,t0,t2`
			`# w1 = (mp_word)b * a1;`
			`dmultu a2,a5`
			`# w0 += c0;`
			`daddu t0,t0,a6 #`
			`# cy = CARRYOUT(w0);`
			`dsrl32 t2,t0,0`
			`# c[0] = ACCUM(w0);`
			`sw t0,0(a3)`
			`# c1 = c[1];`
			`lwu a7,4(a3)`
			`# w1 += cy;`
			`mflo t1`
			`daddu t1,t1,t2`
			`# w1 += c1;`
			`daddu t1,t1,a7`
			`# c[1] = ACCUM(w1);`
			`sw t1,4(a3)`
			`# cy = CARRYOUT(w1);`
			`dsrl32 t2,t1,0`
			`# c += 1;`
			`b .L.6`
			`addiu a3,a3,4`
			`# } else {`
			`.L.5:`
			`# w0 += c0;`
			`daddu t0,t0,a6`
			`# c[0] = ACCUM(w0);`
			`sw t0,0(a3)`
			`# cy = CARRYOUT(w0);`
			`b .L.6`
			`dsrl32 t2,t0,0`
			`# }`
			`# } else {`
			`.L.2:`
			`# c0 = c[0];`
			`lwu a6,0(a3)`
			`# w0 += c0;`
			`mflo t0`
			`daddu t0,t0,a6`
			`# c[0] = ACCUM(w0);`
			`sw t0,0(a3)`
			`# cy = CARRYOUT(w0);`
			`dsrl32 t2,t0,0`
			`# }`
			`.L.6:`
			`# c[1] = cy;`
			`jr ra`
			`sw t2,4(a3)`
			`# }`
			`.L.1:`
			`jr ra`
			`nop`
			`#}`
			`#`
			`.end s_mpv_mul_d_add`

			`.ent s_mpv_mul_d_add_prop`
			`.globl s_mpv_mul_d_add_prop`

			`s_mpv_mul_d_add_prop:`
			`#/* c += a * b */`
			`#void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,`
			`# mp_digit *c)`
			`#{`
			`# mp_digit a0, a1; regs a4, a5`
			`# mp_digit c0, c1; regs a6, a7`
			`# mp_digit cy = 0; reg t2`
			`# mp_word w0, w1; regs t0, t1`
			`#`
			`# if (a_len) {`
			`beq a1,zero,.M.1`
			`move t2,zero # cy = 0`
			`dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)`
			`dsrl32 a2,a2,0 # This clears the upper 32 bits.`
			`# a0 = a[0];`
			`lwu a4,0(a0)`
			`# w0 = ((mp_word)b * a0);`
			`dmultu a2,a4`
			`# if (--a_len) {`
			`addiu a1,a1,-1`
			`beq a1,zero,.M.2`
			`# while (a_len >= 2) {`
			`sltiu t3,a1,2`
			`bne t3,zero,.M.3`
			`# a1 = a[1];`
			`lwu a5,4(a0)`
			`.M.4:`
			`# a_len -= 2;`
			`addiu a1,a1,-2`
			`# c0 = c[0];`
			`lwu a6,0(a3)`
			`# w0 += cy;`
			`mflo t0`
			`daddu t0,t0,t2`
			`# w0 += c0;`
			`daddu t0,t0,a6`
			`# w1 = (mp_word)b * a1;`
			`dmultu a2,a5 #`
			`# cy = CARRYOUT(w0);`
			`dsrl32 t2,t0,0`
			`# c[0] = ACCUM(w0);`
			`sw t0,0(a3)`
			`# a0 = a[2];`
			`lwu a4,8(a0)`
			`# a += 2;`
			`addiu a0,a0,8`
			`# c1 = c[1];`
			`lwu a7,4(a3)`
			`# w1 += cy;`
			`mflo t1`
			`daddu t1,t1,t2`
			`# w1 += c1;`
			`daddu t1,t1,a7`
			`# w0 = (mp_word)b * a0;`
			`dmultu a2,a4 #`
			`# cy = CARRYOUT(w1);`
			`dsrl32 t2,t1,0`
			`# c[1] = ACCUM(w1);`
			`sw t1,4(a3)`
			`# c += 2;`
			`addiu a3,a3,8`
			`sltiu t3,a1,2`
			`beq t3,zero,.M.4`
			`# a1 = a[1];`
			`lwu a5,4(a0)`
			`# }`
			`.M.3:`
			`# c0 = c[0];`
			`lwu a6,0(a3)`
			`# w0 += cy;`
			`# if (a_len) {`
			`mflo t0`
			`beq a1,zero,.M.5`
			`daddu t0,t0,t2`
			`# w1 = (mp_word)b * a1;`
			`dmultu a2,a5`
			`# w0 += c0;`
			`daddu t0,t0,a6 #`
			`# cy = CARRYOUT(w0);`
			`dsrl32 t2,t0,0`
			`# c[0] = ACCUM(w0);`
			`sw t0,0(a3)`
			`# c1 = c[1];`
			`lwu a7,4(a3)`
			`# w1 += cy;`
			`mflo t1`
			`daddu t1,t1,t2`
			`# w1 += c1;`
			`daddu t1,t1,a7`
			`# c[1] = ACCUM(w1);`
			`sw t1,4(a3)`
			`# cy = CARRYOUT(w1);`
			`dsrl32 t2,t1,0`
			`# c += 1;`
			`b .M.6`
			`addiu a3,a3,8`
			`# } else {`
			`.M.5:`
			`# w0 += c0;`
			`daddu t0,t0,a6`
			`# c[0] = ACCUM(w0);`
			`sw t0,0(a3)`
			`# cy = CARRYOUT(w0);`
			`dsrl32 t2,t0,0`
			`b .M.6`
			`addiu a3,a3,4`
			`# }`
			`# } else {`
			`.M.2:`
			`# c0 = c[0];`
			`lwu a6,0(a3)`
			`# w0 += c0;`
			`mflo t0`
			`daddu t0,t0,a6`
			`# c[0] = ACCUM(w0);`
			`sw t0,0(a3)`
			`# cy = CARRYOUT(w0);`
			`dsrl32 t2,t0,0`
			`addiu a3,a3,4`
			`# }`
			`.M.6:`

			`# while (cy) {`
			`beq t2,zero,.M.1`
			`nop`
			`.M.7:`
			`# mp_word w = (mp_word)*c + cy;`
			`lwu a6,0(a3)`
			`daddu t2,t2,a6`
			`# *c++ = ACCUM(w);`
			`sw t2,0(a3)`
			`# cy = CARRYOUT(w);`
			`dsrl32 t2,t2,0`
			`bne t2,zero,.M.7`
			`addiu a3,a3,4`

			`# }`
			`.M.1:`
			`jr ra`
			`nop`
			`#}`
			`#`
			`.end s_mpv_mul_d_add_prop`

			`.ent s_mpv_mul_d`
			`.globl s_mpv_mul_d`

			`s_mpv_mul_d:`
			`#/* c = a * b */`
			`#void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b,`
			`# mp_digit *c)`
			`#{`
			`# mp_digit a0, a1; regs a4, a5`
			`# mp_digit cy = 0; reg t2`
			`# mp_word w0, w1; regs t0, t1`
			`#`
			`# if (a_len) {`
			`beq a1,zero,.N.1`
			`move t2,zero # cy = 0`
			`dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)`
			`dsrl32 a2,a2,0 # This clears the upper 32 bits.`
			`# a0 = a[0];`
			`lwu a4,0(a0)`
			`# w0 = ((mp_word)b * a0);`
			`dmultu a2,a4`
			`# if (--a_len) {`
			`addiu a1,a1,-1`
			`beq a1,zero,.N.2`
			`# while (a_len >= 2) {`
			`sltiu t3,a1,2`
			`bne t3,zero,.N.3`
			`# a1 = a[1];`
			`lwu a5,4(a0)`
			`.N.4:`
			`# a_len -= 2;`
			`addiu a1,a1,-2`
			`# w0 += cy;`
			`mflo t0`
			`daddu t0,t0,t2`
			`# cy = CARRYOUT(w0);`
			`dsrl32 t2,t0,0`
			`# w1 = (mp_word)b * a1;`
			`dmultu a2,a5`
			`# c[0] = ACCUM(w0);`
			`sw t0,0(a3)`
			`# a0 = a[2];`
			`lwu a4,8(a0)`
			`# a += 2;`
			`addiu a0,a0,8`
			`# w1 += cy;`
			`mflo t1`
			`daddu t1,t1,t2`
			`# cy = CARRYOUT(w1);`
			`dsrl32 t2,t1,0`
			`# w0 = (mp_word)b * a0;`
			`dmultu a2,a4`
			`# c[1] = ACCUM(w1);`
			`sw t1,4(a3)`
			`# c += 2;`
			`addiu a3,a3,8`
			`sltiu t3,a1,2`
			`beq t3,zero,.N.4`
			`# a1 = a[1];`
			`lwu a5,4(a0)`
			`# }`
			`.N.3:`
			`# w0 += cy;`
			`# if (a_len) {`
			`mflo t0`
			`beq a1,zero,.N.5`
			`daddu t0,t0,t2`
			`# w1 = (mp_word)b * a1;`
			`dmultu a2,a5 #`
			`# cy = CARRYOUT(w0);`
			`dsrl32 t2,t0,0`
			`# c[0] = ACCUM(w0);`
			`sw t0,0(a3)`
			`# w1 += cy;`
			`mflo t1`
			`daddu t1,t1,t2`
			`# c[1] = ACCUM(w1);`
			`sw t1,4(a3)`
			`# cy = CARRYOUT(w1);`
			`dsrl32 t2,t1,0`
			`# c += 1;`
			`b .N.6`
			`addiu a3,a3,4`
			`# } else {`
			`.N.5:`
			`# c[0] = ACCUM(w0);`
			`sw t0,0(a3)`
			`# cy = CARRYOUT(w0);`
			`b .N.6`
			`dsrl32 t2,t0,0`
			`# }`
			`# } else {`
			`.N.2:`
			`mflo t0`
			`# c[0] = ACCUM(w0);`
			`sw t0,0(a3)`
			`# cy = CARRYOUT(w0);`
			`dsrl32 t2,t0,0`
			`# }`
			`.N.6:`
			`# c[1] = cy;`
			`jr ra`
			`sw t2,4(a3)`
			`# }`
			`.N.1:`
			`jr ra`
			`nop`
			`#}`
			`#`
			`.end s_mpv_mul_d`


			`.ent s_mpv_sqr_add_prop`
			`.globl s_mpv_sqr_add_prop`
			`#void s_mpv_sqr_add_prop(const mp_digit a, mp_size a_len, mp_digit sqrs);`
			`# registers`
			`# a0 *a`
			`# a1 a_len`
			`# a2 *sqr`
			`# a3 digit from *a, a_i`
			`# a4 square of digit from a`
			`# a5,a6 next 2 digits in sqr`
			`# a7,t0 carry`
			`s_mpv_sqr_add_prop:`
			`move a7,zero`
			`move t0,zero`
			`lwu a3,0(a0)`
			`addiu a1,a1,-1 # --a_len`
			`dmultu a3,a3`
			`beq a1,zero,.P.3 # jump if we've already done the only sqr`
			`addiu a0,a0,4 # ++a`
			`.P.2:`
			`lwu a5,0(a2)`
			`lwu a6,4(a2)`
			`addiu a2,a2,8 # sqrs += 2;`
			`dsll32 a6,a6,0`
			`daddu a5,a5,a6`
			`lwu a3,0(a0)`
			`addiu a0,a0,4 # ++a`
			`mflo a4`
			`daddu a6,a5,a4`
			`sltu a7,a6,a5 # a7 = a6 < a5 detect overflow`
			`dmultu a3,a3`
			`daddu a4,a6,t0`
			`sltu t0,a4,a6`
			`add t0,t0,a7`
			`sw a4,-8(a2)`
			`addiu a1,a1,-1 # --a_len`
			`dsrl32 a4,a4,0`
			`bne a1,zero,.P.2 # loop if a_len > 0`
			`sw a4,-4(a2)`
			`.P.3:`
			`lwu a5,0(a2)`
			`lwu a6,4(a2)`
			`addiu a2,a2,8 # sqrs += 2;`
			`dsll32 a6,a6,0`
			`daddu a5,a5,a6`
			`mflo a4`
			`daddu a6,a5,a4`
			`sltu a7,a6,a5 # a7 = a6 < a5 detect overflow`
			`daddu a4,a6,t0`
			`sltu t0,a4,a6`
			`add t0,t0,a7`
			`sw a4,-8(a2)`
			`beq t0,zero,.P.9 # jump if no carry`
			`dsrl32 a4,a4,0`
			`.P.8:`
			`sw a4,-4(a2)`
			`/* propagate final carry */`
			`lwu a5,0(a2)`
			`daddu a6,a5,t0`
			`sltu t0,a6,a5`
			`bne t0,zero,.P.8 # loop if carry persists`
			`addiu a2,a2,4 # sqrs++`
			`.P.9:`
			`jr ra`
			`sw a4,-4(a2)`

			`.end s_mpv_sqr_add_prop`