数学计算

开发平台：
Unix_Linux

divrem_2.asm：源码内容
							dnl  PPC-32 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
dnl  This file is part of the GNU MP Library.
dnl  The GNU MP Library is free software; you can redistribute it and/or modify
dnl  it under the terms of the GNU Lesser General Public License as published
dnl  by the Free Software Foundation; either version 3 of the License, or (at
dnl  your option) any later version.
dnl  The GNU MP Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
dnl  License for more details.
dnl  You should have received a copy of the GNU Lesser General Public License
dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C		cycles/limb
C		norm	frac
C 7410		~36.5	~36.5
C 744x, 745x	 29	 29
C INPUT PARAMETERS
C qp  = r3
C fn  = r4
C up  = r5
C un  = r6
C d   = r7
C TODO
C  * Decrease register usage.
C  * Make sure mul operands and optimal for early-out.
C  * Check that things work well for a shared library build.
C  * Write an invert_limb, perhaps inline, perhaps as a private call.  Or at
C    least vastly improve the current __udiv_qrnnd_c based code.
ASM_START()
PROLOGUE(mpn_divrem_2)
	stwu	r1, -32(r1)
	slwi	r0, r6, 2
	add	r5, r5, r0
	stmw	r28, 8(r1)
	addi	r29, r5, -8		C up = up_param + un - 2
	lwz	r10, 4(r7)
	lwz	r12, 4(r29)
	addi	r8, r3, -12
	lwz	r7, 0(r7)
	cmplw	cr7, r12, r10
	lwz	r28, 0(r29)
	blt-	cr7, L(2)
	bgt+	cr7, L(4)
	cmplw	cr7, r28, r7
	blt-	cr7, L(2)
L(4):	subfc	r28, r7, r28
	subfe	r12, r10, r12
	li	r3, 1
	b	L(6)
L(2):	li	r3, 0
L(6):	add	r0, r4, r6
	addic.	r30, r0, -2
	ble-	cr0, L(ret)
	slwi	r9, r0, 2
	add	r8, r8, r9		C rp += un + fn
	mtctr	r30
C Compute di from d1
	srwi	r11, r10, 16
	nor	r0, r10, r10
	divwu	r31, r0, r11
	rlwinm	r5, r10, 0, 16, 31
	mullw	r9, r11, r31
	mullw	r6, r5, r31
	subf	r0, r9, r0
	slwi	r0, r0, 16
	ori	r0, r0, 65535
	cmplw	cr7, r0, r6
	bge-	cr7, L(9)
	add	r0, r0, r10
	cmplw	cr7, r0, r10
	cmplw	cr6, r0, r6
	addi	r31, r31, -1		C q1--
	cror	28, 28, 25
	bc+	12, 28, L(9)
	addi	r31, r31, -1		C q1--
	add	r0, r0, r10
L(9):	subf	r0, r6, r0
	divwu	r6, r0, r11
	mullw	r9, r11, r6
	mullw	r11, r5, r6
	subf	r0, r9, r0
	slwi	r0, r0, 16
	ori	r0, r0, 65535
	cmplw	cr7, r0, r11
	bge-	cr7, L(13)
	add	r0, r0, r10
	cmplw	cr7, r0, r10
	cmplw	cr6, r0, r11
	addi	r6, r6, -1		C q0--
	cror	28, 28, 25
	bc+	12, 28, L(13)
C	add	r0, r0, r10		C final remainder
	addi	r6, r6, -1		C q0--
L(13):	rlwimi	r6, r31, 16, 0, 15	C assemble final quotient
C Adjust di by including d0
	mullw	r9, r10, r6		C t0 = LO(di * d1)
	addc	r11, r9, r7
	subfe	r0, r1, r1
	mulhwu	r9, r6, r7		C s1 = HI(di * d0)
	addc	r9, r11, r9
	addze.	r0, r0
	blt	cr0, L(17)
L(18):	subfc	r9, r10, r9
	addi	r6, r6, -1
	addme.	r0, r0
	bge+	cr0, L(18)
L(17):
C r0  r3  r4  r5  r6  r7  r8  r9 r10 r11 r12 r28 r29 r30 r31
C     msl         di  d0  qp     d1          fn  up  un
L(loop):
	mullw	r0, r12, r6		C q0 = LO(n2 * di)
	cmpw	cr7, r30, r4
	addc	r31, r0, r28		C q0 += n1
	mulhwu	r9, r12, r6		C q  = HI(n2 * di)
	adde	r12, r9, r12		C q  += n2
	addi	r30, r30, -1
	mullw	r0, r10, r12		C d1 * q
	li	r9, 0
	subf	r0, r0, r28		C n1 -= d1 * q
	addi	r5, r12, 1
	ble-	cr7, L(23)
	lwzu	r9, -4(r29)
L(23):	mullw	r11, r12, r7		C t0 = LO(d0 * q)
	subfc	r28, r7, r9		C n0 -= d0
	subfe	r0, r10, r0		C n1 -= d1
	mulhwu	r12, r12, r7		C t1 = HI(d0 * q)
	subfc	r28, r11, r28		C n0 -= t0
	subfe	r12, r12, r0		C n1 -= t1
	cmplw	cr7, r12, r31
	blt+	cr7, L(24)
	addc	r28, r28, r7
	adde	r12, r12, r10
	addi	r5, r5, -1
L(24):	cmplw	cr7, r12, r10
	bge-	cr7, L(fix)
L(bck):	stw	r5, 0(r8)
	addi	r8, r8, -4
	bdnz	L(loop)
L(ret):	stw	r28, 0(r29)
	stw	r12, 4(r29)
	lmw	r28, 8(r1)
	addi	r1, r1, 32
	blr
L(fix):	cmplw	cr6, r28, r7
	bgt+	cr7, L(28)
	blt-	cr6, L(bck)
L(28):	subfc	r28, r7, r28
	subfe	r12, r10, r12
	addi	r5, r5, 1
	b	L(bck)
EPILOGUE()