mod_34lsub1.asm
上传用户:qaz666999
上传日期:2022-08-06
资源大小:2570k
文件大小:3k
- dnl Alpha mpn_mod_34lsub1.
- dnl Copyright 2002 Free Software Foundation, Inc.
- dnl This file is part of the GNU MP Library.
- dnl The GNU MP Library is free software; you can redistribute it and/or modify
- dnl it under the terms of the GNU Lesser General Public License as published
- dnl by the Free Software Foundation; either version 3 of the License, or (at
- dnl your option) any later version.
- dnl The GNU MP Library is distributed in the hope that it will be useful, but
- dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- dnl License for more details.
- dnl You should have received a copy of the GNU Lesser General Public License
- dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
- include(`../config.m4')
- C cycles/limb
- C EV4: 4 (?)
- C EV5: 2.67
- C EV6: 1.67
- dnl INPUT PARAMETERS
- dnl up r16
- dnl n r17
- define(`l0',`r18')
- define(`l1',`r19')
- define(`l2',`r20')
- define(`a0',`r21')
- define(`a1',`r22')
- define(`a2',`r23')
- define(`c0',`r24')
- define(`c1',`r5')
- define(`c2',`r6')
- ASM_START()
- PROLOGUE(mpn_mod_34lsub1)
- bis r31, r31, c0
- bis r31, r31, c1
- bis r31, r31, c2
- lda r17, -3(r17)
- bge r17, $L_3_or_more
- bis r31, r31, a0
- bis r31, r31, a1
- bis r31, r31, a2
- br r31, $L_012
- $L_3_or_more:
- ldq a0, 0(r16)
- ldq a1, 8(r16)
- ldq a2, 16(r16)
- lda r16, 24(r16)
- lda r17, -3(r17)
- blt r17, $L_012
- $L_6_or_more:
- ldq l0, 0(r16)
- ldq l1, 8(r16)
- ldq l2, 16(r16)
- addq l0, a0, a0
- lda r16, 24(r16)
- lda r17, -3(r17)
- blt r17, $L_end
- ALIGN(16)
- C Main loop
- $L_9_or_more:
- $Loop: cmpult a0, l0, r0
- ldq l0, 0(r16)
- addq r0, c0, c0
- addq l1, a1, a1
- cmpult a1, l1, r0
- ldq l1, 8(r16)
- addq r0, c1, c1
- addq l2, a2, a2
- cmpult a2, l2, r0
- ldq l2, 16(r16)
- addq r0, c2, c2
- addq l0, a0, a0
- lda r16, 24(r16)
- lda r17, -3(r17)
- bge r17, $Loop
- $L_end: cmpult a0, l0, r0
- addq r0, c0, c0
- addq l1, a1, a1
- cmpult a1, l1, r0
- addq r0, c1, c1
- addq l2, a2, a2
- cmpult a2, l2, r0
- addq r0, c2, c2
- C Handle the last (n mod 3) limbs
- $L_012: lda r17, 2(r17)
- blt r17, $L_0
- ldq l0, 0(r16)
- addq l0, a0, a0
- cmpult a0, l0, r0
- addq r0, c0, c0
- beq r17, $L_0
- ldq l1, 8(r16)
- addq l1, a1, a1
- cmpult a1, l1, r0
- addq r0, c1, c1
- C Align and sum our 3 main accumulators and 3 carry accumulators
- $L_0: srl a0, 48, r2
- srl a1, 32, r4
- ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
- ` insll a1, 2, r1', C (a1 & 0xffffffff) << 16
- ` zapnot a1, 15, r25
- sll r25, 16, r1')
- zapnot a0, 63, r0 C a0 & 0xffffffffffff
- srl a2, 16, a1
- ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
- ` inswl a2, 4, r3', C (a2 & 0xffff) << 32
- ` zapnot a2, 3, r25
- sll r25, 32, r3')
- addq r1, r4, r1
- addq r0, r2, r0
- srl c0, 32, a2
- ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
- ` insll c0, 2, r4', C (c0 & 0xffffffff) << 16
- ` zapnot c0, 15, r25
- sll r25, 16, r4')
- addq r0, r1, r0
- addq r3, a1, r3
- addq r0, r3, r0
- srl c1, 16, c0
- ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
- ` inswl c1, 4, r2', C (c1 & 0xffff) << 32
- ` zapnot c1, 3, r25
- sll r25, 32, r2')
- addq r4, a2, r4
- C srl c2, 48, r3 C This will be 0 in practise
- zapnot c2, 63, r1 C r1 = c2 & 0xffffffffffff
- addq r0, r4, r0
- addq r2, c0, r2
- addq r0, r2, r0
- C addq r1, r3, r1
- addq r0, r1, r0
- ret r31, (r26), 1
- EPILOGUE(mpn_mod_34lsub1)
- ASM_END()