add_n.asm
上传用户:qaz666999
上传日期:2022-08-06
资源大小:2570k
文件大小:4k
- dnl SPARC v9 mpn_add_n -- Add two limb vectors of the same length > 0 and
- dnl store sum in a third limb vector.
- dnl Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
- dnl This file is part of the GNU MP Library.
- dnl The GNU MP Library is free software; you can redistribute it and/or modify
- dnl it under the terms of the GNU Lesser General Public License as published
- dnl by the Free Software Foundation; either version 3 of the License, or (at
- dnl your option) any later version.
- dnl The GNU MP Library is distributed in the hope that it will be useful, but
- dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- dnl License for more details.
- dnl You should have received a copy of the GNU Lesser General Public License
- dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
- include(`../config.m4')
- C cycles/limb
- C UltraSPARC 1&2: 4
- C UltraSPARC 3: 4.5
- C Compute carry-out from the most significant bits of u,v, and r, where
- C r=u+v+carry_in, using logic operations.
- C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn
- C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
- C Therefore, it seems futile to try to optimize this any further...
- C INPUT PARAMETERS
- define(`rp',`%i0')
- define(`up',`%i1')
- define(`vp',`%i2')
- define(`n',`%i3')
- define(`u0',`%l0')
- define(`u1',`%l2')
- define(`u2',`%l4')
- define(`u3',`%l6')
- define(`v0',`%l1')
- define(`v1',`%l3')
- define(`v2',`%l5')
- define(`v3',`%l7')
- define(`cy',`%i4')
- define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
- define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe
- ASM_START()
- REGISTER(%g2,#scratch)
- REGISTER(%g3,#scratch)
- PROLOGUE(mpn_add_n)
- save %sp,-160,%sp
- fitod %f0,%f0 C make sure f0 contains small, quiet number
- subcc n,4,%g0
- bl,pn %icc,.Loop0
- mov 0,cy
- ldx [up+0],u0
- ldx [vp+0],v0
- add up,32,up
- ldx [up-24],u1
- ldx [vp+8],v1
- add vp,32,vp
- ldx [up-16],u2
- ldx [vp-16],v2
- ldx [up-8],u3
- ldx [vp-8],v3
- subcc n,8,n
- add u0,v0,%g1 C main add
- add %g1,cy,%g4 C carry add
- or u0,v0,%g2
- bl,pn %icc,.Lend4567
- fanop
- b,a .Loop
- .align 16
- C START MAIN LOOP
- .Loop: andn %g2,%g4,%g2
- and u0,v0,%g3
- ldx [up+0],u0
- fanop
- C --
- or %g3,%g2,%g2
- ldx [vp+0],v0
- add up,32,up
- fanop
- C --
- srlx %g2,63,cy
- add u1,v1,%g1
- stx %g4,[rp+0]
- fanop
- C --
- add %g1,cy,%g4
- or u1,v1,%g2
- fmnop
- fanop
- C --
- andn %g2,%g4,%g2
- and u1,v1,%g3
- ldx [up-24],u1
- fanop
- C --
- or %g3,%g2,%g2
- ldx [vp+8],v1
- add vp,32,vp
- fanop
- C --
- srlx %g2,63,cy
- add u2,v2,%g1
- stx %g4,[rp+8]
- fanop
- C --
- add %g1,cy,%g4
- or u2,v2,%g2
- fmnop
- fanop
- C --
- andn %g2,%g4,%g2
- and u2,v2,%g3
- ldx [up-16],u2
- fanop
- C --
- or %g3,%g2,%g2
- ldx [vp-16],v2
- add rp,32,rp
- fanop
- C --
- srlx %g2,63,cy
- add u3,v3,%g1
- stx %g4,[rp-16]
- fanop
- C --
- add %g1,cy,%g4
- or u3,v3,%g2
- fmnop
- fanop
- C --
- andn %g2,%g4,%g2
- and u3,v3,%g3
- ldx [up-8],u3
- fanop
- C --
- or %g3,%g2,%g2
- subcc n,4,n
- ldx [vp-8],v3
- fanop
- C --
- srlx %g2,63,cy
- add u0,v0,%g1
- stx %g4,[rp-8]
- fanop
- C --
- add %g1,cy,%g4
- or u0,v0,%g2
- bge,pt %icc,.Loop
- fanop
- C END MAIN LOOP
- .Lend4567:
- andn %g2,%g4,%g2
- and u0,v0,%g3
- or %g3,%g2,%g2
- srlx %g2,63,cy
- add u1,v1,%g1
- stx %g4,[rp+0]
- add %g1,cy,%g4
- or u1,v1,%g2
- andn %g2,%g4,%g2
- and u1,v1,%g3
- or %g3,%g2,%g2
- srlx %g2,63,cy
- add u2,v2,%g1
- stx %g4,[rp+8]
- add %g1,cy,%g4
- or u2,v2,%g2
- andn %g2,%g4,%g2
- and u2,v2,%g3
- or %g3,%g2,%g2
- add rp,32,rp
- srlx %g2,63,cy
- add u3,v3,%g1
- stx %g4,[rp-16]
- add %g1,cy,%g4
- or u3,v3,%g2
- andn %g2,%g4,%g2
- and u3,v3,%g3
- or %g3,%g2,%g2
- srlx %g2,63,cy
- stx %g4,[rp-8]
- addcc n,4,n
- bz,pn %icc,.Lret
- fanop
- .Loop0: ldx [up],u0
- add up,8,up
- ldx [vp],v0
- add vp,8,vp
- add rp,8,rp
- subcc n,1,n
- add u0,v0,%g1
- or u0,v0,%g2
- add %g1,cy,%g4
- and u0,v0,%g3
- andn %g2,%g4,%g2
- stx %g4,[rp-8]
- or %g3,%g2,%g2
- bnz,pt %icc,.Loop0
- srlx %g2,63,cy
- .Lret: mov cy,%i0
- ret
- restore
- EPILOGUE(mpn_add_n)