sqr_diagonal.asm
上传用户:qaz666999
上传日期:2022-08-06
资源大小:2570k
文件大小:2k
- dnl HP-PA 32-bit mpn_sqr_diagonal optimized for the PA8x00.
- dnl Copyright 2001, 2002 Free Software Foundation, Inc.
- dnl This file is part of the GNU MP Library.
- dnl The GNU MP Library is free software; you can redistribute it and/or modify
- dnl it under the terms of the GNU Lesser General Public License as published
- dnl by the Free Software Foundation; either version 3 of the License, or (at
- dnl your option) any later version.
- dnl The GNU MP Library is distributed in the hope that it will be useful, but
- dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- dnl License for more details.
- dnl You should have received a copy of the GNU Lesser General Public License
- dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
- include(`../config.m4')
- C This code runs at 6 cycles/limb on the PA7100 and 2 cycles/limb on PA8x00.
- C The 2-way unrolling is actually not helping the PA7100.
- C INPUT PARAMETERS
- define(`rp',`%r26')
- define(`up',`%r25')
- define(`n',`%r24')
- ASM_START()
- PROLOGUE(mpn_sqr_diagonal)
- fldws,ma 4(up),%fr4r
- addib,= -1,n,L(end1)
- ldo 4(rp),rp
- fldws,ma 4(up),%fr6r
- addib,= -1,n,L(end2)
- xmpyu %fr4r,%fr4r,%fr5
- fldws,ma 4(up),%fr4r
- addib,= -1,n,L(end3)
- xmpyu %fr6r,%fr6r,%fr7
- LDEF(loop)
- fldws,ma 4(up),%fr6r
- fstws %fr5r,-4(rp)
- fstws,ma %fr5l,8(rp)
- addib,= -1,n,L(exite)
- xmpyu %fr4r,%fr4r,%fr5
- fldws,ma 4(up),%fr4r
- fstws %fr7r,-4(rp)
- fstws,ma %fr7l,8(rp)
- addib,<> -1,n,L(loop)
- xmpyu %fr6r,%fr6r,%fr7
- LDEF(exito)
- fstws %fr5r,-4(rp)
- fstws %fr5l,0(rp)
- xmpyu %fr4r,%fr4r,%fr5
- fstws %fr7r,4(rp)
- fstws %fr7l,8(rp)
- fstws,mb %fr5r,12(rp)
- bv 0(%r2)
- fstws %fr5l,4(rp)
- LDEF(exite)
- fstws %fr7r,-4(rp)
- fstws %fr7l,0(rp)
- xmpyu %fr6r,%fr6r,%fr7
- fstws %fr5r,4(rp)
- fstws %fr5l,8(rp)
- fstws,mb %fr7r,12(rp)
- bv 0(%r2)
- fstws %fr7l,4(rp)
- LDEF(end1)
- xmpyu %fr4r,%fr4r,%fr5
- fstws %fr5r,-4(rp)
- bv 0(%r2)
- fstws,ma %fr5l,8(rp)
- LDEF(end2)
- xmpyu %fr6r,%fr6r,%fr7
- fstws %fr5r,-4(rp)
- fstws %fr5l,0(rp)
- fstws %fr7r,4(rp)
- bv 0(%r2)
- fstws %fr7l,8(rp)
- LDEF(end3)
- fstws %fr5r,-4(rp)
- fstws %fr5l,0(rp)
- xmpyu %fr4r,%fr4r,%fr5
- fstws %fr7r,4(rp)
- fstws %fr7l,8(rp)
- fstws,mb %fr5r,12(rp)
- bv 0(%r2)
- fstws %fr5l,4(rp)
- EPILOGUE(mpn_sqr_diagonal)