sqr_diagonal.asm
上传用户:qaz666999
上传日期:2022-08-06
资源大小:2570k
文件大小:2k
- dnl IA-64 mpn_sqr_diagonal. Helper for sqr_basecase.
- dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
- dnl This file is part of the GNU MP Library.
- dnl The GNU MP Library is free software; you can redistribute it and/or modify
- dnl it under the terms of the GNU Lesser General Public License as published
- dnl by the Free Software Foundation; either version 3 of the License, or (at
- dnl your option) any later version.
- dnl The GNU MP Library is distributed in the hope that it will be useful, but
- dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- dnl License for more details.
- dnl You should have received a copy of the GNU Lesser General Public License
- dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
- include(`../config.m4')
- C cycles/limb
- C Itanium: 4
- C Itanium 2: 2
- C TODO
- C * Perhaps avoid ctop loop. Unfortunately, a cloop loop running at 1 c/l
- C would need prohibitive 8-way unrolling.
- C * Instead of messing too much with this, write a nifty mpn_sqr_basecase.
- C INPUT PARAMETERS
- C rp = r32
- C sp = r33
- C n = r34
- ASM_START()
- PROLOGUE(mpn_sqr_diagonal)
- .prologue
- .save ar.lc, r2
- .save pr, r15
- .body
- ifdef(`HAVE_ABI_32',
- ` addp4 r32 = 0, r32
- addp4 r33 = 0, r33
- zxt4 r34 = r34
- ;;
- ')
- ldf8 f32 = [r33], 8 C M load rp[0] early
- mov r2 = ar.lc C I0
- mov r14 = ar.ec C I0
- mov r15 = pr C I0
- add r19 = -1, r34 C M I decr n
- add r18 = 8, r32 C M I rp for high limb
- ;;
- mov ar.lc = r19 C I0
- mov ar.ec = 5 C I0
- mov pr.rot = 1<<16 C I0
- ;;
- br.cexit.spnt .Ldone C B
- ;;
- ALIGN(32)
- .Loop:
- (p16) ldf8 f32 = [r33], 8 C M
- (p19) xma.l f36 = f35, f35, f0 C F
- (p21) stf8 [r32] = f38, 16 C M2 M3
- (p19) xma.hu f40 = f35, f35, f0 C F
- (p21) stf8 [r18] = f42, 16 C M2 M3
- br.ctop.dptk .Loop C B
- ;;
- .Ldone:
- stf8 [r32] = f38 C M2 M3
- stf8 [r18] = f42 C M2 M3
- mov ar.ec = r14 C I0
- ;;
- mov pr = r15, 0x1ffff C I0
- mov ar.lc = r2 C I0
- br.ret.sptk.many b0 C B
- EPILOGUE(mpn_sqr_diagonal)
- ASM_END()