addmul_2.asm
上传用户:qaz666999
上传日期:2022-08-06
资源大小:2570k
文件大小:3k
源码类别:

数学计算

开发平台:

Unix_Linux

  1. dnl  Alpha ev6 nails mpn_addmul_2.
  2. dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
  3. dnl
  4. dnl  This file is part of the GNU MP Library.
  5. dnl
  6. dnl  The GNU MP Library is free software; you can redistribute it and/or
  7. dnl  modify it under the terms of the GNU Lesser General Public License as
  8. dnl  published by the Free Software Foundation; either version 3 of the
  9. dnl  License, or (at your option) any later version.
  10. dnl
  11. dnl  The GNU MP Library is distributed in the hope that it will be useful,
  12. dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14. dnl  Lesser General Public License for more details.
  15. dnl
  16. dnl  You should have received a copy of the GNU Lesser General Public License
  17. dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  18. include(`../config.m4')
  19. C Runs at 4.0 cycles/limb.
  20. C We could either go for 2-way unrolling over 11 cycles, or 2.75 c/l,
  21. C or 4-way unrolling over 20 cycles, for 2.5 c/l.
  22. C  INPUT PARAMETERS
  23. define(`rp',`r16')
  24. define(`up',`r17')
  25. define(`n',`r18')
  26. define(`vp',`r19')
  27. C  Useful register aliases
  28. define(`numb_mask',`r24')
  29. define(`ulimb',`r25')
  30. define(`rlimb',`r27')
  31. define(`m0a',`r0')
  32. define(`m0b',`r1')
  33. define(`m1a',`r2')
  34. define(`m1b',`r3')
  35. define(`acc0',`r4')
  36. define(`acc1',`r5')
  37. define(`v0',`r6')
  38. define(`v1',`r7')
  39. C Used for temps: r8 r19 r28
  40. define(`NAIL_BITS',`GMP_NAIL_BITS')
  41. define(`NUMB_BITS',`GMP_NUMB_BITS')
  42. C  This declaration is munged by configure
  43. NAILS_SUPPORT(3-63)
  44. ASM_START()
  45. PROLOGUE(mpn_addmul_2)
  46. lda numb_mask,-1(r31)
  47. srl numb_mask,NAIL_BITS,numb_mask
  48. ldq v0, 0(vp)
  49. ldq v1, 8(vp)
  50. bis r31, r31, acc0 C zero acc0
  51. sll v0,NAIL_BITS, v0
  52. bis r31, r31, acc1 C zero acc1
  53. sll v1,NAIL_BITS, v1
  54. bis r31, r31, r19
  55. ldq ulimb, 0(up)
  56. lda up, 8(up)
  57. mulq v0, ulimb, m0a C U1
  58. umulh v0, ulimb, m0b C U1
  59. mulq v1, ulimb, m1a C U1
  60. umulh v1, ulimb, m1b C U1
  61. lda n, -1(n)
  62. beq n, L(end) C U0
  63. ALIGN(16)
  64. L(top): bis r31, r31, r31 C U1 nop
  65. addq r19, acc0, acc0 C U0 propagate nail
  66. ldq rlimb, 0(rp) C L0
  67. ldq ulimb, 0(up) C L1
  68. lda rp, 8(rp) C L1
  69. srl m0a,NAIL_BITS, r8 C U0
  70. lda up, 8(up) C L0
  71. mulq v0, ulimb, m0a C U1
  72. addq r8, acc0, r19 C U0
  73. addq m0b, acc1, acc0 C L1
  74. umulh v0, ulimb, m0b C U1
  75. bis r31, r31, r31 C L0 nop
  76. addq rlimb, r19, r19 C L1 FINAL PROD-SUM
  77. srl m1a,NAIL_BITS, r8 C U0
  78. lda n, -1(n) C L0
  79. mulq v1, ulimb, m1a C U1
  80. addq r8, acc0, acc0 C U0
  81. bis r31, m1b, acc1 C L1
  82. umulh v1, ulimb, m1b C U1
  83. and r19,numb_mask, r28 C L0 extract numb part
  84. unop
  85. srl r19,NUMB_BITS, r19 C U1 extract nail part
  86. stq r28, -8(rp) C L1
  87. bne n, L(top) C U0
  88. L(end): ldq rlimb, 0(rp)
  89. addq r19, acc0, acc0 C propagate nail
  90. lda rp, 8(rp)
  91. srl m0a,NAIL_BITS, r8 C U0
  92. addq r8, acc0, r19
  93. addq m0b, acc1, acc0
  94. addq rlimb, r19, r19
  95. srl m1a,NAIL_BITS, r8 C U0
  96. addq r8, acc0, acc0
  97. bis r31, m1b, acc1
  98. and r19,numb_mask, r28 C extract limb
  99. srl r19,NUMB_BITS, r19 C extract nail
  100. stq r28, -8(rp)
  101. addq r19, acc0, acc0 C propagate nail
  102. and acc0,numb_mask, r28
  103. stq r28, 0(rp)
  104. srl acc0,NUMB_BITS, r19
  105. addq r19, acc1, r0
  106. ret r31, (r26), 1
  107. EPILOGUE()
  108. ASM_END()