addmul_3.asm
上传用户:qaz666999
上传日期:2022-08-06
资源大小:2570k
文件大小:4k
源码类别:

数学计算

开发平台:

Unix_Linux

  1. dnl  Alpha ev6 nails mpn_addmul_3.
  2. dnl  Copyright 2002, 2006 Free Software Foundation, Inc.
  3. dnl
  4. dnl  This file is part of the GNU MP Library.
  5. dnl
  6. dnl  The GNU MP Library is free software; you can redistribute it and/or
  7. dnl  modify it under the terms of the GNU Lesser General Public License as
  8. dnl  published by the Free Software Foundation; either version 3 of the
  9. dnl  License, or (at your option) any later version.
  10. dnl
  11. dnl  The GNU MP Library is distributed in the hope that it will be useful,
  12. dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14. dnl  Lesser General Public License for more details.
  15. dnl
  16. dnl  You should have received a copy of the GNU Lesser General Public License
  17. dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  18. include(`../config.m4')
  19. C Runs at 3.0 cycles/limb.
  20. C With 2-way unrolling, we could probably reach 2.25 c/l (3.33 i/c).
  21. C  INPUT PARAMETERS
  22. define(`rp',`r16')
  23. define(`up',`r17')
  24. define(`n',`r18')
  25. define(`vp',`r19')
  26. C  Useful register aliases
  27. define(`numb_mask',`r24')
  28. define(`ulimb',`r25')
  29. define(`rlimb',`r27')
  30. define(`m0a',`r0')
  31. define(`m0b',`r1')
  32. define(`m1a',`r2')
  33. define(`m1b',`r3')
  34. define(`m2a',`r20')
  35. define(`m2b',`r21')
  36. define(`acc0',`r4')
  37. define(`acc1',`r5')
  38. define(`acc2',`r22')
  39. define(`v0',`r6')
  40. define(`v1',`r7')
  41. define(`v2',`r23')
  42. C Used for temps: r8 r19 r28
  43. define(`NAIL_BITS',`GMP_NAIL_BITS')
  44. define(`NUMB_BITS',`GMP_NUMB_BITS')
  45. C  This declaration is munged by configure
  46. NAILS_SUPPORT(3-63)
  47. ASM_START()
  48. PROLOGUE(mpn_addmul_3)
  49. lda numb_mask,-1(r31)
  50. srl numb_mask,NAIL_BITS,numb_mask
  51. ldq v0, 0(vp)
  52. ldq v1, 8(vp)
  53. ldq v2, 16(vp)
  54. bis r31, r31, acc0 C zero acc0
  55. sll v0,NAIL_BITS, v0
  56. bis r31, r31, acc1 C zero acc1
  57. sll v1,NAIL_BITS, v1
  58. bis r31, r31, acc2 C zero acc2
  59. sll v2,NAIL_BITS, v2
  60. bis r31, r31, r19
  61. ldq ulimb, 0(up)
  62. lda up, 8(up)
  63. mulq v0, ulimb, m0a C U1
  64. umulh v0, ulimb, m0b C U1
  65. mulq v1, ulimb, m1a C U1
  66. umulh v1, ulimb, m1b C U1
  67. lda n, -1(n)
  68. mulq v2, ulimb, m2a C U1
  69. umulh v2, ulimb, m2b C U1
  70. beq n, L(end) C U0
  71. ALIGN(16)
  72. L(top): ldq rlimb, 0(rp) C L1
  73. ldq ulimb, 0(up) C L0
  74. bis r31, r31, r31 C U0 nop
  75. addq r19, acc0, acc0 C U1 propagate nail
  76. lda rp, 8(rp) C L1
  77. srl m0a,NAIL_BITS, r8 C U0
  78. lda up, 8(up) C L0
  79. mulq v0, ulimb, m0a C U1
  80. addq r8, acc0, r19 C U0
  81. addq m0b, acc1, acc0 C L1
  82. umulh v0, ulimb, m0b C U1
  83. bis r31, r31, r31 C L0 nop
  84. addq rlimb, r19, r19 C L1
  85. srl m1a,NAIL_BITS, r8 C U0
  86. bis r31, r31, r31 C L0 nop
  87. mulq v1, ulimb, m1a C U1
  88. addq r8, acc0, acc0 C U0
  89. addq m1b, acc2, acc1 C L1
  90. umulh v1, ulimb, m1b C U1
  91. and r19,numb_mask, r28 C L0 extract numb part
  92. bis r31, r31, r31 C L1 nop
  93. srl m2a,NAIL_BITS, r8 C U0
  94. lda n, -1(n) C L0
  95. mulq v2, ulimb, m2a C U1
  96. addq r8, acc1, acc1 C L0
  97. bis r31, m2b, acc2 C L1
  98. umulh v2, ulimb, m2b C U1
  99. srl r19,NUMB_BITS, r19 C U0 extract nail part
  100. stq r28, -8(rp) C L
  101. bne n, L(top) C U0
  102. L(end): ldq rlimb, 0(rp)
  103. addq r19, acc0, acc0 C propagate nail
  104. lda rp, 8(rp)
  105. srl m0a,NAIL_BITS, r8 C U0
  106. addq r8, acc0, r19
  107. addq m0b, acc1, acc0
  108. addq rlimb, r19, r19
  109. srl m1a,NAIL_BITS, r8 C U0
  110. addq r8, acc0, acc0
  111. addq m1b, acc2, acc1
  112. and r19,numb_mask, r28 C extract limb
  113. srl m2a,NAIL_BITS, r8 C U0
  114. addq r8, acc1, acc1
  115. bis r31, m2b, acc2
  116. srl r19,NUMB_BITS, r19 C extract nail
  117. stq r28, -8(rp)
  118. addq r19, acc0, acc0 C propagate nail
  119. and acc0,numb_mask, r28
  120. stq r28, 0(rp)
  121. srl acc0,NUMB_BITS, r19
  122. addq r19, acc1, acc1
  123. and acc1,numb_mask, r28
  124. stq r28, 8(rp)
  125. srl acc1,NUMB_BITS, r19
  126. addq r19, acc2, m0a
  127. ret r31, (r26), 1
  128. EPILOGUE()
  129. ASM_END()