invert_limb.asm
上传用户:qaz666999
上传日期:2022-08-06
资源大小:2570k
文件大小:3k
源码类别:

数学计算

开发平台:

Unix_Linux

  1. dnl  PowerPC-64 mpn_invert_limb -- Invert a normalized limb.
  2. dnl  Copyright 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
  3. dnl  This file is part of the GNU MP Library.
  4. dnl  The GNU MP Library is free software; you can redistribute it and/or modify
  5. dnl  it under the terms of the GNU Lesser General Public License as published
  6. dnl  by the Free Software Foundation; either version 3 of the License, or (at
  7. dnl  your option) any later version.
  8. dnl  The GNU MP Library is distributed in the hope that it will be useful, but
  9. dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  10. dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  11. dnl  License for more details.
  12. dnl  You should have received a copy of the GNU Lesser General Public License
  13. dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  14. include(`../config.m4')
  15. C cycles/limb
  16. C POWER3/PPC630:     ?
  17. C POWER4/PPC970:     75 (including call+ret)
  18. C TODO:
  19. C   * Pair multiply instructions.
  20. ASM_START()
  21. PROLOGUE(mpn_invert_limb)
  22. LEAL( r12, approx_tab)
  23. srdi r11, r3, 32 C r11 = d >> 32
  24. rlwinm  r9, r11, 10, 23, 30 C r9 = ((d >> 55) & 0xff) << 1
  25. lhzx r0, r12, r9 C load initial approximation
  26. rldic r10, r0, 6, 42
  27. mulld r8, r10, r10
  28. sldi r9, r10, 17
  29. mulld r0, r8, r11
  30. srdi r0, r0, 31
  31. subf r10, r0, r9
  32. mulld r8, r10, r10
  33. sldi r11, r10, 33
  34. mulhdu r0, r8, r3
  35. sldi r9, r0, 1
  36. subf r10, r9, r11
  37. sldi r11, r10, 2
  38. mulhdu r0, r10, r10
  39. mulld r8, r10, r10
  40. mulhdu r10, r8, r3
  41. mulld r9, r0, r3
  42. mulhdu r0, r0, r3
  43. addc r8, r9, r10
  44. addze r10, r0
  45. srdi r0, r8, 62
  46. rldimi r0, r10, 2, 0
  47. sldi r9, r8, 2
  48. subfic r10, r9, 0
  49. subfe r8, r0, r11
  50. mulhdu r10, r3, r8
  51. add r10, r10, r3
  52. mulld r9, r3, r8
  53. subf r11, r10, r8
  54. addi r0, r10, 1
  55. addi r8, r11, -1
  56. and r0, r3, r0
  57. addc r11, r9, r0
  58. addze r10, r10
  59. addc r0, r11, r3
  60. addze r10, r10
  61. subf r3, r10, r8
  62. blr
  63. EPILOGUE()
  64. DEF_OBJECT(approx_tab)
  65. .short 1023,1020,1016,1012,1008,1004,1000,996
  66. .short 992,989,985,981,978,974,970,967
  67. .short 963,960,956,953,949,946,942,939
  68. .short 936,932,929,926,923,919,916,913
  69. .short 910,907,903,900,897,894,891,888
  70. .short 885,882,879,876,873,870,868,865
  71. .short 862,859,856,853,851,848,845,842
  72. .short 840,837,834,832,829,826,824,821
  73. .short 819,816,814,811,809,806,804,801
  74. .short 799,796,794,791,789,787,784,782
  75. .short 780,777,775,773,771,768,766,764
  76. .short 762,759,757,755,753,751,748,746
  77. .short 744,742,740,738,736,734,732,730
  78. .short 728,726,724,722,720,718,716,714
  79. .short 712,710,708,706,704,702,700,699
  80. .short 697,695,693,691,689,688,686,684
  81. .short 682,680,679,677,675,673,672,670
  82. .short 668,667,665,663,661,660,658,657
  83. .short 655,653,652,650,648,647,645,644
  84. .short 642,640,639,637,636,634,633,631
  85. .short 630,628,627,625,624,622,621,619
  86. .short 618,616,615,613,612,611,609,608
  87. .short 606,605,604,602,601,599,598,597
  88. .short 595,594,593,591,590,589,587,586
  89. .short 585,583,582,581,579,578,577,576
  90. .short 574,573,572,571,569,568,567,566
  91. .short 564,563,562,561,560,558,557,556
  92. .short 555,554,553,551,550,549,548,547
  93. .short 546,544,543,542,541,540,539,538
  94. .short 537,536,534,533,532,531,530,529
  95. .short 528,527,526,525,524,523,522,521
  96. .short 520,519,518,517,516,515,514,513
  97. END_OBJECT(approx_tab)
  98. ASM_END()