stan.S
上传用户:jlfgdled
上传日期:2013-04-10
资源大小:33168k
文件大小:13k
源码类别:

Linux/Unix编程

开发平台:

Unix_Linux

  1. |
  2. | stan.sa 3.3 7/29/91
  3. |
  4. | The entry point stan computes the tangent of
  5. | an input argument;
  6. | stand does the same except for denormalized input.
  7. |
  8. | Input: Double-extended number X in location pointed to
  9. | by address register a0.
  10. |
  11. | Output: The value tan(X) returned in floating-point register Fp0.
  12. |
  13. | Accuracy and Monotonicity: The returned result is within 3 ulp in
  14. | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
  15. | result is subsequently rounded to double precision. The
  16. | result is provably monotonic in double precision.
  17. |
  18. | Speed: The program sTAN takes approximately 170 cycles for
  19. | input argument X such that |X| < 15Pi, which is the usual
  20. | situation.
  21. |
  22. | Algorithm:
  23. |
  24. | 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
  25. |
  26. | 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
  27. | k = N mod 2, so in particular, k = 0 or 1.
  28. |
  29. | 3. If k is odd, go to 5.
  30. |
  31. | 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
  32. | rational function U/V where
  33. | U = r + r*s*(P1 + s*(P2 + s*P3)), and
  34. | V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.
  35. | Exit.
  36. |
  37. | 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
  38. | rational function U/V where
  39. | U = r + r*s*(P1 + s*(P2 + s*P3)), and
  40. | V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
  41. | -Cot(r) = -V/U. Exit.
  42. |
  43. | 6. If |X| > 1, go to 8.
  44. |
  45. | 7. (|X|<2**(-40)) Tan(X) = X. Exit.
  46. |
  47. | 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
  48. |
  49. | Copyright (C) Motorola, Inc. 1990
  50. | All Rights Reserved
  51. |
  52. | THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
  53. | The copyright notice above does not evidence any  
  54. | actual or intended publication of such source code.
  55. |STAN idnt 2,1 | Motorola 040 Floating Point Software Package
  56. |section 8
  57. .include "fpsp.h"
  58. BOUNDS1: .long 0x3FD78000,0x4004BC7E
  59. TWOBYPI: .long 0x3FE45F30,0x6DC9C883
  60. TANQ4: .long 0x3EA0B759,0xF50F8688
  61. TANP3: .long 0xBEF2BAA5,0xA8924F04
  62. TANQ3: .long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
  63. TANP2: .long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
  64. TANQ2: .long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
  65. TANP1: .long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
  66. TANQ1: .long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
  67. INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
  68. TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
  69. TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
  70. |--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
  71. |--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
  72. |--MOST 69 BITS LONG.
  73. .global PITBL
  74. PITBL:
  75.   .long  0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
  76.   .long  0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
  77.   .long  0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
  78.   .long  0xC0040000,0xB6365E22,0xEE46F000,0x21480000
  79.   .long  0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
  80.   .long  0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
  81.   .long  0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
  82.   .long  0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
  83.   .long  0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
  84.   .long  0xC0040000,0x90836524,0x88034B96,0x20B00000
  85.   .long  0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
  86.   .long  0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
  87.   .long  0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
  88.   .long  0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
  89.   .long  0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
  90.   .long  0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
  91.   .long  0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
  92.   .long  0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
  93.   .long  0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
  94.   .long  0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
  95.   .long  0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
  96.   .long  0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
  97.   .long  0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
  98.   .long  0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
  99.   .long  0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
  100.   .long  0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
  101.   .long  0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
  102.   .long  0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
  103.   .long  0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
  104.   .long  0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
  105.   .long  0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
  106.   .long  0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
  107.   .long  0x00000000,0x00000000,0x00000000,0x00000000
  108.   .long  0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
  109.   .long  0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
  110.   .long  0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
  111.   .long  0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
  112.   .long  0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
  113.   .long  0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
  114.   .long  0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
  115.   .long  0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
  116.   .long  0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
  117.   .long  0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
  118.   .long  0x40030000,0x8A3AE64F,0x76F80584,0x21080000
  119.   .long  0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
  120.   .long  0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
  121.   .long  0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
  122.   .long  0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
  123.   .long  0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
  124.   .long  0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
  125.   .long  0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
  126.   .long  0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
  127.   .long  0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
  128.   .long  0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
  129.   .long  0x40040000,0x8A3AE64F,0x76F80584,0x21880000
  130.   .long  0x40040000,0x90836524,0x88034B96,0xA0B00000
  131.   .long  0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
  132.   .long  0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
  133.   .long  0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
  134.   .long  0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
  135.   .long  0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
  136.   .long  0x40040000,0xB6365E22,0xEE46F000,0xA1480000
  137.   .long  0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
  138.   .long  0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
  139.   .long  0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
  140. .set INARG,FP_SCR4
  141. .set TWOTO63,L_SCR1
  142. .set ENDFLAG,L_SCR2
  143. .set N,L_SCR3
  144. | xref t_frcinx
  145. |xref t_extdnrm
  146. .global stand
  147. stand:
  148. |--TAN(X) = X FOR DENORMALIZED X
  149. bra t_extdnrm
  150. .global stan
  151. stan:
  152. fmovex (%a0),%fp0 | ...LOAD INPUT
  153. movel (%a0),%d0
  154. movew 4(%a0),%d0
  155. andil #0x7FFFFFFF,%d0
  156. cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)?
  157. bges TANOK1
  158. bra TANSM
  159. TANOK1:
  160. cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI?
  161. blts TANMAIN
  162. bra REDUCEX
  163. TANMAIN:
  164. |--THIS IS THE USUAL CASE, |X| <= 15 PI.
  165. |--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
  166. fmovex %fp0,%fp1
  167. fmuld TWOBYPI,%fp1 | ...X*2/PI
  168. |--HIDE THE NEXT TWO INSTRUCTIONS
  169. leal PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
  170. |--FP1 IS NOW READY
  171. fmovel %fp1,%d0 | ...CONVERT TO INTEGER
  172. asll #4,%d0
  173. addal %d0,%a1 | ...ADDRESS N*PIBY2 IN Y1, Y2
  174. fsubx (%a1)+,%fp0 | ...X-Y1
  175. |--HIDE THE NEXT ONE
  176. fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2
  177. rorl #5,%d0
  178. andil #0x80000000,%d0 | ...D0 WAS ODD IFF D0 < 0
  179. TANCONT:
  180. cmpil #0,%d0
  181. blt NODD
  182. fmovex %fp0,%fp1
  183. fmulx %fp1,%fp1   | ...S = R*R
  184. fmoved TANQ4,%fp3
  185. fmoved TANP3,%fp2
  186. fmulx %fp1,%fp3   | ...SQ4
  187. fmulx %fp1,%fp2   | ...SP3
  188. faddd TANQ3,%fp3 | ...Q3+SQ4
  189. faddx TANP2,%fp2 | ...P2+SP3
  190. fmulx %fp1,%fp3   | ...S(Q3+SQ4)
  191. fmulx %fp1,%fp2   | ...S(P2+SP3)
  192. faddx TANQ2,%fp3 | ...Q2+S(Q3+SQ4)
  193. faddx TANP1,%fp2 | ...P1+S(P2+SP3)
  194. fmulx %fp1,%fp3   | ...S(Q2+S(Q3+SQ4))
  195. fmulx %fp1,%fp2   | ...S(P1+S(P2+SP3))
  196. faddx TANQ1,%fp3 | ...Q1+S(Q2+S(Q3+SQ4))
  197. fmulx %fp0,%fp2   | ...RS(P1+S(P2+SP3))
  198. fmulx %fp3,%fp1   | ...S(Q1+S(Q2+S(Q3+SQ4)))
  199. faddx %fp2,%fp0   | ...R+RS(P1+S(P2+SP3))
  200. fadds #0x3F800000,%fp1 | ...1+S(Q1+...)
  201. fmovel %d1,%fpcr |restore users exceptions
  202. fdivx %fp1,%fp0 |last inst - possible exception set
  203. bra t_frcinx
  204. NODD:
  205. fmovex %fp0,%fp1
  206. fmulx %fp0,%fp0   | ...S = R*R
  207. fmoved TANQ4,%fp3
  208. fmoved TANP3,%fp2
  209. fmulx %fp0,%fp3   | ...SQ4
  210. fmulx %fp0,%fp2   | ...SP3
  211. faddd TANQ3,%fp3 | ...Q3+SQ4
  212. faddx TANP2,%fp2 | ...P2+SP3
  213. fmulx %fp0,%fp3   | ...S(Q3+SQ4)
  214. fmulx %fp0,%fp2   | ...S(P2+SP3)
  215. faddx TANQ2,%fp3 | ...Q2+S(Q3+SQ4)
  216. faddx TANP1,%fp2 | ...P1+S(P2+SP3)
  217. fmulx %fp0,%fp3   | ...S(Q2+S(Q3+SQ4))
  218. fmulx %fp0,%fp2   | ...S(P1+S(P2+SP3))
  219. faddx TANQ1,%fp3 | ...Q1+S(Q2+S(Q3+SQ4))
  220. fmulx %fp1,%fp2   | ...RS(P1+S(P2+SP3))
  221. fmulx %fp3,%fp0   | ...S(Q1+S(Q2+S(Q3+SQ4)))
  222. faddx %fp2,%fp1   | ...R+RS(P1+S(P2+SP3))
  223. fadds #0x3F800000,%fp0 | ...1+S(Q1+...)
  224. fmovex %fp1,-(%sp)
  225. eoril #0x80000000,(%sp)
  226. fmovel %d1,%fpcr   |restore users exceptions
  227. fdivx (%sp)+,%fp0 |last inst - possible exception set
  228. bra t_frcinx
  229. TANBORS:
  230. |--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
  231. |--IF |X| < 2**(-40), RETURN X OR 1.
  232. cmpil #0x3FFF8000,%d0
  233. bgts REDUCEX
  234. TANSM:
  235. fmovex %fp0,-(%sp)
  236. fmovel %d1,%fpcr  |restore users exceptions
  237. fmovex (%sp)+,%fp0 |last inst - possible exception set
  238. bra t_frcinx
  239. REDUCEX:
  240. |--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
  241. |--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
  242. |--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
  243. fmovemx %fp2-%fp5,-(%a7) | ...save FP2 through FP5
  244. movel %d2,-(%a7)
  245.         fmoves         #0x00000000,%fp1
  246. |--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
  247. |--there is a danger of unwanted overflow in first LOOP iteration.  In this
  248. |--case, reduce argument by one remainder step to make subsequent reduction
  249. |--safe.
  250. cmpil #0x7ffeffff,%d0 |is argument dangerously large?
  251. bnes LOOP
  252. movel #0x7ffe0000,FP_SCR2(%a6) |yes
  253. | ;create 2**16383*PI/2
  254. movel #0xc90fdaa2,FP_SCR2+4(%a6)
  255. clrl FP_SCR2+8(%a6)
  256. ftstx %fp0 |test sign of argument
  257. movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383*
  258. | ;PI/2 at FP_SCR3
  259. movel #0x85a308d3,FP_SCR3+4(%a6)
  260. clrl   FP_SCR3+8(%a6)
  261. fblt red_neg
  262. orw #0x8000,FP_SCR2(%a6) |positive arg
  263. orw #0x8000,FP_SCR3(%a6)
  264. red_neg:
  265. faddx  FP_SCR2(%a6),%fp0 |high part of reduction is exact
  266. fmovex  %fp0,%fp1 |save high result in fp1
  267. faddx  FP_SCR3(%a6),%fp0 |low part of reduction
  268. fsubx  %fp0,%fp1 |determine low component of result
  269. faddx  FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument.
  270. |--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
  271. |--integer quotient will be stored in N
  272. |--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
  273. LOOP:
  274. fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2
  275. movew INARG(%a6),%d0
  276.         movel          %d0,%a1 | ...save a copy of D0
  277. andil #0x00007FFF,%d0
  278. subil #0x00003FFF,%d0 | ...D0 IS K
  279. cmpil #28,%d0
  280. bles LASTLOOP
  281. CONTLOOP:
  282. subil #27,%d0  | ...D0 IS L := K-27
  283. movel #0,ENDFLAG(%a6)
  284. bras WORK
  285. LASTLOOP:
  286. clrl %d0 | ...D0 IS L := 0
  287. movel #1,ENDFLAG(%a6)
  288. WORK:
  289. |--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
  290. |--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
  291. |--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
  292. |--2**L * (PIby2_1), 2**L * (PIby2_2)
  293. movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI
  294. subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI)
  295. movel #0xA2F9836E,FP_SCR1+4(%a6)
  296. movel #0x4E44152A,FP_SCR1+8(%a6)
  297. movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI)
  298. fmovex %fp0,%fp2
  299. fmulx FP_SCR1(%a6),%fp2
  300. |--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
  301. |--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
  302. |--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
  303. |--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
  304. |--US THE DESIRED VALUE IN FLOATING POINT.
  305. |--HIDE SIX CYCLES OF INSTRUCTION
  306.         movel %a1,%d2
  307.         swap %d2
  308. andil #0x80000000,%d2
  309. oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL
  310. movel %d2,TWOTO63(%a6)
  311. movel %d0,%d2
  312. addil #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2)
  313. |--FP2 IS READY
  314. fadds TWOTO63(%a6),%fp2 | ...THE FRACTIONAL PART OF FP1 IS ROUNDED
  315. |--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
  316.         movew %d2,FP_SCR2(%a6)
  317. clrw           FP_SCR2+2(%a6)
  318. movel #0xC90FDAA2,FP_SCR2+4(%a6)
  319. clrl FP_SCR2+8(%a6) | ...FP_SCR2 is  2**(L) * Piby2_1
  320. |--FP2 IS READY
  321. fsubs TWOTO63(%a6),%fp2 | ...FP2 is N
  322. addil #0x00003FDD,%d0
  323.         movew %d0,FP_SCR3(%a6)
  324. clrw           FP_SCR3+2(%a6)
  325. movel #0x85A308D3,FP_SCR3+4(%a6)
  326. clrl FP_SCR3+8(%a6) | ...FP_SCR3 is 2**(L) * Piby2_2
  327. movel ENDFLAG(%a6),%d0
  328. |--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
  329. |--P2 = 2**(L) * Piby2_2
  330. fmovex %fp2,%fp4
  331. fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1
  332. fmovex %fp2,%fp5
  333. fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2
  334. fmovex %fp4,%fp3
  335. |--we want P+p = W+w  but  |p| <= half ulp of P
  336. |--Then, we need to compute  A := R-P   and  a := r-p
  337. faddx %fp5,%fp3 | ...FP3 is P
  338. fsubx %fp3,%fp4 | ...W-P
  339. fsubx %fp3,%fp0 | ...FP0 is A := R - P
  340.         faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w
  341. fmovex %fp0,%fp3 | ...FP3 A
  342. fsubx %fp4,%fp1 | ...FP1 is a := r - p
  343. |--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
  344. |--|r| <= half ulp of R.
  345. faddx %fp1,%fp0 | ...FP0 is R := A+a
  346. |--No need to calculate r if this is the last loop
  347. cmpil #0,%d0
  348. bgt RESTORE
  349. |--Need to calculate r
  350. fsubx %fp0,%fp3 | ...A-R
  351. faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a
  352. bra LOOP
  353. RESTORE:
  354.         fmovel %fp2,N(%a6)
  355. movel (%a7)+,%d2
  356. fmovemx (%a7)+,%fp2-%fp5
  357. movel N(%a6),%d0
  358.         rorl #1,%d0
  359. bra TANCONT
  360. |end