reg_round.S
上传用户:lgb322
上传日期:2013-02-24
资源大小:30529k
文件大小:18k
源码类别:

嵌入式Linux

开发平台:

Unix_Linux

  1. .file "reg_round.S"
  2. /*---------------------------------------------------------------------------+
  3.  |  reg_round.S                                                              |
  4.  |                                                                           |
  5.  | Rounding/truncation/etc for FPU basic arithmetic functions.               |
  6.  |                                                                           |
  7.  | Copyright (C) 1993,1995,1997                                              |
  8.  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
  9.  |                       Australia.  E-mail billm@suburbia.net               |
  10.  |                                                                           |
  11.  | This code has four possible entry points.                                 |
  12.  | The following must be entered by a jmp instruction:                       |
  13.  |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
  14.  |                                                                           |
  15.  | The FPU_round entry point is intended to be used by C code.               |
  16.  | From C, call as:                                                          |
  17.  |  int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
  18.  |                                                                           |
  19.  |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
  20.  |    one was raised, or -1 on internal error.                               |
  21.  |                                                                           |
  22.  | For correct "up" and "down" rounding, the argument must have the correct  |
  23.  | sign.                                                                     |
  24.  |                                                                           |
  25.  +---------------------------------------------------------------------------*/
  26. /*---------------------------------------------------------------------------+
  27.  | Four entry points.                                                        |
  28.  |                                                                           |
  29.  | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points:     |
  30.  |  %eax:%ebx  64 bit significand                                            |
  31.  |  %edx       32 bit extension of the significand                           |
  32.  |  %edi       pointer to an FPU_REG for the result to be stored             |
  33.  |  stack      calling function must have set up a C stack frame and         |
  34.  |             pushed %esi, %edi, and %ebx                                   |
  35.  |                                                                           |
  36.  | Needed just for the fpu_reg_round_sqrt entry point:                       |
  37.  |  %cx  A control word in the same format as the FPU control word.          |
  38.  | Otherwise, PARAM4 must give such a value.                                 |
  39.  |                                                                           |
  40.  |                                                                           |
  41.  | The significand and its extension are assumed to be exact in the          |
  42.  | following sense:                                                          |
  43.  |   If the significand by itself is the exact result then the significand   |
  44.  |   extension (%edx) must contain 0, otherwise the significand extension    |
  45.  |   must be non-zero.                                                       |
  46.  |   If the significand extension is non-zero then the significand is        |
  47.  |   smaller than the magnitude of the correct exact result by an amount     |
  48.  |   greater than zero and less than one ls bit of the significand.          |
  49.  |   The significand extension is only required to have three possible       |
  50.  |   non-zero values:                                                        |
  51.  |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
  52.  |                                 bit smaller than the magnitude of the     |
  53.  |                                 true exact result.                        |
  54.  |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
  55.  |                                 smaller than the magnitude of the true    |
  56.  |                                 exact result.                             |
  57.  |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
  58.  |                                 bit smaller than the magnitude of the     |
  59.  |                                 true exact result.                        |
  60.  |                                                                           |
  61.  +---------------------------------------------------------------------------*/
  62. /*---------------------------------------------------------------------------+
  63.  |  The code in this module has become quite complex, but it should handle   |
  64.  |  all of the FPU flags which are set at this stage of the basic arithmetic |
  65.  |  computations.                                                            |
  66.  |  There are a few rare cases where the results are not set identically to  |
  67.  |  a real FPU. These require a bit more thought because at this stage the   |
  68.  |  results of the code here appear to be more consistent...                 |
  69.  |  This may be changed in a future version.                                 |
  70.  +---------------------------------------------------------------------------*/
  71. #include "fpu_emu.h"
  72. #include "exception.h"
  73. #include "control_w.h"
  74. /* Flags for FPU_bits_lost */
  75. #define LOST_DOWN $1
  76. #define LOST_UP $2
  77. /* Flags for FPU_denormal */
  78. #define DENORMAL $1
  79. #define UNMASKED_UNDERFLOW $2
  80. #ifndef NON_REENTRANT_FPU
  81. /* Make the code re-entrant by putting
  82. local storage on the stack: */
  83. #define FPU_bits_lost (%esp)
  84. #define FPU_denormal 1(%esp)
  85. #else
  86. /* Not re-entrant, so we can gain speed by putting
  87. local storage in a static area: */
  88. .data
  89. .align 4,0
  90. FPU_bits_lost:
  91. .byte 0
  92. FPU_denormal:
  93. .byte 0
  94. #endif /* NON_REENTRANT_FPU */
  95. .text
  96. .globl fpu_reg_round
  97. .globl fpu_reg_round_sqrt
  98. .globl fpu_Arith_exit
  99. /* Entry point when called from C */
  100. ENTRY(FPU_round)
  101. pushl %ebp
  102. movl %esp,%ebp
  103. pushl %esi
  104. pushl %edi
  105. pushl %ebx
  106. movl PARAM1,%edi
  107. movl SIGH(%edi),%eax
  108. movl SIGL(%edi),%ebx
  109. movl PARAM2,%edx
  110. fpu_reg_round: /* Normal entry point */
  111. movl PARAM4,%ecx
  112. #ifndef NON_REENTRANT_FPU
  113. pushl %ebx /* adjust the stack pointer */
  114. #endif /* NON_REENTRANT_FPU */ 
  115. #ifdef PARANOID
  116. /* Cannot use this here yet */
  117. /* orl %eax,%eax */
  118. /* jns L_entry_bugged */
  119. #endif /* PARANOID */
  120. cmpw EXP_UNDER,EXP(%edi)
  121. jle L_Make_denorm /* The number is a de-normal */
  122. movb $0,FPU_denormal /* 0 -> not a de-normal */
  123. Denorm_done:
  124. movb $0,FPU_bits_lost /* No bits yet lost in rounding */
  125. movl %ecx,%esi
  126. andl CW_PC,%ecx
  127. cmpl PR_64_BITS,%ecx
  128. je LRound_To_64
  129. cmpl PR_53_BITS,%ecx
  130. je LRound_To_53
  131. cmpl PR_24_BITS,%ecx
  132. je LRound_To_24
  133. #ifdef PECULIAR_486
  134. /* With the precision control bits set to 01 "(reserved)", a real 80486
  135.    behaves as if the precision control bits were set to 11 "64 bits" */
  136. cmpl PR_RESERVED_BITS,%ecx
  137. je LRound_To_64
  138. #ifdef PARANOID
  139. jmp L_bugged_denorm_486
  140. #endif /* PARANOID */ 
  141. #else
  142. #ifdef PARANOID
  143. jmp L_bugged_denorm /* There is no bug, just a bad control word */
  144. #endif /* PARANOID */ 
  145. #endif /* PECULIAR_486 */
  146. /* Round etc to 24 bit precision */
  147. LRound_To_24:
  148. movl %esi,%ecx
  149. andl CW_RC,%ecx
  150. cmpl RC_RND,%ecx
  151. je LRound_nearest_24
  152. cmpl RC_CHOP,%ecx
  153. je LCheck_truncate_24
  154. cmpl RC_UP,%ecx /* Towards +infinity */
  155. je LUp_24
  156. cmpl RC_DOWN,%ecx /* Towards -infinity */
  157. je LDown_24
  158. #ifdef PARANOID
  159. jmp L_bugged_round24
  160. #endif /* PARANOID */ 
  161. LUp_24:
  162. cmpb SIGN_POS,PARAM5
  163. jne LCheck_truncate_24 /* If negative then  up==truncate */
  164. jmp LCheck_24_round_up
  165. LDown_24:
  166. cmpb SIGN_POS,PARAM5
  167. je LCheck_truncate_24 /* If positive then  down==truncate */
  168. LCheck_24_round_up:
  169. movl %eax,%ecx
  170. andl $0x000000ff,%ecx
  171. orl %ebx,%ecx
  172. orl %edx,%ecx
  173. jnz LDo_24_round_up
  174. jmp L_Re_normalise
  175. LRound_nearest_24:
  176. /* Do rounding of the 24th bit if needed (nearest or even) */
  177. movl %eax,%ecx
  178. andl $0x000000ff,%ecx
  179. cmpl $0x00000080,%ecx
  180. jc LCheck_truncate_24 /* less than half, no increment needed */
  181. jne LGreater_Half_24 /* greater than half, increment needed */
  182. /* Possibly half, we need to check the ls bits */
  183. orl %ebx,%ebx
  184. jnz LGreater_Half_24 /* greater than half, increment needed */
  185. orl %edx,%edx
  186. jnz LGreater_Half_24 /* greater than half, increment needed */
  187. /* Exactly half, increment only if 24th bit is 1 (round to even) */
  188. testl $0x00000100,%eax
  189. jz LDo_truncate_24
  190. LGreater_Half_24: /* Rounding: increment at the 24th bit */
  191. LDo_24_round_up:
  192. andl $0xffffff00,%eax /* Truncate to 24 bits */
  193. xorl %ebx,%ebx
  194. movb LOST_UP,FPU_bits_lost
  195. addl $0x00000100,%eax
  196. jmp LCheck_Round_Overflow
  197. LCheck_truncate_24:
  198. movl %eax,%ecx
  199. andl $0x000000ff,%ecx
  200. orl %ebx,%ecx
  201. orl %edx,%ecx
  202. jz L_Re_normalise /* No truncation needed */
  203. LDo_truncate_24:
  204. andl $0xffffff00,%eax /* Truncate to 24 bits */
  205. xorl %ebx,%ebx
  206. movb LOST_DOWN,FPU_bits_lost
  207. jmp L_Re_normalise
  208. /* Round etc to 53 bit precision */
  209. LRound_To_53:
  210. movl %esi,%ecx
  211. andl CW_RC,%ecx
  212. cmpl RC_RND,%ecx
  213. je LRound_nearest_53
  214. cmpl RC_CHOP,%ecx
  215. je LCheck_truncate_53
  216. cmpl RC_UP,%ecx /* Towards +infinity */
  217. je LUp_53
  218. cmpl RC_DOWN,%ecx /* Towards -infinity */
  219. je LDown_53
  220. #ifdef PARANOID
  221. jmp L_bugged_round53
  222. #endif /* PARANOID */ 
  223. LUp_53:
  224. cmpb SIGN_POS,PARAM5
  225. jne LCheck_truncate_53 /* If negative then  up==truncate */
  226. jmp LCheck_53_round_up
  227. LDown_53:
  228. cmpb SIGN_POS,PARAM5
  229. je LCheck_truncate_53 /* If positive then  down==truncate */
  230. LCheck_53_round_up:
  231. movl %ebx,%ecx
  232. andl $0x000007ff,%ecx
  233. orl %edx,%ecx
  234. jnz LDo_53_round_up
  235. jmp L_Re_normalise
  236. LRound_nearest_53:
  237. /* Do rounding of the 53rd bit if needed (nearest or even) */
  238. movl %ebx,%ecx
  239. andl $0x000007ff,%ecx
  240. cmpl $0x00000400,%ecx
  241. jc LCheck_truncate_53 /* less than half, no increment needed */
  242. jnz LGreater_Half_53 /* greater than half, increment needed */
  243. /* Possibly half, we need to check the ls bits */
  244. orl %edx,%edx
  245. jnz LGreater_Half_53 /* greater than half, increment needed */
  246. /* Exactly half, increment only if 53rd bit is 1 (round to even) */
  247. testl $0x00000800,%ebx
  248. jz LTruncate_53
  249. LGreater_Half_53: /* Rounding: increment at the 53rd bit */
  250. LDo_53_round_up:
  251. movb LOST_UP,FPU_bits_lost
  252. andl $0xfffff800,%ebx /* Truncate to 53 bits */
  253. addl $0x00000800,%ebx
  254. adcl $0,%eax
  255. jmp LCheck_Round_Overflow
  256. LCheck_truncate_53:
  257. movl %ebx,%ecx
  258. andl $0x000007ff,%ecx
  259. orl %edx,%ecx
  260. jz L_Re_normalise
  261. LTruncate_53:
  262. movb LOST_DOWN,FPU_bits_lost
  263. andl $0xfffff800,%ebx /* Truncate to 53 bits */
  264. jmp L_Re_normalise
  265. /* Round etc to 64 bit precision */
  266. LRound_To_64:
  267. movl %esi,%ecx
  268. andl CW_RC,%ecx
  269. cmpl RC_RND,%ecx
  270. je LRound_nearest_64
  271. cmpl RC_CHOP,%ecx
  272. je LCheck_truncate_64
  273. cmpl RC_UP,%ecx /* Towards +infinity */
  274. je LUp_64
  275. cmpl RC_DOWN,%ecx /* Towards -infinity */
  276. je LDown_64
  277. #ifdef PARANOID
  278. jmp L_bugged_round64
  279. #endif /* PARANOID */ 
  280. LUp_64:
  281. cmpb SIGN_POS,PARAM5
  282. jne LCheck_truncate_64 /* If negative then  up==truncate */
  283. orl %edx,%edx
  284. jnz LDo_64_round_up
  285. jmp L_Re_normalise
  286. LDown_64:
  287. cmpb SIGN_POS,PARAM5
  288. je LCheck_truncate_64 /* If positive then  down==truncate */
  289. orl %edx,%edx
  290. jnz LDo_64_round_up
  291. jmp L_Re_normalise
  292. LRound_nearest_64:
  293. cmpl $0x80000000,%edx
  294. jc LCheck_truncate_64
  295. jne LDo_64_round_up
  296. /* Now test for round-to-even */
  297. testb $1,%bl
  298. jz LCheck_truncate_64
  299. LDo_64_round_up:
  300. movb LOST_UP,FPU_bits_lost
  301. addl $1,%ebx
  302. adcl $0,%eax
  303. LCheck_Round_Overflow:
  304. jnc L_Re_normalise
  305. /* Overflow, adjust the result (significand to 1.0) */
  306. rcrl $1,%eax
  307. rcrl $1,%ebx
  308. incw EXP(%edi)
  309. jmp L_Re_normalise
  310. LCheck_truncate_64:
  311. orl %edx,%edx
  312. jz L_Re_normalise
  313. LTruncate_64:
  314. movb LOST_DOWN,FPU_bits_lost
  315. L_Re_normalise:
  316. testb $0xff,FPU_denormal
  317. jnz Normalise_result
  318. L_Normalised:
  319. movl TAG_Valid,%edx
  320. L_deNormalised:
  321. cmpb LOST_UP,FPU_bits_lost
  322. je L_precision_lost_up
  323. cmpb LOST_DOWN,FPU_bits_lost
  324. je L_precision_lost_down
  325. L_no_precision_loss:
  326. /* store the result */
  327. L_Store_significand:
  328. movl %eax,SIGH(%edi)
  329. movl %ebx,SIGL(%edi)
  330. cmpw EXP_OVER,EXP(%edi)
  331. jge L_overflow
  332. movl %edx,%eax
  333. /* Convert the exponent to 80x87 form. */
  334. addw EXTENDED_Ebias,EXP(%edi)
  335. andw $0x7fff,EXP(%edi)
  336. fpu_reg_round_signed_special_exit:
  337. cmpb SIGN_POS,PARAM5
  338. je fpu_reg_round_special_exit
  339. orw $0x8000,EXP(%edi) /* Negative sign for the result. */
  340. fpu_reg_round_special_exit:
  341. #ifndef NON_REENTRANT_FPU
  342. popl %ebx /* adjust the stack pointer */
  343. #endif /* NON_REENTRANT_FPU */ 
  344. fpu_Arith_exit:
  345. popl %ebx
  346. popl %edi
  347. popl %esi
  348. leave
  349. ret
  350. /*
  351.  * Set the FPU status flags to represent precision loss due to
  352.  * round-up.
  353.  */
  354. L_precision_lost_up:
  355. push %edx
  356. push %eax
  357. call SYMBOL_NAME(set_precision_flag_up)
  358. popl %eax
  359. popl %edx
  360. jmp L_no_precision_loss
  361. /*
  362.  * Set the FPU status flags to represent precision loss due to
  363.  * truncation.
  364.  */
  365. L_precision_lost_down:
  366. push %edx
  367. push %eax
  368. call SYMBOL_NAME(set_precision_flag_down)
  369. popl %eax
  370. popl %edx
  371. jmp L_no_precision_loss
  372. /*
  373.  * The number is a denormal (which might get rounded up to a normal)
  374.  * Shift the number right the required number of bits, which will
  375.  * have to be undone later...
  376.  */
  377. L_Make_denorm:
  378. /* The action to be taken depends upon whether the underflow
  379.    exception is masked */
  380. testb CW_Underflow,%cl /* Underflow mask. */
  381. jz Unmasked_underflow /* Do not make a denormal. */
  382. movb DENORMAL,FPU_denormal
  383. pushl %ecx /* Save */
  384. movw EXP_UNDER+1,%cx
  385. subw EXP(%edi),%cx
  386. cmpw $64,%cx /* shrd only works for 0..31 bits */
  387. jnc Denorm_shift_more_than_63
  388. cmpw $32,%cx /* shrd only works for 0..31 bits */
  389. jnc Denorm_shift_more_than_32
  390. /*
  391.  * We got here without jumps by assuming that the most common requirement
  392.  *   is for a small de-normalising shift.
  393.  * Shift by [1..31] bits
  394.  */
  395. addw %cx,EXP(%edi)
  396. orl %edx,%edx /* extension */
  397. setne %ch /* Save whether %edx is non-zero */
  398. xorl %edx,%edx
  399. shrd %cl,%ebx,%edx
  400. shrd %cl,%eax,%ebx
  401. shr %cl,%eax
  402. orb %ch,%dl
  403. popl %ecx
  404. jmp Denorm_done
  405. /* Shift by [32..63] bits */
  406. Denorm_shift_more_than_32:
  407. addw %cx,EXP(%edi)
  408. subb $32,%cl
  409. orl %edx,%edx
  410. setne %ch
  411. orb %ch,%bl
  412. xorl %edx,%edx
  413. shrd %cl,%ebx,%edx
  414. shrd %cl,%eax,%ebx
  415. shr %cl,%eax
  416. orl %edx,%edx /* test these 32 bits */
  417. setne %cl
  418. orb %ch,%bl
  419. orb %cl,%bl
  420. movl %ebx,%edx
  421. movl %eax,%ebx
  422. xorl %eax,%eax
  423. popl %ecx
  424. jmp Denorm_done
  425. /* Shift by [64..) bits */
  426. Denorm_shift_more_than_63:
  427. cmpw $64,%cx
  428. jne Denorm_shift_more_than_64
  429. /* Exactly 64 bit shift */
  430. addw %cx,EXP(%edi)
  431. xorl %ecx,%ecx
  432. orl %edx,%edx
  433. setne %cl
  434. orl %ebx,%ebx
  435. setne %ch
  436. orb %ch,%cl
  437. orb %cl,%al
  438. movl %eax,%edx
  439. xorl %eax,%eax
  440. xorl %ebx,%ebx
  441. popl %ecx
  442. jmp Denorm_done
  443. Denorm_shift_more_than_64:
  444. movw EXP_UNDER+1,EXP(%edi)
  445. /* This is easy, %eax must be non-zero, so.. */
  446. movl $1,%edx
  447. xorl %eax,%eax
  448. xorl %ebx,%ebx
  449. popl %ecx
  450. jmp Denorm_done
  451. Unmasked_underflow:
  452. movb UNMASKED_UNDERFLOW,FPU_denormal
  453. jmp Denorm_done
  454. /* Undo the de-normalisation. */
  455. Normalise_result:
  456. cmpb UNMASKED_UNDERFLOW,FPU_denormal
  457. je Signal_underflow
  458. /* The number must be a denormal if we got here. */
  459. #ifdef PARANOID
  460. /* But check it... just in case. */
  461. cmpw EXP_UNDER+1,EXP(%edi)
  462. jne L_norm_bugged
  463. #endif /* PARANOID */
  464. #ifdef PECULIAR_486
  465. /*
  466.  * This implements a special feature of 80486 behaviour.
  467.  * Underflow will be signalled even if the number is
  468.  * not a denormal after rounding.
  469.  * This difference occurs only for masked underflow, and not
  470.  * in the unmasked case.
  471.  * Actual 80486 behaviour differs from this in some circumstances.
  472.  */
  473. orl %eax,%eax /* ms bits */
  474. js LPseudoDenormal /* Will be masked underflow */
  475. #else
  476. orl %eax,%eax /* ms bits */
  477. js L_Normalised /* No longer a denormal */
  478. #endif /* PECULIAR_486 */ 
  479. jnz LDenormal_adj_exponent
  480. orl %ebx,%ebx
  481. jz L_underflow_to_zero /* The contents are zero */
  482. LDenormal_adj_exponent:
  483. decw EXP(%edi)
  484. LPseudoDenormal:
  485. testb $0xff,FPU_bits_lost /* bits lost == underflow */
  486. movl TAG_Special,%edx
  487. jz L_deNormalised
  488. /* There must be a masked underflow */
  489. push %eax
  490. pushl EX_Underflow
  491. call EXCEPTION
  492. popl %eax
  493. popl %eax
  494. movl TAG_Special,%edx
  495. jmp L_deNormalised
  496. /*
  497.  * The operations resulted in a number too small to represent.
  498.  * Masked response.
  499.  */
  500. L_underflow_to_zero:
  501. push %eax
  502. call SYMBOL_NAME(set_precision_flag_down)
  503. popl %eax
  504. push %eax
  505. pushl EX_Underflow
  506. call EXCEPTION
  507. popl %eax
  508. popl %eax
  509. /* Reduce the exponent to EXP_UNDER */
  510. movw EXP_UNDER,EXP(%edi)
  511. movl TAG_Zero,%edx
  512. jmp L_Store_significand
  513. /* The operations resulted in a number too large to represent. */
  514. L_overflow:
  515. addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
  516. push %edi
  517. call SYMBOL_NAME(arith_overflow)
  518. pop %edi
  519. jmp fpu_reg_round_signed_special_exit
  520. Signal_underflow:
  521. /* The number may have been changed to a non-denormal */
  522. /* by the rounding operations. */
  523. cmpw EXP_UNDER,EXP(%edi)
  524. jle Do_unmasked_underflow
  525. jmp L_Normalised
  526. Do_unmasked_underflow:
  527. /* Increase the exponent by the magic number */
  528. addw $(3*(1<<13)),EXP(%edi)
  529. push %eax
  530. pushl EX_Underflow
  531. call EXCEPTION
  532. popl %eax
  533. popl %eax
  534. jmp L_Normalised
  535. #ifdef PARANOID
  536. #ifdef PECULIAR_486
  537. L_bugged_denorm_486:
  538. pushl EX_INTERNAL|0x236
  539. call EXCEPTION
  540. popl %ebx
  541. jmp L_exception_exit
  542. #else
  543. L_bugged_denorm:
  544. pushl EX_INTERNAL|0x230
  545. call EXCEPTION
  546. popl %ebx
  547. jmp L_exception_exit
  548. #endif /* PECULIAR_486 */ 
  549. L_bugged_round24:
  550. pushl EX_INTERNAL|0x231
  551. call EXCEPTION
  552. popl %ebx
  553. jmp L_exception_exit
  554. L_bugged_round53:
  555. pushl EX_INTERNAL|0x232
  556. call EXCEPTION
  557. popl %ebx
  558. jmp L_exception_exit
  559. L_bugged_round64:
  560. pushl EX_INTERNAL|0x233
  561. call EXCEPTION
  562. popl %ebx
  563. jmp L_exception_exit
  564. L_norm_bugged:
  565. pushl EX_INTERNAL|0x234
  566. call EXCEPTION
  567. popl %ebx
  568. jmp L_exception_exit
  569. L_entry_bugged:
  570. pushl EX_INTERNAL|0x235
  571. call EXCEPTION
  572. popl %ebx
  573. L_exception_exit:
  574. mov $-1,%eax
  575. jmp fpu_reg_round_special_exit
  576. #endif /* PARANOID */