CA认证

开发平台：
C/C++

lbn80386.asm：源码内容
							;;; Assembly primitives for bignum library, 80386 family, 32-bit code.
;;;
;;; Copyright (c) 1995, Colin Plumb.
;;; For licensing and other legal details, see the file legal.c.
;;;
;;; Several primitives are included here.  Only lbnMulAdd1 is *really*
;;; critical, but once that's written, lnmMulN1 and lbnMulSub1 are quite
;;; easy to write as well, so they are included here as well.
;;; lbnDiv21 and lbnModQ are so easy to write that they're included, too.
;;;
;;; All functions here are for 32-bit flat mode.  I.e. near code and
;;; near data, although the near offsets are 32 bits.
.386
;_TEXT   segment para public use32 'CODE' ; 16-byte aligned because 486 cares
;_TEXT	ends
ifdef @Version
if @Version le 510
FLAT	group	_TEXT
endif
else
FLAT	group	_TEXT
endif
	assume	cs:FLAT, ds:FLAT, ss:FLAT
_TEXT   segment para public use32 'CODE' ; 16-byte aligned because 486 cares
	public  _lbnMulN1_32
	public  _lbnMulAdd1_32
	public  _lbnMulSub1_32
	public	_lbnDiv21_32
	public	_lbnModQ_32
;; Register usage:
;; eax - low half of product
;; ebx - carry to next iteration
;; ecx - multiplier (k)
;; edx - high half of product
;; esi - source pointer
;; edi - dest pointer
;; ebp - loop counter
;;
;; Stack frame:
;; +--------+ esp+20  esp+24  esp+28  esp+32
;; |    k   |
;; +--------+ esp+16  esp+20  esp+24  esp+28
;; |   len  |
;; +--------+ esp+12  esp+16  esp+20  esp+24
;; |   in   |
;; +--------+ esp+8   esp+12  esp+16  esp+20
;; |   out  |
;; +--------+ esp+4   esp+8   esp+12  esp+16
;; | return |
;; +--------+ esp     esp+4   esp+8   esp+12
;; |   esi  |
;; +--------+         esp     esp+4   esp+8
;; |   ebp  |
;; +--------+                 esp     esp+4
;; |   edi  |
;; +--------+                         esp
	align	16
_lbnMulN1_32	proc	near
	push	esi		; U
	mov	esi,[esp+12]	; V	load in
	push	ebp		; U
	mov	ebp,[esp+20]	; V	load len
	mov	ecx,[esp+24]	; U	load k
	push	edi		; V
	mov	edi,[esp+16]	; U	load out
;; First multiply step has no carry in.
	mov	eax,[esi]	; V
	lea	ebx,[ebp*4-4]	; U	loop unrolling
	mul	ecx		; NP	first multiply
	mov	[edi],eax	; U
	and	ebx,12		; V	loop unrolling
	add	esi,ebx		; U	loop unrolling
	add	edi,ebx		; V	loop unrolling
	jmp	DWORD PTR m32_jumptable[ebx]	; NP	loop unrolling
	align	4
m32_jumptable:
	dd	m32_case0
	dd	m32_case1
	dd	m32_case2
	dd	m32_case3
	nop
	align	8
	nop
	nop
	nop	; Get loop nicely aligned
m32_case0:
	sub	ebp,4		; U
	jbe	SHORT m32_done	; V
m32_loop:
	mov	eax,[esi+4]	; U
	mov	ebx,edx		; V	Remember carry for later
	add	esi,16		; U
	add	edi,16		; V
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	adc	edx,0		; U
	mov	[edi-12],eax	; V
m32_case3:
	mov	eax,[esi-8]	; U
	mov	ebx,edx		; V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	adc	edx,0		; U
	mov	[edi-8],eax	; V
m32_case2:
	mov	eax,[esi-4]	; U
	mov	ebx,edx		; V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	adc	edx,0		; U
	mov	[edi-4],eax	; V
m32_case1:
	mov	eax,[esi]	; U
	mov	ebx,edx		; V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	adc	edx,0		; U
	mov	[edi],eax	; V
	sub	ebp,4		; U
	ja	SHORT m32_loop	; V
m32_done:
	mov	[edi+4],edx	; U
	pop	edi		; V
	pop	ebp		; U
	pop	esi		; V
	ret			; NP
_lbnMulN1_32	endp
	align	16
_lbnMulAdd1_32	proc	near
	push	esi		; U
	mov	esi,[esp+12]	; V	load in
	push	edi		; U
	mov	edi,[esp+12]	; V	load out
	push	ebp		; U
	mov	ebp,[esp+24]	; V	load len
	mov	ecx,[esp+28]	; U	load k
;; First multiply step has no carry in.
	mov	eax,[esi]	; V
	mov	ebx,[edi]	; U
	mul	ecx		; NP	first multiply
	add	ebx,eax		; U
	lea	eax,[ebp*4-4]	; V	loop unrolling
	adc	edx,0		; U
	and	eax,12		; V	loop unrolling
	mov	[edi],ebx	; U
	add	esi,eax		; V	loop unrolling
	add	edi,eax		; U	loop unrolling
	jmp	DWORD PTR ma32_jumptable[eax]	; NP	loop unrolling
	align	4
ma32_jumptable:
	dd	ma32_case0
	dd	ma32_case1
	dd	ma32_case2
	dd	ma32_case3
	nop
	align	8
	nop
	nop
	nop			; To align loop properly
ma32_case0:
	sub	ebp,4		; U
	jbe	SHORT ma32_done	; V
ma32_loop:
	mov	eax,[esi+4]	; U
	mov	ebx,edx		; V	Remember carry for later
	add	esi,16		; U
	add	edi,16		; V
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi-12]	; V
	adc	edx,0		; U
	add	ebx,eax		; V
	adc	edx,0		; U
	mov	[edi-12],ebx	; V
ma32_case3:
	mov	eax,[esi-8]	; U
	mov	ebx,edx		; V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi-8]	; V
	adc	edx,0		; U
	add	ebx,eax		; V
	adc	edx,0		; U
	mov	[edi-8],ebx	; V
ma32_case2:
	mov	eax,[esi-4]	; U
	mov	ebx,edx		; V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi-4]	; V
	adc	edx,0		; U
	add	ebx,eax		; V
	adc	edx,0		; U
	mov	[edi-4],ebx	; V
ma32_case1:
	mov	eax,[esi]	; U
	mov	ebx,edx		; V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi]	; V
	adc	edx,0		; U
	add	ebx,eax		; V
	adc	edx,0		; U
	mov	[edi],ebx	; V
	sub	ebp,4		; U
	ja	SHORT ma32_loop	; V
ma32_done:
	pop	ebp		; U
	mov	eax,edx		; V
	pop	edi		; U
	pop	esi		; V
	ret			; NP
_lbnMulAdd1_32	endp
	align	16
_lbnMulSub1_32	proc	near
	push	esi		; U
	mov	esi,[esp+12]	; V	load in
	push	edi		; U
	mov	edi,[esp+12]	; V	load out
	push	ebp		; U
	mov	ebp,[esp+24]	; V	load len
	mov	ecx,[esp+28]	; U	load k
;; First multiply step has no carry in.
	mov	eax,[esi]	; V
	mov	ebx,[edi]	; U
	mul	ecx		; NP	first multiply
	sub	ebx,eax		; U
	lea	eax,[ebp*4-4]	; V	loop unrolling
	adc	edx,0		; U
	and	eax,12		; V	loop unrolling
	mov	[edi],ebx	; U
	add	esi,eax		; V	loop unrolling
	add	edi,eax		; U	loop unrolling
	jmp	DWORD PTR ms32_jumptable[eax]	; NP	loop unrolling
	align	4
ms32_jumptable:
	dd	ms32_case0
	dd	ms32_case1
	dd	ms32_case2
	dd	ms32_case3
	nop
	align	8
	nop
	nop
	nop
ms32_case0:
	sub	ebp,4		; U
	jbe	SHORT ms32_done	; V
ms32_loop:
	mov	eax,[esi+4]	; U
	mov	ebx,edx		; V	Remember carry for later
	add	esi,16		; U
	add	edi,16		; V
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi-12]	; V
	adc	edx,0		; U
	sub	ebx,eax		; V
	adc	edx,0		; U
	mov	[edi-12],ebx	; V
ms32_case3:
	mov	eax,[esi-8]	; U
	mov	ebx,edx		; V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi-8]	; V
	adc	edx,0		; U
	sub	ebx,eax		; V
	adc	edx,0		; U
	mov	[edi-8],ebx	; V
ms32_case2:
	mov	eax,[esi-4]	; U
	mov	ebx,edx		; V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi-4]	; V
	adc	edx,0		; U
	sub	ebx,eax		; V
	adc	edx,0		; U
	mov	[edi-4],ebx	; V
ms32_case1:
	mov	eax,[esi]	; U
	mov	ebx,edx		; V	Remember carry for later
	mul	ecx		; NP
	add	eax,ebx		; U	Add carry in from previous word
	mov	ebx,[edi]	; V
	adc	edx,0		; U
	sub	ebx,eax		; V
	adc	edx,0		; U
	mov	[edi],ebx	; V
	sub	ebp,4		; U
	ja	SHORT ms32_loop	; V
ms32_done:
	pop	ebp		; U
	mov	eax,edx		; V
	pop	edi		; U
	pop	esi		; V
	ret			; NP
_lbnMulSub1_32	endp
;; Two-word by one-word divide.  Stores quotient, returns remainder.
;; BNWORD32 lbnDiv21_32(BNWORD32 *q, BNWORD32 nh, BNWORD32 nl, BNWORD32 d)
;;                      4            8            12           16
align 4
_lbnDiv21_32	proc	near
	mov	edx,[esp+8]		; U	Load nh
	mov	eax,[esp+12]		; V	Load nl
	mov	ebx,[esp+4]		; U	Load q
	div	DWORD PTR [esp+16]	; NP
	mov	[ebx],eax		; U	Store quotient
	mov	eax,edx			; V	Return remainder
	ret
_lbnDiv21_32	endp
;; Multi-word by one-word remainder.
;; This speeds up key generation.  It's not worth unrolling and so on;
;; using 32-bit divides is enough of a speedup.
;;
;; The modulus (in ebp) is often 16 bits.  Given that the dividend is 32
;; bits, the chances of saving the first divide because the high word of the
;; dividend is less than the modulus are low enough it's not worth taking
;; the cycles to test for it.
;;
;; unsigned lbnModQ_32(BNWORD32 const *n, unsigned len, unsigned d)
;;                     4                  8             12
align 4
_lbnModQ_32	proc	near
	mov	ebx,[esp+4]		; U	Load n
	mov	ecx,[esp+12]		; V	Load d
	push	ebp			; U
	mov	ebp,[esp+12]		; V	Load len
	xor	edx,edx			; U
modq32_loop:
	mov	eax,[ebx]		; U
	add	ebx,4			; V
	div	ecx			; NP
	dec	ebp			; U
	jnz	SHORT modq32_loop	; V
	pop	ebp			; U
	mov	edx,eax			; V
	ret				; NP
_lbnModQ_32	endp
_TEXT	ends
	end