reconmmx.s
上传用户:aoeyumen
上传日期:2007-01-06
资源大小:3329k
文件大小:15k
- ;-------------------------------------------------------------------------
- ADD_1: dd 01010101h, 01010101h
- MASK_AND: dd 7f7f7f7fh, 7f7f7f7fh
- PLUS_384: dd 01800180h, 01800180h
- PLUS_128: dd 00800080h, 00800080h
- %assign LocalFrameSize 0
- %assign RegisterStorageSize 16
- ; Arguments:
- %assign source LocalFrameSize + RegisterStorageSize + 4
- %assign dest LocalFrameSize + RegisterStorageSize + 8
- %assign lx2 LocalFrameSize + RegisterStorageSize + 12
- %assign h LocalFrameSize + RegisterStorageSize + 16
- ; Locals (on local stack frame)
- ; extern void C rec_mmx (
- ; unsigned char *source,
- ; unsigned char *dest,
- ; int lx2,
- ; int h
- ;
- ; The local variables are on the stack,
- ;
- global rec
- global recc
- global reca
- global recac
- global recv
- global recvc
- global recva
- global recvac
- global rech
- global rechc
- global add_block_mmx
- global set_block_mmx
- align 16
- rec:
- push esi
- push edi
- push ecx
- push ebx
- ; sub esp, LocalFrameSize
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- .rec1:
- movq mm0,[esi]
- movq mm1,[esi+8]
- movq [edi],mm0
- add esi,ebx
- movq [edi+8],mm1
- add edi,ebx
- dec ecx
- jnz .rec1
- emms
- ; add esp, LocalFrameSize
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
- align 16
- recc:
- push esi
- push edi
- push ecx
- push ebx
- ; sub esp, LocalFrameSize
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- .recc1:
- movq mm0,[esi]
- movq [edi],mm0
- add edi,ebx
- add esi,ebx
- dec ecx
- jnz .recc1
- emms
- ; add esp, LocalFrameSize
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
- align 16
- reca:
- push esi
- push edi
- push ecx
- push ebx
- ; sub esp, LocalFrameSize
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- movq mm5, [MASK_AND]
- movq mm6, [ADD_1]
- .reca1:
- movq mm0,[esi]
- movq mm1,[edi]
- movq mm2,[esi+8]
- movq mm3,[edi+8]
- psrlw mm0,1
- psrlw mm1,1
- psrlw mm2,1
- psrlw mm3,1
- pand mm0,mm5
- pand mm1,mm5
- pand mm2,mm5
- pand mm3,mm5
- paddusb mm0,mm1
- paddusb mm2,mm3
- paddusb mm0,mm6
- paddusb mm2,mm6
- movq [edi],mm0
- add esi,ebx
- movq [edi+8],mm2
- add edi,ebx
- dec ecx
- jnz .reca1
- emms
- ; add esp, LocalFrameSize
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
- align 16
- recac:
- push esi
- push edi
- push ecx
- push ebx
- ; sub esp, LocalFrameSize
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- movq mm5, [MASK_AND]
- movq mm6, [ADD_1]
- .recac1:
- movq mm0,[esi]
- movq mm1,[edi]
- psrlw mm0,1
- psrlw mm1,1
- pand mm0,mm5
- pand mm1,mm5
- paddusb mm0,mm1
- paddusb mm0,mm6
- movq [edi],mm0
- add edi,ebx
- add esi,ebx
- dec ecx
- jnz .recac1
- emms
- ; add esp, LocalFrameSize
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
- align 16
- rech:
- push esi
- push edi
- push ecx
- push ebx
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- movq mm5, [MASK_AND]
- movq mm6, [ADD_1]
- .rech1:
- movq mm0,[esi]
- movq mm1,[esi+1]
- movq mm2,[esi+8]
- movq mm3,[esi+9]
- psrlw mm0,1
- psrlw mm1,1
- psrlw mm2,1
- psrlw mm3,1
- pand mm0,mm5
- pand mm1,mm5
- pand mm2,mm5
- pand mm3,mm5
- paddusb mm0,mm1
- paddusb mm2,mm3
- paddusb mm0,mm6
- paddusb mm2,mm6
- movq [edi],mm0
- add esi,ebx
- movq [edi+8],mm2
- add edi,ebx
- dec ecx
- jnz .rech1
- emms
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
- align 16
- rechc:
- push esi
- push edi
- push ecx
- push ebx
- ; sub esp, LocalFrameSize
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- movq mm5, [MASK_AND]
- movq mm6, [ADD_1]
- .rechc1:
- movq mm0,[esi]
- movq mm1,[esi+1]
- psrlw mm0,1
- psrlw mm1,1
- pand mm0,mm5
- pand mm1,mm5
- paddusb mm0,mm1
- paddusb mm0,mm6
- movq [edi],mm0
- add edi,ebx
- add esi,ebx
- dec ecx
- jnz .rechc1
- emms
- ; add esp, LocalFrameSize
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
- %assign RegisterStorageSize 20
- %assign source LocalFrameSize + RegisterStorageSize + 4
- %assign dest LocalFrameSize + RegisterStorageSize + 8
- %assign lx LocalFrameSize + RegisterStorageSize + 12
- %assign lx2 LocalFrameSize + RegisterStorageSize + 16
- %assign h LocalFrameSize + RegisterStorageSize + 20
- align 16
- recv:
- push esi
- push edi
- push ecx
- push ebx
- push edx
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- mov edx, [esp+lx]
- movq mm5, [MASK_AND]
- movq mm6, [ADD_1]
- .recv1:
- movq mm0,[esi]
- movq mm1,[esi+edx]
- movq mm2,[esi+8]
- movq mm3,[esi+edx+8]
- psrlw mm0,1
- psrlw mm1,1
- psrlw mm2,1
- psrlw mm3,1
- pand mm0,mm5
- pand mm1,mm5
- pand mm2,mm5
- pand mm3,mm5
- paddusb mm0,mm1
- paddusb mm2,mm3
- paddusb mm0,mm6
- paddusb mm2,mm6
- movq [edi],mm0
- add esi,ebx
- movq [edi+8],mm2
- add edi,ebx
- dec ecx
- jnz .recv1
- emms
- pop edx
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
- align 16
- recvc:
- push esi
- push edi
- push ecx
- push ebx
- push edx
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- mov edx, [esp+lx]
- movq mm5, [MASK_AND]
- movq mm6, [ADD_1]
- .recvc1:
- movq mm0,[esi]
- movq mm1,[esi+edx]
- psrlw mm0,1
- psrlw mm1,1
- pand mm0,mm5
- pand mm1,mm5
- paddusb mm0,mm1
- paddusb mm0,mm6
- movq [edi],mm0
- add edi,ebx
- add esi,ebx
- dec ecx
- jnz .recvc1
- emms
- pop edx
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
- align 16
- recva:
- push esi
- push edi
- push ecx
- push ebx
- push edx
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- mov edx, [esp+lx]
- movq mm7, [MASK_AND]
- movq mm6, [ADD_1]
- .recva1:
- movq mm0,[esi]
- movq mm1,[esi+edx]
- movq mm2,[esi+8]
- movq mm3,[esi+edx+8]
- movq mm4,[edi]
- movq mm5,[edi+8]
- psrlw mm0,1
- psrlw mm1,1
- psrlw mm2,1
- psrlw mm3,1
- psrlw mm4,1
- psrlw mm5,1
- pand mm0,mm7
- pand mm1,mm7
- pand mm2,mm7
- pand mm3,mm7
- pand mm4,mm7
- pand mm5,mm7
- paddusb mm0,mm1
- paddusb mm2,mm3
- paddusb mm0,mm6
- paddusb mm2,mm6
- psrlw mm0,1
- psrlw mm2,1
- pand mm0,mm7
- pand mm2,mm7
- paddusb mm4,mm0
- paddusb mm5,mm2
- paddusb mm4,mm6
- paddusb mm5,mm6
- movq [edi],mm4
- movq [edi+8],mm5
- add edi,ebx
- add esi,ebx
- dec ecx
- jnz near .recva1
- emms
- pop edx
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
- align 16
- recvac:
- push esi
- push edi
- push ecx
- push ebx
- push edx
- mov esi, [esp+source]
- mov edi, [esp+dest]
- mov ecx, [esp+h]
- mov ebx, [esp+lx2]
- mov edx, [esp+lx]
- movq mm5, [MASK_AND]
- movq mm6, [ADD_1]
- .recvac1:
- movq mm0,[esi]
- movq mm1,[esi+edx]
- movq mm4,[edi]
- psrlw mm0,1
- psrlw mm1,1
- psrlw mm4,1
- pand mm0,mm5
- pand mm1,mm5
- pand mm4,mm5
- paddusb mm0,mm1
- paddusb mm0,mm6
- psrlw mm0,1
- pand mm0,mm5
- paddusb mm4,mm0
- paddusb mm4,mm6
- movq [edi],mm4
- add edi,ebx
- add esi,ebx
- dec ecx
- jnz .recvac1
- emms
- pop edx
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
- %assign RegisterStorageSize 20
- %assign rfp LocalFrameSize + RegisterStorageSize + 4
- %assign bp LocalFrameSize + RegisterStorageSize + 8
- %assign iincr LocalFrameSize + RegisterStorageSize + 12
- ; FIXME clipping needs to be done
- align 16
- add_block_mmx:
- push esi
- push edi
- push ecx
- push ebx
- push edx
- mov esi, [esp+bp]
- mov edi, [esp+rfp]
- mov ebx, [esp+iincr]
- ; movq mm7, [PLUS_384]
- mov ecx,8
- pxor mm2,mm2 ; clear
- %rep 8
- movq mm0, [edi] ; get dest
- movq mm1,mm0
- punpcklbw mm0,mm2
- punpckhbw mm1,mm2
- paddsw mm0, [esi]
- paddsw mm1, [esi+8]
- ; paddsw mm0, mm7
- ; paddsw mm1, mm7
- packuswb mm0,mm1
- movq [edi], mm0
- add edi,ebx
- add esi,16
- %endrep
- emms
- pop edx
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
- align 16
- set_block_mmx:
- push esi
- push edi
- push ecx
- push ebx
- push edx
- mov esi, [esp+bp]
- mov edi, [esp+rfp]
- mov ebx, [esp+iincr]
- movq mm7, [PLUS_128]
- %rep 4
- movq mm0, [esi]
- movq mm1, [esi+8]
- paddsw mm0, mm7
- movq mm2, [esi+16]
- paddsw mm1, mm7
- movq mm3, [esi+24]
- paddsw mm2, mm7
- packuswb mm0, mm1
- paddsw mm3, mm7
- movq [edi], mm0
- packuswb mm2, mm3
- add edi, ebx
- add esi, 32
- movq [edi], mm2
- add edi, ebx
- %endrep
- emms
- pop edx
- pop ebx
- pop ecx
- pop edi
- pop esi
- ret
- ;
- ;_64_minus_index: dd 64,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34
- ; dd 33,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1
- ;
- ;extern _buf_pointer
- ;extern _end_buf
- ;extern _bit_count
- ;
- ;
- ;global _getbits
- ;_getbits:
- ;
- ; mov eax, [_bit_count] ; Number of valid bits in MM0
- ; mov ecx, [ESP+4] ; Parameter. How many bits should we
- ; ; read.
- ;
- ; sub eax,ecx ; Do we have enough bits in MM0
- ; jl new64bit ; If not get new 64 bits
- ;
- ; movd MM3,[_64_minus_index+ecx*4] ;MM3 = 64 - number of needed bits.
- ; movq MM2,MM0
- ;
- ; movd mm1,ecx ; # of bits to read
- ; psrlq MM2,MM3 ; MM2 now has valid bitstream in least
- ; ; significant part
- ;
- ; mov [_bit_count],eax ; Update number of valid bits.
- ;
- ; movd eax,MM2 ; move the result into eax
- ; psllq MM0,MM1 ; throw away those bits
- ;
- ; ret
- ;
- ;new64bit:
- ; movd MM3,[_64_minus_index+ecx*4] ;MM3 = 64 - number of requested bits
- ; ;(for shifting)
- ; movq MM2,MM0 ; copy left over bits
- ;
- ; mov edx,[_buf_pointer] ;pointer to bitstream
- ; mov ecx,[_end_buf] ;read pointer to end of buffer
- ;
- ; add edx,8 ;update the pointer
- ; add eax,64 ;eax = 64- # of bits we missed in old
- ; ;group
- ;
- ; cmp edx,ecx ;do we have another qword to read
- ; mov [_buf_pointer],edx ;save new value
- ;
- ; mov ecx,[edx-8] ;read next qword (dword here)
- ; mov edx,[edx-4] ;(dword here)
- ;
- ; jge do_refill ;do_refill
- ;refill:
- ; ; now convert from bigendian to little and
- ; ; but make use of left over bits (MM2) before using these
- ; bswap edx ;swapping the first 32 bit
- ; bswap ecx ;swapping the second 32 bit
- ;
- ; movd mm4,ecx ;second 32 bit in mm4
- ; psrlq mm2,mm3 ;mm2 has remaining bits in least
- ; ;significant part with room for new
- ; ;bits to right of it
- ; movd mm1,edx ;move first 32 bit
- ; psllq mm4,32 ;shiftsecond 32 bit to upper part of
- ; ;register
- ;
- ; movd mm3,eax ;mm3 gets the shift counter
- ; por mm4,mm1 ;combine the 64 swapped data into mm4
- ;
- ; movq mm0,mm4 ;save new word in mm0 for next time
- ; psrlq mm4,mm3 ;mm4 = new bits we now need in least
- ; ;significant part
- ;
- ; mov [_bit_count],eax ;Save bit count for next time
- ; por mm2,mm4 ;combine remaining bits with the bits
- ; ;from new word
- ;
- ; movd mm1,[_64_minus_index+4*eax] ;# of bits we missed in old group
- ;
- ; movd eax,mm2 ;return bits in eax
- ; psllq mm0,mm1 ;remove the bits we just
- ; ;read from mm0
- ;
- ; RET
- ;do_refill:
- ; PUSH EAX
- ; PUSH EDX
- ; PUSH ECX
- ; CALL _refill_buffer
- ; POP ECX
- ; POP EDX
- ; POP EAX
- ; JMP refill