convert_a.asm
上传用户:xjjlds
上传日期:2015-12-05
资源大小:22823k
文件大小:7k
- ; Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al.
- ; http://www.avisynth.org
- ;
- ; This program is free software; you can redistribute it and/or modify
- ; it under the terms of the GNU General Public License as published by
- ; the Free Software Foundation; either version 2 of the License, or
- ; (at your option) any later version.
- ;
- ; This program is distributed in the hope that it will be useful,
- ; but WITHOUT ANY WARRANTY; without even the implied warranty of
- ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ; GNU General Public License for more details.
- ;
- ; You should have received a copy of the GNU General Public License
- ; along with this program; if not, write to the Free Software
- ; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
- ; http://www.gnu.org/copyleft/gpl.html .
- ;
- ; Linking Avisynth statically or dynamically with other modules is making a
- ; combined work based on Avisynth. Thus, the terms and conditions of the GNU
- ; General Public License cover the whole combination.
- ;
- ; As a special exception, the copyright holders of Avisynth give you
- ; permission to link Avisynth with independent modules that communicate with
- ; Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
- ; terms of these independent modules, and to copy and distribute the
- ; resulting combined work under terms of your choice, provided that
- ; every copy of the combined work is accompanied by a complete copy of
- ; the source code of Avisynth (the version of Avisynth used to produce the
- ; combined work), being distributed under the terms of the GNU General
- ; Public License plus this exception. An independent module is a module
- ; which is not derived from or based on Avisynth, such as 3rd-party filters,
- ; import and export plugins, or graphical user interfaces.
- .586
- .mmx
- .model flat
- ; alignment has to be 'page' so that I can use 'align 32' below
- _TEXT64 segment page public use32 'CODE'
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- align 8
- yuv2rgb_constants:
- x0000_0000_0010_0010 dq 00000000000100010h
- x0080_0080_0080_0080 dq 00080008000800080h
- x00FF_00FF_00FF_00FF dq 000FF00FF00FF00FFh
- x00002000_00002000 dq 00000200000002000h
- xFF000000_FF000000 dq 0FF000000FF000000h
- cy dq 000004A8500004A85h
- crv dq 03313000033130000h
- cgu_cgv dq 0E5FCF377E5FCF377h
- cbu dq 00000408D0000408Dh
- yuv2rgb_constants_rec709:
- dq 00000000000100010h
- dq 00080008000800080h
- dq 000FF00FF00FF00FFh
- dq 00000200000002000h
- dq 0FF000000FF000000h
- dq 000004A8500004A85h
- dq 03960000039600000h
- dq 0EEF5F930EEF5F930h
- dq 00000439B0000439Bh
- ofs_x0000_0000_0010_0010 = 0
- ofs_x0080_0080_0080_0080 = 8
- ofs_x00FF_00FF_00FF_00FF = 16
- ofs_x00002000_00002000 = 24
- ofs_xFF000000_FF000000 = 32
- ofs_cy = 40
- ofs_crv = 48
- ofs_cgu_cgv = 56
- ofs_cbu = 64
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- GET_Y MACRO mma,uyvy
- IF &uyvy
- psrlw mma,8
- ELSE
- pand mma,[edx+ofs_x00FF_00FF_00FF_00FF]
- ENDIF
- ENDM
- GET_UV MACRO mma,uyvy
- GET_Y mma,1-uyvy
- ENDM
- YUV2RGB_INNER_LOOP MACRO uyvy,rgb32,no_next_pixel
- ;; This YUV422->RGB conversion code uses only four MMX registers per
- ;; source dword, so I convert two dwords in parallel. Lines corresponding
- ;; to the "second pipe" are indented an extra space. There's almost no
- ;; overlap, except at the end and in the three lines marked ***.
- ;; revised 4july,2002 to properly set alpha in rgb32 to default "on" & other small memory optimizations
- movd mm0, dword ptr [esi]
- movd mm5, dword ptr [esi+4]
- movq mm1,mm0
- GET_Y mm0,&uyvy ; mm0 = __________Y1__Y0
- movq mm4,mm5
- GET_UV mm1,&uyvy ; mm1 = __________V0__U0
- GET_Y mm4,&uyvy
- movq mm2,mm5 ; *** avoid reload from [esi+4]
- GET_UV mm5,&uyvy
- psubw mm0, qword ptr [edx+ofs_x0000_0000_0010_0010]
- movd mm6, dword ptr [esi+8-4*(no_next_pixel)]
- GET_UV mm2,&uyvy ; mm2 = __________V2__U2
- psubw mm4, qword ptr [edx+ofs_x0000_0000_0010_0010]
- paddw mm2,mm1
- GET_UV mm6,&uyvy
- psubw mm1, qword ptr [edx+ofs_x0080_0080_0080_0080]
- paddw mm6,mm5
- psllq mm2,32
- psubw mm5, qword ptr [edx+ofs_x0080_0080_0080_0080]
- punpcklwd mm0,mm2 ; mm0 = ______Y1______Y0
- psllq mm6,32
- pmaddwd mm0, qword ptr [edx+ofs_cy]
- punpcklwd mm4,mm6
- paddw mm1,mm1
- pmaddwd mm4, qword ptr [edx+ofs_cy]
- paddw mm5,mm5
- paddw mm1,mm2 ; mm1 = __V1__U1__V0__U0 * 2
- paddd mm0,[edx+ofs_x00002000_00002000]
- paddw mm5,mm6
- movq mm2,mm1
- paddd mm4,[edx+ofs_x00002000_00002000]
- movq mm3,mm1
- movq mm6,mm5
- pmaddwd mm1,[edx+ofs_crv]
- movq mm7,mm5
- paddd mm1,mm0
- pmaddwd mm5,[edx+ofs_crv]
- psrad mm1,14 ; mm1 = RRRRRRRRrrrrrrrr
- paddd mm5,mm4
- pmaddwd mm2,[edx+ofs_cgu_cgv]
- psrad mm5,14
- paddd mm2,mm0
- pmaddwd mm6,[edx+ofs_cgu_cgv]
- psrad mm2,14 ; mm2 = GGGGGGGGgggggggg
- paddd mm6,mm4
- pmaddwd mm3,[edx+ofs_cbu]
- psrad mm6,14
- paddd mm3,mm0
- pmaddwd mm7,[edx+ofs_cbu]
- add esi,8
- add edi,12+4*rgb32
- IFE &no_next_pixel
- cmp esi,ecx
- ENDIF
- psrad mm3,14 ; mm3 = BBBBBBBBbbbbbbbb
- paddd mm7,mm4
- pxor mm0,mm0
- psrad mm7,14
- packssdw mm3,mm2 ; mm3 = GGGGggggBBBBbbbb
- packssdw mm7,mm6
- packssdw mm1,mm0 ; mm1 = ________RRRRrrrr
- packssdw mm5,mm0 ; *** avoid pxor mm4,mm4
- movq mm2,mm3
- movq mm6,mm7
- punpcklwd mm2,mm1 ; mm2 = RRRRBBBBrrrrbbbb
- punpcklwd mm6,mm5
- punpckhwd mm3,mm1 ; mm3 = ____GGGG____gggg
- punpckhwd mm7,mm5
- movq mm0,mm2
- movq mm4,mm6
- punpcklwd mm0,mm3 ; mm0 = ____rrrrggggbbbb
- punpcklwd mm4,mm7
- IFE &rgb32
- psllq mm0,16
- psllq mm4,16
- ENDIF
- punpckhwd mm2,mm3 ; mm2 = ____RRRRGGGGBBBB
- punpckhwd mm6,mm7
- packuswb mm0,mm2 ; mm0 = __RRGGBB__rrggbb <- ta dah!
- packuswb mm4,mm6
- IF &rgb32
- por mm0, [edx+ofs_xFF000000_FF000000] ; set alpha channels "on"
- por mm4, [edx+ofs_xFF000000_FF000000]
- movq [edi-16],mm0 ; store the quadwords independently
- movq [edi-8],mm4
- ELSE
- psrlq mm0,8 ; pack the two quadwords into 12 bytes
- psllq mm4,8 ; (note: the two shifts above leave
- movd dword ptr [edi-12],mm0 ; mm0,4 = __RRGGBBrrggbb__)
- psrlq mm0,32
- por mm4,mm0
- movd dword ptr [edi-8],mm4
- psrlq mm4,32
- movd dword ptr [edi-4],mm4
- ENDIF
- ENDM
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- YUV2RGB_PROC MACRO procname,uyvy,rgb32
- PUBLIC C _&procname
- ;;void __cdecl procname(
- ;; [esp+ 4] const BYTE* src,
- ;; [esp+ 8] BYTE* dst,
- ;; [esp+12] const BYTE* src_end,
- ;; [esp+16] int src_pitch,
- ;; [esp+20] int row_size,
- ;; [esp+24] bool rec709);
- _&procname PROC
- push esi
- push edi
- push ebx
- mov eax,[esp+16+12]
- mov esi,[esp+12+12] ; read source bottom-up
- mov edi,[esp+8+12]
- mov ebx,[esp+20+12]
- mov edx,offset yuv2rgb_constants
- test byte ptr [esp+24+12],1
- jz loop0
- mov edx,offset yuv2rgb_constants_rec709
- loop0:
- sub esi,eax
- lea ecx,[esi+ebx-8]
- align 32
- loop1:
- YUV2RGB_INNER_LOOP uyvy,rgb32,0
- jb loop1
- YUV2RGB_INNER_LOOP uyvy,rgb32,1
- sub esi,ebx
- cmp esi,[esp+4+12]
- ja loop0
- emms
- pop ebx
- pop edi
- pop esi
- retn
- _&procname ENDP
- ENDM
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- YUV2RGB_PROC mmx_YUY2toRGB24,0,0
- YUV2RGB_PROC mmx_YUY2toRGB32,0,1
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- END