idct_mmx.asm
资源名称:tcpmp.rar [点击查看]
上传用户:wstnjxml
上传日期:2014-04-03
资源大小:7248k
文件大小:5k
源码类别:
Windows CE
开发平台:
C/C++
- ;*****************************************************************************
- ;*
- ;* This program is free software ; you can redistribute it and/or modify
- ;* it under the terms of the GNU General Public License as published by
- ;* the Free Software Foundation; either version 2 of the License, or
- ;* (at your option) any later version.
- ;*
- ;* This program is distributed in the hope that it will be useful,
- ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ;* GNU General Public License for more details.
- ;*
- ;* You should have received a copy of the GNU General Public License
- ;* along with this program; if not, write to the Free Software
- ;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- ;*
- ;* $Id: idct_mmx.asm 432 2005-12-28 16:39:13Z picard $
- ;*
- ;* The Core Pocket Media Player
- ;* Copyright (c) 2004-2005 Gabor Kovacs
- ;*
- ;*****************************************************************************
- ;******************
- ;* NOT FINISHED *
- ;******************
- BITS 32
- ROW_SHIFT equ 11
- COL_SHIFT equ 6
- SECTION .data
- ALIGN 16
- SECTION .text
- %macro cglobal 2
- %define %1 _%1@%2
- global %1
- %endmacro
- cglobal IDCT_Const8x8,16
- cglobal IDCT_Const4x4,16
- ;cglobal IDCT_Block8x8,16
- ;cglobal IDCT_Block8x4,16
- ; ecx:block
- %macro Row 1
- movq mm0,[ecx+%1*16]
- movq mm1,[ecx+%1*16+8]
- ; x0 x4 x3 x7 x1 x6 x2 x5
- ; x4' = W7 * x5 + W1 * x4;
- ; x5' = W7 * x4 - W1 * x5;
- ; x6' = W3 * x7 + W5 * x6;
- ; x7' = W3 * x6 - W5 * x7;
- ; x6' = x4 + x6;
- ; x4' = x4 - x6;
- ; x7' = x5 + x7;
- ; x5' = x5 - x7;
- ; x5' = (181 * (x4 + x5) + 128) >> 8;
- ; x4' = (181 * (x4 - x5) + 128) >> 8;
- ; x3' = W6 * x2 + W2 * x3;
- ; x2' = W6 * x3 - W2 * x2;
- ; x1 <<= 11;
- ; x0 <<= 11;
- ; x1' = x0 + x1;
- ; x0' = x0 - x1;
- ; x3' = x1 + x3;
- ; x1' = x1 - x3;
- ; x2' = x0 + x2;
- ; x0' = x0 - x2;
- movq [ecx+%1*16],mm0
- movq [ecx+%1*16+8],mm1
- %endmacro
- ; ecx:block
- ; edi:dest edx:dest pitch
- ; esi:src eax:src pitch
- %macro Col4x4 2
- %endmacro
- %macro Col4x8 2
- %endmacro
- %if 0
- ALIGN 16
- IDCT_Block8x8:
- push esi
- push edi
- mov ecx,[esp+12] ;block
- mov edi,[esp+12+4] ;dst
- mov edx,[esp+12+8] ;dst pitch
- mov esi,[esp+12+12] ;src
- mov eax,8 ;src pitch
- Row 0
- Row 1
- Row 2
- Row 3
- Row 4
- Row 5
- Row 6
- Row 7
- or esi,esi
- jne .Add
- Col4x8 0,0
- Col4x8 8,0
- pop edi
- pop esi
- ret 16
- .Add:
- Col4x8 0,1
- Col4x8 8,1
- pop edi
- pop esi
- ret 16
- ALIGN 16
- IDCT_Block8x4:
- push esi
- push edi
- mov ecx,[esp+12] ;src
- mov edi,[esp+12+4] ;dst
- mov edx,[esp+12+8] ;dst pitch
- mov esi,[esp+12+12] ;src
- mov eax,8 ;src pitch
- Row 0
- Row 1
- Row 2
- Row 3
- or esi,esi
- jne .Add
- Col4x4 0,0
- Col4x4 8,0
- pop edi
- pop esi
- ret 16
- .Add:
- Col4x4 0,1
- Col4x4 8,1
- pop edi
- pop esi
- ret 16
- %endif
- ALIGN 16
- IDCT_Const8x8:
- push esi
- push edi
- mov ecx,[esp+12] ;v
- mov edi,[esp+12+4] ;dst
- mov edx,[esp+12+8] ;dst pitch
- mov esi,[esp+12+12] ;src
- mov eax,8 ;src pitch
- or ecx,ecx
- js .Sub
- .Add:
- movd mm7,ecx
- punpcklbw mm7,mm7
- punpcklwd mm7,mm7
- punpckldq mm7,mm7
- %rep 4
- movq mm0,[esi]
- movq mm1,[esi+eax]
- paddusb mm0,mm7
- lea esi,[esi+eax*2]
- paddusb mm1,mm7
- movq [edi],mm0
- movq [edi+edx],mm1
- lea edi,[edi+edx*2]
- %endrep
- pop edi
- pop esi
- ret 16
- .Sub:
- neg ecx
- movd mm7,ecx
- punpcklbw mm7,mm7
- punpcklwd mm7,mm7
- punpckldq mm7,mm7
- %rep 4
- movq mm0,[esi]
- movq mm1,[esi+eax]
- psubusb mm0,mm7
- lea esi,[esi+eax*2]
- psubusb mm1,mm7
- movq [edi],mm0
- movq [edi+edx],mm1
- lea edi,[edi+edx*2]
- %endrep
- pop edi
- pop esi
- ret 16
- ALIGN 16
- IDCT_Const4x4:
- push esi
- push edi
- mov ecx,[esp+12] ;v
- mov edi,[esp+12+4] ;dst
- mov edx,[esp+12+8] ;dst pitch
- mov esi,[esp+12+12] ;src
- mov eax,8 ;src pitch
- or ecx,ecx
- js .Sub
- .Add:
- movd mm7,ecx
- punpcklbw mm7,mm7
- punpcklwd mm7,mm7
- punpckldq mm7,mm7
- %rep 2
- movd mm0,[esi]
- movd mm1,[esi+eax]
- paddusb mm0,mm7
- lea esi,[esi+eax*2]
- paddusb mm1,mm7
- movd [edi],mm0
- movd [edi+edx],mm1
- lea edi,[edi+edx*2]
- %endrep
- pop edi
- pop esi
- ret 16
- .Sub:
- neg ecx
- movd mm7,ecx
- punpcklbw mm7,mm7
- punpcklwd mm7,mm7
- punpckldq mm7,mm7
- %rep 2
- movd mm0,[esi]
- movd mm1,[esi+eax]
- psubusb mm0,mm7
- lea esi,[esi+eax*2]
- psubusb mm1,mm7
- movd [edi],mm0
- movd [edi+edx],mm1
- lea edi,[edi+edx*2]
- %endrep
- pop edi
- pop esi
- ret 16