- Visual C++源码
- Visual Basic源码
- C++ Builder源码
- Java源码
- Delphi源码
- C/C++源码
- PHP源码
- Perl源码
- Python源码
- Asm源码
- Pascal源码
- Borland C++源码
- Others源码
- SQL源码
- VBScript源码
- JavaScript源码
- ASP/ASPX源码
- C#源码
- Flash/ActionScript源码
- matlab源码
- PowerBuilder源码
- LabView源码
- Flex源码
- MathCAD源码
- VBA源码
- IDL源码
- Lisp/Scheme源码
- VHDL源码
- Objective-C源码
- Fortran源码
- tcl/tk源码
- QT源码
idct_arm.asm
资源名称:tcpmp.rar [点击查看]
上传用户:wstnjxml
上传日期:2014-04-03
资源大小:7248k
文件大小:19k
源码类别:
Windows CE
开发平台:
C/C++
- ;*****************************************************************************
- ;*
- ;* This program is free software ; you can redistribute it and/or modify
- ;* it under the terms of the GNU General Public License as published by
- ;* the Free Software Foundation; either version 2 of the License, or
- ;* (at your option) any later version.
- ;*
- ;* This program is distributed in the hope that it will be useful,
- ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ;* GNU General Public License for more details.
- ;*
- ;* You should have received a copy of the GNU General Public License
- ;* along with this program; if not, write to the Free Software
- ;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- ;*
- ;* $Id: idct_arm.asm 284 2005-10-04 08:54:26Z picard $
- ;*
- ;* The Core Pocket Media Player
- ;* Copyright (c) 2004-2005 Gabor Kovacs
- ;*
- ;*****************************************************************************
- AREA |.text|, CODE
- EXPORT IDCT_Block4x8
- EXPORT IDCT_Block8x8
- EXPORT IDCT_Block4x8Swap
- EXPORT IDCT_Block8x8Swap
- ; r6 Block
- ; r7,r8 must be saved
- macro
- MCol8 $Name,$Rotate,$Pitch
- $Name PROC
- ; r10 = x0
- ; r4 = x1
- ; r2 = x2
- ; r1 = x3
- ; r3 = x4
- ; r12 = x5
- ; r0 = x6
- ; r5 = x7
- ; r11 = x8
- ; r9 = tmp (x567)
- ldrsh r4, [r6, #4*$Pitch]
- ldrsh r0, [r6, #5*$Pitch]
- ldrsh r12,[r6, #7*$Pitch]
- ldrsh r5, [r6, #3*$Pitch]
- ldrsh r2, [r6, #6*$Pitch]
- ldrsh r1, [r6, #2*$Pitch]
- ldrsh r3, [r6, #1*$Pitch]
- ldrsh r10,[r6]
- if $Rotate
- add r6,r6,r9
- endif
- orr r9, r12, r0
- orr r9, r9, r5
- orr r11, r9, r2
- orr r11, r11, r4
- orrs r11, r11, r1
- bne $Name.Mode2
- cmp r3, #0
- bne $Name.Mode1
- if $Rotate=0
- cmp r10, #0
- beq $Name.Zero
- endif
- mov r10, r10, lsl #3
- strh r10, [r6]
- strh r10, [r6, #0x10]
- strh r10, [r6, #0x20]
- strh r10, [r6, #0x30]
- strh r10, [r6, #0x40]
- strh r10, [r6, #0x50]
- strh r10, [r6, #0x60]
- strh r10, [r6, #0x70]
- $Name.Zero
- mov pc,lr
- $Name.Mode1 ;x0,x4
- mov r11, r3
- mov r2, #0x8D, 30 ; 0x234 = 564
- orr r2, r2, #1
- mov r9, r3
- mul r2, r11, r2
- mov r11, #0xB1, 28 ; 0xB10 = 2832
- orr r11, r11, #9
- mul r4, r9, r11
- mov r11, #0x96, 28 ; 0x960 = 2400
- orr r11, r11, #8
- mul r5, r9, r11
- mov r11, #0x19, 26 ; 0x640 = 1600
- mov r1, r10, lsl #11
- orr r11, r11, #9
- mul r0, r3, r11
- add r1, r1, #0x80 ; 0x80 = 128
- add r3, r4, r1
- add r11, r5, r1
- mov r3, r3, asr #8
- mov r11, r11, asr #8
- strh r3, [r6]
- strh r11, [r6, #0x10] ; 0x10 = 16
- add r3, r0, r1
- add r11, r2, r1
- mov r3, r3, asr #8
- mov r11, r11, asr #8
- strh r3, [r6, #0x20] ; 0x20 = 32
- strh r11, [r6, #0x30] ; 0x30 = 48
- sub r3, r1, r2
- sub r11, r1, r0
- mov r3, r3, asr #8
- mov r11, r11, asr #8
- strh r3, [r6, #0x40] ; 0x40 = 64
- strh r11, [r6, #0x50] ; 0x50 = 80
- sub r3, r1, r5
- sub r11, r1, r4
- mov r3, r3, asr #8
- mov r11, r11, asr #8
- strh r3, [r6, #0x60] ; 0x60 = 96
- strh r11, [r6, #0x70] ; 0x70 = 112
- mov pc,lr
- $Name.Mode2 ;x0,x1,x2,x3
- orrs r11, r9, r3
- bne $Name.Mode3
- mov r3, r10, lsl #11
- add r3, r3, #128
- mov r9, #0x45, 28 ; 0x450 = 1104
- add r5, r3, r4, lsl #11
- add r11, r2, r1
- orr r9, r9, #4
- sub r3, r3, r4, lsl #11
- mul r4, r11, r9
- mov r11, #0x3B, 26 ; 0xEC0 = 3776
- orr r11, r11, #8
- mul r11, r2, r11
- sub r2, r4, r11
- mov r11, #0x62, 28 ; 0x620 = 1568
- mul r11, r1, r11
- add r0, r2, r3
- add r1, r11, r4
- add r4, r5, r1
- sub r3, r3, r2
- sub r5, r5, r1
- mov r1, r4, asr #8
- mov r3, r3, asr #8
- mov r2, r0, asr #8
- mov r4, r5, asr #8
- strh r1, [r6,#0x00]
- strh r2, [r6,#0x10]
- strh r3, [r6,#0x20]
- strh r4, [r6,#0x30]
- strh r4, [r6,#0x40]
- strh r3, [r6,#0x50]
- strh r2, [r6,#0x60]
- strh r1, [r6,#0x70]
- mov pc,lr
- $Name.Mode3 ;x0,x1,x2,x3,x4,x5,x6,x7
- mov r9, #0x8D, 30
- orr r9, r9, #1 ;W7
- add r11, r12, r3
- mul r11, r9, r11 ;x8 = W7 * (x5 + x4)
- mov r9, #0x8E, 28
- orr r9, r9, #4 ;W1-W7
- mla r3, r9, r3, r11 ;x4 = x8 + (W1-W7) * x4
- mvn r9, #0xD40
- eor r9, r9, #0xD ;-W1-W7
- mla r12, r9, r12, r11 ;x5 = x8 + (-W1-W7) * x5
- mov r9, #0x96, 28 ;
- orr r9, r9, #8 ;W3
- add r11, r0, r5
- mul r11, r9, r11 ;x8 = W3 * (x6 + x7)
- mvn r9, #0x310
- eor r9, r9, #0xE ;W5-W3
- mla r0, r9, r0, r11 ;x6 = x8 + (W5-W3) * x6
- mvn r9, #0xFB0 ;-W3-W5
- mla r5, r9, r5, r11 ;x7 = x8 + (-W3-W5) * x7
- mov r10, r10, lsl #11
- add r10, r10, #128 ;x0 = (x0 << 11) + 128
- add r11, r10,r4,lsl #11 ;x8 = x0 + (x1 << 11)
- sub r10, r10,r4,lsl #11 ;x0 = x0 - (x1 << 11)
- mov r9, #0x45, 28
- orr r9, r9, #4 ;W6
- add r4, r1, r2
- mul r4, r9, r4 ;x1 = W6 * (x3 + x2)
- mvn r9, #0xEC0
- eor r9, r9, #0x7 ;-W2-W6
- mla r2, r9, r2, r4 ;x2 = x1 + (-W2-W6) * x2
- mov r9, #0x620 ;W2-W6
- mla r1, r9, r1, r4 ;x3 = x1 + (W2-W6) * x3
- add r4, r3, r0 ;x1 = x4 + x6
- sub r3, r3, r0 ;x4 -= x6
- add r0, r12,r5 ;x6 = x5 + x7
- sub r12,r12,r5 ;x5 -= x7
- add r5, r11,r1 ;x7 = x8 + x3
- sub r11,r11,r1 ;x8 -= x3
- add r1, r10,r2 ;x3 = x0 + x2
- sub r10,r10,r2 ;x0 -= x2
- add r9, r3, r12 ;x4 + x5
- sub r3, r3, r12 ;x4 - x5
- mov r12, #181
- mul r2, r9, r12 ;181 * (x4 + x5)
- mul r9, r3, r12 ;181 * (x4 - x5)
- add r2, r2, #128 ;x2 = 181 * (x4 + x5) + 128
- add r3, r9, #128 ;x4 = 181 * (x4 - x5) + 128
- add r9,r5,r4
- sub r5,r5,r4
- mov r9,r9,asr #8 ;(x7 + x1) >> 8
- mov r5,r5,asr #8 ;(x7 - x1) >> 8
- strh r9,[r6,#0x00]
- strh r5,[r6,#0x70]
- add r9,r1,r2,asr #8
- sub r1,r1,r2,asr #8
- mov r9,r9,asr #8 ;(x3 + x2) >> 8
- mov r1,r1,asr #8 ;(x3 - x2) >> 8
- strh r9,[r6,#0x10]
- strh r1,[r6,#0x60]
- add r9,r10,r3,asr #8
- sub r10,r10,r3,asr #8
- mov r9,r9,asr #8 ;(x0 + x4) >> 8
- mov r10,r10,asr #8 ;(x0 - x4) >> 8
- strh r9,[r6,#0x20]
- strh r10,[r6,#0x50]
- add r9,r11,r0
- sub r11,r11,r0
- mov r9,r9,asr #8 ;(x8 + x6) >> 8
- mov r11,r11,asr #8 ;(x8 - x6) >> 8
- strh r9,[r6,#0x30]
- strh r11,[r6,#0x40]
- mov pc,lr
- mend
- MCol8 Col8,0,16
- MCol8 Col8Swap,1,2
- ; r0 Block[0]
- ; r6 Block
- ; r7 Src
- ; r8 Dst
- ALIGN 16
- RowConst PROC
- add r0, r0, #0x20 ; 0x20 = 32
- cmp r7, #0
- mov r3, r0, asr #6
- beq RowConst_NoSrc
- cmp r3, #0
- beq RowConst_Zero
- blt RowConst_Sub
- RowConst_Add
- ldr r0, CarryMask
- ldr r2, [r7]
- orr r3, r3, r3, lsl #8
- orr r3, r3, r3, lsl #16
- add r4, r2, r3
- eor r11, r2, r3
- and r2, r3, r2
- bic r11, r11, r4
- orr r11, r11, r2
- and r5, r11, r0
- mov r12, r5, lsl #1
- sub r10, r4, r12
- sub r11, r12, r5, lsr #7
- ldr r2, [r7, #4]
- orr r11, r11, r10
- str r11, [r8]
- add r4, r2, r3
- eor r11, r2, r3
- and r2, r3, r2
- bic r11, r11, r4
- orr r11, r11, r2
- and r5, r11, r0
- mov r12, r5, lsl #1
- sub r10, r4, r12
- sub r11, r12, r5, lsr #7
- orr r11, r11, r10
- str r11, [r8, #4]
- add r7, r7, #8 ;source stride
- mov pc,lr
- RowConst_Sub
- ldr r0, CarryMask
- ldr r2, [r7]
- rsb r3, r3, #0
- orr r3, r3, r3, lsl #8
- orr r3, r3, r3, lsl #16
- mvn r2, r2
- add r4, r2, r3
- eor r11, r2, r3
- and r2, r3, r2
- bic r11, r11, r4
- orr r11, r11, r2
- and r5, r11, r0
- mov r12, r5, lsl #1
- sub r10, r4, r12
- sub r11, r12, r5, lsr #7
- ldr r2, [r7, #4]
- orr r11, r11, r10
- mvn r11, r11
- str r11, [r8]
- mvn r2, r2
- add r4, r2, r3
- eor r11, r2, r3
- and r2, r3, r2
- bic r11, r11, r4
- orr r11, r11, r2
- and r5, r11, r0
- mov r12, r5, lsl #1
- sub r10, r4, r12
- sub r11, r12, r5, lsr #7
- orr r11, r11, r10
- mvn r11, r11
- str r11, [r8, #4]
- add r7, r7, #8 ;source stride
- mov pc,lr
- RowConst_Zero
- ldr r1, [r7]
- ldr r2, [r7, #4]
- str r1, [r8]
- str r2, [r8, #4]
- add r7, r7, #8 ;source stride
- mov pc,lr
- RowConst_NoSrc
- cmp r3, #0
- movmi r3, #0
- cmppl r3, #255
- movgt r3, #255
- orr r3, r3, r3, lsl #8
- orr r3, r3, r3, lsl #16
- str r3, [r8]
- str r3, [r8, #4]
- mov pc,lr
- ENDP
- CarryMask DCD 0x80808080
- W1 DCW 2841 ; 2048*sqrt(2)*cos(1*pi/16)
- W3 DCW 2408 ; 2048*sqrt(2)*cos(3*pi/16)
- nW5 DCW 0xF9B7 ;-1609 ; 2048*sqrt(2)*cos(5*pi/16)
- W6 DCW 1108 ; 2048*sqrt(2)*cos(6*pi/16)
- W7 DCW 565 ; 2048*sqrt(2)*cos(7*pi/16)
- W2 DCW 2676 ; 2048*sqrt(2)*cos(2*pi/16)
- ; r6 Block
- ; r7 Src
- ; r8 Dst
- ALIGN 16
- IDCT_Block4x8Swap PROC
- add r0, r0, #256
- stmdb sp!, {r0, r2, r4 - r12, lr} ; r0=BlockEnd r2=DstStride
- sub r6, r0, #256 ;Block
- mov r7, r3 ;Src
- mov r8, r1 ;Dst
- mov r9,#128-0*16+0*2
- bl Col8Swap
- mov r9,#128-1*16+1*2
- add r6, r6, #1*16-0*2-128
- bl Col8Swap
- mov r9,#128-2*16+2*2
- add r6, r6, #2*16-1*2-128
- bl Col8Swap
- mov r9,#128-3*16+3*2
- add r6, r6, #3*16-2*2-128
- bl Col8Swap
- sub r6, r6, #6
- b Row4_Loop
- ALIGN 16
- IDCT_Block4x8 PROC
- add r0, r0, #128
- stmdb sp!, {r0, r2, r4 - r12, lr} ; r0=BlockEnd r2=DstStride
- sub r6, r0, #128 ;Block
- mov r7, r3 ;Src
- mov r8, r1 ;Dst
- bl Col8
- add r6, r6, #2
- bl Col8
- add r6, r6, #2
- bl Col8
- add r6, r6, #2
- bl Col8
- sub r6, r6, #6
- Row4_Loop
- ldrsh r4, [r6, #4] ;x3
- ldrsh r5, [r6, #6] ;x7
- ldrsh r3, [r6, #2] ;x4
- ldrsh r0, [r6] ;x0
- orr r11, r5, r4
- orrs r11, r11, r3
- bne Row4_NoConst
- bl RowConst
- b Row4_Next
- Row4_NoConst
- cmp r7, #0
- ldrsh r10, W7
- ldrsh r11, W1
- mov r2, #4
- add r0, r0, #32
- mov r0, r0, lsl #8 ;x0
- mla r14, r3, r10, r2 ;x5 = x4 * W7 + 4
- ldrsh r10, W3
- mla r3, r11, r3, r2 ;x4 = x4 * W1 + 4
- mov r14, r14, asr #3 ;x5 >>= 3
- ldrsh r11, nW5
- mla r12, r5, r10, r2 ;x6 = x7 * W3 + 4
- mov r3, r3, asr #3 ;x4 >>= 3
- ldrsh r10, W6
- mla r5, r11, r5, r2 ;x7 = x7 * -W5 + 4
- ldrsh r11, W2
- add r9, r3, r12, asr #3 ;x1 = x4 + (x6 >> 3)
- sub r3, r3, r12, asr #3 ;x4 = x4 - (x6 >> 3)
- mla r12, r4, r10, r2 ;x2 = x3 * W6 + 4
- mla r4, r11, r4, r2 ;x3 = x3 * W2 + 4
- add r2, r14, r5, asr #3 ;x6 = x5 + (x7 >> 3)
- sub r5, r14, r5, asr #3 ;x5 = x5 - (x7 >> 3)
- add r14, r0, r4, asr #3 ;x7 = x0 + (x3 >> 3)
- sub r4, r0, r4, asr #3 ;x8 = x0 - (x3 >> 3)
- add r10, r0, r12, asr #3;x3 = x0 + (x2 >> 3)
- sub r0, r0, r12, asr #3 ;x0 = x0 - (x2 >> 3)
- add r1, r5, r3
- mov r11, #181
- mul r12, r1, r11 ;x2 = 181 * (x5 + x4)
- sub r3, r3, r5
- mul r1, r3, r11 ;x4 = 181 * (x4 - x5)
- add r12, r12, #128 ;x2 += 128
- add r3, r1, #128 ;x4 += 128
- add r1, r14, r9 ;x5 = x7 + x1
- sub r5, r14, r9 ;x1 = x7 - x1
- add r11, r10, r12, asr #8 ;x7 = x3 + (x2 >> 8)
- sub r14, r10, r12, asr #8 ;x2 = x3 - (x2 >> 8)
- add r9, r0, r3, asr #8 ;x3 = x0 + (x4 >> 8)
- sub r3, r0, r3, asr #8 ;x4 = x0 - (x4 >> 8)
- add r12, r4, r2 ;x0 = x8 + x6
- sub r4, r4, r2 ;x6 = x8 - x6
- beq Row4_NoSrc
- ldrb r0, [r7]
- ldrb r2, [r7, #7]
- ldrb r10, [r7, #1]
- add r1, r0, r1, asr #14
- add r5, r2, r5, asr #14
- add r11, r10, r11, asr #14
- ldrb r2, [r7, #6]
- ldrb r0, [r7, #2]
- ldrb r10, [r7, #5]
- add r14, r2, r14, asr #14
- add r9, r0, r9, asr #14
- ldrb r0, [r7, #3]
- ldrb r2, [r7, #4]
- add r3, r10, r3, asr #14
- add r12, r0, r12, asr #14
- add r4, r2, r4, asr #14
- add r7, r7, #8 ;source stride
- Row4_Sat
- orr r0, r5, r14
- orr r0, r0, r4
- orr r0, r0, r1
- orr r0, r0, r12
- orr r0, r0, r11
- orr r0, r0, r9
- orr r0, r0, r3
- bics r0, r0, #0xFF ; 0xFF = 255
- beq Row4_Write
- mov r0, #0xFFFFFF00
- tst r1, r0
- movne r1, #0xFF
- movmi r1, #0x00
- tst r11, r0
- movne r11, #0xFF
- movmi r11, #0x00
- tst r9, r0
- movne r9, #0xFF
- movmi r9, #0x00
- tst r12, r0
- movne r12, #0xFF
- movmi r12, #0x00
- tst r4, r0
- movne r4, #0xFF
- movmi r4, #0x00
- tst r3, r0
- movne r3, #0xFF
- movmi r3, #0x00
- tst r14, r0
- movne r14, #0xFF
- movmi r14, #0x00
- tst r5, r0
- movne r5, #0xFF
- movmi r5, #0x00
- Row4_Write
- strb r1, [r8]
- strb r11,[r8, #1]
- strb r9, [r8, #2]
- strb r12,[r8, #3]
- strb r4, [r8, #4]
- strb r3, [r8, #5]
- strb r14,[r8, #6]
- strb r5, [r8, #7]
- Row4_Next
- ldr r2, [sp, #4] ;DstStride
- ldr r1, [sp, #0] ;BlockEnd
- add r6,r6,#16 ;Block += 16
- add r8,r8,r2 ;Dst += DstStride
- cmp r6,r1
- bne Row4_Loop
- ldmia sp!, {r0,r2,r4 - r12, pc}
- Row4_NoSrc
- mov r5, r5, asr #14
- mov r14, r14, asr #14
- mov r12, r12, asr #14
- mov r1, r1, asr #14
- mov r11, r11, asr #14
- mov r9, r9, asr #14
- mov r3, r3, asr #14
- mov r4, r4, asr #14
- b Row4_Sat
- ENDP
- ; r6 Block
- ; r7 Src
- ; r8 Dst
- ALIGN 16
- IDCT_Block8x8Swap PROC
- add r0, r0, #256
- stmdb sp!, {r0, r2, r4 - r12, lr} ; r0=BlockEnd r2=DstStride
- sub r6, r0, #256 ;Block
- mov r7, r3 ;Src
- mov r8, r1 ;Dst
- mov r9,#128-0*16+0*2
- bl Col8Swap
- mov r9,#128-1*16+1*2
- add r6, r6, #1*16-0*2-128
- bl Col8Swap
- mov r9,#128-2*16+2*2
- add r6, r6, #2*16-1*2-128
- bl Col8Swap
- mov r9,#128-3*16+3*2
- add r6, r6, #3*16-2*2-128
- bl Col8Swap
- mov r9,#128-4*16+4*2
- add r6, r6, #4*16-3*2-128
- bl Col8Swap
- mov r9,#128-5*16+5*2
- add r6, r6, #5*16-4*2-128
- bl Col8Swap
- mov r9,#128-6*16+6*2
- add r6, r6, #6*16-5*2-128
- bl Col8Swap
- mov r9,#128-7*16+7*2
- add r6, r6, #7*16-6*2-128
- bl Col8Swap
- sub r6, r6, #14
- b Row8_Loop
- ALIGN 16
- IDCT_Block8x8 PROC
- add r0, r0, #128
- stmdb sp!, {r0, r2, r4 - r12, lr} ; r0=BlockEnd r2=DstStride
- sub r6, r0, #128 ;Block
- mov r7, r3 ;Src
- mov r8, r1 ;Dst
- bl Col8
- add r6, r6, #2
- bl Col8
- add r6, r6, #2
- bl Col8
- add r6, r6, #2
- bl Col8
- add r6, r6, #2
- bl Col8
- add r6, r6, #2
- bl Col8
- add r6, r6, #2
- bl Col8
- add r6, r6, #2
- bl Col8
- sub r6, r6, #14
- Row8_Loop
- ldrsh r0, [r6] ;x0
- ldrsh r3, [r6, #2] ;x4
- ldrsh r4, [r6, #4] ;x3
- ldrsh r5, [r6, #6] ;x7
- ldrsh r9, [r6, #8] ;x1
- ldrsh r2, [r6, #10] ;x6
- ldrsh r14,[r6, #12] ;x2
- ldrsh r1, [r6, #14] ;x5
- orr r11, r3, r4
- orr r11, r11, r5
- orr r11, r11, r9
- orr r11, r11, r2
- orr r11, r11, r14
- orrs r11, r11, r1
- bne Row8_NoConst
- bl RowConst
- b Row8_Next
- _W3 DCW 2408 ; 2048*sqrt(2)*cos(3*pi/16)
- _W6 DCW 1108 ; 2048*sqrt(2)*cos(6*pi/16)
- _W7 DCW 565 ; 2048*sqrt(2)*cos(7*pi/16)
- _W1_nW7 DCW 2276
- _nW1_nW7 DCW 0xF2B2 ;-3406
- _W5_nW3 DCW 0xFCE1 ;-799
- _nW2_nW6 DCW 0xF138 ;-3784
- ALIGN 4
- Row8_NoConst
- cmp r7, #0
- add r0, r0, #32
- ldrsh r10, _W7
- mov r0, r0, lsl #11 ;x0 = (x0 + 32) << 11
- ldrsh r12, _W1_nW7
- add r11,r3,r1
- mul r11,r10,r11 ;x8 = W7 * (x4 + x5)
- ldrsh r10, _nW1_nW7
- mla r3, r12, r3, r11 ;x4 = x8 + (W1-W7) * x4
- ldrsh r12, _W3
- mla r1, r10, r1, r11 ;x5 = x8 + (-W1-W7) * x5
- ldrsh r10, _W5_nW3
- add r11,r2,r5 ;x6 + x7
- mul r11,r12,r11 ;x8 = W3 * (x6 + x7)
- mvn r12, #0xFB0 ;-W3-W5
- mla r2,r10,r2,r11 ;x6 = x8 + (W5-W3) * x6
- ldrsh r10, _W6
- mla r5,r12,r5,r11 ;x7 = x8 + (-W3-W5) * x7
- ldrsh r12, _nW2_nW6
- add r11, r0, r9, lsl #11;x8 = x0 + (x1 << 11)
- sub r0, r0, r9, lsl #11 ;x0 = x0 - (x1 << 11)
- add r9, r4, r14
- mul r9, r10, r9 ;x1 = W6 * (x3 + x2)
- mov r10, #0x620 ;W2-W6
- mla r14, r12, r14, r9 ;x2 = x1 + (-W2-W6) * x2
- mov r12, #181
- mla r4, r10, r4, r9 ;x3 = x1 + (W2-W6) * x3
- add r9, r3, r2 ;x1 = x4 + x6
- sub r3, r3, r2 ;x4 = x4 - x6
- add r2, r1, r5 ;x6 = x5 + x7
- sub r1, r1, r5 ;x5 = x5 - x7
- add r5, r11, r4 ;x7 = x8 + x3
- sub r11, r11, r4 ;x8 = x8 - x3
- add r4, r0, r14 ;x3 = x0 + x2
- sub r0, r0, r14 ;x0 = x0 - x2
- add r3, r3, #4 ;
- add r14, r3, r1 ;x2 = x4 + x5 + 4
- sub r3, r3, r1 ;x4 = x4 - x5 + 4
- mov r10, #16
- mov r14, r14, asr #3
- mov r3, r3, asr #3
- mla r14, r12, r14, r10 ;x2 = 181 * ((x4 + x5 + 4) >> 3) + 16
- mla r3, r12, r3, r10 ;x4 = 181 * ((x4 - x5 + 4) >> 3) + 16
- add r1, r5, r9 ;x5 = x7 + x1
- sub r9, r5, r9 ;x1 = x7 - x1
- add r5, r4, r14, asr #5 ;x7 = x3 + (x2 >> 5)
- sub r14,r4, r14, asr #5 ;x2 = x3 - (x2 >> 5)
- add r4, r0, r3, asr #5 ;x3 = x0 + (x4 >> 5)
- sub r3, r0, r3, asr #5 ;x4 = x0 - (x4 >> 5)
- add r0, r11, r2 ;x0 = x8 + x6
- sub r2, r11, r2 ;x6 = x8 - x6
- beq Row8_NoSrc
- ldrb r10, [r7]
- ldrb r12, [r7, #7]
- ldrb r11, [r7, #1]
- add r1, r10, r1, asr #17
- add r9, r12, r9, asr #17
- add r5, r11, r5, asr #17
- ldrb r10, [r7, #6]
- ldrb r12, [r7, #2]
- ldrb r11, [r7, #5]
- add r14, r10, r14, asr #17
- add r4, r12, r4, asr #17
- ldrb r10, [r7, #3]
- ldrb r12, [r7, #4]
- add r3, r11, r3, asr #17
- add r0, r10, r0, asr #17
- add r2, r12, r2, asr #17
- add r7, r7, #8 ;source stride
- Row8_Sat
- orr r10, r1, r9
- orr r10, r10, r5
- orr r10, r10, r14
- orr r10, r10, r4
- orr r10, r10, r3
- orr r10, r10, r0
- orr r10, r10, r2
- bics r10, r10, #0xFF ; 0xFF = 255
- beq Row8_Write
- mov r10, #0xFFFFFF00
- tst r1, r10
- movne r1, #0xFF
- movmi r1, #0x00
- tst r9, r10
- movne r9, #0xFF
- movmi r9, #0x00
- tst r5, r10
- movne r5, #0xFF
- movmi r5, #0x00
- tst r14, r10
- movne r14, #0xFF
- movmi r14, #0x00
- tst r4, r10
- movne r4, #0xFF
- movmi r4, #0x00
- tst r3, r10
- movne r3, #0xFF
- movmi r3, #0x00
- tst r0, r10
- movne r0, #0xFF
- movmi r0, #0x00
- tst r2, r10
- movne r2, #0xFF
- movmi r2, #0x00
- Row8_Write
- strb r1, [r8]
- strb r5, [r8, #1]
- strb r4, [r8, #2]
- strb r0, [r8, #3]
- strb r2, [r8, #4]
- strb r3, [r8, #5]
- strb r14,[r8, #6]
- strb r9, [r8, #7]
- Row8_Next
- ldr r2, [sp, #4] ;DstStride
- ldr r1, [sp, #0] ;BlockEnd
- add r6,r6,#16 ;Block += 16
- add r8,r8,r2 ;Dst += DstStride
- cmp r6,r1
- bne Row8_Loop
- ldmia sp!, {r0,r2,r4 - r12, pc}
- Row8_NoSrc
- mov r1, r1, asr #17
- mov r9, r9, asr #17
- mov r5, r5, asr #17
- mov r14, r14, asr #17
- mov r4, r4, asr #17
- mov r3, r3, asr #17
- mov r0, r0, asr #17
- mov r2, r2, asr #17
- b Row8_Sat
- ENDP
- END