mcomp_mips64.c
资源名称:tcpmp.rar [点击查看]
上传用户:wstnjxml
上传日期:2014-04-03
资源大小:7248k
文件大小:12k
源码类别:
Windows CE
开发平台:
C/C++
- /*****************************************************************************
- *
- * This program is free software ; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * $Id: mcomp_mips64.c 284 2005-10-04 08:54:26Z picard $
- *
- * The Core Pocket Media Player
- * Copyright (c) 2004-2005 Gabor Kovacs
- *
- ****************************************************************************/
- #include "../common.h"
- #include "softidct.h"
- #if defined(MIPS64)
- // important: disable interrupts before using 64bit registers (but not too long, it could freeze)
- // important: $8 can't be used as 64bit register (trashed by some kernel routine)
- // $8 src end pointer
- // $4 src pointer
- // $5 dst pointer
- // $6 src stride
- // $7 dst stride
- // $2,$9 first item lower 8 bytes (in two forms)
- // $10,$11 first item upper 8 bytes (in two forms) - for 16x16 macroblocks
- // $12,$13 second item lower 8 bytes (in two forms)
- // $14,$15 second item upper 8 bytes (in two forms) - for 16x16 macroblocks
- // $24 0x0101 0101 0101 0101 - for non horver
- // $25 0xFEFE FEFE FEFE FEFE - for non horver
- // $24 rounding - for horver
- // $25 temporary - for 16x16 horver
- // $3 0x0303 0303 0303 0303 - for horver
- // $1 0xFCFC FCFC FCFC FCFC - for horver
- // $16 temporary - for 16x16 horver (must be saved/restored)
- #define SET_SRCEND8
- "sll $8,$6,3;"
- "addu $8,$4,$8;"
- #define SET_SRCEND16
- "sll $8,$6,4;"
- "addu $8,$4,$8;"
- #define SET_MASKS
- "li $24,0x01010101;"
- "dsll $25,$24,32;"
- "or $24,$24,$25;"
- "nor $25,$24,$0;"
- #define SET_MASKS2
- ".set noat;"
- "li $3,0x03030303;"
- "dsll $1,$3,32;"
- "or $3,$3,$1;"
- "nor $1,$3,$0;"
- #define LOAD_FIRST8(ofs)
- "uld $2, " #ofs "($4);"
- "and $9,$2,$25;"
- "dsrl $9,$9,1;"
- #define LOAD_FIRST16(ofs)
- "uld $2, " #ofs "($4);"
- "uld $10," #ofs "+8($4);"
- "and $9,$2,$25;"
- "and $11,$10,$25;"
- "dsrl $9,$9,1;"
- "dsrl $11,$11,1;"
- #define LOAD_SECOND8(ofs)
- "uld $12," #ofs "($4);"
- "and $13,$12,$25;"
- "dsrl $13,$13,1;"
- #define LOAD_SECOND16(ofs)
- "uld $12," #ofs "($4);"
- "uld $14," #ofs "+8($4);"
- "and $13,$12,$25;"
- "and $15,$14,$25;"
- "dsrl $13,$13,1;"
- "dsrl $15,$15,1;"
- #define LOAD_FIRST8_HV
- "uld $2,0($4);"
- "uld $9,1($4);"
- "and $10,$2,$1;"
- "and $11,$9,$1;"
- "and $2,$2,$3;"
- "and $9,$9,$3;"
- "dsrl $10,$10,2;"
- "dsrl $11,$11,2;"
- "daddu $2,$2,$9;"
- "daddu $9,$10,$11;"
- #define LOAD_FIRST16_HV
- "uld $2,0($4);"
- "uld $9,1($4);"
- "and $16,$2,$1;"
- "and $25,$9,$1;"
- "and $2,$2,$3;"
- "and $9,$9,$3;"
- "dsrl $16,$16,2;"
- "dsrl $25,$25,2;"
- "daddu $2,$2,$9;"
- "daddu $9,$16,$25;"
- "uld $10,8($4);"
- "uld $11,9($4);"
- "and $16,$10,$1;"
- "and $25,$11,$1;"
- "and $10,$10,$3;"
- "and $11,$11,$3;"
- "dsrl $16,$16,2;"
- "dsrl $25,$25,2;"
- "daddu $10,$10,$11;"
- "daddu $11,$16,$25;"
- #define LOAD_SECOND8_HV
- "uld $12,0($4);"
- "uld $13,1($4);"
- "and $14,$12,$1;"
- "and $15,$13,$1;"
- "and $12,$12,$3;"
- "and $13,$13,$3;"
- "dsrl $14,$14,2;"
- "dsrl $15,$15,2;"
- "daddu $12,$12,$13;"
- "daddu $13,$14,$15;"
- #define LOAD_SECOND16_HV
- "uld $12,0($4);"
- "uld $13,1($4);"
- "and $16,$12,$1;"
- "and $25,$13,$1;"
- "and $12,$12,$3;"
- "and $13,$13,$3;"
- "dsrl $16,$16,2;"
- "dsrl $25,$25,2;"
- "daddu $12,$12,$13;"
- "daddu $13,$16,$25;"
- "uld $14,8($4);"
- "uld $15,9($4);"
- "and $16,$14,$1;"
- "and $25,$15,$1;"
- "and $14,$14,$3;"
- "and $15,$15,$3;"
- "dsrl $16,$16,2;"
- "dsrl $25,$25,2;"
- "daddu $14,$14,$15;"
- "daddu $15,$16,$25;"
- #define AVG8
- "or $2,$2,$12;"
- "and $2,$2,$24;"
- "daddu $2,$2,$9;"
- "daddu $2,$2,$13;"
- #define AVG16
- "or $2,$2,$12;"
- "or $10,$10,$14;"
- "and $2,$2,$24;"
- "and $10,$10,$24;"
- "daddu $2,$2,$9;"
- "daddu $10,$10,$11;"
- "daddu $2,$2,$13;"
- "daddu $10,$10,$15;"
- #define AVGROUND8
- "and $2,$2,$12;"
- "and $2,$2,$24;"
- "daddu $2,$2,$9;"
- "daddu $2,$2,$13;"
- #define AVGROUND16
- "and $2,$2,$12;"
- "and $10,$10,$14;"
- "and $2,$2,$24;"
- "and $10,$10,$24;"
- "daddu $2,$2,$9;"
- "daddu $10,$10,$11;"
- "daddu $2,$2,$13;"
- "daddu $10,$10,$15;"
- #define SWAPSET8
- "move $2,$12;"
- "move $9,$13;"
- #define SWAPSET16
- "move $2,$12;"
- "move $9,$13;"
- "move $10,$14;"
- "move $11,$15;"
- #define WRITE8
- "sdr $2,0($5);"
- "addu $5,$5,$7;"
- #define WRITE16
- "sdr $2,0($5);"
- "sdr $10,8($5);"
- "addu $5,$5,$7;"
- #define SAVE
- "addiu $sp,$sp,-4;"
- "sw $16,0(sp);"
- #define RESTORE
- "lw $16,0(sp);"
- "addiu $sp,$sp,4;"
- #ifdef MIPSVR41XX
- //cache without loading
- #define CACHE16
- ".set noreorder;"
- "cache 13,0($5);"
- ".set reorder;"
- #else
- #define CACHE16
- #endif
- void CopyBlock(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm( SET_SRCEND8
- "loop:"
- "uld $2,0($4);"
- "addu $4,$4,$6;"
- "sdr $2,0($5);"
- "addu $5,$5,$7;"
- "bne $4,$8,loop;");
- }
- void CopyBlockHor(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm ( SET_SRCEND8
- SET_MASKS
- "loophor:"
- LOAD_FIRST8(0)
- LOAD_SECOND8(1)
- "addu $4,$4,$6;"
- AVG8
- WRITE8
- "bne $4,$8,loophor;");
- }
- void CopyBlockHorRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm ( SET_SRCEND8
- SET_MASKS
- "loophorround:"
- LOAD_FIRST8(0)
- LOAD_SECOND8(1)
- "addu $4,$4,$6;"
- AVGROUND8
- WRITE8
- "bne $4,$8,loophorround;");
- }
- void CopyBlockVer(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm ( SET_SRCEND8
- SET_MASKS
- LOAD_FIRST8(0)
- "loopver:"
- "addu $4,$4,$6;"
- LOAD_SECOND8(0)
- AVG8
- WRITE8
- SWAPSET8
- "bne $4,$8,loopver;");
- }
- void CopyBlockVerRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm ( SET_SRCEND8
- SET_MASKS
- LOAD_FIRST8(0)
- "loopverround:"
- "addu $4,$4,$6;"
- LOAD_SECOND8(0)
- AVGROUND8
- WRITE8
- SWAPSET8
- "bne $4,$8,loopverround;");
- }
- void CopyBlockHorVer(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm ( SET_SRCEND8
- SET_MASKS2
- "dsll $24,$3,1;"
- "and $24,$24,$3;" // 0x0202 0202 0202 0202
- //preprocessing
- LOAD_FIRST8_HV
- "loophorver:"
- "addu $4,$4,$6;"
- LOAD_SECOND8_HV
- "daddu $2,$2,$12;"
- "daddu $9,$9,$13;"
- "daddu $2,$2,$24;"
- "and $2,$2,$1;"
- "dsrl $2,$2,2;"
- "daddu $2,$2,$9;"
- WRITE8
- SWAPSET8
- "bne $4,$8,loophorver;");
- }
- void CopyBlockHorVerRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm ( SET_SRCEND8
- SET_MASKS2
- "dsrl $24,$3,1;"
- "and $24,$24,$3;" // 0x0101 0101 0101 0101
- //preprocessing
- LOAD_FIRST8_HV
- "loophorverround:"
- "addu $4,$4,$6;"
- LOAD_SECOND8_HV
- "daddu $2,$2,$12;"
- "daddu $9,$9,$13;"
- "daddu $2,$2,$24;"
- "and $2,$2,$1;"
- "dsrl $2,$2,2;"
- "daddu $2,$2,$9;"
- WRITE8
- SWAPSET8
- "bne $4,$8,loophorverround;");
- }
- void CopyMBlock(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm( SET_SRCEND16
- "loopm:"
- CACHE16
- "uld $2,0($4);"
- "uld $10,8($4);"
- "addu $4,$4,$6;"
- WRITE16
- "bne $4,$8,loopm;");
- }
- void CopyMBlockHor(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm ( SET_SRCEND16
- SET_MASKS
- "loopmhor:"
- LOAD_FIRST16(0)
- LOAD_SECOND16(1)
- "addu $4,$4,$6;"
- CACHE16
- AVG16
- WRITE16
- "bne $4,$8,loopmhor;");
- }
- void CopyMBlockHorRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm ( SET_SRCEND16
- SET_MASKS
- "loopmhorround:"
- LOAD_FIRST16(0)
- LOAD_SECOND16(1)
- "addu $4,$4,$6;"
- CACHE16
- AVGROUND16
- WRITE16
- "bne $4,$8,loopmhorround;");
- }
- void CopyMBlockVer(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm ( SET_SRCEND16
- SET_MASKS
- LOAD_FIRST16(0)
- "loopmver:"
- "addu $4,$4,$6;"
- LOAD_SECOND16(0)
- CACHE16
- AVG16
- WRITE16
- SWAPSET16
- "bne $4,$8,loopmver;"
- );
- }
- void CopyMBlockVerRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm ( SET_SRCEND16
- SET_MASKS
- LOAD_FIRST16(0)
- "loopmverround:"
- "addu $4,$4,$6;"
- LOAD_SECOND16(0)
- CACHE16
- AVGROUND16
- WRITE16
- SWAPSET16
- "bne $4,$8,loopmverround;");
- }
- void CopyMBlockHorVer(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm ( SAVE
- SET_SRCEND16
- SET_MASKS2
- "dsll $24,$3,1;"
- "and $24,$24,$3;" // 0x0202 0202 0202 0202
- //preprocessing
- LOAD_FIRST16_HV
- "loopmhorver:"
- "addu $4,$4,$6;"
- LOAD_SECOND16_HV
- CACHE16);
- __asm ( "daddu $2,$2,$12;"
- "daddu $9,$9,$13;"
- "daddu $10,$10,$14;"
- "daddu $11,$11,$15;"
- "daddu $2,$2,$24;"
- "daddu $10,$10,$24;"
- "and $2,$2,$1;"
- "and $10,$10,$1;"
- "dsrl $2,$2,2;"
- "dsrl $10,$10,2;"
- "daddu $2,$2,$9;"
- "daddu $10,$10,$11;"
- WRITE16
- SWAPSET16
- "bne $4,$8,loopmhorver;"
- RESTORE);
- }
- void CopyMBlockHorVerRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm ( SAVE
- SET_SRCEND16
- SET_MASKS2
- "dsrl $24,$3,1;"
- "and $24,$24,$3;" // 0x0101 0101 0101 0101
- //preprocessing
- LOAD_FIRST16_HV
- "loopmhorverround:"
- "addu $4,$4,$6;");
- __asm ( LOAD_SECOND16_HV
- CACHE16
- "daddu $2,$2,$12;"
- "daddu $9,$9,$13;"
- "daddu $10,$10,$14;"
- "daddu $11,$11,$15;"
- "daddu $2,$2,$24;"
- "daddu $10,$10,$24;"
- "and $2,$2,$1;"
- "and $10,$10,$1;"
- "dsrl $2,$2,2;"
- "dsrl $10,$10,2;"
- "daddu $2,$2,$9;"
- "daddu $10,$10,$11;"
- WRITE16
- SWAPSET16
- "bne $4,$8,loopmhorverround;"
- RESTORE);
- }
- void AddBlock8x8(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm( SET_SRCEND8
- SET_MASKS
- "loopadd:"
- "ldr $2,0($4);"
- "addu $4,$4,$6;"
- "ldr $9,0($5);"
- "and $11,$2,$25;"
- "or $2,$2,$9;"
- "and $2,$2,$24;"
- "dsrl $11,$11,1;"
- "daddu $2,$2,$11;"
- "and $9,$9,$25;"
- "dsrl $9,$9,1;"
- "daddu $2,$2,$9;"
- "sdr $2,0($5);"
- "addu $5,$5,$7;"
- "bne $4,$8,loopadd;");
- }
- void AddBlock16x16(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
- {
- __asm( SET_SRCEND16
- SET_MASKS
- "loopadd16:"
- "ldr $2,0($4);"
- "ldr $10,8($4);"
- #ifdef MIPSVR41XX
- ".set noreorder;"
- "cache 17,0($4);" // hit invalidate (lose changes)
- ".set reorder;"
- #endif
- "addu $4,$4,$6;"
- "ldr $9,0($5);"
- "and $11,$2,$25;"
- "or $2,$2,$9;"
- "and $2,$2,$24;"
- "dsrl $11,$11,1;"
- "daddu $2,$2,$11;"
- "and $9,$9,$25;"
- "dsrl $9,$9,1;"
- "daddu $2,$2,$9;"
- "ldr $11,8($5);"
- "and $9,$10,$25;"
- "or $10,$10,$11;"
- "and $10,$10,$24;"
- "dsrl $9,$9,1;"
- "daddu $10,$10,$9;"
- "and $11,$11,$25;"
- "dsrl $11,$11,1;"
- "daddu $10,$10,$11;"
- "sdr $2,0($5);"
- "sdr $10,8($5);"
- "addu $5,$5,$7;"
- "bne $4,$8,loopadd16;");
- }
- #endif