mcomp_mips32.c
上传用户:wstnjxml
上传日期:2014-04-03
资源大小:7248k
文件大小:10k
源码类别:

Windows CE

开发平台:

C/C++

  1. /*****************************************************************************
  2.  *
  3.  * This program is free software ; you can redistribute it and/or modify
  4.  * it under the terms of the GNU General Public License as published by
  5.  * the Free Software Foundation; either version 2 of the License, or
  6.  * (at your option) any later version.
  7.  *
  8.  * This program is distributed in the hope that it will be useful,
  9.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11.  * GNU General Public License for more details.
  12.  *
  13.  * You should have received a copy of the GNU General Public License
  14.  * along with this program; if not, write to the Free Software
  15.  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  16.  *
  17.  * $Id: mcomp_mips32.c 323 2005-11-01 20:52:32Z picard $
  18.  *
  19.  * The Core Pocket Media Player
  20.  * Copyright (c) 2004-2005 Gabor Kovacs
  21.  *
  22.  ****************************************************************************/
  23. #include "../common.h"
  24. #include "softidct.h"
  25. #if defined(MIPS32)
  26. // $8 src end pointer
  27. // $4 src pointer
  28. // $5 dst pointer
  29. // $6 src stride
  30. // $7 dst stride
  31. // $2,$9 first item lower 4 bytes (in two forms)
  32. // $10,$11 first item upper 4 bytes (in two forms)
  33. // $12,$13 second item lower 4 bytes (in two forms)
  34. // $14,$15 second item upper 4 bytes (in two forms)
  35. // $24 0x0101 0101 - for non horver
  36. // $25 0xFEFE FEFE - for non horver
  37. // $24 rounding - for horver
  38. // $25 temporary - for horver
  39. // $3 0x0303 0303 - for horver
  40. // $1 0xFCFC FCFC - for horver
  41. // $16 temporary - for horver (must be saved/restored)
  42. #define SET_SRCEND8
  43. "sll $8,$6,3;"
  44. "addu $8,$4,$8;"
  45. #define SET_SRCEND16
  46. "sll $8,$6,4;"
  47. "addu $8,$4,$8;"
  48. #define SET_MASKS
  49.     "li $24,0x01010101;"
  50. "nor $25,$24,$0;"
  51. #define SET_MASKS2
  52. ".set noat;"
  53.     "li $3,0x03030303;"
  54. "nor $1,$3,$0;"
  55. #define LOAD_FIRST8(ofs)
  56. "ulw $2, " #ofs "($4);"
  57. "ulw $10," #ofs "+4($4);"
  58. "and $9,$2,$25;"
  59. "and $11,$10,$25;"
  60. "srl $9,$9,1;"
  61. "srl $11,$11,1;"
  62. #define LOAD_SECOND8(ofs)
  63. "ulw $12," #ofs "($4);"
  64. "ulw $14," #ofs "+4($4);"
  65. "and $13,$12,$25;"
  66. "and $15,$14,$25;"
  67. "srl $13,$13,1;"
  68. "srl $15,$15,1;"
  69. #define LOAD_FIRST8_HV
  70. "ulw $2,0($4);"
  71. "ulw $9,1($4);"
  72. "and $16,$2,$1;"
  73. "and $25,$9,$1;"
  74. "and $2,$2,$3;"
  75. "and $9,$9,$3;"
  76. "srl $16,$16,2;"
  77. "srl $25,$25,2;"
  78. "addu $2,$2,$9;"
  79. "addu $9,$16,$25;"
  80. "ulw $10,4($4);"
  81. "ulw $11,5($4);"
  82. "and $16,$10,$1;"
  83. "and $25,$11,$1;"
  84. "and $10,$10,$3;"
  85. "and $11,$11,$3;"
  86. "srl $16,$16,2;"
  87. "srl $25,$25,2;"
  88. "addu $10,$10,$11;"
  89. "addu $11,$16,$25;"
  90. #define LOAD_SECOND8_HV
  91. "ulw $12,0($4);"
  92. "ulw $13,1($4);"
  93. "and $16,$12,$1;"
  94. "and $25,$13,$1;"
  95. "and $12,$12,$3;"
  96. "and $13,$13,$3;"
  97. "srl $16,$16,2;"
  98. "srl $25,$25,2;"
  99. "addu $12,$12,$13;"
  100. "addu $13,$16,$25;"
  101. "ulw $14,4($4);"
  102. "ulw $15,5($4);"
  103. "and $16,$14,$1;"
  104. "and $25,$15,$1;"
  105. "and $14,$14,$3;"
  106. "and $15,$15,$3;"
  107. "srl $16,$16,2;"
  108. "srl $25,$25,2;"
  109. "addu $14,$14,$15;"
  110. "addu $15,$16,$25;"
  111. #define AVG8
  112. "or $2,$2,$12;"
  113. "or $10,$10,$14;"
  114. "and $2,$2,$24;"
  115. "and $10,$10,$24;"
  116. "addu $2,$2,$9;"
  117. "addu $10,$10,$11;"
  118. "addu $2,$2,$13;"
  119. "addu $10,$10,$15;"
  120. #define AVGROUND8
  121. "and $2,$2,$12;"
  122. "and $10,$10,$14;"
  123. "and $2,$2,$24;"
  124. "and $10,$10,$24;"
  125. "addu $2,$2,$9;"
  126. "addu $10,$10,$11;"
  127. "addu $2,$2,$13;"
  128. "addu $10,$10,$15;"
  129. #define SWAPSET8
  130. "move $2,$12;"
  131. "move $9,$13;"
  132. "move $10,$14;"
  133. "move $11,$15;"
  134. #define WRITE8
  135. "sw $2,0($5);"
  136. "sw $10,4($5);"
  137. "addu $5,$5,$7;" 
  138. #define SAVE
  139. "addiu $sp,$sp,-4;"
  140. "sw $16,0(sp);"
  141. #define RESTORE
  142. "lw $16,0(sp);"
  143. "addiu $sp,$sp,4;"
  144. void CopyBlock(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  145. __asm( SET_SRCEND8
  146. "loop:"
  147. "ulw $2,0($4);"
  148. "ulw $10,4($4);" 
  149. "addu $4,$4,$6;"
  150. "sw $2,0($5);"
  151. "sw $10,4($5);"
  152. "addu $5,$5,$7;" 
  153. "bne $4,$8,loop;"); 
  154. void CopyBlockHor(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  155. __asm ( SET_SRCEND8
  156. SET_MASKS
  157. "loophor:"
  158. LOAD_FIRST8(0)
  159. LOAD_SECOND8(1)
  160. "addu $4,$4,$6;" 
  161. AVG8
  162. WRITE8
  163. "bne $4,$8,loophor;");
  164. void CopyBlockHorRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  165. __asm ( SET_SRCEND8
  166. SET_MASKS
  167. "loophorround:"
  168. LOAD_FIRST8(0)
  169. LOAD_SECOND8(1)
  170. "addu $4,$4,$6;" 
  171. AVGROUND8
  172. WRITE8
  173. "bne $4,$8,loophorround;");
  174. void CopyBlockVer(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  175. __asm ( SET_SRCEND8
  176. SET_MASKS
  177. LOAD_FIRST8(0)
  178. "loopver:"
  179. "addu $4,$4,$6;" 
  180. LOAD_SECOND8(0)
  181. AVG8
  182. WRITE8
  183. SWAPSET8
  184. "bne $4,$8,loopver;");
  185. void CopyBlockVerRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  186. __asm ( SET_SRCEND8
  187. SET_MASKS
  188. LOAD_FIRST8(0)
  189. "loopverround:"
  190. "addu $4,$4,$6;" 
  191. LOAD_SECOND8(0)
  192. AVGROUND8
  193. WRITE8
  194. SWAPSET8
  195. "bne $4,$8,loopverround;");
  196. }
  197. void CopyBlockHorVer(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  198. __asm ( SAVE
  199.     SET_SRCEND8
  200. SET_MASKS2
  201. "sll $24,$3,1;"  
  202. "and $24,$24,$3;" // 0x0202 0202
  203. //preprocessing
  204. LOAD_FIRST8_HV
  205. "loophorver:"
  206. "addu $4,$4,$6;"
  207. LOAD_SECOND8_HV);
  208. __asm ( "addu $2,$2,$12;"
  209. "addu $9,$9,$13;"
  210. "addu $10,$10,$14;"
  211. "addu $11,$11,$15;"
  212. "addu $2,$2,$24;"
  213. "addu $10,$10,$24;"
  214. "and $2,$2,$1;"
  215. "and $10,$10,$1;"
  216. "srl $2,$2,2;"
  217. "srl $10,$10,2;"
  218. "addu $2,$2,$9;"
  219. "addu $10,$10,$11;"
  220. WRITE8
  221. SWAPSET8
  222. "bne $4,$8,loophorver;"
  223. RESTORE);
  224. void CopyBlockHorVerRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  225. __asm ( SAVE
  226.     SET_SRCEND8
  227. SET_MASKS2
  228. "srl $24,$3,1;"  
  229. "and $24,$24,$3;" // 0x0101 0101
  230. //preprocessing
  231. LOAD_FIRST8_HV
  232. "loophorverround:"
  233. "addu $4,$4,$6;"
  234. LOAD_SECOND8_HV);
  235. __asm ( "addu $2,$2,$12;"
  236. "addu $9,$9,$13;"
  237. "addu $10,$10,$14;"
  238. "addu $11,$11,$15;"
  239. "addu $2,$2,$24;"
  240. "addu $10,$10,$24;"
  241. "and $2,$2,$1;"
  242. "and $10,$10,$1;"
  243. "srl $2,$2,2;"
  244. "srl $10,$10,2;"
  245. "addu $2,$2,$9;"
  246. "addu $10,$10,$11;"
  247. WRITE8
  248. SWAPSET8
  249. "bne $4,$8,loophorverround;"
  250. RESTORE);
  251. }
  252. void AddBlock8x8(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  253. __asm( SET_SRCEND8
  254. SET_MASKS
  255. "loopadd:"
  256. "lw $2,0($4);" 
  257. "lw $10,4($4);" 
  258. "addu $4,$4,$6;" 
  259. "lw $9,0($5);"
  260. "and $11,$2,$25;"
  261. "or $2,$2,$9;"
  262. "and $2,$2,$24;"
  263. "srl $11,$11,1;"
  264. "addu $2,$2,$11;"
  265. "and $9,$9,$25;"
  266. "srl $9,$9,1;"
  267. "addu $2,$2,$9;"
  268. "lw $11,4($5);"
  269. "and $9,$10,$25;"
  270. "or $10,$10,$11;"
  271. "and $10,$10,$24;"
  272. "srl $9,$9,1;"
  273. "addu $10,$10,$9;"
  274. "and $11,$11,$25;"
  275. "srl $11,$11,1;"
  276. "addu $10,$10,$11;"
  277. "sw $2,0($5);"
  278. "sw $10,4($5);"
  279. "addu $5,$5,$7;" 
  280. "bne $4,$8,loopadd;"); 
  281. void AddBlock4x4(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  282. __asm( SET_MASKS
  283. "lw $2,0($4);" 
  284. "addu $4,$4,$6;" 
  285. "lw $10,0($4);" 
  286. "addu $4,$4,$6;" 
  287. "lw $9,0($5);"
  288. "and $11,$2,$25;"
  289. "or $2,$2,$9;"
  290. "and $2,$2,$24;"
  291. "srl $11,$11,1;"
  292. "addu $2,$2,$11;"
  293. "and $9,$9,$25;"
  294. "srl $9,$9,1;"
  295. "addu $2,$2,$9;"
  296. "sw $2,0($5);"
  297. "addu $5,$5,$7;"
  298. "lw $11,0($5);"
  299. "and $9,$10,$25;"
  300. "or $10,$10,$11;"
  301. "and $10,$10,$24;"
  302. "srl $9,$9,1;"
  303. "addu $10,$10,$9;"
  304. "and $11,$11,$25;"
  305. "srl $11,$11,1;"
  306. "addu $10,$10,$11;"
  307. "sw $10,0($5);"
  308. "addu $5,$5,$7;" 
  309. "lw $2,0($4);" 
  310. "addu $4,$4,$6;" 
  311. "lw $10,0($4);" 
  312. "addu $4,$4,$6;" 
  313. "lw $9,0($5);"
  314. "and $11,$2,$25;"
  315. "or $2,$2,$9;"
  316. "and $2,$2,$24;"
  317. "srl $11,$11,1;"
  318. "addu $2,$2,$11;"
  319. "and $9,$9,$25;"
  320. "srl $9,$9,1;"
  321. "addu $2,$2,$9;"
  322. "sw $2,0($5);"
  323. "addu $5,$5,$7;"
  324. "lw $11,0($5);"
  325. "and $9,$10,$25;"
  326. "or $10,$10,$11;"
  327. "and $10,$10,$24;"
  328. "srl $9,$9,1;"
  329. "addu $10,$10,$9;"
  330. "and $11,$11,$25;"
  331. "srl $11,$11,1;"
  332. "addu $10,$10,$11;"
  333. "sw $10,0($5);"
  334. "addu $5,$5,$7;" ); 
  335. void CopyBlock16x16(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  336. __asm( SET_SRCEND16
  337. "loopcopy16:"
  338. "lw $2,0($4);" 
  339. "lw $10,4($4);" 
  340. "lw $12,8($4);" 
  341. "lw $14,12($4);" 
  342. "addu $4,$4,$6;"
  343. "sw $2,0($5);"
  344. "sw $10,4($5);"
  345. "sw $12,8($5);"
  346. "sw $14,12($5);"
  347. "addu $5,$5,$7;" 
  348. "bne $4,$8,loopcopy16;"); 
  349. void CopyBlock8x8(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  350. __asm( SET_SRCEND8
  351. "loopcopy8:"
  352. "lw $2,0($4);" 
  353. "lw $10,4($4);" 
  354. "addu $4,$4,$6;"
  355. "lw $12,0($4);" 
  356. "lw $14,4($4);" 
  357. "addu $4,$4,$6;"
  358. "sw $2,0($5);"
  359. "sw $10,4($5);"
  360. "addu $5,$5,$7;" 
  361. "sw $12,0($5);"
  362. "sw $14,4($5);"
  363. "addu $5,$5,$7;" 
  364. "bne $4,$8,loopcopy8;"); 
  365. void CopyBlock4x4(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  366. __asm( "lw $2,0($4);" 
  367. "addu $4,$4,$6;"
  368. "lw $10,0($4);" 
  369. "addu $4,$4,$6;"
  370. "lw $12,0($4);" 
  371. "addu $4,$4,$6;"
  372. "lw $14,0($4);" 
  373. "sw $2,0($5);"
  374. "addu $5,$5,$7;" 
  375. "sw $10,0($5);"
  376. "addu $5,$5,$7;" 
  377. "sw $12,0($5);"
  378. "addu $5,$5,$7;" 
  379. "sw $14,0($5);"); 
  380. void CopyBlockM(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  381. __asm( SET_SRCEND16
  382. "loopm:"
  383. "ulw $2,0($4);"
  384. "ulw $10,4($4);" 
  385. "ulw $12,8($4);"
  386. "ulw $14,12($4);" 
  387. "addu $4,$4,$6;"
  388. "sw $2,0($5);"
  389. "sw $10,4($5);"
  390. "sw $12,8($5);"
  391. "sw $14,12($5);"
  392. "addu $5,$5,$7;" 
  393. "bne $4,$8,loopm;"); 
  394. #endif