mcomp_mips64.c
上传用户:wstnjxml
上传日期:2014-04-03
资源大小:7248k
文件大小:12k
源码类别:

Windows CE

开发平台:

C/C++

  1. /*****************************************************************************
  2.  *
  3.  * This program is free software ; you can redistribute it and/or modify
  4.  * it under the terms of the GNU General Public License as published by
  5.  * the Free Software Foundation; either version 2 of the License, or
  6.  * (at your option) any later version.
  7.  *
  8.  * This program is distributed in the hope that it will be useful,
  9.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11.  * GNU General Public License for more details.
  12.  *
  13.  * You should have received a copy of the GNU General Public License
  14.  * along with this program; if not, write to the Free Software
  15.  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  16.  *
  17.  * $Id: mcomp_mips64.c 284 2005-10-04 08:54:26Z picard $
  18.  *
  19.  * The Core Pocket Media Player
  20.  * Copyright (c) 2004-2005 Gabor Kovacs
  21.  *
  22.  ****************************************************************************/
  23. #include "../common.h"
  24. #include "softidct.h"
  25. #if defined(MIPS64)
  26. // important: disable interrupts before using 64bit registers (but not too long, it could freeze)
  27. // important: $8 can't be used as 64bit register (trashed by some kernel routine)
  28. // $8 src end pointer
  29. // $4 src pointer
  30. // $5 dst pointer
  31. // $6 src stride
  32. // $7 dst stride
  33. // $2,$9 first item lower 8 bytes (in two forms)
  34. // $10,$11 first item upper 8 bytes (in two forms) - for 16x16 macroblocks
  35. // $12,$13 second item lower 8 bytes (in two forms)
  36. // $14,$15 second item upper 8 bytes (in two forms) - for 16x16 macroblocks
  37. // $24 0x0101 0101 0101 0101 - for non horver
  38. // $25 0xFEFE FEFE FEFE FEFE - for non horver
  39. // $24 rounding   - for horver
  40. // $25 temporary   - for 16x16 horver
  41. // $3 0x0303 0303 0303 0303 - for horver
  42. // $1 0xFCFC FCFC FCFC FCFC - for horver
  43. // $16 temporary   - for 16x16 horver (must be saved/restored)
  44. #define SET_SRCEND8
  45. "sll $8,$6,3;"
  46. "addu $8,$4,$8;"
  47. #define SET_SRCEND16
  48. "sll $8,$6,4;"
  49. "addu $8,$4,$8;"
  50. #define SET_MASKS
  51.     "li $24,0x01010101;"
  52. "dsll $25,$24,32;"
  53. "or $24,$24,$25;"
  54. "nor $25,$24,$0;"
  55. #define SET_MASKS2
  56. ".set noat;"
  57.     "li $3,0x03030303;"
  58. "dsll $1,$3,32;"
  59. "or $3,$3,$1;"
  60. "nor $1,$3,$0;"
  61. #define LOAD_FIRST8(ofs)
  62. "uld $2, " #ofs "($4);"
  63. "and $9,$2,$25;"
  64. "dsrl $9,$9,1;"
  65. #define LOAD_FIRST16(ofs)
  66. "uld $2, " #ofs "($4);"
  67. "uld $10," #ofs "+8($4);"
  68. "and $9,$2,$25;"
  69. "and $11,$10,$25;"
  70. "dsrl $9,$9,1;"
  71. "dsrl $11,$11,1;"
  72. #define LOAD_SECOND8(ofs)
  73. "uld $12," #ofs "($4);"
  74. "and $13,$12,$25;"
  75. "dsrl $13,$13,1;"
  76. #define LOAD_SECOND16(ofs)
  77. "uld $12," #ofs "($4);"
  78. "uld $14," #ofs "+8($4);"
  79. "and $13,$12,$25;"
  80. "and $15,$14,$25;"
  81. "dsrl $13,$13,1;"
  82. "dsrl $15,$15,1;"
  83. #define LOAD_FIRST8_HV
  84. "uld $2,0($4);"
  85. "uld $9,1($4);"
  86. "and $10,$2,$1;"
  87. "and $11,$9,$1;"
  88. "and $2,$2,$3;"
  89. "and $9,$9,$3;"
  90. "dsrl $10,$10,2;"
  91. "dsrl $11,$11,2;"
  92. "daddu $2,$2,$9;"
  93. "daddu $9,$10,$11;"
  94. #define LOAD_FIRST16_HV
  95. "uld $2,0($4);"
  96. "uld $9,1($4);"
  97. "and $16,$2,$1;"
  98. "and $25,$9,$1;"
  99. "and $2,$2,$3;"
  100. "and $9,$9,$3;"
  101. "dsrl $16,$16,2;"
  102. "dsrl $25,$25,2;"
  103. "daddu $2,$2,$9;"
  104. "daddu $9,$16,$25;"
  105. "uld $10,8($4);"
  106. "uld $11,9($4);"
  107. "and $16,$10,$1;"
  108. "and $25,$11,$1;"
  109. "and $10,$10,$3;"
  110. "and $11,$11,$3;"
  111. "dsrl $16,$16,2;"
  112. "dsrl $25,$25,2;"
  113. "daddu $10,$10,$11;"
  114. "daddu $11,$16,$25;"
  115. #define LOAD_SECOND8_HV
  116. "uld $12,0($4);"
  117. "uld $13,1($4);"
  118. "and $14,$12,$1;"
  119. "and $15,$13,$1;"
  120. "and $12,$12,$3;"
  121. "and $13,$13,$3;"
  122. "dsrl $14,$14,2;"
  123. "dsrl $15,$15,2;"
  124. "daddu $12,$12,$13;"
  125. "daddu $13,$14,$15;"
  126. #define LOAD_SECOND16_HV
  127. "uld $12,0($4);"
  128. "uld $13,1($4);"
  129. "and $16,$12,$1;"
  130. "and $25,$13,$1;"
  131. "and $12,$12,$3;"
  132. "and $13,$13,$3;"
  133. "dsrl $16,$16,2;"
  134. "dsrl $25,$25,2;"
  135. "daddu $12,$12,$13;"
  136. "daddu $13,$16,$25;"
  137. "uld $14,8($4);"
  138. "uld $15,9($4);"
  139. "and $16,$14,$1;"
  140. "and $25,$15,$1;"
  141. "and $14,$14,$3;"
  142. "and $15,$15,$3;"
  143. "dsrl $16,$16,2;"
  144. "dsrl $25,$25,2;"
  145. "daddu $14,$14,$15;"
  146. "daddu $15,$16,$25;"
  147. #define AVG8
  148. "or $2,$2,$12;"
  149. "and $2,$2,$24;"
  150. "daddu $2,$2,$9;"
  151. "daddu $2,$2,$13;"
  152. #define AVG16
  153. "or $2,$2,$12;"
  154. "or $10,$10,$14;"
  155. "and $2,$2,$24;"
  156. "and $10,$10,$24;"
  157. "daddu $2,$2,$9;"
  158. "daddu $10,$10,$11;"
  159. "daddu $2,$2,$13;"
  160. "daddu $10,$10,$15;"
  161. #define AVGROUND8
  162. "and $2,$2,$12;"
  163. "and $2,$2,$24;"
  164. "daddu $2,$2,$9;"
  165. "daddu $2,$2,$13;"
  166. #define AVGROUND16
  167. "and $2,$2,$12;"
  168. "and $10,$10,$14;"
  169. "and $2,$2,$24;"
  170. "and $10,$10,$24;"
  171. "daddu $2,$2,$9;"
  172. "daddu $10,$10,$11;"
  173. "daddu $2,$2,$13;"
  174. "daddu $10,$10,$15;"
  175. #define SWAPSET8
  176. "move $2,$12;"
  177. "move $9,$13;"
  178. #define SWAPSET16
  179. "move $2,$12;"
  180. "move $9,$13;"
  181. "move $10,$14;"
  182. "move $11,$15;"
  183. #define WRITE8
  184. "sdr $2,0($5);"
  185. "addu $5,$5,$7;" 
  186. #define WRITE16
  187. "sdr $2,0($5);"
  188. "sdr $10,8($5);"
  189. "addu $5,$5,$7;" 
  190. #define SAVE
  191. "addiu $sp,$sp,-4;"
  192. "sw $16,0(sp);"
  193. #define RESTORE
  194. "lw $16,0(sp);"
  195. "addiu $sp,$sp,4;"
  196. #ifdef MIPSVR41XX
  197. //cache without loading
  198. #define CACHE16
  199. ".set noreorder;"
  200. "cache 13,0($5);"
  201. ".set reorder;"
  202. #else
  203. #define CACHE16
  204. #endif
  205. void CopyBlock(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  206. __asm( SET_SRCEND8
  207. "loop:"
  208. "uld $2,0($4);" 
  209. "addu $4,$4,$6;"
  210. "sdr $2,0($5);"
  211. "addu $5,$5,$7;" 
  212. "bne $4,$8,loop;"); 
  213. void CopyBlockHor(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  214. __asm ( SET_SRCEND8
  215. SET_MASKS
  216. "loophor:"
  217. LOAD_FIRST8(0)
  218. LOAD_SECOND8(1)
  219. "addu $4,$4,$6;" 
  220. AVG8
  221. WRITE8
  222. "bne $4,$8,loophor;");
  223. void CopyBlockHorRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  224. __asm ( SET_SRCEND8
  225. SET_MASKS
  226. "loophorround:"
  227. LOAD_FIRST8(0)
  228. LOAD_SECOND8(1)
  229. "addu $4,$4,$6;" 
  230. AVGROUND8
  231. WRITE8
  232. "bne $4,$8,loophorround;");
  233. void CopyBlockVer(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  234. __asm ( SET_SRCEND8
  235. SET_MASKS
  236. LOAD_FIRST8(0)
  237. "loopver:"
  238. "addu $4,$4,$6;" 
  239. LOAD_SECOND8(0)
  240. AVG8
  241. WRITE8
  242. SWAPSET8
  243. "bne $4,$8,loopver;");
  244. void CopyBlockVerRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  245. __asm ( SET_SRCEND8
  246. SET_MASKS
  247. LOAD_FIRST8(0)
  248. "loopverround:"
  249. "addu $4,$4,$6;" 
  250. LOAD_SECOND8(0)
  251. AVGROUND8
  252. WRITE8
  253. SWAPSET8
  254. "bne $4,$8,loopverround;");
  255. }
  256. void CopyBlockHorVer(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  257. __asm ( SET_SRCEND8
  258. SET_MASKS2
  259. "dsll $24,$3,1;"  
  260. "and $24,$24,$3;" // 0x0202 0202 0202 0202
  261. //preprocessing
  262. LOAD_FIRST8_HV
  263. "loophorver:"
  264. "addu $4,$4,$6;"
  265. LOAD_SECOND8_HV
  266. "daddu $2,$2,$12;"
  267. "daddu $9,$9,$13;"
  268. "daddu $2,$2,$24;"
  269. "and $2,$2,$1;"
  270. "dsrl $2,$2,2;"
  271. "daddu $2,$2,$9;"
  272. WRITE8
  273. SWAPSET8
  274. "bne $4,$8,loophorver;");
  275. void CopyBlockHorVerRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  276. __asm ( SET_SRCEND8
  277. SET_MASKS2
  278. "dsrl $24,$3,1;"  
  279. "and $24,$24,$3;" // 0x0101 0101 0101 0101
  280. //preprocessing
  281. LOAD_FIRST8_HV
  282. "loophorverround:"
  283. "addu $4,$4,$6;"
  284. LOAD_SECOND8_HV
  285. "daddu $2,$2,$12;"
  286. "daddu $9,$9,$13;"
  287. "daddu $2,$2,$24;"
  288. "and $2,$2,$1;"
  289. "dsrl $2,$2,2;"
  290. "daddu $2,$2,$9;"
  291. WRITE8
  292. SWAPSET8
  293. "bne $4,$8,loophorverround;");
  294. }
  295. void CopyMBlock(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  296. __asm( SET_SRCEND16
  297. "loopm:"
  298. CACHE16
  299. "uld $2,0($4);" 
  300. "uld $10,8($4);" 
  301. "addu $4,$4,$6;" 
  302. WRITE16
  303. "bne $4,$8,loopm;"); 
  304. void CopyMBlockHor(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  305. __asm ( SET_SRCEND16
  306. SET_MASKS
  307. "loopmhor:"
  308. LOAD_FIRST16(0)
  309. LOAD_SECOND16(1)
  310. "addu $4,$4,$6;" 
  311. CACHE16
  312. AVG16
  313. WRITE16
  314. "bne $4,$8,loopmhor;");
  315. void CopyMBlockHorRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  316. __asm ( SET_SRCEND16
  317. SET_MASKS
  318. "loopmhorround:"
  319. LOAD_FIRST16(0)
  320. LOAD_SECOND16(1)
  321. "addu $4,$4,$6;" 
  322. CACHE16
  323. AVGROUND16
  324. WRITE16
  325. "bne $4,$8,loopmhorround;");
  326. void CopyMBlockVer(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  327. __asm ( SET_SRCEND16
  328. SET_MASKS
  329. LOAD_FIRST16(0)
  330. "loopmver:"
  331. "addu $4,$4,$6;" 
  332. LOAD_SECOND16(0)
  333. CACHE16
  334. AVG16
  335. WRITE16
  336. SWAPSET16
  337. "bne $4,$8,loopmver;"
  338. );
  339. void CopyMBlockVerRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  340. __asm ( SET_SRCEND16
  341. SET_MASKS
  342. LOAD_FIRST16(0)
  343. "loopmverround:"
  344. "addu $4,$4,$6;" 
  345. LOAD_SECOND16(0)
  346. CACHE16
  347. AVGROUND16
  348. WRITE16
  349. SWAPSET16
  350. "bne $4,$8,loopmverround;");
  351. void CopyMBlockHorVer(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  352. __asm ( SAVE
  353.     SET_SRCEND16
  354. SET_MASKS2
  355. "dsll $24,$3,1;"  
  356. "and $24,$24,$3;" // 0x0202 0202 0202 0202
  357. //preprocessing
  358. LOAD_FIRST16_HV
  359. "loopmhorver:"
  360. "addu $4,$4,$6;"
  361. LOAD_SECOND16_HV 
  362. CACHE16);
  363. __asm ( "daddu $2,$2,$12;"
  364. "daddu $9,$9,$13;"
  365. "daddu $10,$10,$14;"
  366. "daddu $11,$11,$15;"
  367. "daddu $2,$2,$24;"
  368. "daddu $10,$10,$24;"
  369. "and $2,$2,$1;"
  370. "and $10,$10,$1;"
  371. "dsrl $2,$2,2;"
  372. "dsrl $10,$10,2;"
  373. "daddu $2,$2,$9;"
  374. "daddu $10,$10,$11;"
  375. WRITE16
  376. SWAPSET16
  377. "bne $4,$8,loopmhorver;"
  378. RESTORE);
  379. void CopyMBlockHorVerRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  380. __asm ( SAVE
  381.     SET_SRCEND16
  382. SET_MASKS2
  383. "dsrl $24,$3,1;"  
  384. "and $24,$24,$3;" // 0x0101 0101 0101 0101
  385. //preprocessing
  386. LOAD_FIRST16_HV
  387. "loopmhorverround:"
  388. "addu $4,$4,$6;");
  389. __asm ( LOAD_SECOND16_HV
  390. CACHE16
  391. "daddu $2,$2,$12;"
  392. "daddu $9,$9,$13;"
  393. "daddu $10,$10,$14;"
  394. "daddu $11,$11,$15;"
  395. "daddu $2,$2,$24;"
  396. "daddu $10,$10,$24;"
  397. "and $2,$2,$1;"
  398. "and $10,$10,$1;"
  399. "dsrl $2,$2,2;"
  400. "dsrl $10,$10,2;"
  401. "daddu $2,$2,$9;"
  402. "daddu $10,$10,$11;"
  403. WRITE16
  404. SWAPSET16
  405. "bne $4,$8,loopmhorverround;"
  406. RESTORE);
  407. }
  408. void AddBlock8x8(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  409. __asm( SET_SRCEND8
  410. SET_MASKS
  411. "loopadd:"
  412. "ldr $2,0($4);"
  413. "addu $4,$4,$6;"
  414. "ldr $9,0($5);"
  415. "and $11,$2,$25;"
  416. "or $2,$2,$9;"
  417. "and $2,$2,$24;"
  418. "dsrl $11,$11,1;"
  419. "daddu $2,$2,$11;"
  420. "and $9,$9,$25;"
  421. "dsrl $9,$9,1;"
  422. "daddu $2,$2,$9;"
  423. "sdr $2,0($5);"
  424. "addu $5,$5,$7;" 
  425. "bne $4,$8,loopadd;"); 
  426. void AddBlock16x16(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride) 
  427. __asm( SET_SRCEND16
  428. SET_MASKS
  429. "loopadd16:"
  430. "ldr $2,0($4);" 
  431. "ldr $10,8($4);" 
  432. #ifdef MIPSVR41XX
  433. ".set noreorder;"
  434. "cache 17,0($4);" // hit invalidate (lose changes)
  435. ".set reorder;"
  436. #endif
  437. "addu $4,$4,$6;" 
  438. "ldr $9,0($5);"
  439. "and $11,$2,$25;"
  440. "or $2,$2,$9;"
  441. "and $2,$2,$24;"
  442. "dsrl $11,$11,1;"
  443. "daddu $2,$2,$11;"
  444. "and $9,$9,$25;"
  445. "dsrl $9,$9,1;"
  446. "daddu $2,$2,$9;"
  447. "ldr $11,8($5);"
  448. "and $9,$10,$25;"
  449. "or $10,$10,$11;"
  450. "and $10,$10,$24;"
  451. "dsrl $9,$9,1;"
  452. "daddu $10,$10,$9;"
  453. "and $11,$11,$25;"
  454. "dsrl $11,$11,1;"
  455. "daddu $10,$10,$11;"
  456. "sdr $2,0($5);"
  457. "sdr $10,8($5);"
  458. "addu $5,$5,$7;" 
  459. "bne $4,$8,loopadd16;"); 
  460. #endif