arm_colorconv.S
资源名称:vlc-1.0.5.zip [点击查看]
上传用户:kjfoods
上传日期:2020-07-06
资源大小:29949k
文件大小:9k
源码类别:
midi
开发平台:
Unix_Linux
- /*
- * ARM assembly optimized color format conversion functions
- * (YV12 -> YUY2, YV12 -> some custom YUV420 format used by
- * Epson graphics chip in Nokia N800)
- *
- * Copyright (C) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * version 2.1 as published by the Free Software Foundation.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- */
- .text
- /*******************************************************/
- .align
- .global yv12_to_yuy2_line_arm
- .func yv12_to_yuy2_line_arm
- yv12_to_yuy2_line_arm:
- #define DST r0
- #define SRC_Y r1
- #define SRC_U r2
- #define SRC_V r3
- #define WIDTH ip
- ldr ip, [sp], #0
- stmfd sp!, {r4-r8, r10, lr}
- #define TMP1 r8
- #define TMP2 r10
- #define TMP3 lr
- bic WIDTH, #1
- subs WIDTH, #8
- blt 2f
- 1:
- ldrb r4, [SRC_Y], #1
- ldrb TMP1, [SRC_U], #1
- ldrb TMP2, [SRC_Y], #1
- ldrb TMP3, [SRC_V], #1
- add r4, r4, TMP1, lsl #8
- add r4, r4, TMP2, lsl #16
- add r4, r4, TMP3, lsl #24
- ldrb r5, [SRC_Y], #1
- ldrb TMP1, [SRC_U], #1
- ldrb TMP2, [SRC_Y], #1
- ldrb TMP3, [SRC_V], #1
- add r5, r5, TMP1, lsl #8
- add r5, r5, TMP2, lsl #16
- add r5, r5, TMP3, lsl #24
- ldrb r6, [SRC_Y], #1
- ldrb TMP1, [SRC_U], #1
- ldrb TMP2, [SRC_Y], #1
- ldrb TMP3, [SRC_V], #1
- add r6, r6, TMP1, lsl #8
- add r6, r6, TMP2, lsl #16
- add r6, r6, TMP3, lsl #24
- ldrb r7, [SRC_Y], #1
- ldrb TMP1, [SRC_U], #1
- ldrb TMP2, [SRC_Y], #1
- ldrb TMP3, [SRC_V], #1
- add r7, r7, TMP1, lsl #8
- add r7, r7, TMP2, lsl #16
- add r7, r7, TMP3, lsl #24
- stmia DST!, {r4-r7}
- subs WIDTH, WIDTH, #8
- bge 1b
- 2:
- adds WIDTH, WIDTH, #8
- ble 4f
- 3:
- ldrb r4, [SRC_Y], #1
- ldrb TMP1, [SRC_U], #1
- ldrb TMP2, [SRC_Y], #1
- ldrb TMP3, [SRC_V], #1
- add r4, r4, TMP1, lsl #8
- add r4, r4, TMP2, lsl #16
- add r4, r4, TMP3, lsl #24
- str r4, [DST], #4
- subs WIDTH, WIDTH, #2
- bgt 3b
- 4:
- ldmfd sp!, {r4-r8, r10, pc}
- #undef DST
- #undef SRC_Y
- #undef SRC_U
- #undef SRC_V
- #undef WIDTH
- #undef TMP1
- #undef TMP2
- #undef TMP3
- .endfunc
- /*******************************************************/
- #define DST r0
- #define SRC_Y r1
- #define SRC_U r2
- #define WIDTH r3
- #define TMP1 r10
- #define TMP2 r11
- #define TMP3 lr
- .macro YUV420_function_template function_name, USE_PLD, USE_ARMV6
- .align
- .global function_name
- .func function_name
- function_name:
- /* Read information about 4 pixels, convert them to YUV420 and store into 6 bytes using 16-bit writes */
- .macro CONVERT_4_PIXELS_MACROBLOCK
- ldrb r4, [SRC_Y], #1
- ldrb TMP1, [SRC_U], #1
- ldrb r5, [SRC_U], #1
- ldrb TMP2, [SRC_Y], #1
- ldrb r6, [SRC_Y, #1]
- ldrb TMP3, [SRC_Y], #2
- add r4, r4, TMP1, lsl #8
- add r5, r5, TMP2, lsl #8
- add r6, r6, TMP3, lsl #8
- strh r4, [DST], #2
- strh r5, [DST], #2
- strh r6, [DST], #2
- .endm
- .if USE_ARMV6
- .macro CONVERT_8_PIXELS_MACROBLOCK_1 DST_REG1, DST_REG2, FLAG1, FLAG2, PLD_FLAG
- .if FLAG1 == 0
- ldrb DST_REG1, [SRC_U], #1
- ldrh TMP1, [SRC_Y], #2
- ldrb TMP2, [SRC_U], #1
- .endif
- .if FLAG2 == 1
- ldrh DST_REG2, [SRC_Y], #2
- .endif
- .if PLD_FLAG == 1
- pld [SRC_Y, #48]
- .endif
- add DST_REG1, DST_REG1, TMP1, lsl #8
- add DST_REG1, DST_REG1, TMP2, lsl #24
- .if FLAG2 == 1
- ldrb TMP1, [SRC_U], #1
- ldrb TMP2, [SRC_Y], #1
- .endif
- rev16 DST_REG1, DST_REG1
- .endm
- .macro CONVERT_8_PIXELS_MACROBLOCK_2 DST_REG1, DST_REG2, FLAG1, FLAG2, DUMMY1
- .if FLAG1 == 0
- ldrh DST_REG1, [SRC_Y], #2
- ldrb TMP1, [SRC_U], #1
- ldrb TMP2, [SRC_Y], #1
- .endif
- .if FLAG2 == 1
- ldrb DST_REG2, [SRC_Y], #1
- .endif
- add DST_REG1, DST_REG1, TMP1, lsl #16
- add DST_REG1, DST_REG1, TMP2, lsl #24
- .if FLAG2 == 1
- ldrb TMP1, [SRC_U], #1
- ldrh TMP2, [SRC_Y], #2
- .endif
- rev16 DST_REG1, DST_REG1
- .endm
- .macro CONVERT_8_PIXELS_MACROBLOCK_3 DST_REG1, DST_REG2, FLAG1, FLAG2, DUMMY1
- .if FLAG1 == 0
- ldrb DST_REG1, [SRC_Y], #1
- ldrb TMP1, [SRC_U], #1
- ldrh TMP2, [SRC_Y], #2
- .endif
- .if FLAG2 == 1
- ldrb DST_REG2, [SRC_U], #1
- .endif
- add DST_REG1, DST_REG1, TMP1, lsl #8
- add DST_REG1, DST_REG1, TMP2, lsl #16
- .if FLAG2 == 1
- ldrh TMP1, [SRC_Y], #2
- ldrb TMP2, [SRC_U], #1
- .endif
- rev16 DST_REG1, DST_REG1
- .endm
- .else
- /* Prepare the first 32-bit output value for 8 pixels macroblock */
- .macro CONVERT_8_PIXELS_MACROBLOCK_1 DST_REG, DUMMY1, DUMMY2, DUMMY3, PLD_FLAG
- ldrb DST_REG, [SRC_Y], #1
- ldrb TMP1, [SRC_U], #1
- ldrb TMP2, [SRC_U], #1
- ldrb TMP3, [SRC_Y], #1
- .if USE_PLD && (PLD_FLAG == 1)
- pld [SRC_Y, #48]
- .endif
- add DST_REG, DST_REG, TMP1, lsl #8
- add DST_REG, DST_REG, TMP2, lsl #16
- add DST_REG, DST_REG, TMP3, lsl #24
- .endm
- /* Prepare the second 32-bit output value for 8 pixels macroblock */
- .macro CONVERT_8_PIXELS_MACROBLOCK_2 DST_REG, DUMMY1, DUMMY2, DUMMY3, DUMMY4
- ldrb DST_REG, [SRC_Y, #1]
- ldrb TMP1, [SRC_Y], #2
- ldrb TMP2, [SRC_Y], #1
- ldrb TMP3, [SRC_U], #1
- add DST_REG, DST_REG, TMP1, lsl #8
- add DST_REG, DST_REG, TMP2, lsl #16
- add DST_REG, DST_REG, TMP3, lsl #24
- .endm
- /* Prepare the third 32-bit output value for 8 pixels macroblock */
- .macro CONVERT_8_PIXELS_MACROBLOCK_3 DST_REG, DUMMY1, DUMMY2, DUMMY3, DUMMY4
- ldrb DST_REG, [SRC_U], #1
- ldrb TMP1, [SRC_Y], #1
- ldrb TMP2, [SRC_Y, #1]
- ldrb TMP3, [SRC_Y], #2
- add DST_REG, DST_REG, TMP1, lsl #8
- add DST_REG, DST_REG, TMP2, lsl #16
- add DST_REG, DST_REG, TMP3, lsl #24
- .endm
- .endif
- .if USE_PLD
- pld [SRC_Y]
- .endif
- stmfd sp!, {r4-r8, r10-r11, lr}
- /* Destination buffer should be at least 16-bit aligned, image width should be multiple of 4 */
- bic DST, #1
- bic WIDTH, #3
- /* Ensure 32-bit alignment of the destination buffer */
- tst DST, #2
- beq 1f
- subs WIDTH, #4
- blt 6f
- CONVERT_4_PIXELS_MACROBLOCK
- 1:
- subs WIDTH, #32
- blt 3f
- 2: /* Convert 32 pixels per loop iteration */
- CONVERT_8_PIXELS_MACROBLOCK_1 r4, r6, 0, 1, 1 /* Also do cache preload for SRC_Y */
- CONVERT_8_PIXELS_MACROBLOCK_2 r6, r7, 1, 1, 0
- CONVERT_8_PIXELS_MACROBLOCK_3 r7, r8, 1, 1, 0
- CONVERT_8_PIXELS_MACROBLOCK_1 r8, r5, 1, 1, 0
- stmia DST!, {r4, r6, r7, r8}
- subs WIDTH, #32
- CONVERT_8_PIXELS_MACROBLOCK_2 r5, r6, 1, 1, 0
- CONVERT_8_PIXELS_MACROBLOCK_3 r6, r7, 1, 1, 0
- CONVERT_8_PIXELS_MACROBLOCK_1 r7, r8, 1, 1, 0
- CONVERT_8_PIXELS_MACROBLOCK_2 r8, r4, 1, 1, 0
- stmia DST!, {r5, r6, r7, r8}
- .if USE_PLD
- /* Do cache preload for SRC_U */
- pld [SRC_U, #48]
- .endif
- CONVERT_8_PIXELS_MACROBLOCK_3 r4, r6, 1, 1, 0
- CONVERT_8_PIXELS_MACROBLOCK_1 r6, r7, 1, 1, 0
- CONVERT_8_PIXELS_MACROBLOCK_2 r7, r8, 1, 1, 0
- CONVERT_8_PIXELS_MACROBLOCK_3 r8, r4, 1, 0, 0
- stmia DST!, {r4, r6, r7, r8}
- bge 2b
- 3:
- adds WIDTH, WIDTH, #32
- ble 6f
- subs WIDTH, WIDTH, #8
- blt 5f
- 4: /* Convert remaining pixels processing them 8 per iteration */
- CONVERT_8_PIXELS_MACROBLOCK_1 r4, r5, 0, 1, 0
- CONVERT_8_PIXELS_MACROBLOCK_2 r5, r6, 1, 1, 0
- CONVERT_8_PIXELS_MACROBLOCK_3 r6, r7, 1, 0, 0
- stmia DST!, {r4-r6}
- subs WIDTH, WIDTH, #8
- bge 4b
- 5: /* Convert the last 4 pixels if needed */
- adds WIDTH, WIDTH, #8
- ble 6f
- CONVERT_4_PIXELS_MACROBLOCK
- subs WIDTH, #4
- bgt 4b
- 6: /* Restore all registers and return */
- ldmfd sp!, {r4-r8, r10-r11, pc}
- .purgem CONVERT_4_PIXELS_MACROBLOCK
- .purgem CONVERT_8_PIXELS_MACROBLOCK_1
- .purgem CONVERT_8_PIXELS_MACROBLOCK_2
- .purgem CONVERT_8_PIXELS_MACROBLOCK_3
- #undef DST
- #undef SRC_Y
- #undef SRC_U
- #undef WIDTH
- #undef TMP1
- #undef TMP2
- #undef TMP3
- .endfunc
- .endm
- YUV420_function_template yv12_to_yuv420_line_arm, 0, 0
- YUV420_function_template yv12_to_yuv420_line_armv5, 1, 0
- YUV420_function_template yv12_to_yuv420_line_armv6, 1, 1