资源名称:vlc-1.0.5.zip [点击查看]
- /*****************************************************************************
- * i420_yuy2.c : YUV to YUV conversion module for vlc
- *****************************************************************************
- * Copyright (C) 2000, 2001 the VideoLAN team
- * $Id: 15f5ac2fee6d469c27339e27e161b761f1ba043c $
- *
- * Authors: Samuel Hocevar <sam@zoy.org>
- * Damien Fouilleul <damien@videolan.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
- *****************************************************************************/
- /*****************************************************************************
- * Preamble
- *****************************************************************************/
- #ifdef HAVE_CONFIG_H
- # include "config.h"
- #endif
- #include <vlc_common.h>
- #include <vlc_plugin.h>
- #include <vlc_filter.h>
- #include <vlc_vout.h>
- #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
- # include <altivec.h>
- #endif
- #include "i420_yuy2.h"
- #define SRC_FOURCC "I420,IYUV,YV12"
- #if defined (MODULE_NAME_IS_i420_yuy2)
- #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
- #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
- #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
- #endif
- /*****************************************************************************
- * Local and extern prototypes.
- *****************************************************************************/
- static int Activate ( vlc_object_t * );
- static void I420_YUY2 ( filter_t *, picture_t *, picture_t * );
- static void I420_YVYU ( filter_t *, picture_t *, picture_t * );
- static void I420_UYVY ( filter_t *, picture_t *, picture_t * );
- static picture_t *I420_YUY2_Filter ( filter_t *, picture_t * );
- static picture_t *I420_YVYU_Filter ( filter_t *, picture_t * );
- static picture_t *I420_UYVY_Filter ( filter_t *, picture_t * );
- #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
- static void I420_IUYV ( filter_t *, picture_t *, picture_t * );
- static void I420_cyuv ( filter_t *, picture_t *, picture_t * );
- static picture_t *I420_IUYV_Filter ( filter_t *, picture_t * );
- static picture_t *I420_cyuv_Filter ( filter_t *, picture_t * );
- #endif
- #if defined (MODULE_NAME_IS_i420_yuy2)
- static void I420_Y211 ( filter_t *, picture_t *, picture_t * );
- static picture_t *I420_Y211_Filter ( filter_t *, picture_t * );
- #endif
- #ifdef MODULE_NAME_IS_i420_yuy2_mmx
- /* Initialize MMX-specific constants */
- static const uint64_t i_00ffw = 0x00ff00ff00ff00ffULL;
- static const uint64_t i_80w = 0x0000000080808080ULL;
- #endif
- /*****************************************************************************
- * Module descriptor.
- *****************************************************************************/
- vlc_module_begin ()
- #if defined (MODULE_NAME_IS_i420_yuy2)
- set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
- set_capability( "video filter2", 80 )
- #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
- set_description( N_("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) )
- set_capability( "video filter2", 160 )
- add_requirement( MMX )
- #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
- set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
- set_capability( "video filter2", 250 )
- add_requirement( SSE2 )
- #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
- set_description(
- _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
- set_capability( "video filter2", 250 )
- add_requirement( ALTIVEC )
- #endif
- set_callbacks( Activate, NULL )
- vlc_module_end ()
- /*****************************************************************************
- * Activate: allocate a chroma function
- *****************************************************************************
- * This function allocates and initializes a chroma function
- *****************************************************************************/
- static int Activate( vlc_object_t *p_this )
- {
- filter_t *p_filter = (filter_t *)p_this;
- if( p_filter->fmt_in.video.i_width & 1
- || p_filter->fmt_in.video.i_height & 1 )
- {
- return -1;
- }
- if( p_filter->fmt_in.video.i_width != p_filter->fmt_out.video.i_width
- || p_filter->fmt_in.video.i_height != p_filter->fmt_out.video.i_height )
- return -1;
- switch( p_filter->fmt_in.video.i_chroma )
- {
- case VLC_FOURCC('Y','V','1','2'):
- case VLC_FOURCC('I','4','2','0'):
- case VLC_FOURCC('I','Y','U','V'):
- switch( p_filter->fmt_out.video.i_chroma )
- {
- case VLC_FOURCC('Y','U','Y','2'):
- case VLC_FOURCC('Y','U','N','V'):
- p_filter->pf_video_filter = I420_YUY2_Filter;
- break;
- case VLC_FOURCC('Y','V','Y','U'):
- p_filter->pf_video_filter = I420_YVYU_Filter;
- break;
- case VLC_FOURCC('U','Y','V','Y'):
- case VLC_FOURCC('U','Y','N','V'):
- case VLC_FOURCC('Y','4','2','2'):
- p_filter->pf_video_filter = I420_UYVY_Filter;
- break;
- #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
- case VLC_FOURCC('I','U','Y','V'):
- p_filter->pf_video_filter = I420_IUYV_Filter;
- break;
- case VLC_FOURCC('c','y','u','v'):
- p_filter->pf_video_filter = I420_cyuv_Filter;
- break;
- #endif
- #if defined (MODULE_NAME_IS_i420_yuy2)
- case VLC_FOURCC('Y','2','1','1'):
- p_filter->pf_video_filter = I420_Y211_Filter;
- break;
- #endif
- default:
- return -1;
- }
- break;
- default:
- return -1;
- }
- return 0;
- }
- #if 0
- static inline unsigned long long read_cycles(void)
- {
- unsigned long long v;
- __asm__ __volatile__("rdtsc" : "=A" (v): );
- return v;
- }
- #endif
- /* Following functions are local */
- #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
- #endif
- #if defined (MODULE_NAME_IS_i420_yuy2)
- #endif
- /*****************************************************************************
- * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
- *****************************************************************************/
- static void I420_YUY2( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
- {
- uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
- uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
- int i_x, i_y;
- #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
- #define VEC_NEXT_LINES( )
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- #define VEC_LOAD_UV( )
- u_vec = vec_ld( 0, p_u ); p_u += 16;
- v_vec = vec_ld( 0, p_v ); p_v += 16;
- #define VEC_MERGE( a )
- uv_vec = a( u_vec, v_vec );
- y_vec = vec_ld( 0, p_y1 ); p_y1 += 16;
- vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16;
- vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16;
- y_vec = vec_ld( 0, p_y2 ); p_y2 += 16;
- vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
- vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
- vector unsigned char u_vec;
- vector unsigned char v_vec;
- vector unsigned char uv_vec;
- vector unsigned char y_vec;
- if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
- ( p_filter->fmt_in.video.i_height % 2 ) ) )
- {
- /* Width is a multiple of 32, we take 2 lines at a time */
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
- {
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- }
- }
- else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
- ( p_filter->fmt_in.video.i_height % 4 ) ) )
- {
- /* Width is only a multiple of 16, we take 4 lines at a time */
- for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
- {
- /* Line 1 and 2, pixels 0 to ( width - 16 ) */
- for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
- {
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
- VEC_MERGE( vec_mergeh );
- /* Line 3 and 4, pixels 0 to 16 */
- VEC_MERGE( vec_mergel );
- /* Line 3 and 4, pixels 16 to ( width ) */
- for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
- {
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- }
- }
- else
- {
- /* Crap, use the C version */
- #undef VEC_LOAD_UV
- #undef VEC_MERGE
- #endif
- const int i_source_margin = p_source->p[0].i_pitch
- - p_source->p[0].i_visible_pitch;
- const int i_source_margin_c = p_source->p[1].i_pitch
- - p_source->p[1].i_visible_pitch;
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch;
- #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
- for( i_x = p_filter->fmt_in.video.i_width / 8; i_x-- ; )
- {
- C_YUV420_YUYV( );
- C_YUV420_YUYV( );
- C_YUV420_YUYV( );
- C_YUV420_YUYV( );
- }
- #else
- for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
- {
- }
- #endif
- for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
- {
- C_YUV420_YUYV( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
- /* re-enable FPU registers */
- #endif
- #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
- }
- #endif
- #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
- if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((intptr_t)p_line2|(intptr_t)p_y2))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
- {
- }
- for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_YUYV( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
- {
- }
- for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_YUYV( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- }
- /* make sure all SSE2 stores are visible thereafter */
- #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
- }
- /*****************************************************************************
- * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
- *****************************************************************************/
- static void I420_YVYU( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
- {
- uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
- uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
- int i_x, i_y;
- #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
- #define VEC_NEXT_LINES( )
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- #define VEC_LOAD_UV( )
- u_vec = vec_ld( 0, p_u ); p_u += 16;
- v_vec = vec_ld( 0, p_v ); p_v += 16;
- #define VEC_MERGE( a )
- vu_vec = a( v_vec, u_vec );
- y_vec = vec_ld( 0, p_y1 ); p_y1 += 16;
- vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16;
- vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16;
- y_vec = vec_ld( 0, p_y2 ); p_y2 += 16;
- vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
- vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
- vector unsigned char u_vec;
- vector unsigned char v_vec;
- vector unsigned char vu_vec;
- vector unsigned char y_vec;
- if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
- ( p_filter->fmt_in.video.i_height % 2 ) ) )
- {
- /* Width is a multiple of 32, we take 2 lines at a time */
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
- {
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- }
- }
- else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
- ( p_filter->fmt_in.video.i_height % 4 ) ) )
- {
- /* Width is only a multiple of 16, we take 4 lines at a time */
- for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
- {
- /* Line 1 and 2, pixels 0 to ( width - 16 ) */
- for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
- {
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
- VEC_MERGE( vec_mergeh );
- /* Line 3 and 4, pixels 0 to 16 */
- VEC_MERGE( vec_mergel );
- /* Line 3 and 4, pixels 16 to ( width ) */
- for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
- {
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- }
- }
- else
- {
- /* Crap, use the C version */
- #undef VEC_LOAD_UV
- #undef VEC_MERGE
- #endif
- const int i_source_margin = p_source->p[0].i_pitch
- - p_source->p[0].i_visible_pitch;
- const int i_source_margin_c = p_source->p[1].i_pitch
- - p_source->p[1].i_visible_pitch;
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch;
- #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
- {
- #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
- C_YUV420_YVYU( );
- C_YUV420_YVYU( );
- C_YUV420_YVYU( );
- C_YUV420_YVYU( );
- #else
- #endif
- }
- for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
- {
- C_YUV420_YVYU( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
- /* re-enable FPU registers */
- #endif
- #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
- }
- #endif
- #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
- if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((intptr_t)p_line2|(intptr_t)p_y2))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
- {
- }
- for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_YVYU( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
- {
- }
- for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_YVYU( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- }
- /* make sure all SSE2 stores are visible thereafter */
- #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
- }
- /*****************************************************************************
- * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
- *****************************************************************************/
- static void I420_UYVY( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
- {
- uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
- uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
- int i_x, i_y;
- #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
- #define VEC_NEXT_LINES( )
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- #define VEC_LOAD_UV( )
- u_vec = vec_ld( 0, p_u ); p_u += 16;
- v_vec = vec_ld( 0, p_v ); p_v += 16;
- #define VEC_MERGE( a )
- uv_vec = a( u_vec, v_vec );
- y_vec = vec_ld( 0, p_y1 ); p_y1 += 16;
- vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16;
- vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16;
- y_vec = vec_ld( 0, p_y2 ); p_y2 += 16;
- vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
- vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
- vector unsigned char u_vec;
- vector unsigned char v_vec;
- vector unsigned char uv_vec;
- vector unsigned char y_vec;
- if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
- ( p_filter->fmt_in.video.i_height % 2 ) ) )
- {
- /* Width is a multiple of 32, we take 2 lines at a time */
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
- {
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- }
- }
- else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
- ( p_filter->fmt_in.video.i_height % 4 ) ) )
- {
- /* Width is only a multiple of 16, we take 4 lines at a time */
- for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
- {
- /* Line 1 and 2, pixels 0 to ( width - 16 ) */
- for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
- {
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
- VEC_MERGE( vec_mergeh );
- /* Line 3 and 4, pixels 0 to 16 */
- VEC_MERGE( vec_mergel );
- /* Line 3 and 4, pixels 16 to ( width ) */
- for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
- {
- VEC_MERGE( vec_mergeh );
- VEC_MERGE( vec_mergel );
- }
- }
- }
- else
- {
- /* Crap, use the C version */
- #undef VEC_LOAD_UV
- #undef VEC_MERGE
- #endif
- const int i_source_margin = p_source->p[0].i_pitch
- - p_source->p[0].i_visible_pitch;
- const int i_source_margin_c = p_source->p[1].i_pitch
- - p_source->p[1].i_visible_pitch;
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch;
- #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
- {
- #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
- C_YUV420_UYVY( );
- C_YUV420_UYVY( );
- C_YUV420_UYVY( );
- C_YUV420_UYVY( );
- #else
- #endif
- }
- for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x--; )
- {
- C_YUV420_UYVY( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
- /* re-enable FPU registers */
- #endif
- #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
- }
- #endif
- #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
- if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((intptr_t)p_line2|(intptr_t)p_y2))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
- {
- }
- for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_UYVY( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
- {
- }
- for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_UYVY( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- }
- /* make sure all SSE2 stores are visible thereafter */
- #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
- }
- #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
- /*****************************************************************************
- * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
- *****************************************************************************/
- static void I420_IUYV( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
- {
- VLC_UNUSED(p_source); VLC_UNUSED(p_dest);
- /* FIXME: TODO ! */
- msg_Err( p_filter, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
- }
- /*****************************************************************************
- * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
- *****************************************************************************/
- static void I420_cyuv( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
- {
- uint8_t *p_line1 = p_dest->p->p_pixels +
- p_dest->p->i_visible_lines * p_dest->p->i_pitch
- + p_dest->p->i_pitch;
- uint8_t *p_line2 = p_dest->p->p_pixels +
- p_dest->p->i_visible_lines * p_dest->p->i_pitch;
- uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
- int i_x, i_y;
- const int i_source_margin = p_source->p[0].i_pitch
- - p_source->p[0].i_visible_pitch;
- const int i_source_margin_c = p_source->p[1].i_pitch
- - p_source->p[1].i_visible_pitch;
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch;
- #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 -= 3 * p_dest->p->i_pitch;
- p_line2 -= 3 * p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
- {
- #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
- C_YUV420_UYVY( );
- C_YUV420_UYVY( );
- C_YUV420_UYVY( );
- C_YUV420_UYVY( );
- #else
- #endif
- }
- for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
- {
- C_YUV420_UYVY( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
- /* re-enable FPU registers */
- #endif
- #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
- if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
- ((intptr_t)p_line2|(intptr_t)p_y2))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
- {
- }
- for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_UYVY( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
- {
- }
- for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
- {
- C_YUV420_UYVY( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- }
- /* make sure all SSE2 stores are visible thereafter */
- #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
- }
- #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
- /*****************************************************************************
- * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
- *****************************************************************************/
- #if defined (MODULE_NAME_IS_i420_yuy2)
- static void I420_Y211( filter_t *p_filter, picture_t *p_source,
- picture_t *p_dest )
- {
- uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
- uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
- uint8_t *p_u = p_source->U_PIXELS;
- uint8_t *p_v = p_source->V_PIXELS;
- int i_x, i_y;
- const int i_source_margin = p_source->p[0].i_pitch
- - p_source->p[0].i_visible_pitch;
- const int i_source_margin_c = p_source->p[1].i_pitch
- - p_source->p[1].i_visible_pitch;
- const int i_dest_margin = p_dest->p->i_pitch
- - p_dest->p->i_visible_pitch;
- for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
- {
- p_line1 = p_line2;
- p_line2 += p_dest->p->i_pitch;
- p_y1 = p_y2;
- p_y2 += p_source->p[Y_PLANE].i_pitch;
- for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
- {
- C_YUV420_Y211( );
- C_YUV420_Y211( );
- }
- p_y1 += i_source_margin;
- p_y2 += i_source_margin;
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- p_line1 += i_dest_margin;
- p_line2 += i_dest_margin;
- }
- }
- #endif