i420_rgb16.c
资源名称:vlc-1.0.5.zip [点击查看]
上传用户:kjfoods
上传日期:2020-07-06
资源大小:29949k
文件大小:61k
源码类别:
midi
开发平台:
Unix_Linux
- /*****************************************************************************
- * i420_rgb16.c : YUV to bitmap RGB conversion module for vlc
- *****************************************************************************
- * Copyright (C) 2000 the VideoLAN team
- * $Id: 3f0c6734f5d3a20d09399c397a27a860edc1ce45 $
- *
- * Authors: Samuel Hocevar <sam@zoy.org>
- * Damien Fouilleul <damienf@videolan.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
- *****************************************************************************/
- /*****************************************************************************
- * Preamble
- *****************************************************************************/
- #ifdef HAVE_CONFIG_H
- # include "config.h"
- #endif
- #include <vlc/vlc.h>
- #include <vlc_filter.h>
- #include <vlc_vout.h>
- #include "i420_rgb.h"
- #if defined (MODULE_NAME_IS_i420_rgb)
- # include "i420_rgb_c.h"
- #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
- # include "i420_rgb_mmx.h"
- #elif defined (MODULE_NAME_IS_i420_rgb_sse2)
- # include "i420_rgb_mmx.h"
- #endif
- static void SetOffset( int, int, int, int, bool *,
- unsigned int *, int * );
- #if defined (MODULE_NAME_IS_i420_rgb)
- /*****************************************************************************
- * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp with dithering
- *****************************************************************************
- * Horizontal alignment needed:
- * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
- * - output: 1 pixel (2 bytes), margins allowed
- * Vertical alignment needed:
- * - input: 2 lines (2 Y lines, 1 U/V line)
- * - output: 1 line
- *****************************************************************************/
- void I420_RGB16_dither( filter_t *p_filter, picture_t *p_src,
- picture_t *p_dest )
- {
- /* We got this one from the old arguments */
- uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
- uint8_t *p_y = p_src->Y_PIXELS;
- uint8_t *p_u = p_src->U_PIXELS;
- uint8_t *p_v = p_src->V_PIXELS;
- bool b_hscale; /* horizontal scaling type */
- unsigned int i_vscale; /* vertical scaling type */
- unsigned int i_x, i_y; /* horizontal and vertical indexes */
- unsigned int i_real_y; /* y % 4 */
- int i_right_margin;
- int i_rewind;
- int i_scale_count; /* scale modulo counter */
- int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
- uint16_t * p_pic_start; /* beginning of the current line for copy */
- int i_uval, i_vval; /* U and V samples */
- int i_red, i_green, i_blue; /* U and V modified samples */
- uint16_t * p_yuv = p_filter->p_sys->p_rgb16;
- uint16_t * p_ybase; /* Y dependant conversion table */
- /* Conversion buffer pointer */
- uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
- uint16_t * p_buffer;
- /* Offset array pointer */
- int * p_offset_start = p_filter->p_sys->p_offset;
- int * p_offset;
- const int i_source_margin = p_src->p[0].i_pitch
- - p_src->p[0].i_visible_pitch;
- const int i_source_margin_c = p_src->p[1].i_pitch
- - p_src->p[1].i_visible_pitch;
- /* The dithering matrices */
- int dither10[4] = { 0x0, 0x8, 0x2, 0xa };
- int dither11[4] = { 0xc, 0x4, 0xe, 0x6 };
- int dither12[4] = { 0x3, 0xb, 0x1, 0x9 };
- int dither13[4] = { 0xf, 0x7, 0xd, 0x5 };
- for(i_x = 0; i_x < 4; i_x++)
- {
- dither10[i_x] = dither10[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
- dither11[i_x] = dither11[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
- dither12[i_x] = dither12[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
- dither13[i_x] = dither13[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
- }
- i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
- if( p_filter->fmt_in.video.i_width & 7 )
- {
- i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
- }
- else
- {
- i_rewind = 0;
- }
- /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
- * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
- * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
- SetOffset( p_filter->fmt_in.video.i_width,
- p_filter->fmt_in.video.i_height,
- p_filter->fmt_out.video.i_width,
- p_filter->fmt_out.video.i_height,
- &b_hscale, &i_vscale, p_offset_start );
- /*
- * Perform conversion
- */
- i_scale_count = ( i_vscale == 1 ) ?
- p_filter->fmt_out.video.i_height :
- p_filter->fmt_in.video.i_height;
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- i_real_y = i_y & 0x3;
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
- {
- int *p_dither = dither10;
- CONVERT_YUV_PIXEL_DITHER(2);
- p_dither = dither11;
- CONVERT_Y_PIXEL_DITHER(2);
- p_dither = dither12;
- CONVERT_YUV_PIXEL_DITHER(2);
- p_dither = dither13;
- CONVERT_Y_PIXEL_DITHER(2);
- p_dither = dither10;
- CONVERT_YUV_PIXEL_DITHER(2);
- p_dither = dither11;
- CONVERT_Y_PIXEL_DITHER(2);
- p_dither = dither12;
- CONVERT_YUV_PIXEL_DITHER(2);
- p_dither = dither13;
- CONVERT_Y_PIXEL_DITHER(2);
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- int *p_dither = dither10;
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- CONVERT_YUV_PIXEL_DITHER(2);
- p_dither = dither11;
- CONVERT_Y_PIXEL_DITHER(2);
- p_dither = dither12;
- CONVERT_YUV_PIXEL_DITHER(2);
- p_dither = dither13;
- CONVERT_Y_PIXEL_DITHER(2);
- p_dither = dither10;
- CONVERT_YUV_PIXEL_DITHER(2);
- p_dither = dither11;
- CONVERT_Y_PIXEL_DITHER(2);
- p_dither = dither12;
- CONVERT_YUV_PIXEL_DITHER(2);
- p_dither = dither13;
- CONVERT_Y_PIXEL_DITHER(2);
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 2 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- }
- }
- #endif
- /*****************************************************************************
- * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp
- *****************************************************************************
- * Horizontal alignment needed:
- * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
- * - output: 1 pixel (2 bytes), margins allowed
- * Vertical alignment needed:
- * - input: 2 lines (2 Y lines, 1 U/V line)
- * - output: 1 line
- *****************************************************************************/
- #if defined (MODULE_NAME_IS_i420_rgb)
- void I420_RGB16( filter_t *p_filter, picture_t *p_src,
- picture_t *p_dest )
- {
- /* We got this one from the old arguments */
- uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
- uint8_t *p_y = p_src->Y_PIXELS;
- uint8_t *p_u = p_src->U_PIXELS;
- uint8_t *p_v = p_src->V_PIXELS;
- bool b_hscale; /* horizontal scaling type */
- unsigned int i_vscale; /* vertical scaling type */
- unsigned int i_x, i_y; /* horizontal and vertical indexes */
- int i_right_margin;
- int i_rewind;
- int i_scale_count; /* scale modulo counter */
- int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
- uint16_t * p_pic_start; /* beginning of the current line for copy */
- int i_uval, i_vval; /* U and V samples */
- int i_red, i_green, i_blue; /* U and V modified samples */
- uint16_t * p_yuv = p_filter->p_sys->p_rgb16;
- uint16_t * p_ybase; /* Y dependant conversion table */
- /* Conversion buffer pointer */
- uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
- uint16_t * p_buffer;
- /* Offset array pointer */
- int * p_offset_start = p_filter->p_sys->p_offset;
- int * p_offset;
- const int i_source_margin = p_src->p[0].i_pitch
- - p_src->p[0].i_visible_pitch;
- const int i_source_margin_c = p_src->p[1].i_pitch
- - p_src->p[1].i_visible_pitch;
- i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
- if( p_filter->fmt_in.video.i_width & 7 )
- {
- i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
- }
- else
- {
- i_rewind = 0;
- }
- /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
- * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
- * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
- SetOffset( p_filter->fmt_in.video.i_width,
- p_filter->fmt_in.video.i_height,
- p_filter->fmt_out.video.i_width,
- p_filter->fmt_out.video.i_height,
- &b_hscale, &i_vscale, p_offset_start );
- /*
- * Perform conversion
- */
- i_scale_count = ( i_vscale == 1 ) ?
- p_filter->fmt_out.video.i_height :
- p_filter->fmt_in.video.i_height;
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
- {
- CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
- CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
- CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
- CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
- CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
- CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
- CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 2 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- }
- }
- #else // ! defined (MODULE_NAME_IS_i420_rgb)
- void I420_R5G5B5( filter_t *p_filter, picture_t *p_src,
- picture_t *p_dest )
- {
- /* We got this one from the old arguments */
- uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
- uint8_t *p_y = p_src->Y_PIXELS;
- uint8_t *p_u = p_src->U_PIXELS;
- uint8_t *p_v = p_src->V_PIXELS;
- bool b_hscale; /* horizontal scaling type */
- unsigned int i_vscale; /* vertical scaling type */
- unsigned int i_x, i_y; /* horizontal and vertical indexes */
- int i_right_margin;
- int i_rewind;
- int i_scale_count; /* scale modulo counter */
- int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
- uint16_t * p_pic_start; /* beginning of the current line for copy */
- /* Conversion buffer pointer */
- uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
- uint16_t * p_buffer;
- /* Offset array pointer */
- int * p_offset_start = p_filter->p_sys->p_offset;
- int * p_offset;
- const int i_source_margin = p_src->p[0].i_pitch
- - p_src->p[0].i_visible_pitch;
- const int i_source_margin_c = p_src->p[1].i_pitch
- - p_src->p[1].i_visible_pitch;
- i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
- /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
- * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
- * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
- SetOffset( p_filter->fmt_in.video.i_width,
- p_filter->fmt_in.video.i_height,
- p_filter->fmt_out.video.i_width,
- p_filter->fmt_out.video.i_height,
- &b_hscale, &i_vscale, p_offset_start );
- /*
- * Perform conversion
- */
- i_scale_count = ( i_vscale == 1 ) ?
- p_filter->fmt_out.video.i_height :
- p_filter->fmt_in.video.i_height;
- #if defined (MODULE_NAME_IS_i420_rgb_sse2)
- if( p_filter->fmt_in.video.i_width & 15 )
- {
- i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
- }
- else
- {
- i_rewind = 0;
- }
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
- p_dest->p->i_pitch|
- ((intptr_t)p_y)|
- ((intptr_t)p_buffer))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
- {
- SSE2_CALL (
- SSE2_INIT_16_ALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_15_ALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- p_buffer += 16;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- SSE2_CALL (
- SSE2_INIT_16_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_15_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 2 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
- {
- SSE2_CALL (
- SSE2_INIT_16_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_15_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- p_buffer += 16;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- SSE2_CALL (
- SSE2_INIT_16_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_15_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 2 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- }
- }
- /* make sure all SSE2 stores are visible thereafter */
- SSE2_END;
- #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
- if( p_filter->fmt_in.video.i_width & 7 )
- {
- i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
- }
- else
- {
- i_rewind = 0;
- }
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
- {
- MMX_CALL (
- MMX_INIT_16
- MMX_YUV_MUL
- MMX_YUV_ADD
- MMX_UNPACK_15
- );
- p_y += 8;
- p_u += 4;
- p_v += 4;
- p_buffer += 8;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- MMX_CALL (
- MMX_INIT_16
- MMX_YUV_MUL
- MMX_YUV_ADD
- MMX_UNPACK_15
- );
- p_y += 8;
- p_u += 4;
- p_v += 4;
- p_buffer += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 2 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- }
- /* re-enable FPU registers */
- MMX_END;
- #endif
- }
- void I420_R5G6B5( filter_t *p_filter, picture_t *p_src,
- picture_t *p_dest )
- {
- /* We got this one from the old arguments */
- uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
- uint8_t *p_y = p_src->Y_PIXELS;
- uint8_t *p_u = p_src->U_PIXELS;
- uint8_t *p_v = p_src->V_PIXELS;
- bool b_hscale; /* horizontal scaling type */
- unsigned int i_vscale; /* vertical scaling type */
- unsigned int i_x, i_y; /* horizontal and vertical indexes */
- int i_right_margin;
- int i_rewind;
- int i_scale_count; /* scale modulo counter */
- int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
- uint16_t * p_pic_start; /* beginning of the current line for copy */
- /* Conversion buffer pointer */
- uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
- uint16_t * p_buffer;
- /* Offset array pointer */
- int * p_offset_start = p_filter->p_sys->p_offset;
- int * p_offset;
- const int i_source_margin = p_src->p[0].i_pitch
- - p_src->p[0].i_visible_pitch;
- const int i_source_margin_c = p_src->p[1].i_pitch
- - p_src->p[1].i_visible_pitch;
- i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
- /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
- * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
- * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
- SetOffset( p_filter->fmt_in.video.i_width,
- p_filter->fmt_in.video.i_height,
- p_filter->fmt_out.video.i_width,
- p_filter->fmt_out.video.i_height,
- &b_hscale, &i_vscale, p_offset_start );
- /*
- * Perform conversion
- */
- i_scale_count = ( i_vscale == 1 ) ?
- p_filter->fmt_out.video.i_height :
- p_filter->fmt_in.video.i_height;
- #if defined (MODULE_NAME_IS_i420_rgb_sse2)
- if( p_filter->fmt_in.video.i_width & 15 )
- {
- i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
- }
- else
- {
- i_rewind = 0;
- }
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
- p_dest->p->i_pitch|
- ((intptr_t)p_y)|
- ((intptr_t)p_buffer))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
- {
- SSE2_CALL (
- SSE2_INIT_16_ALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_16_ALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- p_buffer += 16;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- SSE2_CALL (
- SSE2_INIT_16_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_16_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 2 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
- {
- SSE2_CALL(
- SSE2_INIT_16_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_16_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- p_buffer += 16;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- SSE2_CALL(
- SSE2_INIT_16_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_16_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 2 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- }
- }
- /* make sure all SSE2 stores are visible thereafter */
- SSE2_END;
- #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
- if( p_filter->fmt_in.video.i_width & 7 )
- {
- i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
- }
- else
- {
- i_rewind = 0;
- }
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
- {
- MMX_CALL (
- MMX_INIT_16
- MMX_YUV_MUL
- MMX_YUV_ADD
- MMX_UNPACK_16
- );
- p_y += 8;
- p_u += 4;
- p_v += 4;
- p_buffer += 8;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- MMX_CALL (
- MMX_INIT_16
- MMX_YUV_MUL
- MMX_YUV_ADD
- MMX_UNPACK_16
- );
- p_y += 8;
- p_u += 4;
- p_v += 4;
- p_buffer += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 2 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- }
- /* re-enable FPU registers */
- MMX_END;
- #endif
- }
- #endif
- /*****************************************************************************
- * I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
- *****************************************************************************
- * Horizontal alignment needed:
- * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
- * - output: 1 pixel (2 bytes), margins allowed
- * Vertical alignment needed:
- * - input: 2 lines (2 Y lines, 1 U/V line)
- * - output: 1 line
- *****************************************************************************/
- #if defined (MODULE_NAME_IS_i420_rgb)
- void I420_RGB32( filter_t *p_filter, picture_t *p_src,
- picture_t *p_dest )
- {
- /* We got this one from the old arguments */
- uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
- uint8_t *p_y = p_src->Y_PIXELS;
- uint8_t *p_u = p_src->U_PIXELS;
- uint8_t *p_v = p_src->V_PIXELS;
- bool b_hscale; /* horizontal scaling type */
- unsigned int i_vscale; /* vertical scaling type */
- unsigned int i_x, i_y; /* horizontal and vertical indexes */
- int i_right_margin;
- int i_rewind;
- int i_scale_count; /* scale modulo counter */
- int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
- uint32_t * p_pic_start; /* beginning of the current line for copy */
- int i_uval, i_vval; /* U and V samples */
- int i_red, i_green, i_blue; /* U and V modified samples */
- uint32_t * p_yuv = p_filter->p_sys->p_rgb32;
- uint32_t * p_ybase; /* Y dependant conversion table */
- /* Conversion buffer pointer */
- uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
- uint32_t * p_buffer;
- /* Offset array pointer */
- int * p_offset_start = p_filter->p_sys->p_offset;
- int * p_offset;
- const int i_source_margin = p_src->p[0].i_pitch
- - p_src->p[0].i_visible_pitch;
- const int i_source_margin_c = p_src->p[1].i_pitch
- - p_src->p[1].i_visible_pitch;
- i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
- if( p_filter->fmt_in.video.i_width & 7 )
- {
- i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
- }
- else
- {
- i_rewind = 0;
- }
- /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
- * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
- * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
- SetOffset( p_filter->fmt_in.video.i_width,
- p_filter->fmt_in.video.i_height,
- p_filter->fmt_out.video.i_width,
- p_filter->fmt_out.video.i_height,
- &b_hscale, &i_vscale, p_offset_start );
- /*
- * Perform conversion
- */
- i_scale_count = ( i_vscale == 1 ) ?
- p_filter->fmt_out.video.i_height :
- p_filter->fmt_in.video.i_height;
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
- {
- CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
- CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
- CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
- CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
- CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
- CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
- CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- }
- }
- #else // defined (MODULE_NAME_IS_i420_rgb_mmx) || defined (MODULE_NAME_IS_i420_rgb_sse2)
- void I420_A8R8G8B8( filter_t *p_filter, picture_t *p_src,
- picture_t *p_dest )
- {
- /* We got this one from the old arguments */
- uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
- uint8_t *p_y = p_src->Y_PIXELS;
- uint8_t *p_u = p_src->U_PIXELS;
- uint8_t *p_v = p_src->V_PIXELS;
- bool b_hscale; /* horizontal scaling type */
- unsigned int i_vscale; /* vertical scaling type */
- unsigned int i_x, i_y; /* horizontal and vertical indexes */
- int i_right_margin;
- int i_rewind;
- int i_scale_count; /* scale modulo counter */
- int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
- uint32_t * p_pic_start; /* beginning of the current line for copy */
- /* Conversion buffer pointer */
- uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
- uint32_t * p_buffer;
- /* Offset array pointer */
- int * p_offset_start = p_filter->p_sys->p_offset;
- int * p_offset;
- const int i_source_margin = p_src->p[0].i_pitch
- - p_src->p[0].i_visible_pitch;
- const int i_source_margin_c = p_src->p[1].i_pitch
- - p_src->p[1].i_visible_pitch;
- i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
- /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
- * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
- * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
- SetOffset( p_filter->fmt_in.video.i_width,
- p_filter->fmt_in.video.i_height,
- p_filter->fmt_out.video.i_width,
- p_filter->fmt_out.video.i_height,
- &b_hscale, &i_vscale, p_offset_start );
- /*
- * Perform conversion
- */
- i_scale_count = ( i_vscale == 1 ) ?
- p_filter->fmt_out.video.i_height :
- p_filter->fmt_in.video.i_height;
- #if defined (MODULE_NAME_IS_i420_rgb_sse2)
- if( p_filter->fmt_in.video.i_width & 15 )
- {
- i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
- }
- else
- {
- i_rewind = 0;
- }
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
- p_dest->p->i_pitch|
- ((intptr_t)p_y)|
- ((intptr_t)p_buffer))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
- {
- SSE2_CALL (
- SSE2_INIT_32_ALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_ARGB_ALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- p_buffer += 16;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- SSE2_CALL (
- SSE2_INIT_32_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_ARGB_UNALIGNED
- );
- p_y += 16;
- p_u += 4;
- p_v += 4;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
- {
- SSE2_CALL (
- SSE2_INIT_32_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_ARGB_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- p_buffer += 16;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- SSE2_CALL (
- SSE2_INIT_32_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_ARGB_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- }
- }
- /* make sure all SSE2 stores are visible thereafter */
- SSE2_END;
- #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
- if( p_filter->fmt_in.video.i_width & 7 )
- {
- i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
- }
- else
- {
- i_rewind = 0;
- }
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
- {
- MMX_CALL (
- MMX_INIT_32
- MMX_YUV_MUL
- MMX_YUV_ADD
- MMX_UNPACK_32_ARGB
- );
- p_y += 8;
- p_u += 4;
- p_v += 4;
- p_buffer += 8;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- MMX_CALL (
- MMX_INIT_32
- MMX_YUV_MUL
- MMX_YUV_ADD
- MMX_UNPACK_32_ARGB
- );
- p_y += 8;
- p_u += 4;
- p_v += 4;
- p_buffer += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- }
- /* re-enable FPU registers */
- MMX_END;
- #endif
- }
- void I420_R8G8B8A8( filter_t *p_filter, picture_t *p_src,
- picture_t *p_dest )
- {
- /* We got this one from the old arguments */
- uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
- uint8_t *p_y = p_src->Y_PIXELS;
- uint8_t *p_u = p_src->U_PIXELS;
- uint8_t *p_v = p_src->V_PIXELS;
- bool b_hscale; /* horizontal scaling type */
- unsigned int i_vscale; /* vertical scaling type */
- unsigned int i_x, i_y; /* horizontal and vertical indexes */
- int i_right_margin;
- int i_rewind;
- int i_scale_count; /* scale modulo counter */
- int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
- uint32_t * p_pic_start; /* beginning of the current line for copy */
- /* Conversion buffer pointer */
- uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
- uint32_t * p_buffer;
- /* Offset array pointer */
- int * p_offset_start = p_filter->p_sys->p_offset;
- int * p_offset;
- const int i_source_margin = p_src->p[0].i_pitch
- - p_src->p[0].i_visible_pitch;
- const int i_source_margin_c = p_src->p[1].i_pitch
- - p_src->p[1].i_visible_pitch;
- i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
- /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
- * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
- * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
- SetOffset( p_filter->fmt_in.video.i_width,
- p_filter->fmt_in.video.i_height,
- p_filter->fmt_out.video.i_width,
- p_filter->fmt_out.video.i_height,
- &b_hscale, &i_vscale, p_offset_start );
- /*
- * Perform conversion
- */
- i_scale_count = ( i_vscale == 1 ) ?
- p_filter->fmt_out.video.i_height :
- p_filter->fmt_in.video.i_height;
- #if defined (MODULE_NAME_IS_i420_rgb_sse2)
- if( p_filter->fmt_in.video.i_width & 15 )
- {
- i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
- }
- else
- {
- i_rewind = 0;
- }
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
- p_dest->p->i_pitch|
- ((intptr_t)p_y)|
- ((intptr_t)p_buffer))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
- {
- SSE2_CALL (
- SSE2_INIT_32_ALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_RGBA_ALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- p_buffer += 16;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- SSE2_CALL (
- SSE2_INIT_32_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_RGBA_UNALIGNED
- );
- p_y += 16;
- p_u += 4;
- p_v += 4;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
- {
- SSE2_CALL (
- SSE2_INIT_32_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_RGBA_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- p_buffer += 16;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- SSE2_CALL (
- SSE2_INIT_32_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_RGBA_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- }
- }
- /* make sure all SSE2 stores are visible thereafter */
- SSE2_END;
- #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
- if( p_filter->fmt_in.video.i_width & 7 )
- {
- i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
- }
- else
- {
- i_rewind = 0;
- }
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
- {
- MMX_CALL (
- MMX_INIT_32
- MMX_YUV_MUL
- MMX_YUV_ADD
- MMX_UNPACK_32_RGBA
- );
- p_y += 8;
- p_u += 4;
- p_v += 4;
- p_buffer += 8;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- MMX_CALL (
- MMX_INIT_32
- MMX_YUV_MUL
- MMX_YUV_ADD
- MMX_UNPACK_32_RGBA
- );
- p_y += 8;
- p_u += 4;
- p_v += 4;
- p_buffer += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- }
- /* re-enable FPU registers */
- MMX_END;
- #endif
- }
- void I420_B8G8R8A8( filter_t *p_filter, picture_t *p_src,
- picture_t *p_dest )
- {
- /* We got this one from the old arguments */
- uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
- uint8_t *p_y = p_src->Y_PIXELS;
- uint8_t *p_u = p_src->U_PIXELS;
- uint8_t *p_v = p_src->V_PIXELS;
- bool b_hscale; /* horizontal scaling type */
- unsigned int i_vscale; /* vertical scaling type */
- unsigned int i_x, i_y; /* horizontal and vertical indexes */
- int i_right_margin;
- int i_rewind;
- int i_scale_count; /* scale modulo counter */
- int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
- uint32_t * p_pic_start; /* beginning of the current line for copy */
- /* Conversion buffer pointer */
- uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
- uint32_t * p_buffer;
- /* Offset array pointer */
- int * p_offset_start = p_filter->p_sys->p_offset;
- int * p_offset;
- const int i_source_margin = p_src->p[0].i_pitch
- - p_src->p[0].i_visible_pitch;
- const int i_source_margin_c = p_src->p[1].i_pitch
- - p_src->p[1].i_visible_pitch;
- i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
- /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
- * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
- * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
- SetOffset( p_filter->fmt_in.video.i_width,
- p_filter->fmt_in.video.i_height,
- p_filter->fmt_out.video.i_width,
- p_filter->fmt_out.video.i_height,
- &b_hscale, &i_vscale, p_offset_start );
- /*
- * Perform conversion
- */
- i_scale_count = ( i_vscale == 1 ) ?
- p_filter->fmt_out.video.i_height :
- p_filter->fmt_in.video.i_height;
- #if defined (MODULE_NAME_IS_i420_rgb_sse2)
- if( p_filter->fmt_in.video.i_width & 15 )
- {
- i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
- }
- else
- {
- i_rewind = 0;
- }
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
- p_dest->p->i_pitch|
- ((intptr_t)p_y)|
- ((intptr_t)p_buffer))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
- {
- SSE2_CALL (
- SSE2_INIT_32_ALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_BGRA_ALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- p_buffer += 16;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- SSE2_CALL (
- SSE2_INIT_32_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_BGRA_UNALIGNED
- );
- p_y += 16;
- p_u += 4;
- p_v += 4;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
- {
- SSE2_CALL (
- SSE2_INIT_32_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_BGRA_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- p_buffer += 16;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- SSE2_CALL (
- SSE2_INIT_32_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_BGRA_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- }
- }
- #else
- if( p_filter->fmt_in.video.i_width & 7 )
- {
- i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
- }
- else
- {
- i_rewind = 0;
- }
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
- {
- MMX_CALL (
- MMX_INIT_32
- MMX_YUV_MUL
- MMX_YUV_ADD
- MMX_UNPACK_32_BGRA
- );
- p_y += 8;
- p_u += 4;
- p_v += 4;
- p_buffer += 8;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- MMX_CALL (
- MMX_INIT_32
- MMX_YUV_MUL
- MMX_YUV_ADD
- MMX_UNPACK_32_BGRA
- );
- p_y += 8;
- p_u += 4;
- p_v += 4;
- p_buffer += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- }
- /* re-enable FPU registers */
- MMX_END;
- #endif
- }
- void I420_A8B8G8R8( filter_t *p_filter, picture_t *p_src,
- picture_t *p_dest )
- {
- /* We got this one from the old arguments */
- uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
- uint8_t *p_y = p_src->Y_PIXELS;
- uint8_t *p_u = p_src->U_PIXELS;
- uint8_t *p_v = p_src->V_PIXELS;
- bool b_hscale; /* horizontal scaling type */
- unsigned int i_vscale; /* vertical scaling type */
- unsigned int i_x, i_y; /* horizontal and vertical indexes */
- int i_right_margin;
- int i_rewind;
- int i_scale_count; /* scale modulo counter */
- int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
- uint32_t * p_pic_start; /* beginning of the current line for copy */
- /* Conversion buffer pointer */
- uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
- uint32_t * p_buffer;
- /* Offset array pointer */
- int * p_offset_start = p_filter->p_sys->p_offset;
- int * p_offset;
- const int i_source_margin = p_src->p[0].i_pitch
- - p_src->p[0].i_visible_pitch;
- const int i_source_margin_c = p_src->p[1].i_pitch
- - p_src->p[1].i_visible_pitch;
- i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
- /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
- * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
- * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
- SetOffset( p_filter->fmt_in.video.i_width,
- p_filter->fmt_in.video.i_height,
- p_filter->fmt_out.video.i_width,
- p_filter->fmt_out.video.i_height,
- &b_hscale, &i_vscale, p_offset_start );
- /*
- * Perform conversion
- */
- i_scale_count = ( i_vscale == 1 ) ?
- p_filter->fmt_out.video.i_height :
- p_filter->fmt_in.video.i_height;
- #if defined (MODULE_NAME_IS_i420_rgb_sse2)
- if( p_filter->fmt_in.video.i_width & 15 )
- {
- i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
- }
- else
- {
- i_rewind = 0;
- }
- /*
- ** SSE2 128 bits fetch/store instructions are faster
- ** if memory access is 16 bytes aligned
- */
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
- p_dest->p->i_pitch|
- ((intptr_t)p_y)|
- ((intptr_t)p_buffer))) )
- {
- /* use faster SSE2 aligned fetch and store */
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
- {
- SSE2_CALL (
- SSE2_INIT_32_ALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_ABGR_ALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- p_buffer += 16;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- SSE2_CALL (
- SSE2_INIT_32_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_ABGR_UNALIGNED
- );
- p_y += 16;
- p_u += 4;
- p_v += 4;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- }
- }
- else
- {
- /* use slower SSE2 unaligned fetch and store */
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
- {
- SSE2_CALL (
- SSE2_INIT_32_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_ABGR_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- p_buffer += 16;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- SSE2_CALL (
- SSE2_INIT_32_UNALIGNED
- SSE2_YUV_MUL
- SSE2_YUV_ADD
- SSE2_UNPACK_32_ABGR_UNALIGNED
- );
- p_y += 16;
- p_u += 8;
- p_v += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- }
- }
- #else
- if( p_filter->fmt_in.video.i_width & 7 )
- {
- i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
- }
- else
- {
- i_rewind = 0;
- }
- for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
- {
- p_pic_start = p_pic;
- p_buffer = b_hscale ? p_buffer_start : p_pic;
- for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
- {
- MMX_CALL (
- MMX_INIT_32
- MMX_YUV_MUL
- MMX_YUV_ADD
- MMX_UNPACK_32_ABGR
- );
- p_y += 8;
- p_u += 4;
- p_v += 4;
- p_buffer += 8;
- }
- /* Here we do some unaligned reads and duplicate conversions, but
- * at least we have all the pixels */
- if( i_rewind )
- {
- p_y -= i_rewind;
- p_u -= i_rewind >> 1;
- p_v -= i_rewind >> 1;
- p_buffer -= i_rewind;
- MMX_CALL (
- MMX_INIT_32
- MMX_YUV_MUL
- MMX_YUV_ADD
- MMX_UNPACK_32_ABGR
- );
- p_y += 8;
- p_u += 4;
- p_v += 4;
- p_buffer += 8;
- }
- SCALE_WIDTH;
- SCALE_HEIGHT( 420, 4 );
- p_y += i_source_margin;
- if( i_y % 2 )
- {
- p_u += i_source_margin_c;
- p_v += i_source_margin_c;
- }
- }
- /* re-enable FPU registers */
- MMX_END;
- #endif
- }
- #endif
- /* Following functions are local */
- /*****************************************************************************
- * SetOffset: build offset array for conversion functions
- *****************************************************************************
- * This function will build an offset array used in later conversion functions.
- * It will also set horizontal and vertical scaling indicators.
- *****************************************************************************/
- static void SetOffset( int i_width, int i_height, int i_pic_width,
- int i_pic_height, bool *pb_hscale,
- unsigned int *pi_vscale, int *p_offset )
- {
- int i_x; /* x position in destination */
- int i_scale_count; /* modulo counter */
- /*
- * Prepare horizontal offset array
- */
- if( i_pic_width - i_width == 0 )
- {
- /* No horizontal scaling: YUV conversion is done directly to picture */
- *pb_hscale = 0;
- }
- else if( i_pic_width - i_width > 0 )
- {
- /* Prepare scaling array for horizontal extension */
- *pb_hscale = 1;
- i_scale_count = i_pic_width;
- for( i_x = i_width; i_x--; )
- {
- while( (i_scale_count -= i_width) > 0 )
- {
- *p_offset++ = 0;
- }
- *p_offset++ = 1;
- i_scale_count += i_pic_width;
- }
- }
- else /* if( i_pic_width - i_width < 0 ) */
- {
- /* Prepare scaling array for horizontal reduction */
- *pb_hscale = 1;
- i_scale_count = i_width;
- for( i_x = i_pic_width; i_x--; )
- {
- *p_offset = 1;
- while( (i_scale_count -= i_pic_width) > 0 )
- {
- *p_offset += 1;
- }
- p_offset++;
- i_scale_count += i_width;
- }
- }
- /*
- * Set vertical scaling indicator
- */
- if( i_pic_height - i_height == 0 )
- {
- *pi_vscale = 0;
- }
- else if( i_pic_height - i_height > 0 )
- {
- *pi_vscale = 1;
- }
- else /* if( i_pic_height - i_height < 0 ) */
- {
- *pi_vscale = -1;
- }
- }