pixel.c
上传用户:hjq518
上传日期:2021-12-09
资源大小:5084k
文件大小:13k
源码类别:

Audio

开发平台:

Visual C++

  1. /*****************************************************************************
  2.  * pixel.c: h264 encoder
  3.  *****************************************************************************
  4.  * Copyright (C) 2003 Laurent Aimar
  5.  * $Id: pixel.c,v 1.1 2004/06/03 19:27:07 fenrir Exp $
  6.  *
  7.  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
  8.  *
  9.  * This program is free software; you can redistribute it and/or modify
  10.  * it under the terms of the GNU General Public License as published by
  11.  * the Free Software Foundation; either version 2 of the License, or
  12.  * (at your option) any later version.
  13.  *
  14.  * This program is distributed in the hope that it will be useful,
  15.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17.  * GNU General Public License for more details.
  18.  *
  19.  * You should have received a copy of the GNU General Public License
  20.  * along with this program; if not, write to the Free Software
  21.  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22.  *****************************************************************************/
  23. #include <string.h>
  24. #include "common.h"
  25. #include "clip1.h"
  26. #ifdef HAVE_MMXEXT
  27. #   include "i386/pixel.h"
  28. #endif
  29. #ifdef ARCH_PPC
  30. #   include "ppc/pixel.h"
  31. #endif
  32. #ifdef ARCH_UltraSparc
  33. #   include "sparc/pixel.h"
  34. #endif
  35. /****************************************************************************
  36.  * pixel_sad_WxH
  37.  ****************************************************************************/
  38. #define PIXEL_SAD_C( name, lx, ly ) 
  39. static int name( uint8_t *pix1, int i_stride_pix1,  
  40.                  uint8_t *pix2, int i_stride_pix2 ) 
  41. {                                                   
  42.     int i_sum = 0;                                  
  43.     int x, y;                                       
  44.     for( y = 0; y < ly; y++ )                       
  45.     {                                               
  46.         for( x = 0; x < lx; x++ )                   
  47.         {                                           
  48.             i_sum += abs( pix1[x] - pix2[x] );      
  49.         }                                           
  50.         pix1 += i_stride_pix1;                      
  51.         pix2 += i_stride_pix2;                      
  52.     }                                               
  53.     return i_sum;                                   
  54. }
  55. PIXEL_SAD_C( pixel_sad_16x16, 16, 16 )
  56. PIXEL_SAD_C( pixel_sad_16x8,  16,  8 )
  57. PIXEL_SAD_C( pixel_sad_8x16,   8, 16 )
  58. PIXEL_SAD_C( pixel_sad_8x8,    8,  8 )
  59. PIXEL_SAD_C( pixel_sad_8x4,    8,  4 )
  60. PIXEL_SAD_C( pixel_sad_4x8,    4,  8 )
  61. PIXEL_SAD_C( pixel_sad_4x4,    4,  4 )
  62. /****************************************************************************
  63.  * pixel_ssd_WxH
  64.  ****************************************************************************/
  65. #define PIXEL_SSD_C( name, lx, ly ) 
  66. static int name( uint8_t *pix1, int i_stride_pix1,  
  67.                  uint8_t *pix2, int i_stride_pix2 ) 
  68. {                                                   
  69.     int i_sum = 0;                                  
  70.     int x, y;                                       
  71.     for( y = 0; y < ly; y++ )                       
  72.     {                                               
  73.         for( x = 0; x < lx; x++ )                   
  74.         {                                           
  75.             int d = pix1[x] - pix2[x];              
  76.             i_sum += d*d;                           
  77.         }                                           
  78.         pix1 += i_stride_pix1;                      
  79.         pix2 += i_stride_pix2;                      
  80.     }                                               
  81.     return i_sum;                                   
  82. }
  83. PIXEL_SSD_C( pixel_ssd_16x16, 16, 16 )
  84. PIXEL_SSD_C( pixel_ssd_16x8,  16,  8 )
  85. PIXEL_SSD_C( pixel_ssd_8x16,   8, 16 )
  86. PIXEL_SSD_C( pixel_ssd_8x8,    8,  8 )
  87. PIXEL_SSD_C( pixel_ssd_8x4,    8,  4 )
  88. PIXEL_SSD_C( pixel_ssd_4x8,    4,  8 )
  89. PIXEL_SSD_C( pixel_ssd_4x4,    4,  4 )
  90. int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height )
  91. {
  92.     int64_t i_ssd = 0;
  93.     int x, y;
  94. #define SSD(size) i_ssd += pf->ssd[size]( pix1 + y*i_pix1 + x, i_pix1, 
  95.                                           pix2 + y*i_pix2 + x, i_pix2 );
  96.     for( y = 0; y < i_height-15; y += 16 )
  97.     {
  98.         for( x = 0; x < i_width-15; x += 16 )
  99.             SSD(PIXEL_16x16);
  100.         if( x < i_width-7 )
  101.             SSD(PIXEL_8x16);
  102.     }
  103.     if( y < i_height-7 )
  104.         for( x = 0; x < i_width-7; x += 8 )
  105.             SSD(PIXEL_8x8);
  106. #undef SSD
  107. #define SSD1 { int d = pix1[y*i_pix1+x] - pix2[y*i_pix2+x]; i_ssd += d*d; }
  108.     if( i_width % 8 != 0 )
  109.     {
  110.         for( y = 0; y < (i_height & ~7); y++ )
  111.             for( x = i_width & ~7; x < i_width; x++ )
  112.                 SSD1;
  113.     }
  114.     if( i_height % 8 != 0 )
  115.     {
  116.         for( y = i_height & ~7; y < i_height; y++ )
  117.             for( x = 0; x < i_width; x++ )
  118.                 SSD1;
  119.     }
  120. #undef SSD1
  121.     return i_ssd;
  122. }
  123. static inline void pixel_sub_wxh( int16_t *diff, int i_size,
  124.                                   uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
  125. {
  126.     int y, x;
  127.     for( y = 0; y < i_size; y++ )
  128.     {
  129.         for( x = 0; x < i_size; x++ )
  130.         {
  131.             diff[x + y*i_size] = pix1[x] - pix2[x];
  132.         }
  133.         pix1 += i_pix1;
  134.         pix2 += i_pix2;
  135.     }
  136. }
  137. /****************************************************************************
  138.  * pixel_satd_WxH: sum of 4x4 Hadamard transformed differences
  139.  ****************************************************************************/
  140. static int pixel_satd_wxh( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height )
  141. {
  142.     int16_t tmp[4][4];
  143.     int16_t diff[4][4];
  144.     int x, y;
  145.     int i_satd = 0;
  146.     for( y = 0; y < i_height; y += 4 )
  147.     {
  148.         for( x = 0; x < i_width; x += 4 )
  149.         {
  150.             int d;
  151.             pixel_sub_wxh( (int16_t*)diff, 4, &pix1[x], i_pix1, &pix2[x], i_pix2 );
  152.             for( d = 0; d < 4; d++ )
  153.             {
  154.                 int s01, s23;
  155.                 int d01, d23;
  156.                 s01 = diff[d][0] + diff[d][1]; s23 = diff[d][2] + diff[d][3];
  157.                 d01 = diff[d][0] - diff[d][1]; d23 = diff[d][2] - diff[d][3];
  158.                 tmp[d][0] = s01 + s23;
  159.                 tmp[d][1] = s01 - s23;
  160.                 tmp[d][2] = d01 - d23;
  161.                 tmp[d][3] = d01 + d23;
  162.             }
  163.             for( d = 0; d < 4; d++ )
  164.             {
  165.                 int s01, s23;
  166.                 int d01, d23;
  167.                 s01 = tmp[0][d] + tmp[1][d]; s23 = tmp[2][d] + tmp[3][d];
  168.                 d01 = tmp[0][d] - tmp[1][d]; d23 = tmp[2][d] - tmp[3][d];
  169.                 i_satd += abs( s01 + s23 ) + abs( s01 - s23 ) + abs( d01 - d23 ) + abs( d01 + d23 );
  170.             }
  171.         }
  172.         pix1 += 4 * i_pix1;
  173.         pix2 += 4 * i_pix2;
  174.     }
  175.     return i_satd / 2;
  176. }
  177. #define PIXEL_SATD_C( name, width, height ) 
  178. static int name( uint8_t *pix1, int i_stride_pix1, 
  179.                  uint8_t *pix2, int i_stride_pix2 ) 
  180.     return pixel_satd_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height ); 
  181. }
  182. PIXEL_SATD_C( pixel_satd_16x16, 16, 16 )
  183. PIXEL_SATD_C( pixel_satd_16x8,  16, 8 )
  184. PIXEL_SATD_C( pixel_satd_8x16,  8, 16 )
  185. PIXEL_SATD_C( pixel_satd_8x8,   8, 8 )
  186. PIXEL_SATD_C( pixel_satd_8x4,   8, 4 )
  187. PIXEL_SATD_C( pixel_satd_4x8,   4, 8 )
  188. PIXEL_SATD_C( pixel_satd_4x4,   4, 4 )
  189. /****************************************************************************
  190.  * pixel_sa8d_WxH: sum of 8x8 Hadamard transformed differences
  191.  ****************************************************************************/
  192. #define SA8D_1D {
  193.     const int a0 = SRC(0) + SRC(4);
  194.     const int a4 = SRC(0) - SRC(4);
  195.     const int a1 = SRC(1) + SRC(5);
  196.     const int a5 = SRC(1) - SRC(5);
  197.     const int a2 = SRC(2) + SRC(6);
  198.     const int a6 = SRC(2) - SRC(6);
  199.     const int a3 = SRC(3) + SRC(7);
  200.     const int a7 = SRC(3) - SRC(7);
  201.     const int b0 = a0 + a2;
  202.     const int b2 = a0 - a2;
  203.     const int b1 = a1 + a3;
  204.     const int b3 = a1 - a3;
  205.     const int b4 = a4 + a6;
  206.     const int b6 = a4 - a6;
  207.     const int b5 = a5 + a7;
  208.     const int b7 = a5 - a7;
  209.     DST(0, b0 + b1);
  210.     DST(1, b0 - b1);
  211.     DST(2, b2 + b3);
  212.     DST(3, b2 - b3);
  213.     DST(4, b4 + b5);
  214.     DST(5, b4 - b5);
  215.     DST(6, b6 + b7);
  216.     DST(7, b6 - b7);
  217. }
  218. static inline int pixel_sa8d_wxh( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2,
  219.                                   int i_width, int i_height )
  220. {
  221.     int16_t diff[8][8];
  222.     int i_satd = 0;
  223.     int x, y;
  224.     for( y = 0; y < i_height; y += 8 )
  225.     {
  226.         for( x = 0; x < i_width; x += 8 )
  227.         {
  228.             int i;
  229.             pixel_sub_wxh( (int16_t*)diff, 8, pix1+x, i_pix1, pix2+x, i_pix2 );
  230. #define SRC(x)     diff[i][x]
  231. #define DST(x,rhs) diff[i][x] = (rhs)
  232.             for( i = 0; i < 8; i++ )
  233.                 SA8D_1D
  234. #undef SRC
  235. #undef DST
  236. #define SRC(x)     diff[x][i]
  237. #define DST(x,rhs) i_satd += abs(rhs)
  238.             for( i = 0; i < 8; i++ )
  239.                 SA8D_1D
  240. #undef SRC
  241. #undef DST
  242.         }
  243.         pix1 += 8 * i_pix1;
  244.         pix2 += 8 * i_pix2;
  245.     }
  246.     return i_satd;
  247. }
  248. #define PIXEL_SA8D_C( width, height ) 
  249. static int pixel_sa8d_##width##x##height( uint8_t *pix1, int i_stride_pix1, 
  250.                  uint8_t *pix2, int i_stride_pix2 ) 
  251.     return ( pixel_sa8d_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height ) + 2 ) >> 2; 
  252. }
  253. PIXEL_SA8D_C( 16, 16 )
  254. PIXEL_SA8D_C( 16, 8 )
  255. PIXEL_SA8D_C( 8, 16 )
  256. PIXEL_SA8D_C( 8, 8 )
  257. /****************************************************************************
  258.  * x264_pixel_init:
  259.  ****************************************************************************/
  260. void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
  261. {
  262.     pixf->sad[PIXEL_16x16] = pixel_sad_16x16;
  263.     pixf->sad[PIXEL_16x8]  = pixel_sad_16x8;
  264.     pixf->sad[PIXEL_8x16]  = pixel_sad_8x16;
  265.     pixf->sad[PIXEL_8x8]   = pixel_sad_8x8;
  266.     pixf->sad[PIXEL_8x4]   = pixel_sad_8x4;
  267.     pixf->sad[PIXEL_4x8]   = pixel_sad_4x8;
  268.     pixf->sad[PIXEL_4x4]   = pixel_sad_4x4;
  269.     pixf->ssd[PIXEL_16x16] = pixel_ssd_16x16;
  270.     pixf->ssd[PIXEL_16x8]  = pixel_ssd_16x8;
  271.     pixf->ssd[PIXEL_8x16]  = pixel_ssd_8x16;
  272.     pixf->ssd[PIXEL_8x8]   = pixel_ssd_8x8;
  273.     pixf->ssd[PIXEL_8x4]   = pixel_ssd_8x4;
  274.     pixf->ssd[PIXEL_4x8]   = pixel_ssd_4x8;
  275.     pixf->ssd[PIXEL_4x4]   = pixel_ssd_4x4;
  276.     pixf->satd[PIXEL_16x16]= pixel_satd_16x16;
  277.     pixf->satd[PIXEL_16x8] = pixel_satd_16x8;
  278.     pixf->satd[PIXEL_8x16] = pixel_satd_8x16;
  279.     pixf->satd[PIXEL_8x8]  = pixel_satd_8x8;
  280.     pixf->satd[PIXEL_8x4]  = pixel_satd_8x4;
  281.     pixf->satd[PIXEL_4x8]  = pixel_satd_4x8;
  282.     pixf->satd[PIXEL_4x4]  = pixel_satd_4x4;
  283.     pixf->sa8d[PIXEL_16x16]= pixel_sa8d_16x16;
  284.     pixf->sa8d[PIXEL_16x8] = pixel_sa8d_16x8;
  285.     pixf->sa8d[PIXEL_8x16] = pixel_sa8d_8x16;
  286.     pixf->sa8d[PIXEL_8x8]  = pixel_sa8d_8x8;
  287. #ifdef HAVE_MMXEXT
  288.     if( cpu&X264_CPU_MMXEXT )
  289.     {
  290.         pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_mmxext;
  291.         pixf->sad[PIXEL_16x8 ] = x264_pixel_sad_16x8_mmxext;
  292.         pixf->sad[PIXEL_8x16 ] = x264_pixel_sad_8x16_mmxext;
  293.         pixf->sad[PIXEL_8x8  ] = x264_pixel_sad_8x8_mmxext;
  294.         pixf->sad[PIXEL_8x4  ] = x264_pixel_sad_8x4_mmxext;
  295.         pixf->sad[PIXEL_4x8  ] = x264_pixel_sad_4x8_mmxext;
  296.         pixf->sad[PIXEL_4x4]   = x264_pixel_sad_4x4_mmxext;
  297.         pixf->sad_pde[PIXEL_16x16] = x264_pixel_sad_pde_16x16_mmxext;
  298.         pixf->sad_pde[PIXEL_16x8 ] = x264_pixel_sad_pde_16x8_mmxext;
  299.         pixf->sad_pde[PIXEL_8x16 ] = x264_pixel_sad_pde_8x16_mmxext;
  300.         pixf->ssd[PIXEL_16x16] = x264_pixel_ssd_16x16_mmxext;
  301.         pixf->ssd[PIXEL_16x8]  = x264_pixel_ssd_16x8_mmxext;
  302.         pixf->ssd[PIXEL_8x16]  = x264_pixel_ssd_8x16_mmxext;
  303.         pixf->ssd[PIXEL_8x8]   = x264_pixel_ssd_8x8_mmxext;
  304.         pixf->ssd[PIXEL_8x4]   = x264_pixel_ssd_8x4_mmxext;
  305.         pixf->ssd[PIXEL_4x8]   = x264_pixel_ssd_4x8_mmxext;
  306.         pixf->ssd[PIXEL_4x4]   = x264_pixel_ssd_4x4_mmxext;
  307.   
  308.         pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_mmxext;
  309.         pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_mmxext;
  310.         pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_mmxext;
  311.         pixf->satd[PIXEL_8x8]  = x264_pixel_satd_8x8_mmxext;
  312.         pixf->satd[PIXEL_8x4]  = x264_pixel_satd_8x4_mmxext;
  313.         pixf->satd[PIXEL_4x8]  = x264_pixel_satd_4x8_mmxext;
  314.         pixf->satd[PIXEL_4x4]  = x264_pixel_satd_4x4_mmxext;
  315.     }
  316. #endif
  317. #ifdef HAVE_SSE2
  318.     // disable on AMD processors since it is slower
  319.     if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) )
  320.     {
  321.         pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_sse2;
  322.         pixf->sad[PIXEL_16x8 ] = x264_pixel_sad_16x8_sse2;
  323.         pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_sse2;
  324.         pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_sse2;
  325.         pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_sse2;
  326.         pixf->satd[PIXEL_8x8]  = x264_pixel_satd_8x8_sse2;
  327.         pixf->satd[PIXEL_8x4]  = x264_pixel_satd_8x4_sse2;
  328.     }
  329.     // these are faster on both Intel and AMD
  330.     if( cpu&X264_CPU_SSE2 )
  331.     {
  332.         pixf->ssd[PIXEL_16x16] = x264_pixel_ssd_16x16_sse2;
  333.         pixf->ssd[PIXEL_16x8]  = x264_pixel_ssd_16x8_sse2;
  334.     }
  335. #endif
  336. #ifdef ARCH_PPC
  337.     if( cpu&X264_CPU_ALTIVEC )
  338.     {
  339.         x264_pixel_altivec_init( pixf );
  340.     }
  341. #endif
  342. #ifdef ARCH_UltraSparc
  343.       pixf->sad[PIXEL_8x8]   = x264_pixel_sad_8x8_vis;
  344.       pixf->sad[PIXEL_8x16]  = x264_pixel_sad_8x16_vis;
  345.       pixf->sad[PIXEL_16x8]  = x264_pixel_sad_16x8_vis;
  346.       pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_vis;
  347. #endif
  348. }