quant.c
上传用户:lctgjx
上传日期:2022-06-04
资源大小:8887k
文件大小:14k
源码类别:

流媒体/Mpeg4/MP4

开发平台:

Visual C++

  1. /*****************************************************************************
  2.  * quant.c: h264 encoder library
  3.  *****************************************************************************
  4.  * Copyright (C) 2005-2008 x264 project
  5.  *
  6.  * Authors: Loren Merritt <lorenm@u.washington.edu>
  7.  *          Christian Heine <sennindemokrit@gmx.net>
  8.  *
  9.  * This program is free software; you can redistribute it and/or modify
  10.  * it under the terms of the GNU General Public License as published by
  11.  * the Free Software Foundation; either version 2 of the License, or
  12.  * (at your option) any later version.
  13.  *
  14.  * This program is distributed in the hope that it will be useful,
  15.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17.  * GNU General Public License for more details.
  18.  *
  19.  * You should have received a copy of the GNU General Public License
  20.  * along with this program; if not, write to the Free Software
  21.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  22.  *****************************************************************************/
  23. #include "common.h"
  24. #ifdef HAVE_MMX
  25. #include "x86/quant.h"
  26. #endif
  27. #ifdef ARCH_PPC
  28. #   include "ppc/quant.h"
  29. #endif
  30. #ifdef ARCH_ARM
  31. #   include "arm/quant.h"
  32. #endif
  33. #define QUANT_ONE( coef, mf, f ) 
  34.     if( (coef) > 0 ) 
  35.         (coef) = (f + (coef)) * (mf) >> 16; 
  36.     else 
  37.         (coef) = - ((f - (coef)) * (mf) >> 16); 
  38.     nz |= (coef); 
  39. }
  40. static int quant_8x8( int16_t dct[8][8], uint16_t mf[64], uint16_t bias[64] )
  41. {
  42.     int i, nz = 0;
  43.     for( i = 0; i < 64; i++ )
  44.         QUANT_ONE( dct[0][i], mf[i], bias[i] );
  45.     return !!nz;
  46. }
  47. static int quant_4x4( int16_t dct[4][4], uint16_t mf[16], uint16_t bias[16] )
  48. {
  49.     int i, nz = 0;
  50.     for( i = 0; i < 16; i++ )
  51.         QUANT_ONE( dct[0][i], mf[i], bias[i] );
  52.     return !!nz;
  53. }
  54. static int quant_4x4_dc( int16_t dct[4][4], int mf, int bias )
  55. {
  56.     int i, nz = 0;
  57.     for( i = 0; i < 16; i++ )
  58.         QUANT_ONE( dct[0][i], mf, bias );
  59.     return !!nz;
  60. }
  61. static int quant_2x2_dc( int16_t dct[2][2], int mf, int bias )
  62. {
  63.     int nz = 0;
  64.     QUANT_ONE( dct[0][0], mf, bias );
  65.     QUANT_ONE( dct[0][1], mf, bias );
  66.     QUANT_ONE( dct[0][2], mf, bias );
  67.     QUANT_ONE( dct[0][3], mf, bias );
  68.     return !!nz;
  69. }
  70. #define DEQUANT_SHL( x ) 
  71.     dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][y][x] ) << i_qbits
  72. #define DEQUANT_SHR( x ) 
  73.     dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][y][x] + f ) >> (-i_qbits)
  74. static void dequant_4x4( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp )
  75. {
  76.     const int i_mf = i_qp%6;
  77.     const int i_qbits = i_qp/6 - 4;
  78.     int y;
  79.     if( i_qbits >= 0 )
  80.     {
  81.         for( y = 0; y < 4; y++ )
  82.         {
  83.             DEQUANT_SHL( 0 );
  84.             DEQUANT_SHL( 1 );
  85.             DEQUANT_SHL( 2 );
  86.             DEQUANT_SHL( 3 );
  87.         }
  88.     }
  89.     else
  90.     {
  91.         const int f = 1 << (-i_qbits-1);
  92.         for( y = 0; y < 4; y++ )
  93.         {
  94.             DEQUANT_SHR( 0 );
  95.             DEQUANT_SHR( 1 );
  96.             DEQUANT_SHR( 2 );
  97.             DEQUANT_SHR( 3 );
  98.         }
  99.     }
  100. }
  101. static void dequant_8x8( int16_t dct[8][8], int dequant_mf[6][8][8], int i_qp )
  102. {
  103.     const int i_mf = i_qp%6;
  104.     const int i_qbits = i_qp/6 - 6;
  105.     int y;
  106.     if( i_qbits >= 0 )
  107.     {
  108.         for( y = 0; y < 8; y++ )
  109.         {
  110.             DEQUANT_SHL( 0 );
  111.             DEQUANT_SHL( 1 );
  112.             DEQUANT_SHL( 2 );
  113.             DEQUANT_SHL( 3 );
  114.             DEQUANT_SHL( 4 );
  115.             DEQUANT_SHL( 5 );
  116.             DEQUANT_SHL( 6 );
  117.             DEQUANT_SHL( 7 );
  118.         }
  119.     }
  120.     else
  121.     {
  122.         const int f = 1 << (-i_qbits-1);
  123.         for( y = 0; y < 8; y++ )
  124.         {
  125.             DEQUANT_SHR( 0 );
  126.             DEQUANT_SHR( 1 );
  127.             DEQUANT_SHR( 2 );
  128.             DEQUANT_SHR( 3 );
  129.             DEQUANT_SHR( 4 );
  130.             DEQUANT_SHR( 5 );
  131.             DEQUANT_SHR( 6 );
  132.             DEQUANT_SHR( 7 );
  133.         }
  134.     }
  135. }
  136. static void dequant_4x4_dc( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp )
  137. {
  138.     const int i_qbits = i_qp/6 - 6;
  139.     int y;
  140.     if( i_qbits >= 0 )
  141.     {
  142.         const int i_dmf = dequant_mf[i_qp%6][0][0] << i_qbits;
  143.         for( y = 0; y < 4; y++ )
  144.         {
  145.             dct[y][0] *= i_dmf;
  146.             dct[y][1] *= i_dmf;
  147.             dct[y][2] *= i_dmf;
  148.             dct[y][3] *= i_dmf;
  149.         }
  150.     }
  151.     else
  152.     {
  153.         const int i_dmf = dequant_mf[i_qp%6][0][0];
  154.         const int f = 1 << (-i_qbits-1);
  155.         for( y = 0; y < 4; y++ )
  156.         {
  157.             dct[y][0] = ( dct[y][0] * i_dmf + f ) >> (-i_qbits);
  158.             dct[y][1] = ( dct[y][1] * i_dmf + f ) >> (-i_qbits);
  159.             dct[y][2] = ( dct[y][2] * i_dmf + f ) >> (-i_qbits);
  160.             dct[y][3] = ( dct[y][3] * i_dmf + f ) >> (-i_qbits);
  161.         }
  162.     }
  163. }
  164. static void x264_denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int size )
  165. {
  166.     int i;
  167.     for( i=1; i<size; i++ )
  168.     {
  169.         int level = dct[i];
  170.         int sign = level>>15;
  171.         level = (level+sign)^sign;
  172.         sum[i] += level;
  173.         level -= offset[i];
  174.         dct[i] = level<0 ? 0 : (level^sign)-sign;
  175.     }
  176. }
  177. /* (ref: JVT-B118)
  178.  * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
  179.  * to 0 (low score means set it to null)
  180.  * Used in inter macroblock (luma and chroma)
  181.  *  luma: for a 8x8 block: if score < 4 -> null
  182.  *        for the complete mb: if score < 6 -> null
  183.  *  chroma: for the complete mb: if score < 7 -> null
  184.  */
  185. const uint8_t x264_decimate_table4[16] = {
  186.     3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
  187. const uint8_t x264_decimate_table8[64] = {
  188.     3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
  189.     1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
  190.     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  191.     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
  192. static int ALWAYS_INLINE x264_decimate_score_internal( int16_t *dct, int i_max )
  193. {
  194.     const uint8_t *ds_table = (i_max == 64) ? x264_decimate_table8 : x264_decimate_table4;
  195.     int i_score = 0;
  196.     int idx = i_max - 1;
  197.     /* Yes, dct[idx-1] is guaranteed to be 32-bit aligned.  idx>=0 instead of 1 works correctly for the same reason */
  198.     while( idx >= 0 && *(uint32_t*)&dct[idx-1] == 0 )
  199.         idx -= 2;
  200.     if( idx >= 0 && dct[idx] == 0 )
  201.         idx--;
  202.     while( idx >= 0 )
  203.     {
  204.         int i_run;
  205.         if( (unsigned)(dct[idx--] + 1) > 2 )
  206.             return 9;
  207.         i_run = 0;
  208.         while( idx >= 0 && dct[idx] == 0 )
  209.         {
  210.             idx--;
  211.             i_run++;
  212.         }
  213.         i_score += ds_table[i_run];
  214.     }
  215.     return i_score;
  216. }
  217. static int x264_decimate_score15( int16_t *dct )
  218. {
  219.     return x264_decimate_score_internal( dct+1, 15 );
  220. }
  221. static int x264_decimate_score16( int16_t *dct )
  222. {
  223.     return x264_decimate_score_internal( dct, 16 );
  224. }
  225. static int x264_decimate_score64( int16_t *dct )
  226. {
  227.     return x264_decimate_score_internal( dct, 64 );
  228. }
  229. static int ALWAYS_INLINE x264_coeff_last_internal( int16_t *l, int i_count )
  230. {
  231.     int i_last;
  232.     for( i_last = i_count-1; i_last >= 3; i_last -= 4 )
  233.         if( *(uint64_t*)(l+i_last-3) )
  234.             break;
  235.     while( i_last >= 0 && l[i_last] == 0 )
  236.         i_last--;
  237.     return i_last;
  238. }
  239. static int x264_coeff_last4( int16_t *l )
  240. {
  241.     return x264_coeff_last_internal( l, 4 );
  242. }
  243. static int x264_coeff_last15( int16_t *l )
  244. {
  245.     return x264_coeff_last_internal( l, 15 );
  246. }
  247. static int x264_coeff_last16( int16_t *l )
  248. {
  249.     return x264_coeff_last_internal( l, 16 );
  250. }
  251. static int x264_coeff_last64( int16_t *l )
  252. {
  253.     return x264_coeff_last_internal( l, 64 );
  254. }
  255. #define level_run(num)
  256. static int x264_coeff_level_run##num( int16_t *dct, x264_run_level_t *runlevel )
  257. {
  258.     int i_last = runlevel->last = x264_coeff_last##num(dct);
  259.     int i_total = 0;
  260.     do
  261.     {
  262.         int r = 0;
  263.         runlevel->level[i_total] = dct[i_last];
  264.         while( --i_last >= 0 && dct[i_last] == 0 )
  265.             r++;
  266.         runlevel->run[i_total++] = r;
  267.     } while( i_last >= 0 );
  268.     return i_total;
  269. }
  270. level_run(4)
  271. level_run(15)
  272. level_run(16)
  273. void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
  274. {
  275.     pf->quant_8x8 = quant_8x8;
  276.     pf->quant_4x4 = quant_4x4;
  277.     pf->quant_4x4_dc = quant_4x4_dc;
  278.     pf->quant_2x2_dc = quant_2x2_dc;
  279.     pf->dequant_4x4 = dequant_4x4;
  280.     pf->dequant_4x4_dc = dequant_4x4_dc;
  281.     pf->dequant_8x8 = dequant_8x8;
  282.     pf->denoise_dct = x264_denoise_dct;
  283.     pf->decimate_score15 = x264_decimate_score15;
  284.     pf->decimate_score16 = x264_decimate_score16;
  285.     pf->decimate_score64 = x264_decimate_score64;
  286.     pf->coeff_last[DCT_CHROMA_DC] = x264_coeff_last4;
  287.     pf->coeff_last[  DCT_LUMA_AC] = x264_coeff_last15;
  288.     pf->coeff_last[ DCT_LUMA_4x4] = x264_coeff_last16;
  289.     pf->coeff_last[ DCT_LUMA_8x8] = x264_coeff_last64;
  290.     pf->coeff_level_run[DCT_CHROMA_DC] = x264_coeff_level_run4;
  291.     pf->coeff_level_run[  DCT_LUMA_AC] = x264_coeff_level_run15;
  292.     pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16;
  293. #ifdef HAVE_MMX
  294.     if( cpu&X264_CPU_MMX )
  295.     {
  296. #ifdef ARCH_X86
  297.         pf->quant_4x4 = x264_quant_4x4_mmx;
  298.         pf->quant_8x8 = x264_quant_8x8_mmx;
  299.         pf->dequant_4x4 = x264_dequant_4x4_mmx;
  300.         pf->dequant_4x4_dc = x264_dequant_4x4dc_mmxext;
  301.         pf->dequant_8x8 = x264_dequant_8x8_mmx;
  302.         if( h->param.i_cqm_preset == X264_CQM_FLAT )
  303.         {
  304.             pf->dequant_4x4 = x264_dequant_4x4_flat16_mmx;
  305.             pf->dequant_8x8 = x264_dequant_8x8_flat16_mmx;
  306.         }
  307.         pf->denoise_dct = x264_denoise_dct_mmx;
  308. #endif
  309.     }
  310.     if( cpu&X264_CPU_MMXEXT )
  311.     {
  312.         pf->quant_2x2_dc = x264_quant_2x2_dc_mmxext;
  313. #ifdef ARCH_X86
  314.         pf->quant_4x4_dc = x264_quant_4x4_dc_mmxext;
  315.         pf->decimate_score15 = x264_decimate_score15_mmxext;
  316.         pf->decimate_score16 = x264_decimate_score16_mmxext;
  317.         pf->decimate_score64 = x264_decimate_score64_mmxext;
  318.         pf->coeff_last[  DCT_LUMA_AC] = x264_coeff_last15_mmxext;
  319.         pf->coeff_last[ DCT_LUMA_4x4] = x264_coeff_last16_mmxext;
  320.         pf->coeff_last[ DCT_LUMA_8x8] = x264_coeff_last64_mmxext;
  321.         pf->coeff_level_run[  DCT_LUMA_AC] = x264_coeff_level_run15_mmxext;
  322.         pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16_mmxext;
  323. #endif
  324.         pf->coeff_last[DCT_CHROMA_DC] = x264_coeff_last4_mmxext;
  325.         pf->coeff_level_run[DCT_CHROMA_DC] = x264_coeff_level_run4_mmxext;
  326.         if( cpu&X264_CPU_LZCNT )
  327.         {
  328.             pf->coeff_last[DCT_CHROMA_DC] = x264_coeff_last4_mmxext_lzcnt;
  329.             pf->coeff_level_run[DCT_CHROMA_DC] = x264_coeff_level_run4_mmxext_lzcnt;
  330.         }
  331.     }
  332.     if( cpu&X264_CPU_SSE2 )
  333.     {
  334.         pf->quant_4x4_dc = x264_quant_4x4_dc_sse2;
  335.         pf->quant_4x4 = x264_quant_4x4_sse2;
  336.         pf->quant_8x8 = x264_quant_8x8_sse2;
  337.         pf->dequant_4x4 = x264_dequant_4x4_sse2;
  338.         pf->dequant_4x4_dc = x264_dequant_4x4dc_sse2;
  339.         pf->dequant_8x8 = x264_dequant_8x8_sse2;
  340.         if( h->param.i_cqm_preset == X264_CQM_FLAT )
  341.         {
  342.             pf->dequant_4x4 = x264_dequant_4x4_flat16_sse2;
  343.             pf->dequant_8x8 = x264_dequant_8x8_flat16_sse2;
  344.         }
  345.         pf->denoise_dct = x264_denoise_dct_sse2;
  346.         pf->decimate_score15 = x264_decimate_score15_sse2;
  347.         pf->decimate_score16 = x264_decimate_score16_sse2;
  348.         pf->decimate_score64 = x264_decimate_score64_sse2;
  349.         pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_sse2;
  350.         pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_sse2;
  351.         pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_sse2;
  352.         pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_sse2;
  353.         pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_sse2;
  354.         if( cpu&X264_CPU_LZCNT )
  355.         {
  356.             pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_sse2_lzcnt;
  357.             pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_sse2_lzcnt;
  358.             pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_sse2_lzcnt;
  359.             pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_sse2_lzcnt;
  360.             pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_sse2_lzcnt;
  361.         }
  362.     }
  363.     if( cpu&X264_CPU_SSSE3 )
  364.     {
  365.         pf->quant_2x2_dc = x264_quant_2x2_dc_ssse3;
  366.         pf->quant_4x4_dc = x264_quant_4x4_dc_ssse3;
  367.         pf->quant_4x4 = x264_quant_4x4_ssse3;
  368.         pf->quant_8x8 = x264_quant_8x8_ssse3;
  369.         pf->denoise_dct = x264_denoise_dct_ssse3;
  370.         pf->decimate_score15 = x264_decimate_score15_ssse3;
  371.         pf->decimate_score16 = x264_decimate_score16_ssse3;
  372.         pf->decimate_score64 = x264_decimate_score64_ssse3;
  373.     }
  374.     if( cpu&X264_CPU_SSE4 )
  375.     {
  376.         pf->quant_4x4_dc = x264_quant_4x4_dc_sse4;
  377.         pf->quant_4x4 = x264_quant_4x4_sse4;
  378.         pf->quant_8x8 = x264_quant_8x8_sse4;
  379.     }
  380. #endif // HAVE_MMX
  381. #ifdef ARCH_PPC
  382.     if( cpu&X264_CPU_ALTIVEC ) {
  383.         pf->quant_2x2_dc = x264_quant_2x2_dc_altivec;
  384.         pf->quant_4x4_dc = x264_quant_4x4_dc_altivec;
  385.         pf->quant_4x4 = x264_quant_4x4_altivec;
  386.         pf->quant_8x8 = x264_quant_8x8_altivec;
  387.         pf->dequant_4x4 = x264_dequant_4x4_altivec;
  388.         pf->dequant_8x8 = x264_dequant_8x8_altivec;
  389.     }
  390. #endif
  391. #ifdef HAVE_ARMV6
  392.     if( cpu&X264_CPU_ARMV6 )
  393.         pf->coeff_last[DCT_CHROMA_DC] = x264_coeff_last4_arm;
  394.     if( cpu&X264_CPU_NEON )
  395.     {
  396.         pf->quant_2x2_dc   = x264_quant_2x2_dc_neon;
  397.         pf->quant_4x4      = x264_quant_4x4_neon;
  398.         pf->quant_4x4_dc   = x264_quant_4x4_dc_neon;
  399.         pf->quant_8x8      = x264_quant_8x8_neon;
  400.         pf->dequant_4x4    = x264_dequant_4x4_neon;
  401.         pf->dequant_4x4_dc = x264_dequant_4x4_dc_neon;
  402.         pf->dequant_8x8    = x264_dequant_8x8_neon;
  403.         pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_neon;
  404.         pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_neon;
  405.         pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_neon;
  406.     }
  407. #endif
  408.     pf->coeff_last[  DCT_LUMA_DC] = pf->coeff_last[DCT_LUMA_4x4];
  409.     pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[ DCT_LUMA_AC];
  410.     pf->coeff_level_run[  DCT_LUMA_DC] = pf->coeff_level_run[DCT_LUMA_4x4];
  411.     pf->coeff_level_run[DCT_CHROMA_AC] = pf->coeff_level_run[ DCT_LUMA_AC];
  412. }