dsputil.c
资源名称:tcpmp.rar [点击查看]
上传用户:wstnjxml
上传日期:2014-04-03
资源大小:7248k
文件大小:140k
源码类别:
Windows CE
开发平台:
C/C++
- #else
- uint64_t __align8 aligned_bak[stride];
- #endif DCTELEM * const temp= (DCTELEM*)aligned_temp; uint8_t * const bak= (uint8_t*)aligned_bak; int i, last, run, bits, level, distoration, start_i; const int esc_length= s->ac_esc_length; uint8_t * length; uint8_t * last_length; assert(h==8); for(i=0; i<8; i++){ ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; } s->dsp.diff_pixels(temp, src1, src2, stride); s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); bits=0; if (s->mb_intra) { start_i = 1; length = s->intra_ac_vlc_length; last_length= s->intra_ac_vlc_last_length; bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma } else { start_i = 0; length = s->inter_ac_vlc_length; last_length= s->inter_ac_vlc_last_length; } if(last>=start_i){ run=0; for(i=start_i; i<last; i++){ int j= scantable[i]; level= temp[j]; if(level){ level+=64; if((level&(~127)) == 0){ bits+= length[UNI_AC_ENC_INDEX(run, level)]; }else bits+= esc_length; run=0; }else run++; } i= scantable[last]; level= temp[i] + 64; assert(level - 64); if((level&(~127)) == 0){ bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; }else bits+= esc_length; } if(last>=0){ if(s->mb_intra) s->dct_unquantize_intra(s, temp, 0, s->qscale); else s->dct_unquantize_inter(s, temp, 0, s->qscale); } s->dsp.idct_add(bak, stride, temp); distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8); return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); } static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ MpegEncContext * const s= (MpegEncContext *)c; const uint8_t *scantable= s->intra_scantable.permutated; uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; DCTELEM * const temp= (DCTELEM*)aligned_temp; int i, last, run, bits, level, start_i; const int esc_length= s->ac_esc_length; uint8_t * length; uint8_t * last_length; assert(h==8); s->dsp.diff_pixels(temp, src1, src2, stride); s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); bits=0; if (s->mb_intra) { start_i = 1; length = s->intra_ac_vlc_length; last_length= s->intra_ac_vlc_last_length; bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma } else { start_i = 0; length = s->inter_ac_vlc_length; last_length= s->inter_ac_vlc_last_length; } if(last>=start_i){ run=0; for(i=start_i; i<last; i++){ int j= scantable[i]; level= temp[j]; if(level){ level+=64; if((level&(~127)) == 0){ bits+= length[UNI_AC_ENC_INDEX(run, level)]; }else bits+= esc_length; run=0; }else run++; } i= scantable[last]; level= temp[i] + 64; assert(level - 64); if((level&(~127)) == 0){ bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; }else bits+= esc_length; } return bits; } static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ int score=0; int x,y; for(y=1; y<h; y++){ for(x=0; x<16; x+=4){ score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride]) +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]); } s+= stride; } return score; } static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ int score=0; int x,y; for(y=1; y<h; y++){ for(x=0; x<16; x++){ score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); } s1+= stride; s2+= stride; } return score; } #define SQ(a) ((a)*(a)) static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ int score=0; int x,y; for(y=1; y<h; y++){ for(x=0; x<16; x+=4){ score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); } s+= stride; } return score; } static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ int score=0; int x,y; for(y=1; y<h; y++){ for(x=0; x<16; x++){ score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); } s1+= stride; s2+= stride; } return score; } WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) WARPER8_16_SQ(dct_max8x8_c, dct_max16_c) WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) WARPER8_16_SQ(rd8x8_c, rd16_c) WARPER8_16_SQ(bit8x8_c, bit16_c) /* XXX: those functions should be suppressed ASAP when all IDCTs are converted */ static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) { j_rev_dct (block); put_pixels_clamped_c(block, dest, line_size); } static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) { j_rev_dct (block); add_pixels_clamped_c(block, dest, line_size); } static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) { j_rev_dct4 (block); put_pixels_clamped4_c(block, dest, line_size); } static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) { j_rev_dct4 (block); add_pixels_clamped4_c(block, dest, line_size); } static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) { j_rev_dct2 (block); put_pixels_clamped2_c(block, dest, line_size); } static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) { j_rev_dct2 (block); add_pixels_clamped2_c(block, dest, line_size); } static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) { uint8_t *cm = cropTbl + MAX_NEG_CROP; dest[0] = cm[(block[0] + 4)>>3]; } static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) { uint8_t *cm = cropTbl + MAX_NEG_CROP; dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; } /* init static data */ void dsputil_static_init(void) { int i; for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; for(i=0;i<MAX_NEG_CROP;i++) { cropTbl[i] = 0; cropTbl[i + MAX_NEG_CROP + 256] = 255; } for(i=0;i<512;i++) { squareTbl[i] = (i - 256) * (i - 256); } for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; } void dsputil_init(DSPContext* c, AVCodecContext *avctx) { int i; #ifdef CONFIG_ENCODERS if(avctx->dct_algo==FF_DCT_FASTINT) { c->fdct = fdct_ifast; c->fdct248 = fdct_ifast248; } else if(avctx->dct_algo==FF_DCT_FAAN) { c->fdct = ff_faandct; c->fdct248 = ff_faandct248; } else { c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default c->fdct248 = ff_fdct248_islow; } #endif //CONFIG_ENCODERS if(avctx->lowres==1){ if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){ c->idct_put= ff_jref_idct4_put; c->idct_add= ff_jref_idct4_add; }else{ c->idct_put= ff_h264_lowres_idct_put_c; c->idct_add= ff_h264_lowres_idct_add_c; } c->idct = j_rev_dct4; c->idct_permutation_type= FF_NO_IDCT_PERM; }else if(avctx->lowres==2){ c->idct_put= ff_jref_idct2_put; c->idct_add= ff_jref_idct2_add; c->idct = j_rev_dct2; c->idct_permutation_type= FF_NO_IDCT_PERM; }else if(avctx->lowres==3){ c->idct_put= ff_jref_idct1_put; c->idct_add= ff_jref_idct1_add; c->idct = j_rev_dct1; c->idct_permutation_type= FF_NO_IDCT_PERM; }else{ if(avctx->idct_algo==FF_IDCT_INT){ c->idct_put= ff_jref_idct_put; c->idct_add= ff_jref_idct_add; c->idct = j_rev_dct; c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; }else if(avctx->idct_algo==FF_IDCT_VP3){ c->idct_put= ff_vp3_idct_put_c; c->idct_add= ff_vp3_idct_add_c; c->idct = ff_vp3_idct_c; c->idct_permutation_type= FF_NO_IDCT_PERM; }else{ //accurate/default c->idct_put= simple_idct_put; c->idct_add= simple_idct_add; c->idct = simple_idct; c->idct_permutation_type= FF_NO_IDCT_PERM; } } c->h264_idct_add= ff_h264_idct_add_c; c->h264_idct8_add= ff_h264_idct8_add_c; c->get_pixels = get_pixels_c; c->diff_pixels = diff_pixels_c; c->put_pixels_clamped = put_pixels_clamped_c; c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; c->add_pixels_clamped = add_pixels_clamped_c; c->add_pixels8 = add_pixels8_c; c->add_pixels4 = add_pixels4_c; c->gmc1 = gmc1_c; c->gmc = gmc_c; c->clear_blocks = clear_blocks_c; c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; /* TODO [0] 16 [1] 8 */ c->pix_abs[0][0] = pix_abs16_c; c->pix_abs[0][1] = pix_abs16_x2_c; c->pix_abs[0][2] = pix_abs16_y2_c; c->pix_abs[0][3] = pix_abs16_xy2_c; c->pix_abs[1][0] = pix_abs8_c; c->pix_abs[1][1] = pix_abs8_x2_c; c->pix_abs[1][2] = pix_abs8_y2_c; c->pix_abs[1][3] = pix_abs8_xy2_c; #define dspfunc(PFX, IDX, NUM) c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c dspfunc(put, 0, 16); dspfunc(put_no_rnd, 0, 16); dspfunc(put, 1, 8); dspfunc(put_no_rnd, 1, 8); dspfunc(put, 2, 4); dspfunc(put, 3, 2); dspfunc(avg, 0, 16); dspfunc(avg_no_rnd, 0, 16); dspfunc(avg, 1, 8); dspfunc(avg_no_rnd, 1, 8); dspfunc(avg, 2, 4); dspfunc(avg, 3, 2); #undef dspfunc c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c; c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c; c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; #define dspfunc(PFX, IDX, NUM) c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c dspfunc(put_qpel, 0, 16); dspfunc(put_no_rnd_qpel, 0, 16); dspfunc(avg_qpel, 0, 16); /* dspfunc(avg_no_rnd_qpel, 0, 16); */ dspfunc(put_qpel, 1, 8); dspfunc(put_no_rnd_qpel, 1, 8); dspfunc(avg_qpel, 1, 8); /* dspfunc(avg_no_rnd_qpel, 1, 8); */ dspfunc(put_h264_qpel, 0, 16); dspfunc(put_h264_qpel, 1, 8); dspfunc(put_h264_qpel, 2, 4); dspfunc(avg_h264_qpel, 0, 16); dspfunc(avg_h264_qpel, 1, 8); dspfunc(avg_h264_qpel, 2, 4); #undef dspfunc c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; #define SET_CMP_FUNC(name) c->name[0]= name ## 16_c; c->name[1]= name ## 8x8_c; SET_CMP_FUNC(hadamard8_diff) c->hadamard8_diff[4]= hadamard8_intra16_c; SET_CMP_FUNC(dct_sad) SET_CMP_FUNC(dct_max) c->sad[0]= pix_abs16_c; c->sad[1]= pix_abs8_c; c->sse[0]= sse16_c; c->sse[1]= sse8_c; c->sse[2]= sse4_c; SET_CMP_FUNC(quant_psnr) SET_CMP_FUNC(rd) SET_CMP_FUNC(bit) c->vsad[0]= vsad16_c; c->vsad[4]= vsad_intra16_c; c->vsse[0]= vsse16_c; c->vsse[4]= vsse_intra16_c; c->nsse[0]= nsse16_c; c->nsse[1]= nsse8_c; c->w53[0]= w53_16_c; c->w53[1]= w53_8_c; c->w97[0]= w97_16_c; c->w97[1]= w97_8_c; c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; c->bswap_buf= bswap_buf; c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; c->h263_h_loop_filter= h263_h_loop_filter_c; c->h263_v_loop_filter= h263_v_loop_filter_c; c->h261_loop_filter= h261_loop_filter_c; c->try_8x8basis= try_8x8basis_c; c->add_8x8basis= add_8x8basis_c; #ifdef HAVE_MMX dsputil_init_mmx(c, avctx); #endif #ifdef ARCH_ARMV4L dsputil_init_armv4l(c, avctx); #endif #ifdef HAVE_MLIB dsputil_init_mlib(c, avctx); #endif #ifdef ARCH_SPARC dsputil_init_vis(c,avctx); #endif #ifdef ARCH_ALPHA dsputil_init_alpha(c, avctx); #endif #ifdef ARCH_POWERPC dsputil_init_ppc(c, avctx); #endif #ifdef HAVE_MMI dsputil_init_mmi(c, avctx); #endif #ifdef ARCH_SH4 dsputil_init_sh4(c,avctx); #endif switch(c->idct_permutation_type){ case FF_NO_IDCT_PERM: for(i=0; i<64; i++) c->idct_permutation[i]= i; break; case FF_LIBMPEG2_IDCT_PERM: for(i=0; i<64; i++) c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); break; case FF_SIMPLE_IDCT_PERM: for(i=0; i<64; i++) c->idct_permutation[i]= simple_mmx_permutation[i]; break; case FF_TRANSPOSE_IDCT_PERM: for(i=0; i<64; i++) c->idct_permutation[i]= ((i&7)<<3) | (i>>3); break; case FF_PARTTRANS_IDCT_PERM: for(i=0; i<64; i++) c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); break; default: av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not setn"); } }