me_fullfast.c
资源名称:chapter15.rar [点击查看]
上传用户:hjq518
上传日期:2021-12-09
资源大小:5084k
文件大小:33k
源码类别:
Audio
开发平台:
Visual C++
- /*!
- *************************************************************************************
- * file me_fullfast.c
- *
- * brief
- * Motion Estimation using Full Search Fast
- *
- * author
- * Main contributors (see contributors.h for copyright, address and affiliation details)
- * - Alexis Michael Tourapis <alexismt@ieee.org>
- *
- *************************************************************************************
- */
- // Includes
- #include "contributors.h"
- #include <limits.h>
- #include "global.h"
- #include "image.h"
- #include "memalloc.h"
- #include "mb_access.h"
- #include "refbuf.h"
- #include "me_distortion.h"
- #include "me_fullsearch.h"
- #include "conformance.h"
- #include "mv-search.h"
- // Define External Global Parameters
- extern short* spiral_search_x;
- extern short* spiral_search_y;
- /*****
- ***** static variables for fast integer motion estimation
- *****
- */
- static int **search_setup_done; //!< flag if all block SAD's have been calculated yet
- static int **search_center_x; //!< absolute search center for fast full motion search
- static int **search_center_y; //!< absolute search center for fast full motion search
- static int **pos_00; //!< position of (0,0) vector
- static distpel *****BlockSAD; //!< SAD for all blocksize, ref. frames and motion vectors
- static int **max_search_range;
- extern void SetMotionVectorPredictor (Macroblock *currMB, short pmv[2], char **refPic,
- short ***tmp_mv, short ref_frame,
- int list, int mb_x, int mb_y,
- int blockshape_x, int blockshape_y);
- // Functions
- /*!
- ***********************************************************************
- * brief
- * Full pixel block motion search
- ***********************************************************************
- */
- /*!
- ***********************************************************************
- * brief
- * function creating arrays for fast integer motion estimation
- ***********************************************************************
- */
- void InitializeFastFullIntegerSearch (void)
- {
- int i, j, k, list;
- int search_range = params->search_range;
- int max_pos = (2*search_range+1) * (2*search_range+1);
- if ((BlockSAD = (distpel*****)malloc (2 * sizeof(distpel****))) == NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
- for (list=0; list<2;list++)
- {
- if ((BlockSAD[list] = (distpel****)malloc ((img->max_num_references) * sizeof(distpel***))) == NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
- for (i = 0; i < img->max_num_references; i++)
- {
- if ((BlockSAD[list][i] = (distpel***)malloc (8 * sizeof(distpel**))) == NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
- for (j = 1; j < 8; j++)
- {
- if ((BlockSAD[list][i][j] = (distpel**)malloc (16 * sizeof(distpel*))) == NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
- for (k = 0; k < 16; k++)
- {
- if ((BlockSAD[list][i][j][k] = (distpel*)malloc (max_pos * sizeof(distpel))) == NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: BlockSAD");
- }
- }
- }
- }
- if ((search_setup_done = (int**)malloc (2*sizeof(int*)))==NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: search_setup_done");
- if ((search_center_x = (int**)malloc (2*sizeof(int*)))==NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: search_center_x");
- if ((search_center_y = (int**)malloc (2*sizeof(int*)))==NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: search_center_y");
- if ((pos_00 = (int**)malloc (2*sizeof(int*)))==NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: pos_00");
- if ((max_search_range = (int**)malloc (2*sizeof(int*)))==NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: max_search_range");
- for (list=0; list<2; list++)
- {
- if ((search_setup_done[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: search_setup_done");
- if ((search_center_x[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: search_center_x");
- if ((search_center_y[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: search_center_y");
- if ((pos_00[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: pos_00");
- if ((max_search_range[list] = (int*)malloc ((img->max_num_references)*sizeof(int)))==NULL)
- no_mem_exit ("InitializeFastFullIntegerSearch: max_search_range");
- }
- // assign max search ranges for reference frames
- if (params->full_search == 2)
- {
- for (list=0;list<2;list++)
- for (i=0; i<img->max_num_references; i++)
- max_search_range[list][i] = search_range;
- }
- else
- {
- for (list=0;list<2;list++)
- {
- max_search_range[list][0] = search_range;
- for (i=1; i< img->max_num_references; i++) max_search_range[list][i] = search_range / 2;
- }
- }
- }
- /*!
- ***********************************************************************
- * brief
- * function for deleting the arrays for fast integer motion estimation
- ***********************************************************************
- */
- void
- ClearFastFullIntegerSearch ()
- {
- int i, j, k, list;
- for (list=0; list<2; list++)
- {
- for (i = 0; i < img->max_num_references; i++)
- {
- for (j = 1; j < 8; j++)
- {
- for (k = 0; k < 16; k++)
- {
- free (BlockSAD[list][i][j][k]);
- }
- free (BlockSAD[list][i][j]);
- }
- free (BlockSAD[list][i]);
- }
- free (BlockSAD[list]);
- }
- free (BlockSAD);
- for (list=0; list<2; list++)
- {
- free (search_setup_done[list]);
- free (search_center_x[list]);
- free (search_center_y[list]);
- free (pos_00[list]);
- free (max_search_range[list]);
- }
- free (search_setup_done);
- free (search_center_x);
- free (search_center_y);
- free (pos_00);
- free (max_search_range);
- }
- /*!
- ***********************************************************************
- * brief
- * function resetting flags for fast integer motion estimation
- * (have to be called in start_macroblock())
- ***********************************************************************
- */
- void ResetFastFullIntegerSearch ()
- {
- int list;
- for (list=0; list<2; list++)
- memset(&search_setup_done [list][0], 0, img->max_num_references * sizeof(int));
- }
- /*!
- ***********************************************************************
- * brief
- * calculation of SAD for larger blocks on the basis of 4x4 blocks
- ***********************************************************************
- */
- void
- SetupLargerBlocks (int list, int refindex, int max_pos)
- {
- #define ADD_UP_BLOCKS() _o=*_bo; _i=*_bi; _j=*_bj; for(pos=0;pos<max_pos;pos++) _o[pos] = _i[pos] + _j[pos];
- #define INCREMENT(inc) _bo+=inc; _bi+=inc; _bj+=inc;
- int pos;
- distpel **_bo, **_bi, **_bj;
- register distpel *_o, *_i, *_j;
- //--- blocktype 6 ---
- _bo = BlockSAD[list][refindex][6];
- _bi = BlockSAD[list][refindex][7];
- _bj = _bi + 4;
- ADD_UP_BLOCKS(); INCREMENT(1);
- ADD_UP_BLOCKS(); INCREMENT(1);
- ADD_UP_BLOCKS(); INCREMENT(1);
- ADD_UP_BLOCKS(); INCREMENT(5);
- ADD_UP_BLOCKS(); INCREMENT(1);
- ADD_UP_BLOCKS(); INCREMENT(1);
- ADD_UP_BLOCKS(); INCREMENT(1);
- ADD_UP_BLOCKS();
- //--- blocktype 5 ---
- _bo = BlockSAD[list][refindex][5];
- _bi = BlockSAD[list][refindex][7];
- _bj = _bi + 1;
- ADD_UP_BLOCKS(); INCREMENT(2);
- ADD_UP_BLOCKS(); INCREMENT(2);
- ADD_UP_BLOCKS(); INCREMENT(2);
- ADD_UP_BLOCKS(); INCREMENT(2);
- ADD_UP_BLOCKS(); INCREMENT(2);
- ADD_UP_BLOCKS(); INCREMENT(2);
- ADD_UP_BLOCKS(); INCREMENT(2);
- ADD_UP_BLOCKS();
- //--- blocktype 4 ---
- _bo = BlockSAD[list][refindex][4];
- _bi = BlockSAD[list][refindex][6];
- _bj = _bi + 1;
- ADD_UP_BLOCKS(); INCREMENT(2);
- ADD_UP_BLOCKS(); INCREMENT(6);
- ADD_UP_BLOCKS(); INCREMENT(2);
- ADD_UP_BLOCKS();
- //--- blocktype 3 ---
- _bo = BlockSAD[list][refindex][3];
- _bi = BlockSAD[list][refindex][4];
- _bj = _bi + 8;
- ADD_UP_BLOCKS(); INCREMENT(2);
- ADD_UP_BLOCKS();
- //--- blocktype 2 ---
- _bo = BlockSAD[list][refindex][2];
- _bi = BlockSAD[list][refindex][4];
- _bj = _bi + 2;
- ADD_UP_BLOCKS(); INCREMENT(8);
- ADD_UP_BLOCKS();
- //--- blocktype 1 ---
- _bo = BlockSAD[list][refindex][1];
- _bi = BlockSAD[list][refindex][3];
- _bj = _bi + 2;
- ADD_UP_BLOCKS();
- }
- /*!
- ***********************************************************************
- * brief
- * Setup the fast search for an macroblock
- ***********************************************************************
- */
- #define GEN_ME 0
- #if GEN_ME
- void SetupFastFullPelSearch (Macroblock *currMb, short ref, int list) // <-- reference frame parameter, list0 or 1
- {
- short pmv[2], mv[2];
- static imgpel orig_pels[768];
- imgpel *srcptr = orig_pels;
- int offset_x, offset_y, range_partly_outside, ref_x, ref_y, pos, abs_x, abs_y, bindex, blky;
- int max_width, max_height;
- int img_width, img_height;
- StorablePicture *ref_picture;
- distpel** block_sad = BlockSAD[list][ref][7];
- int search_range = max_search_range[list][ref];
- int max_pos = (2*search_range+1) * (2*search_range+1);
- int list_offset = img->mb_data[img->current_mb_nr].list_offset;
- int apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
- (active_pps->weighted_bipred_idc && (img->type == B_SLICE))) && params->UseWeightedReferenceME;
- int abs_y4, abs_x4;
- int i, j, k;
- int level_dist = F_PEL + apply_weights * 3;
- int pixel_x, pixel_y;
- for ( j= img->opix_y; j < img->opix_y + MB_BLOCK_SIZE; j +=BLOCK_SIZE)
- {
- for (pixel_y = j; pixel_y < j + BLOCK_SIZE; pixel_y++)
- {
- memcpy( srcptr, &pCurImg[pixel_y][img->opix_x], BLOCK_SIZE * sizeof(imgpel));
- memcpy( srcptr + 16, &pCurImg[pixel_y][img->opix_x + BLOCK_SIZE], BLOCK_SIZE * sizeof(imgpel));
- memcpy( srcptr + 32, &pCurImg[pixel_y][img->opix_x + 2 * BLOCK_SIZE], BLOCK_SIZE * sizeof(imgpel));
- memcpy( srcptr + 48, &pCurImg[pixel_y][img->opix_x + 3 * BLOCK_SIZE], BLOCK_SIZE * sizeof(imgpel));
- srcptr += BLOCK_SIZE;
- }
- srcptr += 48;
- }
- // storage format is different from that of orig_pic
- // for YUV 4:2:0 we have:
- // YYYY
- // YYYY
- // U U
- //
- // V V
- //
- if (ChromaMEEnable)
- {
- imgpel *auxptr;
- int uv;
- int bsx_c = BLOCK_SIZE >> (chroma_shift_x - 2);
- int bsy_c = BLOCK_SIZE >> (chroma_shift_y - 2);
- int pic_pix_x_c = img->opix_x >> (chroma_shift_x - 2);
- int pic_pix_y_c = img->opix_y >> (chroma_shift_y - 2);
- // copy the original cmp1 and cmp2 data to the orig_pic matrix
- // This seems to be wrong.
- for (uv = 1; uv < 3; uv++)
- {
- srcptr = auxptr = orig_pels + (256 << (uv - 1));
- for ( pixel_y = 0, i = 0; i < (BLOCK_SIZE >> (chroma_shift_y - 2)); i++, pixel_y += bsy_c )
- {
- for ( pixel_x = 0, k = 0; k < (BLOCK_SIZE >> (chroma_shift_x - 2)); k++, pixel_x += bsx_c )
- {
- srcptr = auxptr;
- for (j = 0; j < bsy_c; j++)
- {
- memcpy( srcptr, &pImgOrg[uv][pic_pix_y_c + pixel_y + j][pic_pix_x_c + pixel_x], bsx_c * sizeof(imgpel));
- srcptr += bsx_c;
- }
- auxptr += MB_BLOCK_SIZE;
- }
- }
- }
- }
- ref_picture = listX[list+list_offset][ref];
- ref_access_method = FAST_ACCESS;
- //===== Use weighted Reference for ME ====
- ref_pic_sub.luma = ref_picture->p_curr_img_sub;
- if (apply_weights)
- {
- weight_luma = wp_weight[list + list_offset][ref][0];
- offset_luma = wp_offset[list + list_offset][ref][0];
- }
- if ( ChromaMEEnable)
- {
- ref_pic_sub.crcb[0] = ref_picture->imgUV_sub[0];
- ref_pic_sub.crcb[1] = ref_picture->imgUV_sub[1];
- width_pad_cr = ref_picture->size_x_cr_pad;
- height_pad_cr = ref_picture->size_y_cr_pad;
- if (apply_weights)
- {
- weight_cr[0] = wp_weight[list + list_offset][ref][1];
- weight_cr[1] = wp_weight[list + list_offset][ref][2];
- offset_cr[0] = wp_offset[list + list_offset][ref][1];
- offset_cr[1] = wp_offset[list + list_offset][ref][2];
- }
- }
- max_width = ref_picture->size_x - 17;
- max_height = ref_picture->size_y - 17;
- img_width = ref_picture->size_x;
- img_height = ref_picture->size_y;
- width_pad = ref_picture->size_x_pad;
- height_pad = ref_picture->size_y_pad;
- //===== get search center: predictor of 16x16 block =====
- SetMotionVectorPredictor (currMB, pmv, enc_picture->motion.ref_idx[list], enc_picture->motion.mv[list], ref, list, 0, 0, 16, 16);
- mv[0] = pmv[0] / 4;
- mv[1] = pmv[1] / 4;
- if (!params->rdopt)
- {
- //--- correct center so that (0,0) vector is inside ---
- mv[0] = iClip3(-search_range, search_range, mv[0]);
- mv[1] = iClip3(-search_range, search_range, mv[1]);
- }
- clip_mv_range(img, search_range, mv, F_PEL);
- search_center_x[list][ref] = mv[0] + img->opix_x;
- search_center_y[list][ref] = mv[1] + img->opix_y;
- offset_x = search_center_x[list][ref];
- offset_y = search_center_y[list][ref];
- //===== check if whole search range is inside image =====
- if (offset_x >= search_range && offset_x <= max_width - search_range &&
- offset_y >= search_range && offset_y <= max_height - search_range )
- {
- range_partly_outside = 0;
- }
- else
- {
- range_partly_outside = 1;
- }
- //===== determine position of (0,0)-vector =====
- if (!params->rdopt)
- {
- ref_x = img->opix_x - offset_x;
- ref_y = img->opix_y - offset_y;
- for (pos = 0; pos < max_pos; pos++)
- {
- if (ref_x == spiral_search_x[pos] &&
- ref_y == spiral_search_y[pos])
- {
- pos_00[list][ref] = pos;
- break;
- }
- }
- }
- //===== loop over search range (spiral search): get blockwise SAD =====
- for (pos = 0; pos < max_pos; pos++)
- {
- abs_y = offset_y + spiral_search_y[pos];
- abs_x = offset_x + spiral_search_x[pos];
- abs_y4 = (abs_y + IMG_PAD_SIZE) << 2;
- abs_x4 = (abs_x + IMG_PAD_SIZE) << 2;
- if (range_partly_outside)
- {
- if (abs_y >= 0 && abs_y <= max_height &&
- abs_x >= 0 && abs_x <= max_width )
- {
- ref_access_method = FAST_ACCESS;
- }
- else
- {
- ref_access_method = UMV_ACCESS;
- }
- }
- srcptr = orig_pels;
- bindex = 0;
- for (blky = 0; blky < 4; blky++)
- {
- block_sad[bindex++][pos] = computeUniPred[level_dist](srcptr, 4, 4, INT_MAX, abs_x4, abs_y4);
- srcptr += 16;
- block_sad[bindex++][pos] = computeUniPred[level_dist](srcptr, 4, 4, INT_MAX, abs_x4 + 16, abs_y4);
- srcptr += 16;
- block_sad[bindex++][pos] = computeUniPred[level_dist](srcptr, 4, 4, INT_MAX, abs_x4 + 32, abs_y4);
- srcptr += 16;
- block_sad[bindex++][pos] = computeUniPred[level_dist](srcptr, 4, 4, INT_MAX, abs_x4 + 48, abs_y4);
- srcptr += 16;
- abs_y4 += 16;
- }
- }
- //===== combine SAD's for larger block types =====
- SetupLargerBlocks (list, ref, max_pos);
- //===== set flag marking that search setup have been done =====
- search_setup_done[list][ref] = 1;
- }
- #else
- void SetupFastFullPelSearch (Macroblock *currMB, short ref, int list) // <-- reference frame parameter, list0 or 1
- {
- short pmv[2];
- static imgpel orig_pels[768];
- imgpel *srcptr = orig_pels, *refptr;
- int k, x, y;
- int abs_y4, abs_x4;
- int offset_x, offset_y, range_partly_outside, ref_x, ref_y, pos, abs_x, abs_y, bindex, blky;
- int LineSadBlk0, LineSadBlk1, LineSadBlk2, LineSadBlk3;
- int max_width, max_height;
- int img_width, img_height;
- StorablePicture *ref_picture;
- distpel** block_sad = BlockSAD[list][ref][7];
- int search_range = max_search_range[list][ref];
- int max_pos = (2*search_range+1) * (2*search_range+1);
- int list_offset = img->mb_data[img->current_mb_nr].list_offset;
- int apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
- (active_pps->weighted_bipred_idc && (img->type == B_SLICE))) && params->UseWeightedReferenceME;
- int weighted_pel;
- int *dist_method = params->MEErrorMetric[0] ? img->quad : byte_abs;
- ref_picture = listX[list+list_offset][ref];
- ref_access_method = FAST_ACCESS;
- ref_pic_sub.luma = ref_picture->p_curr_img_sub;
- max_width = ref_picture->size_x - 17;
- max_height = ref_picture->size_y - 17;
- img_width = ref_picture->size_x;
- img_height = ref_picture->size_y;
- width_pad = ref_picture->size_x_pad;
- height_pad = ref_picture->size_y_pad;
- if (apply_weights)
- {
- weight_luma = wp_weight[list + list_offset][ref][0];
- offset_luma = wp_offset[list + list_offset][ref][0];
- }
- if ( ChromaMEEnable)
- {
- ref_pic_sub.crcb[0] = ref_picture->imgUV_sub[0];
- ref_pic_sub.crcb[1] = ref_picture->imgUV_sub[1];
- width_pad_cr = ref_picture->size_x_cr_pad;
- height_pad_cr = ref_picture->size_y_cr_pad;
- if (apply_weights)
- {
- weight_cr[0] = wp_weight[list + list_offset][ref][1];
- weight_cr[1] = wp_weight[list + list_offset][ref][2];
- offset_cr[0] = wp_offset[list + list_offset][ref][1];
- offset_cr[1] = wp_offset[list + list_offset][ref][2];
- }
- }
- //===== get search center: predictor of 16x16 block =====
- SetMotionVectorPredictor (currMB, pmv, enc_picture->motion.ref_idx[list], enc_picture->motion.mv[list], ref, list, 0, 0, 16, 16);
- search_center_x[list][ref] = pmv[0] / 4;
- search_center_y[list][ref] = pmv[1] / 4;
- if (!params->rdopt)
- {
- //--- correct center so that (0,0) vector is inside ---
- search_center_x[list][ref] = iClip3(-search_range, search_range, search_center_x[list][ref]);
- search_center_y[list][ref] = iClip3(-search_range, search_range, search_center_y[list][ref]);
- }
- search_center_x[list][ref] = iClip3(-2047 + search_range, 2047 - search_range, search_center_x[list][ref]);
- search_center_y[list][ref] = iClip3(LEVELMVLIMIT[img->LevelIndex][0] + search_range, LEVELMVLIMIT[img->LevelIndex][1] - search_range, search_center_y[list][ref]);
- search_center_x[list][ref] += img->opix_x;
- search_center_y[list][ref] += img->opix_y;
- offset_x = search_center_x[list][ref];
- offset_y = search_center_y[list][ref];
- //===== copy original block for fast access =====
- for (y = img->opix_y; y < img->opix_y+MB_BLOCK_SIZE; y++)
- {
- memcpy(srcptr, &pCurImg[y][img->opix_x], MB_BLOCK_SIZE * sizeof(imgpel));
- srcptr += MB_BLOCK_SIZE;
- }
- if ( ChromaMEEnable)
- {
- for (k = 1; k < 3; k++)
- {
- for (y = img->opix_c_y; y < img->opix_c_y + img->mb_cr_size_y; y++)
- {
- memcpy(srcptr, &pImgOrg[k][y][img->opix_c_x], img->mb_cr_size_x * sizeof(imgpel));
- srcptr += img->mb_cr_size_x;
- }
- }
- }
- //===== check if whole search range is inside image =====
- if (offset_x >= search_range && offset_x <= max_width - search_range &&
- offset_y >= search_range && offset_y <= max_height - search_range )
- {
- range_partly_outside = 0;
- }
- else
- {
- range_partly_outside = 1;
- }
- //===== determine position of (0,0)-vector =====
- if (!params->rdopt)
- {
- ref_x = img->opix_x - offset_x;
- ref_y = img->opix_y - offset_y;
- for (pos = 0; pos < max_pos; pos++)
- {
- if (ref_x == spiral_search_x[pos] &&
- ref_y == spiral_search_y[pos])
- {
- pos_00[list][ref] = pos;
- break;
- }
- }
- }
- //===== loop over search range (spiral search): get blockwise SAD =====
- for (pos = 0; pos < max_pos; pos++)
- {
- abs_y = offset_y + spiral_search_y[pos];
- abs_x = offset_x + spiral_search_x[pos];
- abs_y4 = (abs_y + IMG_PAD_SIZE) << 2;
- abs_x4 = (abs_x + IMG_PAD_SIZE) << 2;
- if (range_partly_outside)
- {
- if (abs_y >= 0 && abs_y <= max_height&&
- abs_x >= 0 && abs_x <= max_width )
- {
- ref_access_method = FAST_ACCESS;
- }
- else
- {
- ref_access_method = UMV_ACCESS;
- }
- }
- if (apply_weights)
- {
- srcptr = orig_pels;
- bindex = 0;
- refptr = get_line[ref_access_method] (ref_pic_sub.luma, abs_y4, abs_x4);
- for (blky = 0; blky < 4; blky++)
- {
- LineSadBlk0 = LineSadBlk1 = LineSadBlk2 = LineSadBlk3 = 0;
- for (y = 0; y < 4; y++)
- {
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk0 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk0 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk0 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk0 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk1 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk1 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk1 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk1 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk2 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk2 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk2 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk2 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk3 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk3 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk3 += dist_method [weighted_pel - *srcptr++];
- weighted_pel = iClip1( img->max_imgpel_value, ((weight_luma * *refptr++ + wp_luma_round) >> luma_log_weight_denom) + offset_luma);
- LineSadBlk3 += dist_method [weighted_pel - *srcptr++];
- refptr += img_padded_size_x - MB_BLOCK_SIZE;
- }
- block_sad[bindex++][pos] = LineSadBlk0;
- block_sad[bindex++][pos] = LineSadBlk1;
- block_sad[bindex++][pos] = LineSadBlk2;
- block_sad[bindex++][pos] = LineSadBlk3;
- }
- if (ChromaMEEnable)
- {
- int max_imgpel_value_uv = img->max_imgpel_value_comp[1];
- for (k = 0; k < 2; k ++)
- {
- bindex = 0;
- refptr = get_crline[ref_access_method] (ref_pic_sub.crcb[k], abs_y4, abs_x4);
- for (blky = 0; blky < 4; blky++)
- {
- LineSadBlk0 = LineSadBlk1 = LineSadBlk2 = LineSadBlk3 = 0;
- for (y = 0; y < img->mb_cr_size_y; y+=BLOCK_SIZE)
- {
- for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
- {
- weighted_pel = iClip1( max_imgpel_value_uv, ((weight_cr[k] * *refptr++ + wp_chroma_round) >> chroma_log_weight_denom) + offset_cr[k]);
- LineSadBlk0 += dist_method [weighted_pel - *srcptr++];
- }
- for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
- {
- weighted_pel = iClip1( max_imgpel_value_uv, ((weight_cr[k] * *refptr++ + wp_chroma_round) >> chroma_log_weight_denom) + offset_cr[k]);
- LineSadBlk1 += dist_method [weighted_pel - *srcptr++];
- }
- for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
- {
- weighted_pel = iClip1( max_imgpel_value_uv, ((weight_cr[k] * *refptr++ + wp_chroma_round) >> chroma_log_weight_denom) + offset_cr[k]);
- LineSadBlk2 += dist_method [weighted_pel - *srcptr++];
- }
- for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
- {
- weighted_pel = iClip1( max_imgpel_value_uv, ((weight_cr[k] * *refptr++ + wp_chroma_round) >> chroma_log_weight_denom) + offset_cr[k]);
- LineSadBlk3 += dist_method [weighted_pel - *srcptr++];
- }
- refptr += img_cr_padded_size_x - img->mb_cr_size_x;
- }
- block_sad[bindex++][pos] += LineSadBlk0;
- block_sad[bindex++][pos] += LineSadBlk1;
- block_sad[bindex++][pos] += LineSadBlk2;
- block_sad[bindex++][pos] += LineSadBlk3;
- }
- }
- }
- }
- else
- {
- srcptr = orig_pels;
- bindex = 0;
- refptr = get_line[ref_access_method] (ref_pic_sub.luma, abs_y4, abs_x4);
- for (blky = 0; blky < 4; blky++)
- {
- LineSadBlk0 = LineSadBlk1 = LineSadBlk2 = LineSadBlk3 = 0;
- for (y = 0; y < 4; y++)
- {
- LineSadBlk0 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk0 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk0 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk0 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk1 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk1 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk1 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk1 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk2 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk2 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk2 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk2 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk3 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk3 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk3 += dist_method [*refptr++ - *srcptr++];
- LineSadBlk3 += dist_method [*refptr++ - *srcptr++];
- refptr += img_padded_size_x - MB_BLOCK_SIZE;
- }
- block_sad[bindex++][pos] = LineSadBlk0;
- block_sad[bindex++][pos] = LineSadBlk1;
- block_sad[bindex++][pos] = LineSadBlk2;
- block_sad[bindex++][pos] = LineSadBlk3;
- }
- if (ChromaMEEnable)
- {
- for (k = 0; k < 2; k ++)
- {
- bindex = 0;
- refptr = get_crline[ref_access_method] (ref_pic_sub.crcb[k], abs_y4, abs_x4);
- for (blky = 0; blky < 4; blky++)
- {
- LineSadBlk0 = LineSadBlk1 = LineSadBlk2 = LineSadBlk3 = 0;
- for (y = 0; y < img->mb_cr_size_y; y+=BLOCK_SIZE)
- {
- for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
- {
- LineSadBlk0 += dist_method [*refptr++ - *srcptr++];
- }
- for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
- {
- LineSadBlk1 += dist_method [*refptr++ - *srcptr++];
- }
- for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
- {
- LineSadBlk2 += dist_method [*refptr++ - *srcptr++];
- }
- for (x = 0; x < img->mb_cr_size_x; x += BLOCK_SIZE)
- {
- LineSadBlk3 += dist_method [*refptr++ - *srcptr++];
- }
- refptr += img_cr_padded_size_x - img->mb_cr_size_x;
- }
- block_sad[bindex++][pos] += LineSadBlk0;
- block_sad[bindex++][pos] += LineSadBlk1;
- block_sad[bindex++][pos] += LineSadBlk2;
- block_sad[bindex++][pos] += LineSadBlk3;
- }
- }
- }
- }
- }
- //===== combine SAD's for larger block types =====
- SetupLargerBlocks (list, ref, max_pos);
- //===== set flag marking that search setup have been done =====
- search_setup_done[list][ref] = 1;
- }
- #endif
- /*!
- ***********************************************************************
- * brief
- * Fast Full pixel block motion search
- ***********************************************************************
- */
- int // ==> minimum motion cost after search
- FastFullPelBlockMotionSearch (Macroblock *currMB, // <-- current Macroblock
- imgpel* orig_pic, // <-- not used
- short ref, // <-- reference frame (0... or -1 (backward))
- int list,
- int list_offset, // <-- MBAFF list offset
- char ***refPic, // <-- reference array
- short ****tmp_mv, // <-- mv array
- int pic_pix_x, // <-- absolute x-coordinate of regarded AxB block
- int pic_pix_y, // <-- absolute y-coordinate of regarded AxB block
- int blocktype, // <-- block type (1-16x16 ... 7-4x4)
- short pred_mv[2], // <-- motion vector predictor (x) in sub-pel units
- short mv[2], // <--> in: search center (x) / out: motion vector (x) - in pel units
- int search_range, // <-- 1-d search range in pel units
- int min_mcost, // <-- minimum motion cost (cost for center or huge value)
- int lambda_factor, // <-- lagrangian parameter for determining motion cost
- int apply_weights
- )
- {
- int pos, offset_x, offset_y, cand_x, cand_y, mcost;
- int max_pos = (2*search_range+1)*(2*search_range+1); // number of search positions
- int best_pos = 0; // position with minimum motion cost
- int block_index; // block index for indexing SAD array
- distpel* block_sad; // pointer to SAD array
- block_index = (pic_pix_y-img->opix_y)+((pic_pix_x-img->opix_x)>>2); // block index for indexing SAD array
- block_sad = BlockSAD[list][ref][blocktype][block_index]; // pointer to SAD array
- //===== set up fast full integer search if needed / set search center =====
- if (!search_setup_done[list][ref])
- {
- SetupFastFullPelSearch (currMB, ref, list);
- }
- offset_x = search_center_x[list][ref] - img->opix_x;
- offset_y = search_center_y[list][ref] - img->opix_y;
- //===== cost for (0,0)-vector: it is done before, because MVCost can be negative =====
- if (!params->rdopt)
- {
- mcost = block_sad[pos_00[list][ref]] + MV_COST_SMP (lambda_factor, 0, 0, pred_mv[0], pred_mv[1]);
- if (mcost < min_mcost)
- {
- min_mcost = mcost;
- best_pos = pos_00[list][ref];
- }
- }
- //===== loop over all search positions =====
- for (pos=0; pos<max_pos; pos++, block_sad++)
- {
- //--- check residual cost ---
- if (*block_sad < min_mcost)
- {
- //--- get motion vector cost ---
- cand_x = (offset_x + spiral_search_x[pos])<<2;
- cand_y = (offset_y + spiral_search_y[pos])<<2;
- mcost = *block_sad;
- mcost += MV_COST_SMP (lambda_factor, cand_x, cand_y, pred_mv[0], pred_mv[1]);
- //--- check motion cost ---
- if (mcost < min_mcost)
- {
- min_mcost = mcost;
- best_pos = pos;
- }
- }
- }
- //===== set best motion vector and return minimum motion cost =====
- mv[0] = offset_x + spiral_search_x[best_pos];
- mv[1] = offset_y + spiral_search_y[best_pos];
- return min_mcost;
- }