Audio

开发平台：
Visual C++

me_umhexsmp.c：源码内容
							
/*!
 *************************************************************************************
 *
 * file me_umhexsmp.c
 *
 * brief
 *   Fast integer pixel and sub pixel motion estimation
 *   Improved and simplified from the original UMHexagonS algorithms
 *   See JVT-P021 for details
 *
 * author
 *    Main contributors: (see contributors.h for copyright, address and affiliation details)
 *    - Zhibo Chen                      <chenzhibo@tsinghua.org.cn>
 *    - JianFeng Xu                     <fenax@video.mdc.tsinghua.edu.cn>
 *    - Wenfang Fu                      <fwf@video.mdc.tsinghua.edu.cn>
 *
 *    - Xiaoquan Yi                     <xyi@engr.scu.edu>
 *    - Jun Zhang                       <jzhang2@engr.scu.edu>
 *
 * date
 *    6. Nov. 2006
 *************************************************************************************
 */
#include <limits.h>
#include "global.h"
#include "memalloc.h"
#include "me_umhexsmp.h"
#include "refbuf.h"
#include "me_distortion.h"
#include "mv-search.h"
static const short Diamond_X[4]      = {-1, 1, 0, 0};
static const short Diamond_Y[4]      = { 0, 0,-1, 1};
static const short Hexagon_X[6]      = {-2, 2,-1, 1,-1, 1};
static const short Hexagon_Y[6]      = { 0, 0,-2, 2, 2,-2};
static const short Big_Hexagon_X[16] = {-4, 4, 0, 0,-4, 4,-4, 4,-4, 4,-4, 4,-2, 2,-2, 2};
static const short Big_Hexagon_Y[16] = { 0, 0,-4, 4,-1, 1, 1,-1,-2, 2, 2,-2,-3, 3, 3,-3};
const short block_type_shift_factor[8] = {0, 0, 1, 1, 2, 3, 3, 1}; // last one relaxed to 1 instead 4
static StorablePicture *ref_pic_ptr;
static int dist_method;
extern short*  spiral_hpel_search_x;
extern short*  spiral_hpel_search_y;
extern short*  spiral_search_x;
extern short*  spiral_search_y;
// Macro for motion estimation cost computation per match
#define SEARCH_ONE_PIXEL_HELPER                                                           
  if(iabs(cand_x - center_x) <= search_range && iabs(cand_y - center_y) <= search_range)  
  {                                                                                       
    mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);             
    mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,             
          min_mcost - mcost, (cand_x + IMG_PAD_SIZE) << 2, (cand_y + IMG_PAD_SIZE) << 2); 
    if (mcost < min_mcost)                                                                
    {                                                                                     
      best_x    = cand_x;                                                                 
      best_y    = cand_y;                                                                 
      min_mcost = mcost;                                                                  
    }                                                                                     
}
#define SEARCH_ONE_PIXEL_BIPRED_HELPER                                                    
if (iabs(cand_x - center2_x) <= search_range && iabs(cand_y - center2_y) <= search_range) 
{                                                                                         
  mcost  = MV_COST (lambda_factor, mvshift, center1_x, center1_y, pred_x1, pred_y1);      
  mcost += MV_COST (lambda_factor, mvshift, cand_x,    cand_y,    pred_x2, pred_y2);      
  if (mcost < min_mcost)                                                                  
  {                                                                                       
    mcost  += computeBiPred(cur_pic, blocksize_y, blocksize_x,                            
                           min_mcost - mcost,                                             
                           (center1_x << 2) + IMG_PAD_SIZE_TIMES4,                        
                           (center1_y << 2) + IMG_PAD_SIZE_TIMES4,                        
                           (cand_x << 2) + IMG_PAD_SIZE_TIMES4,                           
                           (cand_y << 2) + IMG_PAD_SIZE_TIMES4);                          
    if (mcost < min_mcost)                                                                
    {                                                                                     
      best_x = cand_x;                                                                    
      best_y = cand_y;                                                                    
      min_mcost = mcost;                                                                  
    }                                                                                     
  }                                                                                       
}
/*!
 ************************************************************************
 * brief
 *    Set thresholds for fast motion estimation
 *    Those thresholds may be adjusted to trade off rate-distortion
 *    performance and simplified UMHEX speed
 ************************************************************************
 */
void smpUMHEX_init()
{
  SymmetricalCrossSearchThreshold1 =  800;
  SymmetricalCrossSearchThreshold2 = 7000;
  ConvergeThreshold                = 1000;
  SubPelThreshold1                 = 1000;
  SubPelThreshold3                 =  400;
}
/*!
 ************************************************************************
 * brief
 *    Allocation of space for fast motion estimation
 ************************************************************************
 */
int smpUMHEX_get_mem()
{
  int memory_size = 0;
  if (NULL==(smpUMHEX_flag_intra = calloc((img->width>>4)+1, sizeof(byte))))
    no_mem_exit("smpUMHEX_get_mem: smpUMHEX_flag_intra");
  memory_size += get_mem3Dint(&smpUMHEX_l0_cost, 9, img->height/4, img->width/4);
  memory_size += get_mem3Dint(&smpUMHEX_l1_cost, 9, img->height/4, img->width/4);
  memory_size += get_mem2D(&smpUMHEX_SearchState, 7, 7);
  return memory_size;
}
/*!
 ************************************************************************
 * brief
 *    Free space for fast motion estimation
 ************************************************************************
 */
void smpUMHEX_free_mem()
{
  free_mem3Dint(smpUMHEX_l0_cost );
  free_mem3Dint(smpUMHEX_l1_cost );
  free_mem2D(smpUMHEX_SearchState);
  free (smpUMHEX_flag_intra);
}
/*!
************************************************************************
* brief
*    Fast integer pixel block motion estimation
************************************************************************
*/
int                                     //  ==> minimum motion cost after search
smpUMHEXIntegerPelBlockMotionSearch (Macroblock *currMB,      // <--  current Macroblock
                                     imgpel   *orig_pic,      // <--  not used
                                     short     ref,           // <--  reference frame (0... or -1 (backward))
                                     int       list,          // <--  reference picture list
                                     int       list_offset,   // <--  MBAFF list offset
                                     char   ***refPic,        // <--  reference array
                                     short ****tmp_mv,        // <--  mv array
                                     int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
                                     int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
                                     int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
                                     short     pred_mv[2],    // <--  motion vector predictor (x|y) in sub-pel units
                                     short     mv[2],         //  --> motion vector (x|y) - in pel units
                                     int       search_range,  // <--  1-d search range in pel units
                                     int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
                                     int       lambda_factor, // <--  lagrangian parameter for determining motion cost
                                     int       apply_weights
                                     )
{
  int   mvshift       = 2;                                        // motion vector shift for getting sub-pel units
  int   blocksize_y   = params->blc_size[blocktype][1];            // vertical block size
  int   blocksize_x   = params->blc_size[blocktype][0];            // horizontal block size
  int   pred_x        = (pic_pix_x << mvshift) + pred_mv[0];       // predicted position x (in sub-pel units)
  int   pred_y        = (pic_pix_y << mvshift) + pred_mv[1];       // predicted position y (in sub-pel units)
  int   center_x      = pic_pix_x + mv[0];                        // center position x (in pel units)
  int   center_y      = pic_pix_y + mv[1];                        // center position y (in pel units)
  int   best_x        = 0, best_y = 0;
  int   search_step, iYMinNow, iXMinNow;
  int   cand_x, cand_y, mcost;
  unsigned short  i, j, m;
  //===== Use weighted Reference for ME ====
  dist_method = F_PEL + 3 * apply_weights;
  ref_pic_ptr = listX[list+list_offset][ref];
  // Note that following seem to be universal for all functions and could be moved to a separate, clean public function in me_distortion.c
  ref_pic_sub.luma = ref_pic_ptr->p_curr_img_sub;
  img_width  = ref_pic_ptr->size_x;
  img_height = ref_pic_ptr->size_y;
  width_pad  = ref_pic_ptr->size_x_pad;
  height_pad = ref_pic_ptr->size_y_pad;
  if (apply_weights)
  {
    weight_luma = wp_weight[list + list_offset][ref][0];
    offset_luma = wp_offset[list + list_offset][ref][0];
  }
  if (ChromaMEEnable)
  {
    ref_pic_sub.crcb[0] = ref_pic_ptr->imgUV_sub[0];
    ref_pic_sub.crcb[1] = ref_pic_ptr->imgUV_sub[1];
    width_pad_cr  = ref_pic_ptr->size_x_cr_pad;
    height_pad_cr = ref_pic_ptr->size_y_cr_pad;
    if (apply_weights)
    {
      weight_cr[0] = wp_weight[list + list_offset][ref][1];
      weight_cr[1] = wp_weight[list + list_offset][ref][2];
      offset_cr[0] = wp_offset[list + list_offset][ref][1];
      offset_cr[1] = wp_offset[list + list_offset][ref][2];
    }
  }
  //===== set function for getting reference picture lines =====
  if ((center_x > search_range) && (center_x < img_width - 1 - search_range - blocksize_x) &&
    (center_y > search_range) && (center_y < img_height - 1 - search_range - blocksize_y))
  {
    ref_access_method = FAST_ACCESS;
  }
  else
  {
    ref_access_method = UMV_ACCESS;
  }
  //check the center median predictor
  cand_x = center_x ;
  cand_y = center_y ;
  mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);
  mcost += computeUniPred[dist_method](orig_pic, blocksize_y,blocksize_x, min_mcost - mcost,
    (cand_x << 2) + IMG_PAD_SIZE_TIMES4,  (cand_y << 2) + IMG_PAD_SIZE_TIMES4);
  if (mcost < min_mcost)
  {
    min_mcost = mcost;
    best_x    = cand_x;
    best_y    = cand_y;
  }
  iXMinNow = best_x;
  iYMinNow = best_y;
  if ((0 != pred_mv[0]) || (0 != pred_mv[1]))
  {
    cand_x = pic_pix_x;
    cand_y = pic_pix_y;
    SEARCH_ONE_PIXEL_HELPER
  }
  // If the min_mcost is small enough, do a local search then terminate
  // Ihis is good for stationary or quasi-stationary areas
  if (min_mcost < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
  {
    for (m = 0; m < 4; m++)
    {
      cand_x = iXMinNow + Diamond_X[m];
      cand_y = iYMinNow + Diamond_Y[m];
      SEARCH_ONE_PIXEL_HELPER
    }
    mv[0] = (short) (best_x - pic_pix_x);
    mv[1] = (short) (best_y - pic_pix_y);
    return min_mcost;
  }
  // Small local search
  for (m = 0; m < 4; m++)
  {
    cand_x = iXMinNow + Diamond_X[m];
    cand_y = iYMinNow + Diamond_Y[m];
    SEARCH_ONE_PIXEL_HELPER
  }
  // First_step: Symmetrical-cross search
  // If distortion is large, use large shapes. Otherwise, compact shapes are faster
  if ( (blocktype == 1 &&
    min_mcost > (SymmetricalCrossSearchThreshold1>>block_type_shift_factor[blocktype])) ||
    (min_mcost > (SymmetricalCrossSearchThreshold2>>block_type_shift_factor[blocktype])) )
  {
    iXMinNow = best_x;
    iYMinNow = best_y;
    for(i = 1; i <= search_range/2; i++)
    {
      search_step = (i<<1) - 1;
      cand_x = iXMinNow + search_step;
      cand_y = iYMinNow;
      SEARCH_ONE_PIXEL_HELPER
        cand_x = iXMinNow - search_step;
      SEARCH_ONE_PIXEL_HELPER
        cand_x = iXMinNow;
      cand_y = iYMinNow + search_step;
      SEARCH_ONE_PIXEL_HELPER
        cand_y = iYMinNow - search_step;
      SEARCH_ONE_PIXEL_HELPER
    }
    // Hexagon Search
    iXMinNow = best_x;
    iYMinNow = best_y;
    for (m = 0; m < 6; m++)
    {
      cand_x = iXMinNow + Hexagon_X[m];
      cand_y = iYMinNow + Hexagon_Y[m];
      SEARCH_ONE_PIXEL_HELPER
    }
    // Multi Big Hexagon Search
    iXMinNow = best_x;
    iYMinNow = best_y;
    for(i = 1; i <= search_range/4; i++)
    {
      for (m = 0; m < 16; m++)
      {
        cand_x = iXMinNow + Big_Hexagon_X[m]*i;
        cand_y = iYMinNow + Big_Hexagon_Y[m]*i;
        SEARCH_ONE_PIXEL_HELPER
      }
    }
  }
  // Search up_layer predictor for non 16x16 blocks
  if (blocktype > 1)
  {
    cand_x = pic_pix_x + (smpUMHEX_pred_MV_uplayer_X/4);
    cand_y = pic_pix_y + (smpUMHEX_pred_MV_uplayer_Y/4);
    SEARCH_ONE_PIXEL_HELPER
  }
  if(center_x != pic_pix_x || center_y != pic_pix_y)
  {
    cand_x = pic_pix_x;
    cand_y = pic_pix_y;
    SEARCH_ONE_PIXEL_HELPER
      iXMinNow = best_x;
    iYMinNow = best_y;
    // Local diamond search
    for (m = 0; m < 4; m++)
    {
      cand_x = iXMinNow + Diamond_X[m];
      cand_y = iYMinNow + Diamond_Y[m];
      SEARCH_ONE_PIXEL_HELPER
    }
  }
  // If the minimum cost is small enough, do a local search
  // and finish the search here
  if (min_mcost < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
  {
    iXMinNow = best_x;
    iYMinNow = best_y;
    for (m = 0; m < 4; m++)
    {
      cand_x = iXMinNow + Diamond_X[m];
      cand_y = iYMinNow + Diamond_Y[m];
      SEARCH_ONE_PIXEL_HELPER
    }
    mv[0] = (short) (best_x - pic_pix_x);
    mv[1] = (short) (best_y - pic_pix_y);
    return min_mcost;
  }
  //second_step:  Extended Hexagon-based Search
  for(i = 0; i < search_range; i++)
  {
    iXMinNow = best_x;
    iYMinNow = best_y;
    for (m = 0; m < 6; m++)
    {
      cand_x = iXMinNow + Hexagon_X[m];
      cand_y = iYMinNow + Hexagon_Y[m];
      SEARCH_ONE_PIXEL_HELPER
    }
    // The minimum cost point happens in the center
    if (best_x == iXMinNow && best_y == iYMinNow)
    {
      break;
    }
  }
  //third_step: Small diamond search
  for(i = 0; i < search_range; i++)
  {
    iXMinNow = best_x;
    iYMinNow = best_y;
    for (m = 0; m < 4; m++)
    {
      cand_x = iXMinNow + Diamond_X[m];
      cand_y = iYMinNow + Diamond_Y[m];
      SEARCH_ONE_PIXEL_HELPER
    }
    // The minimum cost point happens in the center
    if (best_x == iXMinNow && best_y == iYMinNow)
    {
      break;
    }
  }
  mv[0] = (short) (best_x - pic_pix_x);
  mv[1] = (short) (best_y - pic_pix_y);
  for (j=(pic_pix_y>>2); j < (pic_pix_y>>2) + (blocksize_y>>2); j++)
  {
    for (i=(pic_pix_x>>2); i < (pic_pix_x>>2) + (blocksize_x>>2); i++)    
    {
      if(list == 0)
      {
        smpUMHEX_l0_cost[blocktype][j][i] = min_mcost;
      }
      else
      {
        smpUMHEX_l1_cost[blocktype][j][i] = min_mcost;
      }
    }
  }
  return min_mcost;
}
/*!
 ***********************************************************************
 * brief
 *    Sub pixel block motion search enhanced
 ***********************************************************************
 */
int                                               //  ==> minimum motion cost after search
smpUMHEXFullSubPelBlockMotionSearch (imgpel*   orig_pic,      // <--  original pixel values for the AxB block
                                     short     ref,           // <--  reference frame (0... or -1 (backward))
                                     int       list,          // <--  reference picture list
                                     int       list_offset,
                                     int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
                                     int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
                                     int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
                                     short     pred_mv[2],    // <--  motion vector predictor (x|y) in sub-pel units
                                     short     mv[2],         // <--> in: search center (x|y) / out: motion vector (x|y) - in pel units
                                     int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
                                     int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
                                     int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
                                     int       lambda_factor, // <--  lagrangian parameter for determining motion cost
                                     int       apply_weights
                                     )
{
  int   pos, best_pos, mcost;
  int   cand_mv_x, cand_mv_y;
  int   check_position0 = (!params->rdopt && img->type!=B_SLICE && ref==0 && blocktype==1 && mv[0]==0 && mv[1]==0);
  int   blocksize_x     = params->blc_size[blocktype][0];
  int   blocksize_y     = params->blc_size[blocktype][1];
  int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<< 2);
  int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<< 2);
  int   max_pos2        = ( !start_me_refinement_hp ? imax(1,search_pos2) : search_pos2);
  int   cmv_x, cmv_y;
  StorablePicture *ref_picture = listX[list+list_offset][ref];
  int max_pos_x4 = ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
  int max_pos_y4 = ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
  dist_method = Q_PEL + 3 * apply_weights;
  ref_pic_sub.luma = ref_picture->p_curr_img_sub;
  img_width  = ref_picture->size_x;
  img_height = ref_picture->size_y;
  width_pad  = ref_picture->size_x_pad;
  height_pad = ref_picture->size_y_pad;
  if (apply_weights)
  {
    weight_luma = wp_weight[list + list_offset][ref][0];
    offset_luma = wp_offset[list + list_offset][ref][0];
  }
  if (ChromaMEEnable)
  {
    ref_pic_sub.crcb[0] = ref_picture->imgUV_sub[0];
    ref_pic_sub.crcb[1] = ref_picture->imgUV_sub[1];
    width_pad_cr  = ref_picture->size_x_cr_pad;
    height_pad_cr = ref_picture->size_y_cr_pad;
    if (apply_weights)
    {
      weight_cr[0] = wp_weight[list + list_offset][ref][1];
      weight_cr[1] = wp_weight[list + list_offset][ref][2];
      offset_cr[0] = wp_offset[list + list_offset][ref][1];
      offset_cr[1] = wp_offset[list + list_offset][ref][2];
    }
  }
  /*********************************
   *****                       *****
   *****  HALF-PEL REFINEMENT  *****
   *****                       *****
   *********************************/
  //===== set function for getting pixel values =====
  if ((pic4_pix_x + mv[0] > 1) && (pic4_pix_x + mv[0] < max_pos_x4 - 1) &&
    (pic4_pix_y + mv[1] > 1) && (pic4_pix_y + mv[1] < max_pos_y4 - 1)   )
  {
    ref_access_method = FAST_ACCESS;
  }
  else
  {
    ref_access_method = UMV_ACCESS;
  }
  //===== loop over search positions =====
  for (best_pos = 0, pos = start_me_refinement_hp; pos < max_pos2; pos++)
  {
    cand_mv_x = mv[0] + (spiral_hpel_search_x[pos]);    // quarter-pel units
    cand_mv_y = mv[1] + (spiral_hpel_search_y[pos]);    // quarter-pel units
    //----- set motion vector cost -----
    mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv[0], pred_mv[1]);
    if (mcost >= min_mcost) continue;
    cmv_x = cand_mv_x + pic4_pix_x;
    cmv_y = cand_mv_y + pic4_pix_y;
    mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x, min_mcost - mcost, cmv_x, cmv_y);
    if (pos==0 && check_position0)
    {
      mcost -= WEIGHTED_COST (lambda_factor, 16);
    }
    if (mcost < min_mcost)
    {
      min_mcost = mcost;
      best_pos  = pos;
    }
    if (min_mcost < (SubPelThreshold3>>block_type_shift_factor[blocktype]))
    {
      break;
    }
  }
  if (best_pos)
  {
    mv[0] += (spiral_hpel_search_x [best_pos]);
    mv[1] += (spiral_hpel_search_y [best_pos]);
  }
  if ((mv[0] == 0) && (mv[1] == 0) && (pred_mv[0] == 0 && pred_mv[1] == 0) &&
    (min_mcost < (SubPelThreshold1>>block_type_shift_factor[blocktype])) )
  {
    best_pos = 0;
    return min_mcost;
  }
  if ( !start_me_refinement_qp )
    min_mcost = INT_MAX;
  /************************************
   *****                          *****
   *****  QUARTER-PEL REFINEMENT  *****
   *****                          *****
   ************************************/
  //===== set function for getting pixel values =====
  if ((pic4_pix_x + mv[0] > 0) && (pic4_pix_x + mv[0] < max_pos_x4) &&
    (pic4_pix_y + mv[1] > 0) && (pic4_pix_y + mv[1] < max_pos_y4)   )
  {
    ref_access_method = FAST_ACCESS;
  }
  else
  {
    ref_access_method = UMV_ACCESS;
  }
  //===== loop over search positions =====
  for (best_pos = 0, pos = start_me_refinement_qp; pos < search_pos4; pos++)
  {
    cand_mv_x = mv[0] + spiral_search_x[pos];    // quarter-pel units
    cand_mv_y = mv[1] + spiral_search_y[pos];    // quarter-pel units
    //----- set motion vector cost -----
    mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv[0], pred_mv[1]);
    if (mcost >= min_mcost) continue;
    cmv_x = cand_mv_x + pic4_pix_x;
    cmv_y = cand_mv_y + pic4_pix_y;
    mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x, min_mcost - mcost, cmv_x, cmv_y);
    if (mcost < min_mcost)
    {
      min_mcost = mcost;
      best_pos  = pos;
    }
    if (min_mcost < (SubPelThreshold3>>block_type_shift_factor[blocktype]))
    {
      break;
    }
  }
  if (best_pos)
  {
    mv[0] += spiral_search_x [best_pos];
    mv[1] += spiral_search_y [best_pos];
  }
  //===== return minimum motion cost =====
  return min_mcost;
}
/*!
 ************************************************************************
 * brief
 *    Fast sub pixel block motion estimation
 ************************************************************************
 */
int                                     //  ==> minimum motion cost after search
smpUMHEXSubPelBlockMotionSearch  (
                                  imgpel* orig_pic,        // <--  original pixel values for the AxB block
                                  short     ref,           // <--  reference frame (0... or -1 (backward))
                                  int       list,          // <--  reference picture list
                                  int       list_offset,   // <--  MBAFF list offset
                                  int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
                                  int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
                                  int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
                                  short     pred_mv[2],    // <--  motion vector predictor (x|y) in sub-pel units
                                  short     mv[2],         // <--> in: search center (x|y) / out: MV (x|y) - in pel units
                                  int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
                                  int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
                                  int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
                                  int       lambda_factor, // <--  lagrangian parameter for determining motion cost
                                  int       apply_weights
                                  )
{
  int   mcost;
  int   cand_mv_x, cand_mv_y;
  StorablePicture *ref_picture = listX[list+list_offset][ref];
  short mv_shift        = 0;
  short blocksize_x     = (short) params->blc_size[blocktype][0];
  short blocksize_y     = (short) params->blc_size[blocktype][1];
  int   pic4_pix_x      = ((pic_pix_x + IMG_PAD_SIZE)<<2);
  int   pic4_pix_y      = ((pic_pix_y + IMG_PAD_SIZE)<<2);
  short max_pos_x4      = (short) ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
  short max_pos_y4      = (short) ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
  int   iXMinNow, iYMinNow;
  short dynamic_search_range, i, m;
  int   currmv_x = 0, currmv_y = 0;
  int   pred_frac_mv_x,pred_frac_mv_y,abort_search;
  int   pred_frac_up_mv_x, pred_frac_up_mv_y;
  dist_method = Q_PEL + 3 * apply_weights;
  ref_pic_sub.luma = ref_picture->p_curr_img_sub;
  img_width  = ref_pic_ptr->size_x;
  img_height = ref_pic_ptr->size_y;
  width_pad  = ref_pic_ptr->size_x_pad;
  height_pad = ref_pic_ptr->size_y_pad;
  if (apply_weights)
  {
    weight_luma = wp_weight[list + list_offset][ref][0];
    offset_luma = wp_offset[list + list_offset][ref][0];
  }
  if (ChromaMEEnable)
  {
    ref_pic_sub.crcb[0] = ref_pic_ptr->imgUV_sub[0];
    ref_pic_sub.crcb[1] = ref_pic_ptr->imgUV_sub[1];
    width_pad_cr  = ref_pic_ptr->size_x_cr_pad;
    height_pad_cr = ref_pic_ptr->size_y_cr_pad;
    if (apply_weights)
    {
      weight_cr[0] = wp_weight[list + list_offset][ref][1];
      weight_cr[1] = wp_weight[list + list_offset][ref][2];
      offset_cr[0] = wp_offset[list + list_offset][ref][1];
      offset_cr[1] = wp_offset[list + list_offset][ref][2];
    }
  }
  if ((pic4_pix_x + mv[0] > 1) && (pic4_pix_x + mv[0] < max_pos_x4 - 1) &&
    (pic4_pix_y + mv[1] > 1) && (pic4_pix_y + mv[1] < max_pos_y4 - 1))
  {
    ref_access_method = FAST_ACCESS;
  }
  else
  {
    ref_access_method = UMV_ACCESS;
  }
  dynamic_search_range = 3;
  pred_frac_mv_x = (pred_mv[0] - mv[0]) % 4;
  pred_frac_mv_y = (pred_mv[1] - mv[1]) % 4;
  pred_frac_up_mv_x = (smpUMHEX_pred_MV_uplayer_X - mv[0]) % 4;
  pred_frac_up_mv_y = (smpUMHEX_pred_MV_uplayer_Y - mv[1]) % 4;
  memset(smpUMHEX_SearchState[0], 0,
    (2*dynamic_search_range+1)*(2*dynamic_search_range+1));
  smpUMHEX_SearchState[dynamic_search_range][dynamic_search_range] = 1;
  if( !start_me_refinement_hp )
  {
    cand_mv_x = mv[0];
    cand_mv_y = mv[1];
    mcost   = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv[0], pred_mv[1]);
    mcost   += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
      min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
    if (mcost < min_mcost)
    {
      min_mcost = mcost;
      currmv_x  = cand_mv_x;
      currmv_y  = cand_mv_y;
    }
  }
  else
  {
    currmv_x = mv[0];
    currmv_y = mv[1];
  }
  // If the min_mcost is small enough and other statistics are positive,
  // better to stop the search now
  if ( ((mv[0]) == 0) && ((mv[1]) == 0) &&
    (pred_frac_mv_x == 0 && pred_frac_up_mv_x == 0) &&
    (pred_frac_mv_y == 0 && pred_frac_up_mv_y == 0) &&
    (min_mcost < (SubPelThreshold1>>block_type_shift_factor[blocktype])) )
  {
    mv[0] = (short) currmv_x;
    mv[1] = (short) currmv_y;
    return min_mcost;
  }
  if(pred_frac_mv_x || pred_frac_mv_y)
  {
    cand_mv_x = mv[0] + pred_frac_mv_x;
    cand_mv_y = mv[1] + pred_frac_mv_y;
    mcost   = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv[0], pred_mv[1]);
    mcost   += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
      min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
    smpUMHEX_SearchState[cand_mv_y -mv[1] + dynamic_search_range][cand_mv_x - mv[0] + dynamic_search_range] = 1;
    if (mcost < min_mcost)
    {
      min_mcost = mcost;
      currmv_x  = cand_mv_x;
      currmv_y  = cand_mv_y;
    }
  }
  // Multiple small diamond search
  for(i = 0; i < dynamic_search_range; i++)
  {
    abort_search = 1;
    iXMinNow = currmv_x;
    iYMinNow = currmv_y;
    for (m = 0; m < 4; m++)
    {
      cand_mv_x = iXMinNow + Diamond_X[m];
      cand_mv_y = iYMinNow + Diamond_Y[m];
      if(iabs(cand_mv_x - mv[0]) <= dynamic_search_range && iabs(cand_mv_y - mv[1]) <= dynamic_search_range)
      {
        if(!smpUMHEX_SearchState[cand_mv_y - mv[1] + dynamic_search_range][cand_mv_x - mv[0] + dynamic_search_range])
        {
          mcost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv[0], pred_mv[1]);
          mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
            min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
          smpUMHEX_SearchState[cand_mv_y - mv[1] + dynamic_search_range][cand_mv_x - mv[0] + dynamic_search_range] = 1;
          if (mcost < min_mcost)
          {
            min_mcost    = mcost;
            currmv_x     = cand_mv_x;
            currmv_y     = cand_mv_y;
            abort_search = 0;
          }
          if (min_mcost < (SubPelThreshold3>>block_type_shift_factor[blocktype]))
          {
            mv[0] = (short) currmv_x;
            mv[1] = (short) currmv_y;
            return min_mcost;
          }
        }
      }
    }
    // If the minimum cost point is in the center, break out the loop
    if (abort_search)
    {
      break;
    }
  }
  mv[0] = (short) currmv_x;
  mv[1] = (short) currmv_y;
  return min_mcost;
}
int                                                   //  ==> minimum motion cost after search
smpUMHEXSubPelBlockME (imgpel*   orig_pic,      // <--  original pixel values for the AxB block
                    short     ref,           // <--  reference frame (0... or -1 (backward))
                    int       list,
                    int       list_offset,   // <--  MBAFF list offset
                    int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
                    int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
                    int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
                    short     pred_mv[2],    // <--  motion vector predictor (x|y) in sub-pel units
                    short     mv[2],         // <--> in: search center (x|y) / out: motion vector (x|y) - in sub-pel units
                    int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
                    int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
                    int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
                    int*      lambda_factor,
                    int       apply_weights
                    )
{  
  if(blocktype > 1)
  {
    min_mcost =  smpUMHEXSubPelBlockMotionSearch (orig_pic, ref, list, list_offset, pic_pix_x, pic_pix_y,
      blocktype, pred_mv, mv, 9, 9, min_mcost, lambda_factor[Q_PEL], apply_weights);
  }
  else
  {
    min_mcost =  smpUMHEXFullSubPelBlockMotionSearch (orig_pic, ref, list, list_offset, pic_pix_x, pic_pix_y,
      blocktype, pred_mv, mv, 9, 9, min_mcost, lambda_factor[Q_PEL], apply_weights);
  }
  return min_mcost;
}
/*!
 ************************************************************************
 * brief
 *    smpUMHEXBipredIntegerPelBlockMotionSearch: fast pixel block motion search for bipred mode
 *
 ************************************************************************
 */
int                                                           //  ==> minimum motion cost after search
smpUMHEXBipredIntegerPelBlockMotionSearch (Macroblock *currMB, // <--  current Macroblock
                                           imgpel* cur_pic,   // <--  original pixel values for the AxB block
                                           short     ref,           // <--  reference frame (0... or -1 (backward))
                                           int       list,          // <--  Current reference list
                                           int       list_offset,   // <--  MBAFF list offset
                                           char   ***refPic,        // <--  reference array
                                           short ****tmp_mv,        // <--  mv array
                                           int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
                                           int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
                                           int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
                                           short     pred_mv1[2],   // <--  motion vector predictor (x|y) in sub-pel units
                                           short     pred_mv2[2],   // <--  motion vector predictor (x|y) in sub-pel units
                                           short     mv[2],         // <--> in: search center (x|y) / out: motion vector (x|y) - in pel units
                                           short     s_mv[2],       // <--> in: search center (x|y) 
                                           int       search_range,  // <--  1-d search range in pel units
                                           int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
                                           int       iteration_no,  // <--  bi pred iteration number
                                           int       lambda_factor, // <--  lagrangian parameter for determining motion cost
                                           int       apply_weights
                                           )
{
  int   mvshift       = 2;                              // motion vector shift for getting sub-pel units
  int   search_step, iYMinNow, iXMinNow;
  int   i, m;
  int   cand_x, cand_y, mcost;
  int   blocksize_y   = params->blc_size[blocktype][1];  // vertical block size
  int   blocksize_x   = params->blc_size[blocktype][0];  // horizontal block size
  int   pred_x1       = (pic_pix_x << 2) + pred_mv1[0];  // predicted position x (in sub-pel units)
  int   pred_y1       = (pic_pix_y << 2) + pred_mv1[1];  // predicted position y (in sub-pel units)
  int   pred_x2       = (pic_pix_x << 2) + pred_mv2[0];  // predicted position x (in sub-pel units)
  int   pred_y2       = (pic_pix_y << 2) + pred_mv2[1];  // predicted position y (in sub-pel units)
  short center2_x     = pic_pix_x + mv[0];               // center position x (in pel units)
  short center2_y     = pic_pix_y + mv[1];               // center position y (in pel units)
  short center1_x     = pic_pix_x + s_mv[0];             // mvx of second pred (in pel units)
  short center1_y     = pic_pix_y + s_mv[1];             // mvy of second pred (in pel units)
  int   best_x        = center2_x;
  int   best_y        = center2_y;
  short offset1 = (apply_weights ? (list == 0?  wp_offset[list_offset    ][ref][0]:  wp_offset[list_offset + 1][0  ][ref]) : 0);
  short offset2 = (apply_weights ? (list == 0?  wp_offset[list_offset + 1][ref][0]:  wp_offset[list_offset    ][0  ][ref]) : 0);
  ref_pic1_sub.luma = listX[list + list_offset][ref]->p_curr_img_sub;
  ref_pic2_sub.luma = listX[list == 0 ? 1 + list_offset: list_offset][ 0 ]->p_curr_img_sub;
  img_width  = listX[list + list_offset][ref]->size_x;
  img_height = listX[list + list_offset][ref]->size_y;
  width_pad  = listX[list + list_offset][ref]->size_x_pad;
  height_pad = listX[list + list_offset][ref]->size_y_pad;
  if (apply_weights)
  {
    weight1 = list == 0 ? wbp_weight[list_offset         ][ref][0][0] : wbp_weight[list_offset + LIST_1][0  ][ref][0];
    weight2 = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][0] : wbp_weight[list_offset         ][0  ][ref][0];
    offsetBi=(offset1 + offset2 + 1)>>1;
    computeBiPred = computeBiPredSAD2; //ME only supports SAD computations
  }
  else
  {
    weight1 = 1<<luma_log_weight_denom;
    weight2 = 1<<luma_log_weight_denom;
    offsetBi = 0;
    computeBiPred = computeBiPredSAD1; //ME only supports SAD computations
  }
  if (ChromaMEEnable )
  {
    ref_pic1_sub.crcb[0] = listX[list + list_offset][ref]->imgUV_sub[0];
    ref_pic1_sub.crcb[1] = listX[list + list_offset][ref]->imgUV_sub[1];
    ref_pic2_sub.crcb[0] = listX[list == 0 ? 1 + list_offset: list_offset][ 0 ]->imgUV_sub[0];
    ref_pic2_sub.crcb[1] = listX[list == 0 ? 1 + list_offset: list_offset][ 0 ]->imgUV_sub[1];
    width_pad_cr  = listX[list + list_offset][ref]->size_x_cr_pad;
    height_pad_cr = listX[list + list_offset][ref]->size_y_cr_pad;
    if (apply_weights)
    {
      weight1_cr[0] = list == 0 ? wbp_weight[list_offset         ][ref][0][1] : wbp_weight[list_offset + LIST_1][0  ][ref][1];
      weight1_cr[1] = list == 0 ? wbp_weight[list_offset         ][ref][0][2] : wbp_weight[list_offset + LIST_1][0  ][ref][2];
      weight2_cr[0] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][1] : wbp_weight[list_offset         ][0  ][ref][1];
      weight2_cr[1] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][2] : wbp_weight[list_offset         ][0  ][ref][2];
      offsetBi_cr[0] = (list == 0)
        ? (wp_offset[list_offset         ][ref][1] + wp_offset[list_offset + LIST_1][ref][1] + 1) >> 1
        : (wp_offset[list_offset + LIST_1][0  ][1] + wp_offset[list_offset         ][0  ][1] + 1) >> 1;
      offsetBi_cr[1] = (list == 0)
        ? (wp_offset[list_offset         ][ref][2] + wp_offset[list_offset + LIST_1][ref][2] + 1) >> 1
        : (wp_offset[list_offset + LIST_1][0  ][2] + wp_offset[list_offset         ][0  ][2] + 1) >> 1;
    }
    else
    {
      weight1_cr[0] = 1<<chroma_log_weight_denom;
      weight1_cr[1] = 1<<chroma_log_weight_denom;
      weight2_cr[0] = 1<<chroma_log_weight_denom;
      weight2_cr[1] = 1<<chroma_log_weight_denom;
      offsetBi_cr[0] = 0;
      offsetBi_cr[1] = 0;
    }
  }
  // Set function for getting reference picture lines
  if ((center2_x > search_range) && (center2_x < img_width -1-search_range-blocksize_x) &&
      (center2_y > search_range) && (center2_y < img_height-1-search_range-blocksize_y))
  {
    bipred2_access_method = FAST_ACCESS;
  }
  else
  {
    bipred2_access_method = UMV_ACCESS;
  }
  // Set function for getting reference picture lines
  if ((center1_y > search_range) && (center1_y < img_height-1-search_range-blocksize_y))
  {
    bipred1_access_method = FAST_ACCESS;
  }
  else
  {
    bipred1_access_method = UMV_ACCESS;
  }
  // Check the center median predictor
  cand_x = center2_x ;
  cand_y = center2_y ;
  mcost  = MV_COST (lambda_factor, mvshift, center1_x, center1_y, pred_x1, pred_y1);
  mcost += MV_COST (lambda_factor, mvshift, cand_x,    cand_y,    pred_x2, pred_y2);
  mcost += computeBiPred(cur_pic,
                        blocksize_y, blocksize_x, INT_MAX,
                        (center1_x << 2) + IMG_PAD_SIZE_TIMES4,
                        (center1_y << 2) + IMG_PAD_SIZE_TIMES4,
                        (cand_x << 2) + IMG_PAD_SIZE_TIMES4,
                        (cand_y << 2) + IMG_PAD_SIZE_TIMES4);
  if (mcost < min_mcost)
  {
    min_mcost = mcost;
    best_x = cand_x;
    best_y = cand_y;
  }
  iXMinNow = best_x;
  iYMinNow = best_y;
  if (0 != pred_mv1[0] || 0 != pred_mv1[1] || 0 != pred_mv2[0] || 0 != pred_mv2[1])
  {
    cand_x = pic_pix_x;
    cand_y = pic_pix_y;
    SEARCH_ONE_PIXEL_BIPRED_HELPER;
  }
  // If the min_mcost is small enough, do a local search then terminate
  // This is good for stationary or quasi-stationary areas
  if ((min_mcost<<3) < (ConvergeThreshold>>(block_type_shift_factor[blocktype])))
  {
    for (m = 0; m < 4; m++)
    {
      cand_x = iXMinNow + Diamond_X[m];
      cand_y = iYMinNow + Diamond_Y[m];
      SEARCH_ONE_PIXEL_BIPRED_HELPER;
    }
    mv[0] = best_x - pic_pix_x;
    mv[1] = best_y - pic_pix_y;
    return min_mcost;
  }
  // Small local search
  for (m = 0; m < 4; m++)
  {
    cand_x = iXMinNow + Diamond_X[m];
    cand_y = iYMinNow + Diamond_Y[m];
    SEARCH_ONE_PIXEL_BIPRED_HELPER;
  }
  // First_step: Symmetrical-cross search
  // If distortion is large, use large shapes. Otherwise, compact shapes are faster
  if ((blocktype == 1 &&
    (min_mcost<<2) > (SymmetricalCrossSearchThreshold1>>block_type_shift_factor[blocktype])) ||
    ((min_mcost<<2) > (SymmetricalCrossSearchThreshold2>>block_type_shift_factor[blocktype])))
  {
    iXMinNow = best_x;
    iYMinNow = best_y;
    for (i = 1; i <= search_range / 2; i++)
    {
      search_step = (i<<1) - 1;
      cand_x = iXMinNow + search_step;
      cand_y = iYMinNow;
      SEARCH_ONE_PIXEL_BIPRED_HELPER
      cand_x = iXMinNow - search_step;
      SEARCH_ONE_PIXEL_BIPRED_HELPER
      cand_x = iXMinNow;
      cand_y = iYMinNow + search_step;
      SEARCH_ONE_PIXEL_BIPRED_HELPER
      cand_y = iYMinNow - search_step;
      SEARCH_ONE_PIXEL_BIPRED_HELPER
    }
    // Hexagon Search
    iXMinNow = best_x;
    iYMinNow = best_y;
    for (m = 0; m < 6; m++)
    {
      cand_x = iXMinNow + Hexagon_X[m];
      cand_y = iYMinNow + Hexagon_Y[m];
      SEARCH_ONE_PIXEL_BIPRED_HELPER
    }
    // Multi Big Hexagon Search
    iXMinNow = best_x;
    iYMinNow = best_y;
    for(i = 1; i <= search_range / 4; i++)
    {
      for (m = 0; m < 16; m++)
      {
        cand_x = iXMinNow + Big_Hexagon_X[m] * i;
        cand_y = iYMinNow + Big_Hexagon_Y[m] * i;
        SEARCH_ONE_PIXEL_BIPRED_HELPER
      }
    }
  }
  // Search up_layer predictor for non 16x16 blocks
  if (blocktype > 1)
  {
    cand_x = pic_pix_x + (smpUMHEX_pred_MV_uplayer_X / 4);
    cand_y = pic_pix_y + (smpUMHEX_pred_MV_uplayer_Y / 4);
    SEARCH_ONE_PIXEL_BIPRED_HELPER
  }
  if(center2_x != pic_pix_x || center2_y != pic_pix_y)
  {
    cand_x = pic_pix_x;
    cand_y = pic_pix_y;
    SEARCH_ONE_PIXEL_BIPRED_HELPER
    iXMinNow = best_x;
    iYMinNow = best_y;
    // Local diamond search
    for (m = 0; m < 4; m++)
    {
      cand_x = iXMinNow + Diamond_X[m];
      cand_y = iYMinNow + Diamond_Y[m];
      SEARCH_ONE_PIXEL_BIPRED_HELPER
    }
  }
  // If the minimum cost is small enough, do a local search
  // and finish the search here
  if ((min_mcost<<2) < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
  {
    iXMinNow = best_x;
    iYMinNow = best_y;
    for (m = 0; m < 4; m++)
    {
      cand_x = iXMinNow + Diamond_X[m];
      cand_y = iYMinNow + Diamond_Y[m];
      SEARCH_ONE_PIXEL_BIPRED_HELPER
    }
    mv[0] = (short) (best_x - pic_pix_x);
    mv[1] = (short) (best_y - pic_pix_y);
    return min_mcost;
  }
  // Second_step:  Extended Hexagon-based Search
  for (i = 0; i < search_range; i++)
  {
    iXMinNow = best_x;
    iYMinNow = best_y;
    for (m = 0; m < 6; m++)
    {
      cand_x = iXMinNow + Hexagon_X[m];
      cand_y = iYMinNow + Hexagon_Y[m];
      SEARCH_ONE_PIXEL_BIPRED_HELPER
    }
    // The minimum cost point happens in the center
    if (best_x == iXMinNow && best_y == iYMinNow)
    {
      break;
    }
  }
  // Third_step: Small diamond search
  for (i = 0; i < search_range; i++)
  {
    iXMinNow = best_x;
    iYMinNow = best_y;
    for (m = 0; m < 4; m++)
    {
      cand_x = iXMinNow + Diamond_X[m];
      cand_y = iYMinNow + Diamond_Y[m];
      SEARCH_ONE_PIXEL_BIPRED_HELPER
    }
    // The minimum cost point happens in the center
    if (best_x == iXMinNow && best_y == iYMinNow)
    {
      break;
    }
  }
  mv[0] = (short) (best_x - pic_pix_x);
  mv[1] = (short) (best_y - pic_pix_y);
  return min_mcost;
}
/*!
 ************************************************************************
 * brief
 *    Set neighbouring block mode (intra/inter)
 *    used for fast motion estimation
 ************************************************************************
 */
void smpUMHEX_decide_intrabk_SAD()
{
  if (img->type != I_SLICE)
  {
    if (img->pix_x == 0 && img->pix_y == 0)
    {
      smpUMHEX_flag_intra_SAD = 0;
    }
    else if (img->pix_x == 0)
    {
      smpUMHEX_flag_intra_SAD = smpUMHEX_flag_intra[(img->pix_x)>>4];
    }
    else if (img->pix_y == 0)
    {
      smpUMHEX_flag_intra_SAD = smpUMHEX_flag_intra[((img->pix_x)>>4)-1];
    }
    else
    {
      smpUMHEX_flag_intra_SAD = ((smpUMHEX_flag_intra[(img->pix_x)>>4])||
        (smpUMHEX_flag_intra[((img->pix_x)>>4)-1])||
        (smpUMHEX_flag_intra[((img->pix_x)>>4)+1])) ;
    }
  }
  return;
}
/*!
 ************************************************************************
 * brief
 *    Set cost to zero if neighbouring block is intra
 *    used for fast motion estimation
 ************************************************************************
 */
void smpUMHEX_skip_intrabk_SAD(int best_mode, int ref_max)
{
  short i, j, k;
  if (img->number > 0)
  {
    smpUMHEX_flag_intra[(img->pix_x)>>4] = (best_mode == 9 || best_mode == 10) ? 1 : 0;
  }
  if (img->type != I_SLICE  && (best_mode == 9 || best_mode == 10))
  {
    for (i=0; i < 4; i++)
    {
      for (j=0; j < 4; j++)
      {
        for (k=0; k < 9;k++)
        {
          smpUMHEX_l0_cost[k][j][i] = 0;
          smpUMHEX_l1_cost[k][j][i] = 0;
        }
      }
    }
  }
  return;
}
/*!
 ************************************************************************
 * brief
 *    Set up prediction MV and prediction up layer cost
 *    used for fast motion estimation
 ************************************************************************
 */
void smpUMHEX_setup(short ref,
                          int list,
                          int block_y,
                          int block_x,
                          int blocktype,
                          short ******all_mv)
{
  if (blocktype > 6)
  {
    smpUMHEX_pred_MV_uplayer_X = all_mv[list][ref][5][block_y][block_x][0];
    smpUMHEX_pred_MV_uplayer_Y = all_mv[list][ref][5][block_y][block_x][1];
  }
  else if (blocktype > 4)
  {
    smpUMHEX_pred_MV_uplayer_X = all_mv[list][ref][4][block_y][block_x][0];
    smpUMHEX_pred_MV_uplayer_Y = all_mv[list][ref][4][block_y][block_x][1];
  }
  else if (blocktype == 4)
  {
    smpUMHEX_pred_MV_uplayer_X = all_mv[list][ref][2][block_y][block_x][0];
    smpUMHEX_pred_MV_uplayer_Y = all_mv[list][ref][2][block_y][block_x][1];
  }
  else if (blocktype > 1)
  {
    smpUMHEX_pred_MV_uplayer_X = all_mv[list][ref][1][block_y][block_x][0];
    smpUMHEX_pred_MV_uplayer_Y = all_mv[list][ref][1][block_y][block_x][1];
  }
  if (blocktype > 1)
  {
    if (blocktype > 6)
    {
      smpUMHEX_pred_SAD_uplayer = (list==1) ?
        (smpUMHEX_l1_cost[5][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
        : (smpUMHEX_l0_cost[5][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
      smpUMHEX_pred_SAD_uplayer /= 2;
    }
    else if (blocktype > 4)
    {
      smpUMHEX_pred_SAD_uplayer = (list==1) ?
        (smpUMHEX_l1_cost[4][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
        : (smpUMHEX_l0_cost[4][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
      smpUMHEX_pred_SAD_uplayer /= 2;
    }
    else if (blocktype == 4)
    {
      smpUMHEX_pred_SAD_uplayer = (list==1) ?
        (smpUMHEX_l1_cost[2][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
        : (smpUMHEX_l0_cost[2][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
      smpUMHEX_pred_SAD_uplayer /= 2;
    }
    else
    {
      smpUMHEX_pred_SAD_uplayer = (list==1) ?
        (smpUMHEX_l1_cost[1][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
        : (smpUMHEX_l0_cost[1][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
      smpUMHEX_pred_SAD_uplayer /= 2;
    }
    smpUMHEX_pred_SAD_uplayer = smpUMHEX_flag_intra_SAD ? 0 : smpUMHEX_pred_SAD_uplayer;
  }
}