mcomp.c
上传用户:zhongxx05
上传日期:2007-06-06
资源大小:33641k
文件大小:79k
- /* ***** BEGIN LICENSE BLOCK *****
- * Version: RCSL 1.0/RPSL 1.0
- *
- * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
- *
- * The contents of this file, and the files included with this file, are
- * subject to the current version of the RealNetworks Public Source License
- * Version 1.0 (the "RPSL") available at
- * http://www.helixcommunity.org/content/rpsl unless you have licensed
- * the file under the RealNetworks Community Source License Version 1.0
- * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
- * in which case the RCSL will apply. You may also obtain the license terms
- * directly from RealNetworks. You may not use this file except in
- * compliance with the RPSL or, if you have a valid RCSL with RealNetworks
- * applicable to this file, the RCSL. Please see the applicable RPSL or
- * RCSL for the rights, obligations and limitations governing use of the
- * contents of the file.
- *
- * This file is part of the Helix DNA Technology. RealNetworks is the
- * developer of the Original Code and owns the copyrights in the portions
- * it created.
- *
- * This file, and the files included with this file, is distributed and made
- * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
- * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- *
- * Technology Compatibility Kit Test Suite(s) Location:
- * http://www.helixcommunity.org/content/tck
- *
- * Contributor(s):
- *
- * ***** END LICENSE BLOCK ***** */
- /*-----------------------------------------------------------------------------
- * MCOMP.C
- *
- * DESCRIPTION
- * mcomp.c - Motion compensation for H.263 codec
- * Performs motion compensation for a macroblock with half-pixel accuracy
- * Supports Unrestricted Motion Vector mode, i.e., motion vectors are allowed
- * to point outside picture boundaries; uses edge pixels when needed.
- * Supports Advanced Prediction mode, i.e., four motion vectors per macroblock
- * and overlapped motion compensation for luminance.
- *
- * In Advanced Prediction mode, the prediction is formed in two steps.
- * First, MotionComp263 is called to generate non-overlapped motion compensation.
- * Then, OverlapMC is called to perform the overlapping on the luminance.
- * OverlapMC requires mv's from left, top, and right neighbors. Therefore,
- * the encoder needs to decide on the coding mode for the next mb (i.e., to the
- * right) before overlap can be done.
- *
- * In PB-frame mode, the B-frame prediction is generated by PredBframe. It uses
- * previous P-frame and the corresponding macroblock from the next P-frame.
- * Hence, the next P-frame macroblock needs to be reconstructed before calling
- * PredBframe.
- *
- * This module also contains the motion compensation routine for
- * H.261 (MotionComp), which performs integer motion compensation.
- * MotionComp was extracted from file PREDSEL.C and modified.
- *
- * CALLING SEQUENCE
- * MotionComp263( MACROBLOCK_DESCR * mb, // Describes block to be motion-compensated
- * PICTURE * prev_pic, // Describes previous picture used to form MC
- * PICTURE * pic) // Output picture where MC block is placed
- * OverlapMC( MACROBLOCK_DESCR * mb, // Describes block to be motion-compensated
- * PICTURE * prevPic, // Describes previous picture used to form MC
- * PICTURE * pic, // Output picture where MC block is placed
- * int mbWidth, // Macroblocks per row
- * int mbOffset, // Row offset; (mb-mbOffset) is neighbor on top
- * int overlap[4] // Returns YES or NO to indicate whether overlap
- * ) // was done in each 8x8 subblock
- * PredBframe( MACROBLOCK_DESCR * mb, // Macroblock to be predicted
- * PICTURE * prevPic, // Prev. picture (forward pred)
- * PICTURE * nextPic, // Next P-picture (backward pred)
- * PICTURE * Bpic // Output picture where pred is placed
- * )
- *
- * The routines do not return a value.
- *
- * Assumptions:
- * 1. "pic", "prev_pic", "nextPic", and "Bpic" have same line offset and size
- * 2. Chroma components Cb and Cr have same line offset and size
- * 3. All arrays are quad-aligned, i.e., start address is multiple of 4.
- * 4. Motion vectors are represented with one fractional bit
- *
- *
- * MotionComp( MACROBLOCK_DESCR * mb, // Describes block to be motion-compensated
- * PICTURE * prev_pic, // Describes previous picture used to form MC
- * PICTURE * pic ) // Output picture where MC block is placed
- *
- * The routine returns H261_ERROR if motion vectors point outside picture, otherwise OK.
- *
- * Assumptions:
- * 1-3. As above
- * 4. Motion vectors are represented by integers.
- *
- *
- -----------------------------------------------------------------------------*/
- //#include <stdio.h>
- //#include <stdlib.h>
- #ifdef _MACINTOSH
- #include <stdlib.h> // for exit()
- #endif
- #include "dllindex.h"
- #include "h261defs.h"
- #include "h261func.h"
- #ifdef COMPILE_MMX
- #include "mmxcpuid.h"
- #endif
- //#define VVPROFILER
- #ifdef VVPROFILER
- #include "hvdebtim.h"
- extern struct CVvDebugTimer * pVvProf[];
- extern unsigned long pVvProfCount[];
- #endif
- // Declarations of local functions
- static int chromaMVComp( int mvLuma );
- static int chromaMvComp4V( S8 mvLuma[4] );
- void mc( int hSize, int vSize,
- PIXEL in[], PIXEL out[], int hdim,
- int mvX, int mvY // Motion vector
- );
- void mcMMX( int hSize, int vSize,
- PIXEL in[], PIXEL out[], int hdim,
- int mvX, int mvY // Motion vector
- );
- static void limitMC( int hSize, int vSize,
- PIXEL const *in, PIXEL *out, int hdim,
- int mvX, int mvY, // Motion vector
- int minX, int maxX, int minY, int maxY // Limits for hor/vert indices
- );
- static int mvDiff( int mvX, int mvY, // motion vector
- MACROBLOCK_DESCR const *borderMB, // adjacent macroblock
- int subBlk, // adjacent subblock (needed if borderMB has 4 motion vectors)
- int PBframe, // If PBframe: do overlap also with INTRA neighbor
- int border[2] // return motion vector components for adjacent block
- );
- static void doOverlapMC( int subBlk, // Indicates subblock to process (UL, UR, LL, LR)
- MACROBLOCK_DESCR *mb, // Used to determine (x,y) coordinates for block
- PICTURE *prevPic, // Previous picture; used to create overlapping MC
- PICTURE *pic, // Contains non-overlapped MC on entry; returns overlapped MC
- int borderMv[4][2], // Motion vectors for adjacent blocks (L,Top,R,Bottom)
- int left, // If YES, overlap using LEFT mv
- int top, // If YES, overlap using TOP mv
- int right, // If YES, overlap using RIGHT mv
- int bottom // If YES, overlap using BOTTOM mv
- );
- static void weigh8x4( PIXEL dest[], PIXEL const *p, int hdim, int vert, int left, int right );
- static void weighLeft_Vert( PIXEL *dest, PIXEL const *p, int hdim );
- static void weighNoLeft_Vert( PIXEL *dest, PIXEL const *p, int hdim );
- static void weighLeft( PIXEL *dest, PIXEL const *p, int hdim );
- static void weighRight_Vert( PIXEL *dest, PIXEL const *p, int hdim );
- static void weighNoRight_Vert( PIXEL *dest, PIXEL const *p, int hdim );
- static void weighRight( PIXEL *dest, PIXEL const *p, int hdim );
- static void mc16pelsNoInterpol( PIXEL *in, PIXEL out[], int hdim, int vSize );
- static void mc8pelsNoInterpol( PIXEL *in, PIXEL out[], int hdim, int vSize );
- static void mc4pelsNoInterpol( PIXEL *in, PIXEL out[], int hdim, int vSize );
- static void mc16pelsHorInterpol( PIXEL const *in, PIXEL out[], int hdim, int vSize );
- static void mc8pelsHorInterpol( PIXEL const *in, PIXEL out[], int hdim, int vSize );
- static void mc4pelsHorInterpol( PIXEL const *in, PIXEL out[], int hdim, int vSize );
- static void mc16pelsVertInterpol( PIXEL const *in, PIXEL out[], int hdim, int vSize );
- static void mc8pelsVertInterpol( PIXEL const *in, PIXEL out[], int hdim, int vSize );
- static void mc4pelsVertInterpol( PIXEL const *in, PIXEL out[], int hdim, int vSize );
- static void mc16pels2DInterpol( PIXEL const *in, PIXEL out[], int hdim, int vSize );
- static void mc8pels2DInterpol( PIXEL const *in, PIXEL out[], int hdim, int vSize );
- static void mc4pels2DInterpol( PIXEL const *in, PIXEL out[], int hdim, int vSize );
- static void saveBackwardPred( MACROBLOCK_DESCR * mb, // Describes block to be saved
- PICTURE * pic // Picture with pixels to be saved
- );
- static void averageForBack( MACROBLOCK_DESCR * mb, // Describes block to be averaged
- PICTURE * pic // Input: contains forward prediction
- // Output: contains B-frame prediction
- );
- static void copyBlock( S32 source[], int hSize, int vSize, int sourceOffset,
- S32 dest[], int destOffset );
- void averageBlock( PIXEL forPred[], int hSize, int vSize, int forOffset,
- PIXEL backPred[], int backOffset );
- void averageBlockMMX( PIXEL forPred[], int hSize, int vSize, int forOffset,
- PIXEL backPred[], int backOffset );
- // MotionComp - perform integer-pixel motion compensation for H.261
- extern int MotionComp( MACROBLOCK_DESCR *mb, PICTURE *prev_pic, PICTURE *pic )
- {
- int row, col, status, i, cx, cy, pic_offset, prev_offset;
- union { // Copy words to speed up routine
- PIXEL * pix;
- S32 * word;
- } pixel, prev;
- PIXEL * source, * dest;
- status = OK;
- if (mb->mv_x == 0 && mb->mv_y == 0) { // Copy macroblock
- col = 16 * mb->x;
- row = 16 * mb->y;
- pic_offset = col + row * pic->y.hoffset;
- pixel.pix = pic->y.ptr + pic_offset;
- prev.pix = prev_pic->y.ptr + pic_offset;
- for (i = 0; i < 16; i++) {
- *(pixel.word) = *(prev.word);
- *(pixel.word + 1) = *(prev.word + 1);
- *(pixel.word + 2) = *(prev.word + 2);
- *(pixel.word + 3) = *(prev.word + 3);
- pixel.pix += pic->y.hoffset;
- prev.pix += pic->y.hoffset;
- }
- if (pic->color) {
- col = 8 * mb->x;
- row = 8 * mb->y;
- // Assuming same offset for Cr and Cb
- pic_offset = col + row * pic->cb.hoffset;
- pixel.pix = pic->cb.ptr + pic_offset;
- prev.pix = prev_pic->cb.ptr + pic_offset;
- for (i = 0; i < 8; i++) {
- *(pixel.word) = *(prev.word);
- *(pixel.word + 1) = *(prev.word + 1);
- pixel.pix += pic->cb.hoffset;
- prev.pix += pic->cb.hoffset;
- }
- pixel.pix = pic->cr.ptr + pic_offset;
- prev.pix = prev_pic->cr.ptr + pic_offset;
- for (i = 0; i < 8; i++) {
- *(pixel.word) = *(prev.word);
- *(pixel.word + 1) = *(prev.word + 1);
- pixel.pix += pic->cr.hoffset;
- prev.pix += pic->cr.hoffset;
- }
- }
- } else { // Non-zero motion vector
- // Wrap motion vectors to allowed range
- while (mb->mv_x < MV_MIN) {
- mb->mv_x += MV_WRAP;
- }
- while (mb->mv_x > MV_MAX) {
- mb->mv_x -= MV_WRAP;
- }
- while (mb->mv_y < MV_MIN) {
- mb->mv_y += MV_WRAP;
- }
- while (mb->mv_y > MV_MAX) {
- mb->mv_y -= MV_WRAP;
- }
- // Compute pointers
- col = 16 * mb->x;
- row = 16 * mb->y;
- if (col == 0 && mb->mv_x < 0) { // Pointing left of first col?
- mb->mv_x = 0, status = H261_ERROR;
- }
- if (col == pic->y.nhor - 16 && mb->mv_x > 0) { // Right of last col?
- mb->mv_x = 0, status = H261_ERROR;
- }
- if (row == 0 && mb->mv_y < 0) { // Pointing above first row?
- mb->mv_y = 0, status = H261_ERROR;
- }
- if (row == pic->y.nvert - 16 && mb->mv_y > 0) { // Below last row?
- mb->mv_y = 0, status = H261_ERROR;
- }
- // Copy displaced macroblock
- pic_offset = col + row * pic->y.hoffset;
- prev_offset = col + mb->mv_x + (row + mb->mv_y) * pic->y.hoffset;
- pixel.pix = pic->y.ptr + pic_offset;
- prev.pix = prev_pic->y.ptr + prev_offset;
- for (i = 0; i < 16; i++) {
- source = prev.pix;
- for (dest = pixel.pix; dest < pixel.pix + 16; dest++) {
- *dest = *(source++);
- }
- pixel.pix += pic->y.hoffset;
- prev.pix += pic->y.hoffset;
- }
- if (pic->color) {
- col = 8 * mb->x;
- row = 8 * mb->y;
- // Truncate motion vectors for chroma towards zero
- if (mb->mv_x < 0) {
- cx = (mb->mv_x + 1) >> 1;
- } else {
- cx = mb->mv_x >> 1;
- }
- if (mb->mv_y < 0) {
- cy = (mb->mv_y + 1) >> 1;
- } else {
- cy = mb->mv_y >> 1;
- }
- // Assuming same offset for Cr and Cb
- pic_offset = col + row * pic->cb.hoffset;
- prev_offset = col + cx + (row + cy) * pic->cb.hoffset;
- pixel.pix = pic->cb.ptr + pic_offset;
- prev.pix = prev_pic->cb.ptr + prev_offset;
- for (i = 0; i < 8; i++) {
- source = prev.pix;
- for (dest = pixel.pix; dest < pixel.pix + 8; dest++) {
- *dest = *(source++);
- }
- pixel.pix += pic->cb.hoffset;
- prev.pix += pic->cb.hoffset;
- }
- pixel.pix = pic->cr.ptr + pic_offset;
- prev.pix = prev_pic->cr.ptr + prev_offset;
- for (i = 0; i < 8; i++) {
- source = prev.pix;
- for (dest = pixel.pix; dest < pixel.pix + 8; dest++) {
- *dest = *(source++);
- }
- pixel.pix += pic->cr.hoffset;
- prev.pix += pic->cr.hoffset;
- }
- }
- }
- return (status);
- }
- // MotionComp263 - perform half-pixel motion compensation for H.263
- extern void MotionComp263( MACROBLOCK_DESCR * mb, // Describes block to be motion-compensated
- PICTURE * prevPic, // Describes previous picture used to form MC
- PICTURE * pic // Output picture where MC block is placed
- )
- {
- int row, col, cX, cY, picOffset, hdim, nhor, nvert;
- PIXEL * source, * dest;
- void (*pMC) ( int hSize, int vSize, PIXEL *in, PIXEL *out, int hdim, int mvX, int mvY);
- #if defined(COMPILE_MMX)
- if(cpuid_is_mmx_motion_on()) {
- //do mmx if compiler switch AND initialized AND detected
- pMC = mcMMX;
- } else
- #endif
- {
- pMC = mc;
- }
- // Compute luma pointers
- col = 16 * mb->x;
- row = 16 * mb->y;
- hdim = pic->y.hoffset;
- nhor = pic->y.nhor, nvert = pic->y.nvert;
- picOffset = col + row * hdim;
- dest = pic->y.ptr + picOffset; // Point to output luma
- source = prevPic->y.ptr + picOffset; // Point to input luma (without motion comp)
- // Do motion compensation for luma
- if (mb->mtype == MTYPE263_INTER4V) { // 4 motion vectors
- // Upper left block
- if (PointingOutside( col, col+7, row, row+7, mb->blkMvX[UPPER_LEFT_BLK],
- mb->blkMvY[UPPER_LEFT_BLK], nhor, nvert ) == NO) {
- pMC( 8, 8, source, dest, hdim,
- mb->blkMvX[UPPER_LEFT_BLK], mb->blkMvY[UPPER_LEFT_BLK] );
- } else {
- limitMC( 8, 8, source, dest, hdim,
- mb->blkMvX[UPPER_LEFT_BLK], mb->blkMvY[UPPER_LEFT_BLK],
- -col, nhor - 1 - col, -row, nvert - 1 - row );
- }
- // Upper right block
- if (PointingOutside( col+8, col+15, row, row+7, mb->blkMvX[UPPER_RIGHT_BLK],
- mb->blkMvY[UPPER_RIGHT_BLK], nhor, nvert ) == NO) {
- pMC( 8, 8, source + 8, dest + 8, hdim,
- mb->blkMvX[UPPER_RIGHT_BLK], mb->blkMvY[UPPER_RIGHT_BLK] );
- } else {
- limitMC( 8, 8, source + 8, dest + 8, hdim,
- mb->blkMvX[UPPER_RIGHT_BLK], mb->blkMvY[UPPER_RIGHT_BLK],
- -col - 8, nhor - 1 - col - 8, -row, nvert - 1 - row );
- }
- // Lower left block
- source += 8 * hdim; // Advance 8 lines
- dest += 8 * hdim;
- if (PointingOutside( col, col+7, row+8, row+15, mb->blkMvX[LOWER_LEFT_BLK],
- mb->blkMvY[LOWER_LEFT_BLK], nhor, nvert ) == NO) {
- pMC( 8, 8, source, dest, hdim,
- mb->blkMvX[LOWER_LEFT_BLK], mb->blkMvY[LOWER_LEFT_BLK] );
- } else {
- limitMC( 8, 8, source, dest, hdim,
- mb->blkMvX[LOWER_LEFT_BLK], mb->blkMvY[LOWER_LEFT_BLK],
- -col, nhor - 1 - col, -row - 8, nvert - 1 - row - 8 );
- }
- // Lower right block
- if (PointingOutside( col+8, col+15, row+8, row+15, mb->blkMvX[LOWER_RIGHT_BLK],
- mb->blkMvY[LOWER_RIGHT_BLK], nhor, nvert ) == NO) {
- pMC( 8, 8, source + 8, dest + 8, hdim,
- mb->blkMvX[LOWER_RIGHT_BLK], mb->blkMvY[LOWER_RIGHT_BLK] );
- } else {
- limitMC( 8, 8, source + 8, dest + 8, hdim,
- mb->blkMvX[LOWER_RIGHT_BLK], mb->blkMvY[LOWER_RIGHT_BLK],
- -col - 8, nhor - 1 - col - 8, -row - 8, nvert - 1 - row - 8 );
- }
- // Compute chroma mv
- cX = chromaMvComp4V( mb->blkMvX );
- cY = chromaMvComp4V( mb->blkMvY );
-
- } else { // One motion vector
- if (PointingOutside( col, col+15, row, row+15, mb->mv_x, mb->mv_y,
- nhor, nvert ) == NO) {
- pMC( 16, 16, source, dest, hdim, mb->mv_x, mb->mv_y );
- } else { // Pointing outside
- limitMC( 16, 16, source, dest, hdim, mb->mv_x, mb->mv_y,
- -col, nhor - 1 - col, -row, nvert - 1 - row );
- }
- // Compute chroma mv
- cX = chromaMVComp( mb->mv_x );
- cY = chromaMVComp( mb->mv_y );
- }
-
- // Do motion compensation for chroma
- if (pic->color) {
- // Compute chroma pointers
- col = 8 * mb->x;
- row = 8 * mb->y;
- hdim = pic->cb.hoffset;
- nhor = pic->cb.nhor, nvert = pic->cb.nvert;
- picOffset = col + row * hdim;
- if (PointingOutside( col, col+7, row, row+7, cX, cY, nhor, nvert ) == NO) {
- pMC( 8, 8, prevPic->cb.ptr + picOffset, pic->cb.ptr + picOffset, hdim, cX, cY );
- pMC( 8, 8, prevPic->cr.ptr + picOffset, pic->cr.ptr + picOffset, hdim, cX, cY );
- } else {
- limitMC( 8, 8, prevPic->cb.ptr + picOffset, pic->cb.ptr + picOffset,
- hdim, cX, cY, -col, nhor - 1 - col, -row, nvert - 1 - row );
- limitMC( 8, 8, prevPic->cr.ptr + picOffset, pic->cr.ptr + picOffset,
- hdim, cX, cY, -col, nhor - 1 - col, -row, nvert - 1 - row );
- }
- }
- return;
- }
- // printBlk - for debugging
- /*static void printBlk( int hSize, int vSize, PIXEL *p, int hdim)
- {
- int i,j;
-
- for (i = 0; i < vSize; ++i) {
- for (j = 0; j < hSize; ++j) {
- printf(" %3d", *(p + j));
- }
- printf("n");
- p += hdim;
- }
- return;
- }*/
- // Enumerate array of motion vectors
- #define LEFT (0)
- #define TOP (1)
- #define RIGHT (2)
- #define BOTTOM (3)
- // Do overlapped motion comp. for luma
- extern void OverlapMC( MACROBLOCK_DESCR * mb, // Describes block to be motion-compensated
- int PBframe, // Non-zero if PB frame
- PICTURE * prevPic, // Describes previous picture used to form MC
- PICTURE * pic, // Output picture where MC block is placed
- int mbWidth, // Macroblocks per row
- int mbOffset, // Row offset; (mb-mbOffset) is neighbor on top
- int overlap[4] // Returns YES or NO to indicate whether overlap
- // was done in each 8x8 subblock
- )
- {
- int left, top, right, bottom, mvX, mvY;
- int borderMv[4][2]; // motion vectors for neighbors (left, top, right, bottom)
-
- if (mb->mtype == MTYPE263_INTER4V) {
- // Upper left block
- mvX = mb->blkMvX[UPPER_LEFT_BLK];
- mvY = mb->blkMvY[UPPER_LEFT_BLK];
- if (mb->x == 0) left = NO;
- else left = mvDiff( mvX, mvY, mb-1, UPPER_RIGHT_BLK, PBframe, borderMv[LEFT] );
- if (mb->y == 0) top = NO;
- else top = mvDiff( mvX, mvY, mb-mbOffset, LOWER_LEFT_BLK, PBframe, borderMv[TOP] );
- right = mvDiff( mvX, mvY, mb, UPPER_RIGHT_BLK, PBframe, borderMv[RIGHT] );
- bottom = mvDiff( mvX, mvY, mb, LOWER_LEFT_BLK, PBframe, borderMv[BOTTOM] );
- if (left == YES || right == YES || top == YES || bottom == YES) {
- doOverlapMC( UPPER_LEFT_BLK, mb, prevPic, pic, borderMv, left,top,right,bottom);
- overlap[UPPER_LEFT_BLK] = YES;
- } else {
- overlap[UPPER_LEFT_BLK] = NO;
- }
- // Upper right block
- mvX = mb->blkMvX[UPPER_RIGHT_BLK];
- mvY = mb->blkMvY[UPPER_RIGHT_BLK];
- left = mvDiff( mvX, mvY, mb, UPPER_LEFT_BLK, PBframe, borderMv[LEFT] );
- if (mb->y == 0) top = NO;
- else top = mvDiff( mvX, mvY, mb-mbOffset, LOWER_RIGHT_BLK, PBframe, borderMv[TOP] );
- if (mb->x == mbWidth-1) right = NO;
- else right = mvDiff( mvX, mvY, mb+1, UPPER_LEFT_BLK, PBframe, borderMv[RIGHT] );
- bottom = mvDiff( mvX, mvY, mb, LOWER_RIGHT_BLK, PBframe, borderMv[BOTTOM] );
- if (left == YES || right == YES || top == YES || bottom == YES) {
- doOverlapMC( UPPER_RIGHT_BLK, mb, prevPic, pic, borderMv, left,top,right,bottom);
- overlap[UPPER_RIGHT_BLK] = YES;
- } else {
- overlap[UPPER_RIGHT_BLK] = NO;
- }
- // Lower left block
- mvX = mb->blkMvX[LOWER_LEFT_BLK];
- mvY = mb->blkMvY[LOWER_LEFT_BLK];
- if (mb->x == 0) left = NO;
- else left = mvDiff( mvX, mvY, mb-1, LOWER_RIGHT_BLK, PBframe, borderMv[LEFT] );
- top = mvDiff( mvX, mvY, mb, UPPER_LEFT_BLK, PBframe, borderMv[TOP] );
- right = mvDiff( mvX, mvY, mb, LOWER_RIGHT_BLK, PBframe, borderMv[RIGHT] );
- if (left == YES || right == YES || top == YES) {
- doOverlapMC( LOWER_LEFT_BLK, mb, prevPic, pic, borderMv, left,top,right,NO);
- overlap[LOWER_LEFT_BLK] = YES;
- } else {
- overlap[LOWER_LEFT_BLK] = NO;
- }
- // Lower right block
- mvX = mb->blkMvX[LOWER_RIGHT_BLK];
- mvY = mb->blkMvY[LOWER_RIGHT_BLK];
- left = mvDiff( mvX, mvY, mb, LOWER_LEFT_BLK, PBframe, borderMv[LEFT] );
- top = mvDiff( mvX, mvY, mb, UPPER_RIGHT_BLK, PBframe, borderMv[TOP] );
- if (mb->x == mbWidth-1) right = NO;
- else right = mvDiff( mvX, mvY, mb+1, LOWER_LEFT_BLK, PBframe, borderMv[RIGHT] );
- if (left == YES || right == YES || top == YES) {
- doOverlapMC( LOWER_RIGHT_BLK, mb, prevPic, pic, borderMv, left,top,right,NO);
- overlap[LOWER_RIGHT_BLK] = YES;
- } else {
- overlap[LOWER_RIGHT_BLK] = NO;
- }
-
- } else { // One motion vector for current macroblock; neighbors can be INTER4V
- mvX = mb->mv_x;
- mvY = mb->mv_y;
- // Upper left block
- if (mb->x == 0) left = NO;
- else left = mvDiff( mvX, mvY, mb-1, UPPER_RIGHT_BLK, PBframe, borderMv[LEFT] );
- if (mb->y == 0) top = NO;
- else top = mvDiff( mvX, mvY, mb-mbOffset, LOWER_LEFT_BLK, PBframe, borderMv[TOP] );
- if (left == YES || top == YES) {
- doOverlapMC( UPPER_LEFT_BLK, mb, prevPic, pic, borderMv, left,top,NO,NO);
- overlap[UPPER_LEFT_BLK] = YES;
- } else {
- overlap[UPPER_LEFT_BLK] = NO;
- }
- // Upper right block
- if (mb->y == 0) top = NO;
- else top = mvDiff( mvX, mvY, mb-mbOffset, LOWER_RIGHT_BLK, PBframe, borderMv[TOP] );
- if (mb->x == mbWidth-1) right = NO;
- else right = mvDiff( mvX, mvY, mb+1, UPPER_LEFT_BLK, PBframe, borderMv[RIGHT] );
- if (right == YES || top == YES) {
- doOverlapMC( UPPER_RIGHT_BLK, mb, prevPic, pic, borderMv, NO,top,right,NO);
- overlap[UPPER_RIGHT_BLK] = YES;
- } else {
- overlap[UPPER_RIGHT_BLK] = NO;
- }
- // Lower left block
- if (mb->x == 0) left = NO;
- else left = mvDiff( mvX, mvY, mb-1, LOWER_RIGHT_BLK, PBframe, borderMv[LEFT] );
- if (left == YES) {
- doOverlapMC( LOWER_LEFT_BLK, mb, prevPic, pic, borderMv, left,NO,NO,NO);
- overlap[LOWER_LEFT_BLK] = YES;
- } else {
- overlap[LOWER_LEFT_BLK] = NO;
- }
- // Lower right block
- if (mb->x == mbWidth-1) right = NO;
- else right = mvDiff( mvX, mvY, mb+1, LOWER_LEFT_BLK, PBframe, borderMv[RIGHT] );
- if (right == YES) {
- doOverlapMC( LOWER_RIGHT_BLK, mb, prevPic, pic, borderMv, NO,NO,right,NO);
- overlap[LOWER_RIGHT_BLK] = YES;
- } else {
- overlap[LOWER_RIGHT_BLK] = NO;
- }
- }
- return;
- }
- // PointingOutside - determine whether motion-comp routine needs to worry about
- // the borders of the previous picture (use edge pixels instead of non-existent
- // pixels "outside" the border of the previous picture).
- // Returns YES if the motion-compensated block needs pixels outside the previous
- // picture; NO if picture boundary is not crossed.
- extern int PointingOutside( int col1, int col2, // First and last column of block
- int row1, int row2, // First and last row of block
- int mvX, int mvY, // Motion vector; one fractional bit
- int nCols, int nRows // Picture size
- )
- {
- if (col1 + (mvX >> 1) < 0) // Check left border
- return( YES );
- if (col2 + ((mvX + 1) >> 1) >= nCols) // Check right border
- return( YES );
- if (row1 + (mvY >> 1) < 0) // Check top border
- return( YES );
- if (row2 + ((mvY + 1) >> 1) >= nRows) // Check bottom border
- return( YES );
- return( NO );
- }
- // PredBframe - Form prediction for B-frame
- extern void PredBframe( MACROBLOCK_DESCR * mb, // Macroblock to be predicted
- PICTURE * prevPic, // Prev. picture (forward pred)
- PICTURE * nextPic, // Next P-picture (backward pred)
- PICTURE * Bpic // Output picture where pred is placed
- )
- {
- int i;
- S8 saveMvX[4], saveMvY[4];
- // Perform backward prediction
- if (mb->mtype == MTYPE263_INTER4V) {
- for (i = 0; i < 4; ++i) {
- saveMvX[i] = mb->blkMvX[i];
- mb->blkMvX[i] = mb->blkMvBx[i];
- saveMvY[i] = mb->blkMvY[i];
- mb->blkMvY[i] = mb->blkMvBy[i];
- }
- } else {
- saveMvX[0] = mb->mv_x;
- mb->mv_x = mb->blkMvBx[0];
- saveMvY[0] = mb->mv_y;
- mb->mv_y = mb->blkMvBy[0];
- }
- MotionComp263( mb, nextPic, Bpic );
- // Save backward prediction in temporary area
- saveBackwardPred( mb, Bpic );
- // Perform forward prediction
- if (mb->mtype == MTYPE263_INTER4V) {
- for (i = 0; i < 4; ++i) {
- mb->blkMvX[i] = mb->blkMvFx[i];
- mb->blkMvY[i] = mb->blkMvFy[i];
- }
- } else {
- mb->mv_x = mb->blkMvFx[0];
- mb->mv_y = mb->blkMvFy[0];
- }
- MotionComp263( mb, prevPic, Bpic );
- // Restore parameters
- if (mb->mtype == MTYPE263_INTER4V) {
- for (i = 0; i < 4; ++i) {
- mb->blkMvX[i] = saveMvX[i];
- mb->blkMvY[i] = saveMvY[i];
- }
- } else {
- mb->mv_x = saveMvX[0];
- mb->mv_y = saveMvY[0];
- }
- // Average forward and backward prediction
- averageForBack( mb, Bpic );
- }
- // PredBdist - Form prediction for subsampled error computation
- extern void PredBdist( MACROBLOCK_DESCR * mb, // Macroblock to be predicted
- PICTURE * prevPic, // Prev. picture (forward pred)
- PICTURE * nextPic, // Next P-picture (backward pred)
- PICTURE * Bpic // Output picture where pred is placed
- )
- {
- int i;
- S8 saveMvX[4], saveMvY[4];
- // Perform backward prediction
- if (mb->mtype == MTYPE263_INTER4V) {
- for (i = 0; i < 4; ++i) {
- saveMvX[i] = mb->blkMvX[i];
- mb->blkMvX[i] = mb->blkMvBx[i];
- saveMvY[i] = mb->blkMvY[i];
- mb->blkMvY[i] = mb->blkMvBy[i];
- }
- } else {
- saveMvX[0] = mb->mv_x;
- mb->mv_x = mb->blkMvBx[0];
- saveMvY[0] = mb->mv_y;
- mb->mv_y = mb->blkMvBy[0];
- }
- MotionComp263( mb, nextPic, Bpic );
- // Save backward prediction in temporary area
- saveBackwardPred( mb, Bpic );
- // Perform forward prediction
- if (mb->mtype == MTYPE263_INTER4V) {
- for (i = 0; i < 4; ++i) {
- mb->blkMvX[i] = mb->blkMvFx[i];
- mb->blkMvY[i] = mb->blkMvFy[i];
- }
- } else {
- mb->mv_x = mb->blkMvFx[0];
- mb->mv_y = mb->blkMvFy[0];
- }
- MotionComp263( mb, prevPic, Bpic );
- // Restore parameters
- if (mb->mtype == MTYPE263_INTER4V) {
- for (i = 0; i < 4; ++i) {
- mb->blkMvX[i] = saveMvX[i];
- mb->blkMvY[i] = saveMvY[i];
- }
- } else {
- mb->mv_x = saveMvX[0];
- mb->mv_y = saveMvY[0];
- }
- // Average forward and backward prediction
- averageForBack( mb, Bpic );
- }
- // chromaMVComp - derive motion component for chroma from luma motion component
- static int chromaMVComp( int mvLuma )
- {
- int mvChroma, fraction;
-
- mvChroma = 2 * (mvLuma >> 2); // Truncate fractional part
- fraction = mvLuma & 0x3; // Two fractional bits
- if (fraction != 0) {
- ++mvChroma; // Round towards half-pixel
- }
- return( mvChroma );
- }
- // chromaMvComp4V - derive motion component for chroma from 4 luma motion components
- static int chromaMvComp4V( S8 mvLuma[4] )
- {
- int sum, mvChroma, fraction;
-
- sum = mvLuma[0] + mvLuma[1] + mvLuma[2] + mvLuma[3];
- mvChroma = 2 * (sum >> 4); // Truncate fractional part
- fraction = sum & 0xf; // Four fractional bits
- if (fraction >= 14) {
- mvChroma += 2; // Round up to next integer value
- } else if (fraction >= 3) {
- ++mvChroma; // Round towards half-pixel
- } // else round down to integer pixel
- return( mvChroma );
- }
- // mc - Perform motion compensation for a hSize x vSize block
- void mc( int hSize, int vSize,
- PIXEL *in, PIXEL *out, int hdim,
- int mvX, int mvY // Motion vector
- )
- {
- int intX, intY, fracX, fracY;
-
- #ifdef VVPROFILER
- S32 nVvProfNb = 2;
- if(!pVvProf[nVvProfNb]) pVvProf[nVvProfNb] = newCVvDebugTimer();//memory leak on destruction
- pVvProfCount[nVvProfNb]++;
- StartTime(pVvProf[nVvProfNb]);
- #endif
- intX = mvX >> 1; // Integer part of motion vector
- intY = mvY >> 1;
- fracX = mvX & 0x1; // Fractional part of motion vector
- fracY = mvY & 0x1;
- in += intX + intY * hdim;
- if (hSize != 16 && hSize != 8 && hSize != 4) {
- H261ErrMsg("mc -- hSize not supported");
- exit(0);
- }
- if (fracY == 0) {
- if (fracX == 0) {
- // No interpolation
- if (hSize == 8) {
- mc8pelsNoInterpol( in, out, hdim, vSize );
- } else if (hSize == 16) {
- mc16pelsNoInterpol( in, out, hdim, vSize );
- } else {
- mc4pelsNoInterpol( in, out, hdim, vSize );
- }
- } else {
- // Horizontal interpolation
- if (hSize == 8) {
- mc8pelsHorInterpol( in, out, hdim, vSize );
- } else if (hSize == 16) {
- mc16pelsHorInterpol( in, out, hdim, vSize );
- } else {
- mc4pelsHorInterpol( in, out, hdim, vSize );
- }
- }
- } else if (fracX == 0) {
- // Vertical interpolation
- if (hSize == 8) {
- mc8pelsVertInterpol( in, out, hdim, vSize );
- } else if (hSize == 16) {
- mc16pelsVertInterpol( in, out, hdim, vSize );
- } else {
- mc4pelsVertInterpol( in, out, hdim, vSize );
- }
- } else { // Bilinear interpolation
- if (hSize == 8) {
- mc8pels2DInterpol( in, out, hdim, vSize );
- } else if (hSize == 16) {
- mc16pels2DInterpol( in, out, hdim, vSize );
- } else {
- mc4pels2DInterpol( in, out, hdim, vSize );
- }
- }
- #ifdef VVPROFILER
- StopAndAccuTime(pVvProf[nVvProfNb]);
- #endif
- return;
- }
- // Limit x to interval [low,high]
- #define LIMIT( low, x, high ) max( low, min( x, high ))
- // limitMC - Perform motion compensation; use edge pixels when referring to
- // pixels outside picture
- static void limitMC( int hSize, int vSize,
- PIXEL const *in, PIXEL *out, int hdim,
- int mvX, int mvY, // Motion vector
- int minX, int maxX, int minY, int maxY // Limits for hor/vert indices
- )
- {
- #define MAX_HSIZE (16)
- int intX, intY, fracX, fracY, outsideTop, outsideBot, repeatTop, repeatBot, x, y;
- static int mapX[MAX_HSIZE + 1];
- PIXEL *outSave, *outBegin;
- union { // Copy words to speed up routine
- PIXEL *pix;
- U32 *word;
- } pIn, pOut;
-
- if (hSize & 0x3) {
- H261ErrMsg("limitMC -- hSize must be multiple of 4");
- exit(0);
- }
- if (hSize > MAX_HSIZE) {
- H261ErrMsg("limitMC -- hSize too large");
- exit(0);
- }
- intX = mvX >> 1; // Integer part of motion vector
- intY = mvY >> 1;
- fracX = mvX & 0x1; // Fractional part of motion vector
- fracY = mvY & 0x1;
- // Create horizontal mapping vector
- for (x = 0; x <= hSize; ++x) {
- mapX[x] = LIMIT( minX, x + intX, maxX );
- }
- //repeatTop = max( 0, minY - intY); // Lines on top that are outside
- //repeatBot = max( 0, vSize - 1 + intY + fracY - maxY); // Lines at bottom that are outside
- outsideTop = max( 0, minY - intY); // Lines on top that are outside
- outsideBot = max( 0, vSize - 1 + intY + fracY - maxY); // Lines at bottom that are outside
- // Don't produce more lines than the blocksize (used to be a nasty bug hidden here)
- repeatTop = min( outsideTop, vSize );
- if (outsideBot < vSize) {
- repeatBot = outsideBot;
- in += (intY + outsideTop) * hdim; // Apply vert motion comp. (hor MC thru mapping)
- } else { // Whole block is "outside" bottom of picture
- repeatBot = vSize;
- in += (vSize - 1) * hdim; // Point to last line of picture
- }
- // Output pointers
- outSave = out; // Upper left corner of output block
- out += repeatTop * hdim; // "Repeated" lines will be filled in later
- outBegin = out; // Save address for first valid output line
- if (fracY == 0) {
- // Ensure that at least one output line gets written
- if (repeatTop == vSize) {
- --repeatTop;
- out -= hdim;
- outBegin = out;
- } else if (repeatBot == vSize) {
- --repeatBot;
- }
- if (fracX == 0) {
- // No interpolation
- for (y = repeatTop; y < vSize - repeatBot; ++y) {
- for (x = 0; x < hSize; x += 4) {
- out[x+0] = in[ mapX[x+0] ];
- out[x+1] = in[ mapX[x+1] ];
- out[x+2] = in[ mapX[x+2] ];
- out[x+3] = in[ mapX[x+3] ];
- }
- in += hdim;
- out += hdim;
- }
- } else {
- // Horizontal interpolation
- for (y = repeatTop; y < vSize - repeatBot; ++y) {
- for (x = 0; x < hSize; x += 4) {
- out[x+0] = (in[mapX[x+0]] + in[mapX[x+1]] + 1) >> 1;
- out[x+1] = (in[mapX[x+1]] + in[mapX[x+2]] + 1) >> 1;
- out[x+2] = (in[mapX[x+2]] + in[mapX[x+3]] + 1) >> 1;
- out[x+3] = (in[mapX[x+3]] + in[mapX[x+4]] + 1) >> 1;
- }
- in += hdim;
- out += hdim;
- }
- }
- } else if (fracX == 0) {
- // Vertical interpolation
- if (repeatTop > 0) { // Produce line to repeat
- outBegin = out - hdim;
- for (x = 0; x < hSize; ++x) {
- outBegin[x] = in[ mapX[x] ];
- }
- }
- for (y = repeatTop; y < vSize - repeatBot; ++y) {
- for (x = 0; x < hSize; x += 4) {
- out[x+0] = (in[mapX[x+0]] + in[mapX[x+0] + hdim] + 1) >> 1;
- out[x+1] = (in[mapX[x+1]] + in[mapX[x+1] + hdim] + 1) >> 1;
- out[x+2] = (in[mapX[x+2]] + in[mapX[x+2] + hdim] + 1) >> 1;
- out[x+3] = (in[mapX[x+3]] + in[mapX[x+3] + hdim] + 1) >> 1;
- }
- in += hdim;
- out += hdim;
- }
- if (repeatBot > 0) { // Produce line to repeat
- for (x = 0; x < hSize; ++x) {
- out[x] = in[ mapX[x] ];
- }
- out += hdim;
- }
- } else { // Bilinear interpolation
- if (repeatTop > 0) { // Produce line to repeat
- outBegin = out - hdim;
- for (x = 0; x < hSize; ++x) {
- outBegin[x] = (in[mapX[x]] + in[mapX[x+1]] + 1) >> 1;
- }
- }
- for (y = repeatTop; y < vSize - repeatBot; ++y) {
- for (x = 0; x < hSize; x += 4) {
- out[x+0] = (in[mapX[x+0]] + in[mapX[x+0] + hdim]
- + in[mapX[x+1]] + in[mapX[x+1] + hdim] + 2) >> 2;
- out[x+1] = (in[mapX[x+1]] + in[mapX[x+1] + hdim]
- + in[mapX[x+2]] + in[mapX[x+2] + hdim] + 2) >> 2;
- out[x+2] = (in[mapX[x+2]] + in[mapX[x+2] + hdim]
- + in[mapX[x+3]] + in[mapX[x+3] + hdim] + 2) >> 2;
- out[x+3] = (in[mapX[x+3]] + in[mapX[x+3] + hdim]
- + in[mapX[x+4]] + in[mapX[x+4] + hdim] + 2) >> 2;
- }
- in += hdim;
- out += hdim;
- }
- if (repeatBot > 0) { // Produce line to repeat
- for (x = 0; x < hSize; ++x) {
- out[x] = (in[mapX[x]] + in[mapX[x+1]] + 1) >> 1;
- }
- out += hdim;
- }
- }
- if (fracY == 1) {
- --repeatTop; // Already did one line
- --repeatBot;
- }
- // Repeat first line at top
- pIn.pix = outBegin;
- pOut.pix = outSave;
- for (y = 0; y < repeatTop; ++y) {
- for (x = 0; x < (hSize >> 2); ++x) {
- *(pOut.word + x) = *(pIn.word + x);
- }
- pOut.pix += hdim;
- }
- // Repeat last line at the bottom
- pIn.pix = out - hdim;
- pOut.pix = out;
- for (y = 0; y < repeatBot; ++y) {
- for (x = 0; x < (hSize >> 2); ++x) {
- *(pOut.word + x) = *(pIn.word + x);
- }
- pOut.pix += hdim;
- }
- return;
- }
- // mvDiff - Return YES if motion vector for adjacent block is different, otherwise NO
- // Return NO if adjacent block is INTRA (unless PBframe)
- static int mvDiff( int mvX, int mvY, // motion vector
- MACROBLOCK_DESCR const *borderMB, // adjacent macroblock
- int subBlk, // adjacent subblock (needed if borderMB has 4 motion vectors)
- int PBframe, // If PBframe: do overlap also with INTRA neighbor
- int border[2] // return motion vector components for adjacent block
- )
- {
- if (borderMB->mtype == MTYPE263_INTER4V) {
- border[0] = borderMB->blkMvX[ subBlk ];
- border[1] = borderMB->blkMvY[ subBlk ];
- } else {
- border[0] = borderMB->mv_x;
- border[1] = borderMB->mv_y;
- if (borderMB->mtype >= MTYPE263_INTRA && PBframe == 0) {
- return( NO ); // No overlap with INTRA neighbor (except in PBframe)
- }
- }
- if (border[0] != mvX || border[1] != mvY) {
- return( YES );
- }
- return( NO );
- }
- // median3 - return median of 3 values
- static int median3( int x[3] )
- {
- int order[2];
-
- if (x[1] < x[0]) {
- order[0] = x[1];
- order[1] = x[0];
- } else {
- order[0] = x[0];
- order[1] = x[1];
- }
- if (x[2] < order[0]) {
- return( order[0] );
- } else if (x[2] < order[1]) {
- return( x[2] );
- }
- return( order[1] );
- }
- // MvPred - Compute mv predictor (mvX,mvY) for block "blk" of macroblock mb[0].
- // blk = UPPER_LEFT_BLK, UPPER_RIGHT_BLK, LOWER_LEFT_BLK, LOWER_RIGHT_BLK,
- // or WHOLE_MACROBLOCK.
- // If horPredOnly=YES: don't use previous row of macroblocks in prediction
- extern void MvPred( MACROBLOCK_DESCR mb[],
- int blk, // specify block: UL, UR, LL, LR, or WHOLE
- int mbhor, // offset from previous row of MBs
- int horPredOnly,
- int *mvX, int *mvY )
- {
- int x[3], y[3];
-
- switch (blk) {
- case UPPER_RIGHT_BLK:
- x[0] = mb[0].blkMvX[UPPER_LEFT_BLK];
- y[0] = mb[0].blkMvY[UPPER_LEFT_BLK];
- if (horPredOnly == YES) { // Top border or sending GOB header to resync
- *mvX = x[0], *mvY = y[0];
- } else {
- if (mb[-mbhor].mtype == MTYPE263_INTER4V) {
- x[1] = mb[-mbhor].blkMvX[LOWER_RIGHT_BLK];
- y[1] = mb[-mbhor].blkMvY[LOWER_RIGHT_BLK];
- } else {
- x[1] = mb[-mbhor].mv_x;
- y[1] = mb[-mbhor].mv_y;
- }
- if (mb[0].x == mbhor - 1) { // Right border
- x[2] = 0, y[2] = 0;
- } else if (mb[1-mbhor].mtype == MTYPE263_INTER4V) {
- x[2] = mb[1-mbhor].blkMvX[LOWER_LEFT_BLK];
- y[2] = mb[1-mbhor].blkMvY[LOWER_LEFT_BLK];
- } else {
- x[2] = mb[1-mbhor].mv_x;
- y[2] = mb[1-mbhor].mv_y;
- }
- *mvX = median3( x );
- *mvY = median3( y );
- }
- break;
-
- case LOWER_LEFT_BLK:
- if (mb[0].x == 0) { // Left border
- x[0] = 0, y[0] = 0;
- } else if (mb[-1].mtype == MTYPE263_INTER4V) {
- x[0] = mb[-1].blkMvX[LOWER_RIGHT_BLK];
- y[0] = mb[-1].blkMvY[LOWER_RIGHT_BLK];
- } else {
- x[0] = mb[-1].mv_x;
- y[0] = mb[-1].mv_y;
- }
- x[1] = mb[0].blkMvX[UPPER_LEFT_BLK];
- y[1] = mb[0].blkMvY[UPPER_LEFT_BLK];
- x[2] = mb[0].blkMvX[UPPER_RIGHT_BLK];
- y[2] = mb[0].blkMvY[UPPER_RIGHT_BLK];
- *mvX = median3( x );
- *mvY = median3( y );
- break;
-
- case LOWER_RIGHT_BLK:
- x[0] = mb[0].blkMvX[LOWER_LEFT_BLK];
- y[0] = mb[0].blkMvY[LOWER_LEFT_BLK];
- x[1] = mb[0].blkMvX[UPPER_LEFT_BLK];
- y[1] = mb[0].blkMvY[UPPER_LEFT_BLK];
- x[2] = mb[0].blkMvX[UPPER_RIGHT_BLK];
- y[2] = mb[0].blkMvY[UPPER_RIGHT_BLK];
- *mvX = median3( x );
- *mvY = median3( y );
- break;
-
- case WHOLE_MACROBLOCK:
- case UPPER_LEFT_BLK:
- default:
- if (mb[0].x == 0) { // Left border
- x[0] = 0, y[0] = 0;
- } else if (mb[-1].mtype == MTYPE263_INTER4V) {
- x[0] = mb[-1].blkMvX[UPPER_RIGHT_BLK];
- y[0] = mb[-1].blkMvY[UPPER_RIGHT_BLK];
- } else {
- x[0] = mb[-1].mv_x;
- y[0] = mb[-1].mv_y;
- }
- if (horPredOnly == YES) { // Top border or sending GOB header to resync
- *mvX = x[0], *mvY = y[0];
- } else {
- if (mb[-mbhor].mtype == MTYPE263_INTER4V) {
- x[1] = mb[-mbhor].blkMvX[LOWER_LEFT_BLK];
- y[1] = mb[-mbhor].blkMvY[LOWER_LEFT_BLK];
- } else {
- x[1] = mb[-mbhor].mv_x;
- y[1] = mb[-mbhor].mv_y;
- }
- if (mb[0].x == mbhor - 1) { // Right border
- x[2] = 0, y[2] = 0;
- } else if (mb[1-mbhor].mtype == MTYPE263_INTER4V) {
- x[2] = mb[1-mbhor].blkMvX[LOWER_LEFT_BLK];
- y[2] = mb[1-mbhor].blkMvY[LOWER_LEFT_BLK];
- } else {
- x[2] = mb[1-mbhor].mv_x;
- y[2] = mb[1-mbhor].mv_y;
- }
- *mvX = median3( x );
- *mvY = median3( y );
- }
- break;
- }
- return;
- }
- // doOverlapMC - Perform overlapped motion compensation on 8x8 block
- static void doOverlapMC( int subBlk, // Indicates subblock to process (UL, UR, LL, LR)
- MACROBLOCK_DESCR *mb, // Used to determine (x,y) coordinates for block
- PICTURE *prevPic, // Previous picture; used to create overlapping MC
- PICTURE *pic, // Contains non-overlapped MC on entry; returns overlapped MC
- int borderMv[4][2], // Motion vectors for adjacent blocks (L,Top,R,Bottom)
- int left, // If YES, overlap using LEFT mv
- int top, // If YES, overlap using TOP mv
- int right, // If YES, overlap using RIGHT mv
- int bottom // If YES, overlap using BOTTOM mv
- )
- {
- #define LEFT_ADDR (0)
- #define RIGHT_ADDR (LEFT_ADDR + 4)
- #define TOP_OFFSET (8)
- #define TOP_ADDR (LEFT_ADDR + TOP_OFFSET)
- #define MAX_HDIM (352)
- static PIXEL p[8 * MAX_HDIM]; // MC predictions using neighboring vectors
- // Only using first 16 columns; line offset chosen to be same as for pictures
- int row, col, picOffset, hdim, nhor, nvert, *mv;
- PIXEL * source, * dest;
- void (*pMC) ( int hSize, int vSize, PIXEL *in, PIXEL *out, int hdim, int mvX, int mvY);
- #if defined(COMPILE_MMX)
- if(cpuid_is_mmx_motion_on()) {
- //do mmx if compiler switch AND initialized AND detected
- pMC = mcMMX;
- } else
- #endif
- {
- pMC = mc;
- }
- // Compute luma pointers
- col = 16 * mb->x;
- row = 16 * mb->y;
- if (subBlk == UPPER_RIGHT_BLK || subBlk == LOWER_RIGHT_BLK) col += 8;
- if (subBlk == LOWER_LEFT_BLK || subBlk == LOWER_RIGHT_BLK) row += 8;
- hdim = pic->y.hoffset;
- nhor = pic->y.nhor, nvert = pic->y.nvert;
- picOffset = col + row * hdim;
- dest = pic->y.ptr + picOffset; // Point to output luma (non-overlapped MC)
- source = prevPic->y.ptr + picOffset; // Point to input luma (without motion comp)
- if (hdim > MAX_HDIM) {
- H261ErrMsg("doOverlapMC - Increase size of internal array");
- exit(0);
- }
- // Create motion compensated blocks using neighboring motion vectors
- if (left == YES) { // Produce left 4 columns
- mv = borderMv[LEFT];
- if (PointingOutside( col, col+3, row, row+7, *mv, *(mv+1), nhor, nvert) == NO) {
- pMC( 4,8, source, &p[LEFT_ADDR], hdim, *mv, *(mv+1) );
- } else {
- limitMC( 4,8, source, &p[LEFT_ADDR], hdim, *mv, *(mv+1),
- -col, nhor - 1 - col, -row, nvert - 1 - row );
- }
- }
- if (right == YES) { // Produce right 4 columns
- mv = borderMv[RIGHT];
- if (PointingOutside( col+4, col+7, row, row+7, *mv, *(mv+1), nhor, nvert) == NO) {
- pMC( 4,8, source + 4, &p[RIGHT_ADDR], hdim, *mv, *(mv+1) );
- } else {
- limitMC( 4,8, source + 4, &p[RIGHT_ADDR], hdim, *mv, *(mv+1),
- -col - 4, nhor - 1 - col - 4, -row, nvert - 1 - row );
- }
- }
- if (top == YES) { // Produce top 4 rows
- mv = borderMv[TOP];
- if (PointingOutside( col, col+7, row, row+3, *mv, *(mv+1), nhor, nvert) == NO) {
- pMC( 8,4, source, &p[TOP_ADDR], hdim, *mv, *(mv+1) );
- } else {
- limitMC( 8,4, source, &p[TOP_ADDR], hdim, *mv, *(mv+1),
- -col, nhor - 1 - col, -row, nvert - 1 - row );
- }
- }
- if (bottom == YES) { // Produce bottom 4 rows
- mv = borderMv[BOTTOM];
- if (PointingOutside( col, col+7, row+4, row+7, *mv, *(mv+1), nhor, nvert) == NO) {
- pMC( 8,4, source + 4*hdim, &p[TOP_ADDR + 4*hdim], hdim, *mv, *(mv+1) );
- } else {
- limitMC( 8,4, source + 4*hdim, &p[TOP_ADDR + 4*hdim], hdim, *mv, *(mv+1),
- -col, nhor - 1 - col, -row - 4, nvert - 1 - row - 4 );
- }
- }
- // Produce weighted ("overlapped") MC prediction
- weigh8x4( dest, &p[LEFT_ADDR], hdim, top, left, right ); // Top 4 rows
- weigh8x4( dest + 7*hdim, &p[LEFT_ADDR + 7*hdim], -hdim, bottom, left, right ); // Bottom 4 rows
- return;
- }
- // weigh8x4
- static void weigh8x4( PIXEL *dest, PIXEL const *p, int hdim, int vert, int left, int right )
- {
- if (vert == YES) { // Use top/bottom MV
- if (left == YES) {
- weighLeft_Vert( dest, &p[LEFT_ADDR], hdim );
- } else {
- weighNoLeft_Vert( dest, &p[LEFT_ADDR], hdim );
- }
- if (right == YES) {
- weighRight_Vert( dest + 4, &p[RIGHT_ADDR], hdim );
- } else {
- weighNoRight_Vert( dest + 4, &p[RIGHT_ADDR], hdim );
- }
- } else {
- if (left == YES) {
- weighLeft( dest, &p[LEFT_ADDR], hdim );
- }
- if (right == YES) {
- weighRight( dest + 4, &p[RIGHT_ADDR], hdim );
- }
- }
- return;
- }
- // weighLeft_Vert - Overlap three MC predictions for upper left 4x4 pixels
- static void weighLeft_Vert( PIXEL *dest, // Non-overlapped MC; overlapped by this routine
- PIXEL const *p, // p: MC using left motion vector
- // &p[TOP_OFFSET]: MC using top/bottom MV
- int hdim ) // Line offset
- {
- // First row
- dest[0] = (2 * dest[0] + p[0] + p[TOP_OFFSET + 0] + 2) >> 2;
- dest[1] = (5 * dest[1] + p[1] + 2 * p[TOP_OFFSET + 1] + 4) >> 3;
- dest[2] = (5 * dest[2] + p[2] + 2 * p[TOP_OFFSET + 2] + 4) >> 3;
- dest[3] = (5 * dest[3] + p[3] + 2 * p[TOP_OFFSET + 3] + 4) >> 3;
- // Second row
- dest += hdim, p += hdim;
- dest[0] = (5 * dest[0] + 2 * p[0] + p[TOP_OFFSET + 0] + 4) >> 3;
- dest[1] = (5 * dest[1] + 2 * p[1] + p[TOP_OFFSET + 1] + 4) >> 3;
- dest[2] = (5 * dest[2] + p[2] + 2 * p[TOP_OFFSET + 2] + 4) >> 3;
- dest[3] = (5 * dest[3] + p[3] + 2 * p[TOP_OFFSET + 3] + 4) >> 3;
- // Third row
- dest += hdim, p += hdim;
- dest[0] = (5 * dest[0] + 2 * p[0] + p[TOP_OFFSET + 0] + 4) >> 3;
- dest[1] = (5 * dest[1] + 2 * p[1] + p[TOP_OFFSET + 1] + 4) >> 3;
- dest[2] = (6 * dest[2] + p[2] + p[TOP_OFFSET + 2] + 4) >> 3;
- dest[3] = (6 * dest[3] + p[3] + p[TOP_OFFSET + 3] + 4) >> 3;
- // Fourth row (identical to third row)
- dest += hdim, p += hdim;
- dest[0] = (5 * dest[0] + 2 * p[0] + p[TOP_OFFSET + 0] + 4) >> 3;
- dest[1] = (5 * dest[1] + 2 * p[1] + p[TOP_OFFSET + 1] + 4) >> 3;
- dest[2] = (6 * dest[2] + p[2] + p[TOP_OFFSET + 2] + 4) >> 3;
- dest[3] = (6 * dest[3] + p[3] + p[TOP_OFFSET + 3] + 4) >> 3;
- return;
- }
- // weighNoLeft_Vert - Overlap with MV from vert. neighbor for upper left 4x4 pixels
- static void weighNoLeft_Vert( PIXEL *dest, PIXEL const *p, int hdim )
- {
- // First row
- dest[0] = (3 * dest[0] + p[TOP_OFFSET + 0] + 2) >> 2;
- dest[1] = (3 * dest[1] + p[TOP_OFFSET + 1] + 2) >> 2;
- dest[2] = (3 * dest[2] + p[TOP_OFFSET + 2] + 2) >> 2;
- dest[3] = (3 * dest[3] + p[TOP_OFFSET + 3] + 2) >> 2;
- // Second row
- dest += hdim, p += hdim;
- dest[0] = (7 * dest[0] + p[TOP_OFFSET + 0] + 4) >> 3;
- dest[1] = (7 * dest[1] + p[TOP_OFFSET + 1] + 4) >> 3;
- dest[2] = (3 * dest[2] + p[TOP_OFFSET + 2] + 2) >> 2;
- dest[3] = (3 * dest[3] + p[TOP_OFFSET + 3] + 2) >> 2;
- // Third row
- dest += hdim, p += hdim;
- dest[0] = (7 * dest[0] + p[TOP_OFFSET + 0] + 4) >> 3;
- dest[1] = (7 * dest[1] + p[TOP_OFFSET + 1] + 4) >> 3;
- dest[2] = (7 * dest[2] + p[TOP_OFFSET + 2] + 4) >> 3;
- dest[3] = (7 * dest[3] + p[TOP_OFFSET + 3] + 4) >> 3;
- // Fourth row (identical to third row)
- dest += hdim, p += hdim;
- dest[0] = (7 * dest[0] + p[TOP_OFFSET + 0] + 4) >> 3;
- dest[1] = (7 * dest[1] + p[TOP_OFFSET + 1] + 4) >> 3;
- dest[2] = (7 * dest[2] + p[TOP_OFFSET + 2] + 4) >> 3;
- dest[3] = (7 * dest[3] + p[TOP_OFFSET + 3] + 4) >> 3;
- return;
- }
- // weighLeft - Overlap with MV from left neighbor for upper left 4x4 pixels
- static void weighLeft( PIXEL *dest, PIXEL const *p, int hdim )
- {
- // First row
- dest[0] = (3 * dest[0] + p[0] + 2) >> 2;
- dest[1] = (7 * dest[1] + p[1] + 4) >> 3;
- dest[2] = (7 * dest[2] + p[2] + 4) >> 3;
- dest[3] = (7 * dest[3] + p[3] + 4) >> 3;
- // Second row
- dest += hdim, p += hdim;
- dest[0] = (3 * dest[0] + p[0] + 2) >> 2;
- dest[1] = (3 * dest[1] + p[1] + 2) >> 2;
- dest[2] = (7 * dest[2] + p[2] + 4) >> 3;
- dest[3] = (7 * dest[3] + p[3] + 4) >> 3;
- // Third row (identical to second row)
- dest += hdim, p += hdim;
- dest[0] = (3 * dest[0] + p[0] + 2) >> 2;
- dest[1] = (3 * dest[1] + p[1] + 2) >> 2;
- dest[2] = (7 * dest[2] + p[2] + 4) >> 3;
- dest[3] = (7 * dest[3] + p[3] + 4) >> 3;
- // Fourth row (identical to third row)
- dest += hdim, p += hdim;
- dest[0] = (3 * dest[0] + p[0] + 2) >> 2;
- dest[1] = (3 * dest[1] + p[1] + 2) >> 2;
- dest[2] = (7 * dest[2] + p[2] + 4) >> 3;
- dest[3] = (7 * dest[3] + p[3] + 4) >> 3;
- return;
- }
- // weighRight_Vert - Derived from weighLeft_Vert by reversing indices
- // This means that the assembler routine can be easily derived from the other
- static void weighRight_Vert( PIXEL *dest, // Non-overlapped MC; overlapped by this routine
- PIXEL const *p, // p: MC using right motion vector
- // &p[TOP_OFFSET]: MC using top/bottom MV
- int hdim ) // Line offset
- {
- // First row
- dest[3] = (2 * dest[3] + p[3] + p[TOP_OFFSET + 3] + 2) >> 2;
- dest[2] = (5 * dest[2] + p[2] + 2 * p[TOP_OFFSET + 2] + 4) >> 3;
- dest[1] = (5 * dest[1] + p[1] + 2 * p[TOP_OFFSET + 1] + 4) >> 3;
- dest[0] = (5 * dest[0] + p[0] + 2 * p[TOP_OFFSET + 0] + 4) >> 3;
- // Second row
- dest += hdim, p += hdim;
- dest[3] = (5 * dest[3] + 2 * p[3] + p[TOP_OFFSET + 3] + 4) >> 3;
- dest[2] = (5 * dest[2] + 2 * p[2] + p[TOP_OFFSET + 2] + 4) >> 3;
- dest[1] = (5 * dest[1] + p[1] + 2 * p[TOP_OFFSET + 1] + 4) >> 3;
- dest[0] = (5 * dest[0] + p[0] + 2 * p[TOP_OFFSET + 0] + 4) >> 3;
- // Third row
- dest += hdim, p += hdim;
- dest[3] = (5 * dest[3] + 2 * p[3] + p[TOP_OFFSET + 3] + 4) >> 3;
- dest[2] = (5 * dest[2] + 2 * p[2] + p[TOP_OFFSET + 2] + 4) >> 3;
- dest[1] = (6 * dest[1] + p[1] + p[TOP_OFFSET + 1] + 4) >> 3;
- dest[0] = (6 * dest[0] + p[0] + p[TOP_OFFSET + 0] + 4) >> 3;
- // Fourth row (identical to third row)
- dest += hdim, p += hdim;
- dest[3] = (5 * dest[3] + 2 * p[3] + p[TOP_OFFSET + 3] + 4) >> 3;
- dest[2] = (5 * dest[2] + 2 * p[2] + p[TOP_OFFSET + 2] + 4) >> 3;
- dest[1] = (6 * dest[1] + p[1] + p[TOP_OFFSET + 1] + 4) >> 3;
- dest[0] = (6 * dest[0] + p[0] + p[TOP_OFFSET + 0] + 4) >> 3;
- return;
- }
- // weighNoRight_Vert - Derived from weighNoLeft_Vert by reversing indices
- // This means that the assembler routine can be easily derived from the other
- static void weighNoRight_Vert( PIXEL *dest, PIXEL const *p, int hdim )
- {
- // First row
- dest[3] = (3 * dest[3] + p[TOP_OFFSET + 3] + 2) >> 2;
- dest[2] = (3 * dest[2] + p[TOP_OFFSET + 2] + 2) >> 2;
- dest[1] = (3 * dest[1] + p[TOP_OFFSET + 1] + 2) >> 2;
- dest[0] = (3 * dest[0] + p[TOP_OFFSET + 0] + 2) >> 2;
- // Second row
- dest += hdim, p += hdim;
- dest[3] = (7 * dest[3] + p[TOP_OFFSET + 3] + 4) >> 3;
- dest[2] = (7 * dest[2] + p[TOP_OFFSET + 2] + 4) >> 3;
- dest[1] = (3 * dest[1] + p[TOP_OFFSET + 1] + 2) >> 2;
- dest[0] = (3 * dest[0] + p[TOP_OFFSET + 0] + 2) >> 2;
- // Third row
- dest += hdim, p += hdim;
- dest[3] = (7 * dest[3] + p[TOP_OFFSET + 3] + 4) >> 3;
- dest[2] = (7 * dest[2] + p[TOP_OFFSET + 2] + 4) >> 3;
- dest[1] = (7 * dest[1] + p[TOP_OFFSET + 1] + 4) >> 3;
- dest[0] = (7 * dest[0] + p[TOP_OFFSET + 0] + 4) >> 3;
- // Fourth row (identical to third row)
- dest += hdim, p += hdim;
- dest[3] = (7 * dest[3] + p[TOP_OFFSET + 3] + 4) >> 3;
- dest[2] = (7 * dest[2] + p[TOP_OFFSET + 2] + 4) >> 3;
- dest[1] = (7 * dest[1] + p[TOP_OFFSET + 1] + 4) >> 3;
- dest[0] = (7 * dest[0] + p[TOP_OFFSET + 0] + 4) >> 3;
- return;
- }
- // weighRight - Derived from weighLeft by reversing indices
- // This means that the assembler routine can be easily derived from the other
- static void weighRight( PIXEL *dest, PIXEL const *p, int hdim )
- {
- // First row
- dest[3] = (3 * dest[3] + p[3] + 2) >> 2;
- dest[2] = (7 * dest[2] + p[2] + 4) >> 3;
- dest[1] = (7 * dest[1] + p[1] + 4) >> 3;
- dest[0] = (7 * dest[0] + p[0] + 4) >> 3;
- // Second row
- dest += hdim, p += hdim;
- dest[3] = (3 * dest[3] + p[3] + 2) >> 2;
- dest[2] = (3 * dest[2] + p[2] + 2) >> 2;
- dest[1] = (7 * dest[1] + p[1] + 4) >> 3;
- dest[0] = (7 * dest[0] + p[0] + 4) >> 3;
- // Third row (identical to second row)
- dest += hdim, p += hdim;
- dest[3] = (3 * dest[3] + p[3] + 2) >> 2;
- dest[2] = (3 * dest[2] + p[2] + 2) >> 2;
- dest[1] = (7 * dest[1] + p[1] + 4) >> 3;
- dest[0] = (7 * dest[0] + p[0] + 4) >> 3;
- // Fourth row (identical to third row)
- dest += hdim, p += hdim;
- dest[3] = (3 * dest[3] + p[3] + 2) >> 2;
- dest[2] = (3 * dest[2] + p[2] + 2) >> 2;
- dest[1] = (7 * dest[1] + p[1] + 4) >> 3;
- dest[0] = (7 * dest[0] + p[0] + 4) >> 3;
- return;
- }
- static void mc16pelsNoInterpol( PIXEL *in, PIXEL *out, int hdim, int vSize )
- {
- #ifndef FOR_UNIX
- union { // Copy words to speed up routine
- PIXEL *pix;
- U32 *word;
- } pIn, pOut;
- pIn.pix = in;
- pOut.pix = out;
- while (vSize > 0) {
- *(pOut.word + 0) = *(pIn.word + 0);
- *(pOut.word + 1) = *(pIn.word + 1);
- *(pOut.word + 2) = *(pIn.word + 2);
- *(pOut.word + 3) = *(pIn.word + 3);
- pIn.pix += hdim;
- pOut.pix += hdim;
- --vSize;
- }
- return;
- #else
- union { // PIXELs are not always word-aligned! Gotta copy bytes in UNIX
- PIXEL *pix;
- BYTE *byte;
- } pIn, pOut;
- BYTE *pInbyte;
- BYTE *pOutbyte;
- int i;
- pIn.pix = in;
- pOut.pix = out;
- while (vSize > 0) {
- pInbyte = pIn.byte;
- pOutbyte = pOut.byte;
- for (i=0; i<16; i++)
- *(pOutbyte++) = *(pInbyte++);
- pIn.pix += hdim;
- pOut.pix += hdim;
- --vSize;
- }
- return;
- #endif
- }
- static void mc8pelsNoInterpol( PIXEL *in, PIXEL *out, int hdim, int vSize )
- {
- #ifndef FOR_UNIX
- union { // Copy words to speed up routine
- PIXEL *pix;
- U32 *word;
- } pIn, pOut;
- pIn.pix = in;
- pOut.pix = out;
- while (vSize > 0) {
- *(pOut.word + 0) = *(pIn.word + 0);
- *(pOut.word + 1) = *(pIn.word + 1);
- pIn.pix += hdim;
- pOut.pix += hdim;
- --vSize;
- }
- return;
- #else
- union { // PIXELs are not always word-aligned! Gotta copy bytes in UNIX
- PIXEL *pix;
- BYTE *byte;
- } pIn, pOut;
- pIn.pix = in;
- pOut.pix = out;
- while (vSize > 0) {
- *(pOut.byte + 0) = *(pIn.byte + 0);
- *(pOut.byte + 1) = *(pIn.byte + 1);
- *(pOut.byte + 2) = *(pIn.byte + 2);
- *(pOut.byte + 3) = *(pIn.byte + 3);
- *(pOut.byte + 4) = *(pIn.byte + 4);
- *(pOut.byte + 5) = *(pIn.byte + 5);
- *(pOut.byte + 6) = *(pIn.byte + 6);
- *(pOut.byte + 7) = *(pIn.byte + 7);
- pIn.pix += hdim;
- pOut.pix += hdim;
- --vSize;
- }
- return;
- #endif
- }
- static void mc4pelsNoInterpol( PIXEL *in, PIXEL *out, int hdim, int vSize )
- {
- #ifndef FOR_UNIX
- union { // Copy words to speed up routine
- PIXEL *pix;
- U32 *word;
- } pIn, pOut;
- pIn.pix = in;
- pOut.pix = out;
- while (vSize > 0) {
- *(pOut.word + 0) = *(pIn.word + 0);
- pIn.pix += hdim;
- pOut.pix += hdim;
- --vSize;
- }
- return;
- #else
- union { // PIXELs are not always word-aligned! Gotta copy bytes in UNIX
- PIXEL *pix;
- BYTE *byte;
- } pIn, pOut;
- pIn.pix = in;
- pOut.pix = out;
- while (vSize > 0) {
- *(pOut.byte + 0) = *(pIn.byte + 0);
- *(pOut.byte + 1) = *(pIn.byte + 1);
- *(pOut.byte + 2) = *(pIn.byte + 2);
- *(pOut.byte + 3) = *(pIn.byte + 3);
- pIn.pix += hdim;
- pOut.pix += hdim;
- --vSize;
- }
- return;
- #endif
- }
- static void mc16pelsHorInterpol( PIXEL const *in, PIXEL *out, int hdim, int vSize )
- {
- while (vSize > 0) {
- out[0] = (in[0] + in[1] + 1) >> 1;
- out[1] = (in[1] + in[2] + 1) >> 1;
- out[2] = (in[2] + in[3] + 1) >> 1;
- out[3] = (in[3] + in[4] + 1) >> 1;
- out[4] = (in[4] + in[5] + 1) >> 1;
- out[5] = (in[5] + in[6] + 1) >> 1;
- out[6] = (in[6] + in[7] + 1) >> 1;
- out[7] = (in[7] + in[8] + 1) >> 1;
- out[8] = (in[8] + in[9] + 1) >> 1;
- out[9] = (in[9] + in[10] + 1) >> 1;
- out[10] = (in[10] + in[11] + 1) >> 1;
- out[11] = (in[11] + in[12] + 1) >> 1;
- out[12] = (in[12] + in[13] + 1) >> 1;
- out[13] = (in[13] + in[14] + 1) >> 1;
- out[14] = (in[14] + in[15] + 1) >> 1;
- out[15] = (in[15] + in[16] + 1) >> 1;
- in += hdim;
- out += hdim;
- --vSize;
- }
- return;
- }
- static void mc8pelsHorInterpol( PIXEL const *in, PIXEL *out, int hdim, int vSize )
- {
- while (vSize > 0) {
- out[0] = (in[0] + in[1] + 1) >> 1;
- out[1] = (in[1] + in[2] + 1) >> 1;
- out[2] = (in[2] + in[3] + 1) >> 1;
- out[3] = (in[3] + in[4] + 1) >> 1;
- out[4] = (in[4] + in[5] + 1) >> 1;
- out[5] = (in[5] + in[6] + 1) >> 1;
- out[6] = (in[6] + in[7] + 1) >> 1;
- out[7] = (in[7] + in[8] + 1) >> 1;
- in += hdim;
- out += hdim;
- --vSize;
- }
- return;
- }
- static void mc4pelsHorInterpol( PIXEL const *in, PIXEL *out, int hdim, int vSize )
- {
- while (vSize > 0) {
- out[0] = (in[0] + in[1] + 1) >> 1;
- out[1] = (in[1] + in[2] + 1) >> 1;
- out[2] = (in[2] + in[3] + 1) >> 1;
- out[3] = (in[3] + in[4] + 1) >> 1;
- in += hdim;
- out += hdim;
- --vSize;
- }
- return;
- }
- static void mc16pelsVertInterpol( PIXEL const *in, PIXEL *out, int hdim, int vSize )
- {
- while (vSize > 0) {
- out[0] = (in[0] + in[hdim+0] + 1) >> 1;
- out[1] = (in[1] + in[hdim+1] + 1) >> 1;
- out[2] = (in[2] + in[hdim+2] + 1) >> 1;
- out[3] = (in[3] + in[hdim+3] + 1) >> 1;
- out[4] = (in[4] + in[hdim+4] + 1) >> 1;
- out[5] = (in[5] + in[hdim+5] + 1) >> 1;
- out[6] = (in[6] + in[hdim+6] + 1) >> 1;
- out[7] = (in[7] + in[hdim+7] + 1) >> 1;
- out[8] = (in[8] + in[hdim+8] + 1) >> 1;
- out[9] = (in[9] + in[hdim+9] + 1) >> 1;
- out[10] = (in[10] + in[hdim+10] + 1) >> 1;
- out[11] = (in[11] + in[hdim+11] + 1) >> 1;
- out[12] = (in[12] + in[hdim+12] + 1) >> 1;
- out[13] = (in[13] + in[hdim+13] + 1) >> 1;
- out[14] = (in[14] + in[hdim+14] + 1) >> 1;
- out[15] = (in[15] + in[hdim+15] + 1) >> 1;
- in += hdim;
- out += hdim;
- --vSize;
- }
- return;
- }
- static void mc8pelsVertInterpol( PIXEL const *in, PIXEL *out, int hdim, int vSize )
- {
- while (vSize > 0) {
- out[0] = (in[0] + in[hdim+0] + 1) >> 1;
- out[1] = (in[1] + in[hdim+1] + 1) >> 1;
- out[2] = (in[2] + in[hdim+2] + 1) >> 1;
- out[3] = (in[3] + in[hdim+3] + 1) >> 1;
- out[4] = (in[4] + in[hdim+4] + 1) >> 1;
- out[5] = (in[5] + in[hdim+5] + 1) >> 1;
- out[6] = (in[6] + in[hdim+6] + 1) >> 1;
- out[7] = (in[7] + in[hdim+7] + 1) >> 1;
- in += hdim;
- out += hdim;
- --vSize;
- }
- return;
- }
- static void mc4pelsVertInterpol( PIXEL const *in, PIXEL *out, int hdim, int vSize )
- {
- while (vSize > 0) {
- out[0] = (in[0] + in[hdim+0] + 1) >> 1;
- out[1] = (in[1] + in[hdim+1] + 1) >> 1;
- out[2] = (in[2] + in[hdim+2] + 1) >> 1;
- out[3] = (in[3] + in[hdim+3] + 1) >> 1;
- in += hdim;
- out += hdim;
- --vSize;
- }
- return;
- }
- static void mc16pels2DInterpol( PIXEL const *in, PIXEL *out, int hdim, int vSize )
- {
- while (vSize > 0) {
- out[0] = (in[0] + in[1] + in[hdim+0] + in[hdim+1] + 2) >> 2;
- out[1] = (in[1] + in[2] + in[hdim+1] + in[hdim+2] + 2) >> 2;
- out[2] = (in[2] + in[3] + in[hdim+2] + in[hdim+3] + 2) >> 2;
- out[3] = (in[3] + in[4] + in[hdim+3] + in[hdim+4] + 2) >> 2;
- out[4] = (in[4] + in[5] + in[hdim+4] + in[hdim+5] + 2) >> 2;
- out[5] = (in[5] + in[6] + in[hdim+5] + in[hdim+6] + 2) >> 2;
- out[6] = (in[6] + in[7] + in[hdim+6] + in[hdim+7] + 2) >> 2;
- out[7] = (in[7] + in[8] + in[hdim+7] + in[hdim+8] + 2) >> 2;
- out[8] = (in[8] + in[9] + in[hdim+8] + in[hdim+9] + 2) >> 2;
- out[9] = (in[9] + in[10] + in[hdim+9] + in[hdim+10] + 2) >> 2;
- out[10] = (in[10] + in[11] + in[hdim+10] + in[hdim+11] + 2) >> 2;
- out[11] = (in[11] + in[12] + in[hdim+11] + in[hdim+12] + 2) >> 2;
- out[12] = (in[12] + in[13] + in[hdim+12] + in[hdim+13] + 2) >> 2;
- out[13] = (in[13] + in[14] + in[hdim+13] + in[hdim+14] + 2) >> 2;
- out[14] = (in[14] + in[15] + in[hdim+14] + in[hdim+15] + 2) >> 2;
- out[15] = (in[15] + in[16] + in[hdim+15] + in[hdim+16] + 2) >> 2;
- in += hdim;
- out += hdim;
- --vSize;
- }
- return;
- }
- static void mc8pels2DInterpol( PIXEL const *in, PIXEL *out, int hdim, int vSize )
- {
- while (vSize > 0) {
- out[0] = (in[0] + in[1] + in[hdim+0] + in[hdim+1] + 2) >> 2;
- out[1] = (in[1] + in[2] + in[hdim+1] + in[hdim+2] + 2) >> 2;
- out[2] = (in[2] + in[3] + in[hdim+2] + in[hdim+3] + 2) >> 2;
- out[3] = (in[3] + in[4] + in[hdim+3] + in[hdim+4] + 2) >> 2;
- out[4] = (in[4] + in[5] + in[hdim+4] + in[hdim+5] + 2) >> 2;
- out[5] = (in[5] + in[6] + in[hdim+5] + in[hdim+6] + 2) >> 2;
- out[6] = (in[6] + in[7] + in[hdim+6] + in[hdim+7] + 2) >> 2;
- out[7] = (in[7] + in[8] + in[hdim+7] + in[hdim+8] + 2) >> 2;
- in += hdim;
- out += hdim;
- --vSize;
- }
- return;
- }
- static void mc4pels2DInterpol( PIXEL const *in, PIXEL *out, int hdim, int vSize )
- {
- while (vSize > 0) {
- out[0] = (in[0] + in[1] + in[hdim+0] + in[hdim+1] + 2) >> 2;
- out[1] = (in[1] + in[2] + in[hdim+1] + in[hdim+2] + 2) >> 2;
- out[2] = (in[2] + in[3] + in[hdim+2] + in[hdim+3] + 2) >> 2;
- out[3] = (in[3] + in[4] + in[hdim+3] + in[hdim+4] + 2) >> 2;
- in += hdim;
- out += hdim;
- --vSize;
- }
- return;
- }
- // Temporary arrays for backwards prediction (using next P-picture)
- static PIXEL yBack[16*16], cbBack[8*8], crBack[8*8];
- // saveBackwardPred - save macroblock to temporary area
- static void saveBackwardPred( MACROBLOCK_DESCR * mb, // Describes block to be saved
- PICTURE * pic // Picture with pixels to be saved
- )
- {
- int row, col, picOffset, hdim;
- // Compute luma pointer
- col = 16 * mb->x;
- row = 16 * mb->y;
- hdim = pic->y.hoffset;
- picOffset = col + row * hdim;
- // Copy luma block
- copyBlock( (S32 *)(pic->y.ptr + picOffset), 16>>2, 16, hdim>>2, (S32 *)yBack, 16>>2 );
- // Compute chroma pointer
- col = 8 * mb->x;
- row = 8 * mb->y;
- hdim = pic->cb.hoffset;
- picOffset = col + row * hdim;
- // Copy chroma blocks
- copyBlock( (S32 *)(pic->cb.ptr + picOffset), 8>>2, 8, hdim>>2, (S32 *)cbBack, 8>>2 );
- copyBlock( (S32 *)(pic->cr.ptr + picOffset), 8>>2, 8, hdim>>2, (S32 *)crBack, 8>>2 );
- }
- // averageForBack - Compute B-frame prediction from forward and backward predictions
- static void averageForBack( MACROBLOCK_DESCR * mb, // Describes block to be averaged
- PICTURE * pic // Input: contains forward prediction
- // Output: contains B-frame prediction
- )
- {
- int row, col, hdim, blk, firstCol, lastCol, firstRow, lastRow, cX, cY;
- void (*pAverageBlockFunction)( PIXEL forPred[], int hSize, int vSize, int forOffset,
- PIXEL backPred[], int backOffset );
- #if defined(COMPILE_MMX)
- if(cpuid_is_mmx_motion_on()) {
- //do mmx if compiler switch AND initialized AND detected
- pAverageBlockFunction = averageBlockMMX;
- } else
- #endif
- {
- pAverageBlockFunction = averageBlock;
- }
- // Process luma
- col = 16 * mb->x;
- row = 16 * mb->y;
- hdim = pic->y.hoffset;
- if (mb->mtype == MTYPE263_INTER4V) {
- for (blk = 0; blk < 4; ++blk) { // Treat each 8x8 block separately
- // Only use pixels in current MB for backwards prediction
- if (blk & 0x1) {
- firstCol = 8, lastCol = 15; // Block 1 and 3
- } else {
- firstCol = 0, lastCol = 7; // Block 0 and 2
- }
- if (blk & 0x2) {
- firstRow = 8, lastRow = 15; // Block 2 and 3
- } else {
- firstRow = 0, lastRow = 7; // Block 0 and 1
- }
- firstCol = max( firstCol, (-mb->blkMvBx[blk] + 1) >> 1 );
- lastCol = min( lastCol, 15 - ((mb->blkMvBx[blk] + 1) >> 1));
- firstRow = max( firstRow, (-mb->blkMvBy[blk] + 1) >> 1 );
- lastRow = min( lastRow, 15 - ((mb->blkMvBy[blk] + 1) >> 1));
- pAverageBlockFunction( pic->y.ptr + col + firstCol + (row + firstRow) * hdim,
- lastCol - firstCol + 1,
- lastRow - firstRow + 1, hdim,
- yBack + firstCol + firstRow * 16, 16 );
- }
- // Determine motion vector for chroma block
- cX = chromaMvComp4V( mb->blkMvBx );
- cY = chromaMvComp4V( mb->blkMvBy );
- } else {
- // Only use pixels in current MB for backwards prediction
- firstCol = max( 0, (-mb->blkMvBx[0] + 1) >> 1 );
- lastCol = min( 15, 15 - ((mb->blkMvBx[0] + 1) >> 1));
- firstRow = max( 0, (-mb->blkMvBy[0] + 1) >> 1 );
- lastRow = min( 15, 15 - ((mb->blkMvBy[0] + 1) >> 1));
- //#define MMX_AVERAGE_TEST01
- #ifdef MMX_AVERAGE_TEST01
- {
- int crow, ccol;
- PIXEL *forPred = pic->y.ptr + col + firstCol + (row + firstRow) * hdim;
- int hSize = lastCol - firstCol + 1;
- int vSize = lastRow - firstRow + 1;
- int forOffset = hdim;
- PIXEL *backPred = yBack + firstCol + firstRow * 16;
- int backOffset = 16;
- PIXEL forPred_org[16*352];
- PIXEL backPred_org[16*16];
- PIXEL forPred_temp[16*352];
- PIXEL backPred_temp[16*16];
- int countForOffset;
- int countBackOffset;
- int bError = 0;
- countForOffset = 0;
- countBackOffset = 0;
- for (crow = 0; crow < vSize; ++crow) {
- for (ccol = 0; ccol < hSize; ++ccol) {//from smaller multiple of 4
- forPred_org[ccol + countForOffset] = forPred[ccol + countForOffset];
- backPred_org[ccol + countBackOffset] = backPred[ccol + countBackOffset];
- forPred_temp[ccol + countForOffset] = forPred[ccol + countForOffset];
- backPred_temp[ccol + countBackOffset] = backPred[ccol + countBackOffset];
- }
- countForOffset += forOffset;
- countBackOffset += backOffset;
- }
- averageBlock( forPred_org,
- lastCol - firstCol + 1,
- lastRow - firstRow + 1, hdim,
- backPred_org, 16 );
- #endif
- pAverageBlockFunction( pic->y.ptr + col + firstCol + (row + firstRow) * hdim,
- lastCol - firstCol + 1,
- lastRow - firstRow + 1, hdim,
- yBack + firstCol + firstRow * 16, 16 );
- #ifdef MMX_AVERAGE_TEST01
- countForOffset =0;
- countBackOffset = 0;
- for (crow = 0; crow < vSize; ++crow) {
- for (ccol = 0; ccol < hSize; ++ccol) {//from smaller multiple of 4
- if( (forPred_org[ccol + countForOffset] != forPred[ccol + countForOffset])) {//((hSize==8) || (hSize==16)) &&
- bError = 1;
- }
- }
- countForOffset += forOffset;
- countBackOffset += backOffset;
- }
- if(bError!=0) {
- countForOffset = 0;
- countBackOffset = 0;
- for (crow = 0; crow < vSize; ++crow) {
- for (ccol = 0; ccol < hSize; ++ccol) {//from smaller multiple of 4
- forPred_org[ccol + countForOffset] = forPred[ccol + countForOffset] = forPred_temp[ccol + countForOffset];
- backPred_org[ccol + countBackOffset] = backPred[ccol + countBackOffset] = backPred_temp[ccol + countBackOffset];
- }
- countForOffset += forOffset;
- countBackOffset += backOffset;
- }
- averageBlock( forPred_org,
- lastCol - firstCol + 1,
- lastRow - firstRow + 1, hdim,
- backPred_org, 16 );
- pAverageBlockFunction( pic->y.ptr + col + firstCol + (row + firstRow) * hdim,
- lastCol - firstCol + 1,
- lastRow - firstRow + 1, hdim,
- yBack + firstCol + firstRow * 16, 16 );
- }
- }
- #endif
-
- // Determine motion vector for chroma block
- cX = chromaMVComp( mb->blkMvBx[0] );
- cY = chromaMVComp( mb->blkMvBy[0] );
- }
- // Process chroma
- col = 8 * mb->x;
- row = 8 * mb->y;
- hdim = pic->cb.hoffset;
- // Only use pixels in current MB for backwards prediction
- firstCol = max( 0, (-cX + 1) >> 1 );
- lastCol = min( 7, 7 - ((cX + 1) >> 1));
- firstRow = max( 0, (-cY + 1) >> 1 );
- lastRow = min( 7, 7 - ((cY + 1) >> 1));
- //#define MMX_AVERAGE_TEST02
- #ifdef MMX_AVERAGE_TEST02
- {
- int crow, ccol;
- PIXEL *forPred = pic->cb.ptr + col + firstCol + (row + firstRow) * hdim;
- int hSize = lastCol - firstCol + 1;
- int vSize = lastRow - firstRow + 1;
- int forOffset = hdim;
- PIXEL *backPred = cbBack + firstCol + firstRow * 8;
- int backOffset = 8;
- PIXEL forPred_org[16*352];
- PIXEL backPred_org[16*8];
- int countForOffset = 0;
- int countBackOffset = 0;
- for (crow = 0; crow < vSize; ++crow) {
- for (ccol = 0; ccol < hSize; ++ccol) {//from smaller multiple of 4
- forPred_org[ccol + countForOffset] = forPred[ccol + countForOffset];
- backPred_org[ccol + countBackOffset] = backPred[ccol + countBackOffset];
- }
- countForOffset += forOffset;
- countBackOffset += backOffset;
- }
- averageBlock( forPred_org,
- lastCol - firstCol + 1,
- lastRow - firstRow + 1, hdim,
- backPred_org, 8 );
- #endif
- pAverageBlockFunction( pic->cb.ptr + col + firstCol + (row + firstRow) * hdim,
- lastCol - firstCol + 1,
- lastRow - firstRow + 1, hdim,
- cbBack + firstCol + firstRow * 8, 8 );
- #ifdef MMX_AVERAGE_TEST02
- countForOffset =0;
- countBackOffset = 0;
- for (crow = 0; crow < vSize; ++crow) {
- for (ccol = 0; ccol < hSize; ++ccol) {//from smaller multiple of 4
- if(((hSize==8) || (hSize==16)) && (forPred_org[ccol + countForOffset] != forPred[ccol + countForOffset])) {
- averageBlock( forPred_org,
- lastCol - firstCol + 1,
- lastRow - firstRow + 1, hdim,
- backPred_org, 8 );
- pAverageBlockFunction( pic->cb.ptr + col + firstCol + (row + firstRow) * hdim,
- lastCol - firstCol + 1,
- lastRow - firstRow + 1, hdim,
- cbBack + firstCol + firstRow * 8, 8 );
- }
- }
- countForOffset += forOffset;
- countBackOffset += backOffset;
- }
- }
- #endif
-
-
- pAverageBlockFunction( pic->cr.ptr + col + firstCol + (row + firstRow) * hdim,
- lastCol - firstCol + 1,
- lastRow - firstRow + 1, hdim,
- crBack + firstCol + firstRow * 8, 8 );
- }
- // copyBlock - copy 2-D array of 32-bit integers
- static void copyBlock( S32 source[], int hSize, int vSize, int sourceOffset,
- S32 dest[], int destOffset )
- {
- int row, col;
- for (row = 0; row < vSize; ++row) {
- for (col = 0; col < hSize; ++col) {
- dest[col] = source[col];
- }
- source += sourceOffset;
- dest += destOffset;
- }
- }
- // averageBlock - compute average of two hSize*vSize pixel arrays
- void averageBlock( PIXEL forPred[], int hSize, int vSize, int forOffset,
- PIXEL backPred[], int backOffset )
- {
- int row, col;
- #ifdef VVPROFILER
- S32 nVvProfNb = 3;
- if(!pVvProf[nVvProfNb]) pVvProf[nVvProfNb] = newCVvDebugTimer();//memory leak on destruction
- pVvProfCount[nVvProfNb]++;
- StartTime(pVvProf[nVvProfNb]);
- #endif
- for (row = 0; row < vSize; ++row) {
- for (col = 0; col < hSize; ++col) {
- forPred[col] = (forPred[col] + backPred[col]) >> 1;
- }
- forPred += forOffset;
- backPred += backOffset;
- }
- #ifdef VVPROFILER
- StopAndAccuTime(pVvProf[nVvProfNb]);
- #endif
- }