motion_est.c
上传用户:jxp0626
上传日期:2007-01-08
资源大小:102k
文件大小:14k
- /*
- * Motion estimation
- * Copyright (c) 2000 Gerard Lantau.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
- #include <stdlib.h>
- #include <stdio.h>
- #include "avcodec.h"
- #include "dsputil.h"
- #include "mpegvideo.h"
- static int pix_sum(UINT8 *pix, int line_size)
- {
- int s, i, j;
- s = 0;
- for(i=0;i<16;i++) {
- for(j=0;j<16;j+=8) {
- s += pix[0];
- s += pix[1];
- s += pix[2];
- s += pix[3];
- s += pix[4];
- s += pix[5];
- s += pix[6];
- s += pix[7];
- pix += 8;
- }
- pix += line_size - 16;
- }
- return s;
- }
- static int pix_norm1(UINT8 *pix, int line_size)
- {
- int s, i, j;
- UINT32 *sq = squareTbl + 256;
- s = 0;
- for(i=0;i<16;i++) {
- for(j=0;j<16;j+=8) {
- s += sq[pix[0]];
- s += sq[pix[1]];
- s += sq[pix[2]];
- s += sq[pix[3]];
- s += sq[pix[4]];
- s += sq[pix[5]];
- s += sq[pix[6]];
- s += sq[pix[7]];
- pix += 8;
- }
- pix += line_size - 16;
- }
- return s;
- }
- static int pix_norm(UINT8 *pix1, UINT8 *pix2, int line_size)
- {
- int s, i, j;
- UINT32 *sq = squareTbl + 256;
- s = 0;
- for(i=0;i<16;i++) {
- for(j=0;j<16;j+=8) {
- s += sq[pix1[0] - pix2[0]];
- s += sq[pix1[1] - pix2[1]];
- s += sq[pix1[2] - pix2[2]];
- s += sq[pix1[3] - pix2[3]];
- s += sq[pix1[4] - pix2[4]];
- s += sq[pix1[5] - pix2[5]];
- s += sq[pix1[6] - pix2[6]];
- s += sq[pix1[7] - pix2[7]];
- pix1 += 8;
- pix2 += 8;
- }
- pix1 += line_size - 16;
- pix2 += line_size - 16;
- }
- return s;
- }
- #define USE_MMX
- #ifdef USE_MMX
- /* original mmx code from mpeg2movie by Adam Williams */
- static void inline mmx_start_block(void)
- {
- asm("
- .align 8
- pxor %%mm7, %%mm7;
- pxor %%mm6, %%mm6;
- " : : );
- }
- static void inline mmx_absdiff(unsigned char *a, unsigned char *b)
- {
- asm("
- .align 8
- movq (%%ebx), %%mm0; // Get first half of row1
- movq (%%ecx), %%mm1; // Get first half of row2
- movq %%mm0, %%mm2; // Make a copy of row1 for absdiff operation
- movq 8(%%ebx), %%mm3; // Get second half of row1
- psubusb %%mm1, %%mm0; // Subtract the first halves one way
- psubusb %%mm2, %%mm1; // Subtract the other way
- movq 8(%%ecx), %%mm4; // Get second half of row2
- por %%mm1, %%mm0; // Merge first half results
- movq %%mm3, %%mm5; // Copy for absdiff operation
- movq %%mm0, %%mm1; // Keep a copy
- psubusb %%mm4, %%mm3; // Subtract second halves one way
- punpcklbw %%mm6, %%mm0; // Unpack to higher precision for accumulation
- psubusb %%mm5, %%mm4; // Subtract the other way
- psrlq $32, %%mm1; // Shift registeres for accumulation
- por %%mm4, %%mm3; // merge results of 2nd half
- punpcklbw %%mm6, %%mm1; // unpack to higher precision for accumulation
- movq %%mm3, %%mm4; // keep a copy
- punpcklbw %%mm6, %%mm3; // unpack to higher precision for accumulation
- paddw %%mm0, %%mm7; // accumulate difference
- psrlq $32, %%mm4; // shift results for accumulation
- paddw %%mm1, %%mm7; // accumulate difference
- punpcklbw %%mm6, %%mm4; // unpack to higher precision for accumulation
- paddw %%mm3, %%mm7; // accumulate difference
- paddw %%mm4, %%mm7; // accumulate difference
- "
- :
- : "b" (a), "c" (b)
- : "st" );
- }
- static inline unsigned int mmx_accum_absdiff()
- {
- unsigned long long r = 0;
- asm("
- .align 8
- movq %%mm7, %%mm5;
- movq %%mm7, %%mm4;
- punpcklwd %%mm6, %%mm4;
- punpckhwd %%mm6, %%mm5;
- paddd %%mm5, %%mm4;
- movq %%mm4, %%mm5;
- punpckldq %%mm6, %%mm5;
- punpckhdq %%mm6, %%mm4;
- paddd %%mm5, %%mm4;
- movq %%mm4, (%%ebx);
- emms;
- "
- : : "b" (&r));
- return r;
- }
- static unsigned int pix_abs16x16(UINT8 *pix1, UINT8 *pix2, int line_size)
- {
- int i;
- mmx_start_block();
- for(i=0;i<16;i++) {
- mmx_absdiff(pix1, pix2);
- pix1 += line_size;
- pix2 += line_size;
- }
- return mmx_accum_absdiff();
- }
- #else
- static int pix_abs16x16(UINT8 *pix1, UINT8 *pix2, int line_size)
- {
- int s, i;
- s = 0;
- for(i=0;i<16;i++) {
- s += abs(pix1[0] - pix2[0]);
- s += abs(pix1[1] - pix2[1]);
- s += abs(pix1[2] - pix2[2]);
- s += abs(pix1[3] - pix2[3]);
- s += abs(pix1[4] - pix2[4]);
- s += abs(pix1[5] - pix2[5]);
- s += abs(pix1[6] - pix2[6]);
- s += abs(pix1[7] - pix2[7]);
- s += abs(pix1[8] - pix2[8]);
- s += abs(pix1[9] - pix2[9]);
- s += abs(pix1[10] - pix2[10]);
- s += abs(pix1[11] - pix2[11]);
- s += abs(pix1[12] - pix2[12]);
- s += abs(pix1[13] - pix2[13]);
- s += abs(pix1[14] - pix2[14]);
- s += abs(pix1[15] - pix2[15]);
- pix1 += line_size;
- pix2 += line_size;
- }
- return s;
- }
- #endif
- #define avg2(a,b) ((a+b+1)>>1)
- #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
- static int pix_abs16x16_x2(UINT8 *pix1, UINT8 *pix2, int line_size)
- {
- int s, i;
- s = 0;
- for(i=0;i<16;i++) {
- s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
- s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
- s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
- s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
- s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
- s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
- s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
- s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
- s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
- s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
- s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
- s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
- s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
- s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
- s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
- s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
- pix1 += line_size;
- pix2 += line_size;
- }
- return s;
- }
- static int pix_abs16x16_y2(UINT8 *pix1, UINT8 *pix2, int line_size)
- {
- int s, i;
- UINT8 *pix3 = pix2 + line_size;
- s = 0;
- for(i=0;i<16;i++) {
- s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
- s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
- s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
- s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
- s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
- s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
- s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
- s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
- s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
- s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
- s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
- s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
- s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
- s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
- s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
- s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
- pix1 += line_size;
- pix2 += line_size;
- }
- return s;
- }
- static int pix_abs16x16_xy2(UINT8 *pix1, UINT8 *pix2, int line_size)
- {
- int s, i;
- UINT8 *pix3 = pix2 + line_size;
- s = 0;
- for(i=0;i<16;i++) {
- s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
- s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
- s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
- s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
- s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
- s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
- s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
- s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
- s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
- s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
- s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
- s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
- s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
- s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
- s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
- s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
- pix1 += line_size;
- pix2 += line_size;
- }
- return s;
- }
- static void no_motion_search(MpegEncContext *s,
- int *mx_ptr, int *my_ptr)
- {
- *mx_ptr = 0;
- *my_ptr = 0;
- }
- static void full_motion_search(MpegEncContext *s,
- int *mx_ptr, int *my_ptr)
- {
- int range, x1, y1, x2, y2, xx, yy, x, y;
- int mx, my, mx1, my1, dmin, d;
- UINT8 *pix;
- range = 8 * (1 << (s->f_code - 1));
- if (s->out_format == FMT_H263)
- range = range * 2;
- /* do not allow vectors outside the image (will change one day for
- h263) */
- xx = 16 * s->mb_x;
- yy = 16 * s->mb_y;
- x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */
- if (x1 < 0)
- x1 = 0;
- x2 = xx + range - 1;
- if (x2 > (s->width - 16))
- x2 = s->width - 16;
- y1 = yy - range + 1;
- if (y1 < 0)
- y1 = 0;
- y2 = yy + range - 1;
- if (y2 > (s->height - 16))
- y2 = s->height - 16;
- pix = s->new_picture[0] + (yy * s->width) + xx;
- dmin = 0x7fffffff;
- mx = 0;
- my = 0;
- for(y=y1;y<=y2;y++) {
- for(x=x1;x<=x2;x++) {
- d = pix_abs16x16(pix, s->last_picture[0] + (y * s->width) + x,
- s->width);
- if (d < dmin ||
- (d == dmin &&
- (abs(x - xx) + abs(y - yy)) <
- (abs(mx - xx) + abs(my - yy)))) {
- dmin = d;
- mx = x;
- my = y;
- }
- }
- }
- /* half pixel search */
- mx1 = mx = 2 * mx;
- my1 = my = 2 * my;
- if (mx > 0 && mx < 2 * (s->width - 16) &&
- my > 0 && my < 2 * (s->height - 16)) {
- int dx, dy, px, py;
- UINT8 *ptr;
- for(dy=-1;dy<=1;dy++) {
- for(dx=-1;dx<=1;dx++) {
- if (dx != 0 || dy != 0) {
- px = mx1 + dx;
- py = my1 + dy;
- ptr = s->last_picture[0] + ((py >> 1) * s->width) + (px >> 1);
- switch(((py & 1) << 1) | (px & 1)) {
- default:
- case 0:
- d = pix_abs16x16(pix, ptr, s->width);
- break;
- case 1:
- d = pix_abs16x16_x2(pix, ptr, s->width);
- break;
- case 2:
- d = pix_abs16x16_y2(pix, ptr, s->width);
- break;
- case 3:
- d = pix_abs16x16_xy2(pix, ptr, s->width);
- break;
- }
- if (d < dmin) {
- dmin = d;
- mx = px;
- my = py;
- }
- }
- }
- }
- }
- *mx_ptr = mx - 2 * xx;
- *my_ptr = my - 2 * yy;
- #if 0
- if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
- *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
- fprintf(stderr, "error %d %dn", *mx_ptr, *my_ptr);
- }
- #endif
- }
- #if 1
- int estimate_motion(MpegEncContext *s,
- int mb_x, int mb_y,
- int *mx_ptr, int *my_ptr)
- {
- UINT8 *pix, *ppix;
- int sum, varc, vard, mx, my;
-
- if (s->full_search) {
- full_motion_search(s, &mx, &my);
- } else {
- no_motion_search(s, &mx, &my);
- }
- /* intra / predictive decision */
- pix = s->new_picture[0] + (mb_y * 16 * s->width) + mb_x * 16;
- ppix = s->last_picture[0] +
- ((mb_y * 16 + (my >> 1)) * s->width) + (mb_x * 16 + (mx >> 1));
- sum = pix_sum(pix, s->width);
- varc = pix_norm1(pix, s->width);
- vard = pix_norm(pix, ppix, s->width);
-
- vard = vard >> 8;
- sum = sum >> 8;
- varc = (varc >> 8) - (sum * sum);
- #if 0
- printf("varc=%d (sum=%d) vard=%d mx=%d my=%dn",
- varc, sum, vard, mx, my);
- #endif
- if (vard <= 64) {
- *mx_ptr = mx;
- *my_ptr = my;
- return 0;
- } else if (vard < varc) {
- *mx_ptr = mx;
- *my_ptr = my;
- return 0;
- } else {
- *mx_ptr = 0;
- *my_ptr = 0;
- return 1;
- }
- }
- #else
- /* test version which generates valid random vectors */
- int estimate_motion(MpegEncContext *s,
- int mb_x, int mb_y,
- int *mx_ptr, int *my_ptr)
- {
- int xx, yy, x1, y1, x2, y2, range;
- if ((random() % 10) >= 5) {
- range = 8 * (1 << (s->f_code - 1));
- if (s->out_format == FMT_H263)
- range = range * 2;
- xx = 16 * s->mb_x;
- yy = 16 * s->mb_y;
- x1 = xx - range;
- if (x1 < 0)
- x1 = 0;
- x2 = xx + range - 1;
- if (x2 > (s->width - 16))
- x2 = s->width - 16;
- y1 = yy - range;
- if (y1 < 0)
- y1 = 0;
- y2 = yy + range - 1;
- if (y2 > (s->height - 16))
- y2 = s->height - 16;
- *mx_ptr = (random()%(2 * (x2 - x1 + 1))) + 2 * (x1 - xx);
- *my_ptr = (random()%(2 * (y2 - y1 + 1))) + 2 * (y1 - yy);
- return 0;
- } else {
- *mx_ptr = 0;
- *my_ptr = 0;
- return 1;
- }
- }
- #endif