interpolate.c
资源名称:h264src.zip [点击查看]
上传用户:sunbaby
上传日期:2013-05-31
资源大小:242k
文件大小:13k
源码类别:
mpeg/mp3
开发平台:
Visual C++
- /*****************************************************************************
- *
- * T264 AVC CODEC
- *
- * Copyright(C) 2004-2005 llcc <lcgate1@yahoo.com.cn>
- * 2004-2005 visionany <visionany@yahoo.com.cn>
- *
- * This program is free software ; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation ; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY ; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program ; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- ****************************************************************************/
- #include "stdio.h"
- #include "T264.h"
- #include "interpolate.h"
- #include "bitstream.h"
- // 1/4 pixel search
- uint32_t
- T264_quarter_pixel_search(T264_t* t, int32_t list_index, uint8_t* src, T264_frame_t* refframe, int32_t offset, T264_vector_t* vec, T264_vector_t* vec_median, uint32_t sad_org, int32_t w, int32_t h, uint8_t* residual, int32_t mb_part)
- {
- DECLARE_ALIGNED_MATRIX(data1, 16, 16, uint8_t, CACHE_SIZE);
- DECLARE_ALIGNED_MATRIX(data2, 16, 16, uint8_t, CACHE_SIZE);
- uint32_t sad = sad_org;
- uint8_t* ref;
- int16_t x, y;
- int32_t ref_cost = REFCOST(vec[0].refno);
- x = vec[0].x &= ~3;
- y = vec[0].y &= ~3;
- ref = refframe->Y[0] + offset + (y >> 2) * t->edged_stride + (x >> 2);
- if (t->flags & USE_HALFPEL)
- {
- uint8_t* refcur;
- // right half pel
- refcur = refframe->Y[1] + offset + (y >> 2) * t->edged_stride + (x >> 2);
- sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) +
- t->mb.lambda * (eg_size_se(t->bs, (x + 2) - vec_median[0].x) +
- eg_size_se(t->bs, y - vec_median[0].y)) + ref_cost;
- if (sad < sad_org)
- {
- sad_org = sad;
- vec[0].x = x + 2;
- vec[0].y = y;
- ref = refcur;
- }
- // left half pel
- refcur --;
- sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) +
- t->mb.lambda * (eg_size_se(t->bs, (x - 2) - vec_median[0].x) +
- eg_size_se(t->bs, y - vec_median[0].y)) + ref_cost;
- if (sad < sad_org)
- {
- sad_org = sad;
- vec[0].x = x - 2;
- vec[0].y = y;
- ref = refcur;
- }
- // bottom half pel
- refcur = refframe->Y[2] + offset + (y >> 2) * t->edged_stride + (x >> 2);
- sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) +
- t->mb.lambda * (eg_size_se(t->bs, x - vec_median[0].x) +
- eg_size_se(t->bs, y + 2 - vec_median[0].y)) + ref_cost;
- if (sad < sad_org)
- {
- sad_org = sad;
- vec[0].x = x;
- vec[0].y = y + 2;
- ref = refcur;
- }
- // top half pel
- refcur -= t->edged_stride;
- sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) +
- t->mb.lambda * (eg_size_se(t->bs, x - vec_median[0].x) +
- eg_size_se(t->bs, y - 2 - vec_median[0].y)) + ref_cost;
- if (sad < sad_org)
- {
- sad_org = sad;
- vec[0].x = x;
- vec[0].y = y - 2;
- ref = refcur;
- }
- // bottom-right half pel
- refcur = refframe->Y[3] + offset + (y >> 2) * t->edged_stride + (x >> 2);
- sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) +
- t->mb.lambda * (eg_size_se(t->bs, x + 2 - vec_median[0].x) +
- eg_size_se(t->bs, y + 2 - vec_median[0].y)) + ref_cost;
- if (sad < sad_org)
- {
- sad_org = sad;
- vec[0].x = x + 2;
- vec[0].y = y + 2;
- ref = refcur;
- }
- // bottom-left half pel
- refcur --;
- sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) +
- t->mb.lambda * (eg_size_se(t->bs, x - 2 - vec_median[0].x) +
- eg_size_se(t->bs, y + 2 - vec_median[0].y)) + ref_cost;
- if (sad < sad_org)
- {
- sad_org = sad;
- vec[0].x = x - 2;
- vec[0].y = y + 2;
- ref = refcur;
- }
- // top-left half pel
- refcur -= t->edged_stride;
- sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) +
- t->mb.lambda * (eg_size_se(t->bs, x - 2 - vec_median[0].x) +
- eg_size_se(t->bs, y - 2 - vec_median[0].y)) + ref_cost;
- if (sad < sad_org)
- {
- sad_org = sad;
- vec[0].x = x - 2;
- vec[0].y = y - 2;
- ref = refcur;
- }
- // top-right half pel
- refcur ++;
- sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) +
- t->mb.lambda * (eg_size_se(t->bs, x + 2 - vec_median[0].x) +
- eg_size_se(t->bs, y - 2 - vec_median[0].y)) + ref_cost;
- if (sad < sad_org)
- {
- sad_org = sad;
- vec[0].x = x + 2;
- vec[0].y = y - 2;
- ref = refcur;
- }
- // quarter pel search
- if (t->flags & USE_QUARTPEL)
- {
- int16_t n;
- int32_t i;
- uint8_t* p_min = data1;
- uint8_t* p_buffer = data2;
- uint32_t sad_half = sad_org;
- static const int8_t index[2 * 2][8][8] =
- {
- {
- {0, 1, 0, 0, 0, 0, 1, 0}, {0, 1, 0, 0,-1, 0,-1, 0},
- {0, 2, 0, 0, 0, 0, 0, 1}, {0, 2, 0, 0, 0,-1, 0,-1},
- {2, 1, 0,-1,-1, 0,-1,-1}, {2, 1, 0,-1, 0, 0, 1,-1},
- {2, 1, 0, 0,-1, 0,-1, 1}, {2, 1, 0, 0, 0, 0, 1, 1}
- },
- {
- {0, 1, 1, 0, 0, 0, 1, 0}, {0, 1, 0, 0, 0, 0,-1, 0},
- {1, 3, 0, 0, 0, 0, 0, 1}, {1, 3, 0, 0, 0,-1, 0,-1},
- {2, 1, 0,-1, 0, 0,-1,-1}, {2, 1, 1,-1, 0, 0, 1,-1},
- {2, 1, 0, 0, 0, 0,-1, 1}, {2, 1, 1, 0, 0, 0, 1, 1}
- },
- {
- {2, 3, 0, 0, 0, 0, 1, 0}, {2, 3, 0, 0,-1, 0,-1, 0},
- {2, 0, 0, 0, 0, 1, 0, 1}, {2, 0, 0, 0, 0, 0, 0,-1},
- {2, 1, 0, 0,-1, 0,-1,-1}, {2, 1, 0, 0, 0, 0, 1,-1},
- {2, 1, 0, 0,-1, 1,-1, 1}, {2, 1, 0, 0, 0, 1, 1, 1}
- },
- {
- {3, 2, 0, 0, 1, 0, 1, 0}, {3, 2, 0, 0, 0, 0,-1, 0},
- {3, 1, 0, 0, 0, 1, 0, 1}, {3, 1, 0, 0, 0, 0, 0,-1},
- {1, 2, 0, 0, 0, 0,-1,-1}, {1, 2, 0, 0, 1, 0, 1,-1},
- {1, 2, 0, 1, 0, 0,-1, 1}, {1, 2, 0, 1, 1, 0, 1, 1}
- }
- };
- x = ((uint16_t)vec[0].x) & (uint16_t)~1;
- y = ((uint16_t)vec[0].y) & (uint16_t)~1;
- n = ((y & 2)) | ((x & 2) >> 1);
- for(i = 0 ; i < t->subpel_pts ; i ++)
- {
- t->pia[mb_part](
- t->ref[list_index][vec[0].refno]->Y[index[n][i][0]] + offset + ((y >> 2) + index[n][i][3]) * t->edged_stride + (x >> 2) + index[n][i][2],
- t->ref[list_index][vec[0].refno]->Y[index[n][i][1]] + offset + ((y >> 2) + index[n][i][5]) * t->edged_stride + (x >> 2) + index[n][i][4],
- t->edged_stride, t->edged_stride, p_buffer, 16);
- sad = t->cmp[mb_part](src, t->stride, p_buffer, 16) +
- t->mb.lambda * (eg_size_se(t->bs, x + index[n][i][6] - vec_median[0].x) +
- eg_size_se(t->bs, y +index[n][i][7] - vec_median[0].y)) + ref_cost;
- if (sad < sad_org)
- {
- sad_org = sad;
- vec[0].x = x + index[n][i][6];
- vec[0].y = y + index[n][i][7];
- SWAP(uint8_t, p_min, p_buffer);
- //t->memcpy_stride_u(data, w, h, 16, residual, 16);
- }
- }
- if (sad_org < sad_half)
- {
- t->memcpy_stride_u(p_min, w, h, 16, residual, 16);
- }
- else
- {
- t->memcpy_stride_u(ref, w, h, t->edged_stride, residual, 16);
- }
- }
- else
- {
- t->memcpy_stride_u(ref, w, h, t->edged_stride, residual, 16);
- }
- sad = sad_org;
- }
- else
- {
- // x & y always integer pel
- t->memcpy_stride_u(ref, w, h, t->edged_stride, residual, 16);
- }
- return sad;
- }
- void
- T264_pia_u_c(uint8_t* p1, uint8_t* p2, int32_t p1_stride, int32_t p2_stride, uint8_t* dst, int32_t dst_stride,
- int32_t w,int32_t h)
- {
- int32_t i, j;
- for(i = 0 ; i < h ; i ++)
- {
- for(j = 0 ; j < w ; j ++)
- {
- dst[j] = (p1[j] + p2[j] + 1) >> 1;
- }
- p1 += p1_stride;
- p2 += p2_stride;
- dst+= dst_stride;
- }
- }
- #define PIAFUNC(w, h, base)
- void
- T264_##base##_u_##w##x##h##_c(uint8_t* p1, uint8_t* p2, int32_t p1_stride, int32_t p2_stride, uint8_t* dst, int32_t dst_stride)
- {
- T264_##base##_u_c(p1,p2,p1_stride, p2_stride,dst,dst_stride,w,h);
- }
- PIAFUNC(16, 16, pia)
- PIAFUNC(16, 8, pia)
- PIAFUNC(8, 16, pia)
- PIAFUNC(8, 8, pia)
- PIAFUNC(8, 4, pia)
- PIAFUNC(4, 8, pia)
- PIAFUNC(4, 4, pia)
- PIAFUNC(2, 2, pia)
- void
- T264_eighth_pixel_mc_u_c(uint8_t* src, int32_t src_stride, uint8_t* dst, int16_t mvx, int16_t mvy, int32_t width, int32_t height)
- {
- int32_t x, y;
- int32_t i, j;
- x = mvx & 0x7;
- y = mvy & 0x7;
- for (i = 0 ; i < height ; i ++)
- {
- for(j = 0 ; j < width ; j ++)
- {
- dst[j] = ((8 - x) * (8 - y) * src[j] + x * (8 - y) * src[j + 1] +
- (8 - x) * y * src[j + src_stride] + x * y * src[j + src_stride+ 1] + 32) >> 6;
- }
- src += src_stride;
- dst += 8;
- }
- }
- static __inline int32_t
- tapfilter_h(uint8_t* p)
- {
- return p[-2] - 5 * p[-1] + 20 * p[0] + 20 * p[1] - 5 * p[2] + p[3];
- }
- void
- interpolate_halfpel_h_c(uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t width, int32_t height)
- {
- int32_t i, j;
- int32_t tmp;
- for (i = 0 ; i < height ; i ++)
- {
- for (j = 0 ; j < width ; j ++)
- {
- tmp = (tapfilter_h(src + j) + 16) >> 5;
- dst[j] = CLIP1(tmp);
- }
- src += src_stride;
- dst += dst_stride;
- }
- }
- static __inline int32_t
- tapfilter_v(uint8_t* p, int32_t stride)
- {
- return p[-2 * stride] - 5 * p[-stride] + 20 * p[0] + 20 * p[stride] - 5 * p[2 * stride] + p[3 * stride];
- }
- void
- interpolate_halfpel_v_c(uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t width, int32_t height)
- {
- int32_t i, j;
- int32_t tmp;
- for (i = 0 ; i < height ; i ++)
- {
- for (j = 0 ; j < width ; j ++)
- {
- tmp = (tapfilter_v(src + j, src_stride) + 16) >> 5;
- dst[j] = CLIP1(tmp);
- }
- src += src_stride;
- dst += dst_stride;
- }
- }
- // use vertical to generate this pic
- void
- interpolate_halfpel_hv_c(uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t width, int32_t height)
- {
- int32_t i, j;
- int32_t tmp;
- for (i = 0 ; i < height + 0 ; i ++)
- {
- for (j = 0 ; j < width + 0 ; j ++)
- {
- tmp = (
- (src[j - 2 - 2 * src_stride] - 5 * src[j - 1 - 2 * src_stride] + 20 * src[j - 2 * src_stride] + 20 * src[j + 1 - 2 * src_stride] - 5 * src[j + 2 - 2 * src_stride] + src[j + 3 - 2 * src_stride]) +
- (-5) * (src[j - 2 - 1 * src_stride] - 5 * src[j - 1 - 1 * src_stride] + 20 * src[j - 1 * src_stride] + 20 * src[j + 1 - 1 * src_stride] - 5 * src[j + 2 - 1 * src_stride] + src[j + 3 - 1 * src_stride]) +
- (20) * (src[j - 2 - 0 * src_stride] - 5 * src[j - 1 - 0 * src_stride] + 20 * src[j - 0 * src_stride] + 20 * src[j + 1 - 0 * src_stride] - 5 * src[j + 2 - 0 * src_stride] + src[j + 3 - 0 * src_stride]) +
- (20) * (src[j - 2 + 1 * src_stride] - 5 * src[j - 1 + 1 * src_stride] + 20 * src[j + 1 * src_stride] + 20 * src[j + 1 + 1 * src_stride] - 5 * src[j + 2 + 1 * src_stride] + src[j + 3 + 1 * src_stride]) +
- (-5) * (src[j - 2 + 2 * src_stride] - 5 * src[j - 1 + 2 * src_stride] + 20 * src[j + 2 * src_stride] + 20 * src[j + 1 + 2 * src_stride] - 5 * src[j + 2 + 2 * src_stride] + src[j + 3 + 2 * src_stride]) +
- (src[j - 2 + 3 * src_stride] - 5 * src[j - 1 + 3 * src_stride] + 20 * src[j + 3 * src_stride] + 20 * src[j + 1 + 3 * src_stride] - 5 * src[j + 2 + 3 * src_stride] + src[j + 3 + 3 * src_stride]) +
- 512) >> 10;
- dst[j] = CLIP1(tmp);
- }
- src += src_stride;
- dst += dst_stride;
- }
- }