yadif.h
资源名称:vlc-1.0.5.zip [点击查看]
上传用户:kjfoods
上传日期:2020-07-06
资源大小:29949k
文件大小:11k
源码类别:
midi
开发平台:
Unix_Linux
- /*
- * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of MPlayer.
- *
- * MPlayer is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * MPlayer is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with MPlayer; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
- /* */
- #if defined(CAN_COMPILE_SSE2) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ > 0))
- #define HAVE_YADIF_SSE2
- #define LOAD4(mem,dst)
- "movd "mem", "#dst" nt"
- "punpcklbw %%mm7, "#dst" nt"
- #define PABS(tmp,dst)
- "pxor "#tmp", "#tmp" nt"
- "psubw "#dst", "#tmp" nt"
- "pmaxsw "#tmp", "#dst" nt"
- #define CHECK(pj,mj)
- "movq "#pj"(%[cur],%[mrefs]), %%mm2 nt" /* cur[x-refs-1+j] */
- "movq "#mj"(%[cur],%[prefs]), %%mm3 nt" /* cur[x+refs-1-j] */
- "movq %%mm2, %%mm4 nt"
- "movq %%mm2, %%mm5 nt"
- "pxor %%mm3, %%mm4 nt"
- "pavgb %%mm3, %%mm5 nt"
- "pand %[pb1], %%mm4 nt"
- "psubusb %%mm4, %%mm5 nt"
- "psrlq $8, %%mm5 nt"
- "punpcklbw %%mm7, %%mm5 nt" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */
- "movq %%mm2, %%mm4 nt"
- "psubusb %%mm3, %%mm2 nt"
- "psubusb %%mm4, %%mm3 nt"
- "pmaxub %%mm3, %%mm2 nt"
- "movq %%mm2, %%mm3 nt"
- "movq %%mm2, %%mm4 nt" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */
- "psrlq $8, %%mm3 nt" /* ABS(cur[x-refs +j] - cur[x+refs -j]) */
- "psrlq $16, %%mm4 nt" /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */
- "punpcklbw %%mm7, %%mm2 nt"
- "punpcklbw %%mm7, %%mm3 nt"
- "punpcklbw %%mm7, %%mm4 nt"
- "paddw %%mm3, %%mm2 nt"
- "paddw %%mm4, %%mm2 nt" /* score */
- #define CHECK1
- "movq %%mm0, %%mm3 nt"
- "pcmpgtw %%mm2, %%mm3 nt" /* if(score < spatial_score) */
- "pminsw %%mm2, %%mm0 nt" /* spatial_score= score; */
- "movq %%mm3, %%mm6 nt"
- "pand %%mm3, %%mm5 nt"
- "pandn %%mm1, %%mm3 nt"
- "por %%mm5, %%mm3 nt"
- "movq %%mm3, %%mm1 nt" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
- #define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.
- hurts both quality and speed, but matches the C version. */
- "paddw %[pw1], %%mm6 nt"
- "psllw $14, %%mm6 nt"
- "paddsw %%mm6, %%mm2 nt"
- "movq %%mm0, %%mm3 nt"
- "pcmpgtw %%mm2, %%mm3 nt"
- "pminsw %%mm2, %%mm0 nt"
- "pand %%mm3, %%mm5 nt"
- "pandn %%mm1, %%mm3 nt"
- "por %%mm5, %%mm3 nt"
- "movq %%mm3, %%mm1 nt"
- static void yadif_filter_line_mmx2(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity){
- static const uint64_t pw_1 = 0x0001000100010001ULL;
- static const uint64_t pb_1 = 0x0101010101010101ULL;
- const int mode = p->mode;
- uint64_t tmp0, tmp1, tmp2, tmp3;
- int x;
- #define FILTER
- for(x=0; x<w; x+=4){
- __asm__ volatile(
- "pxor %%mm7, %%mm7 nt"
- LOAD4("(%[cur],%[mrefs])", %%mm0) /* c = cur[x-refs] */
- LOAD4("(%[cur],%[prefs])", %%mm1) /* e = cur[x+refs] */
- LOAD4("(%["prev2"])", %%mm2) /* prev2[x] */
- LOAD4("(%["next2"])", %%mm3) /* next2[x] */
- "movq %%mm3, %%mm4 nt"
- "paddw %%mm2, %%mm3 nt"
- "psraw $1, %%mm3 nt" /* d = (prev2[x] + next2[x])>>1 */
- "movq %%mm0, %[tmp0] nt" /* c */
- "movq %%mm3, %[tmp1] nt" /* d */
- "movq %%mm1, %[tmp2] nt" /* e */
- "psubw %%mm4, %%mm2 nt"
- PABS( %%mm4, %%mm2) /* temporal_diff0 */
- LOAD4("(%[prev],%[mrefs])", %%mm3) /* prev[x-refs] */
- LOAD4("(%[prev],%[prefs])", %%mm4) /* prev[x+refs] */
- "psubw %%mm0, %%mm3 nt"
- "psubw %%mm1, %%mm4 nt"
- PABS( %%mm5, %%mm3)
- PABS( %%mm5, %%mm4)
- "paddw %%mm4, %%mm3 nt" /* temporal_diff1 */
- "psrlw $1, %%mm2 nt"
- "psrlw $1, %%mm3 nt"
- "pmaxsw %%mm3, %%mm2 nt"
- LOAD4("(%[next],%[mrefs])", %%mm3) /* next[x-refs] */
- LOAD4("(%[next],%[prefs])", %%mm4) /* next[x+refs] */
- "psubw %%mm0, %%mm3 nt"
- "psubw %%mm1, %%mm4 nt"
- PABS( %%mm5, %%mm3)
- PABS( %%mm5, %%mm4)
- "paddw %%mm4, %%mm3 nt" /* temporal_diff2 */
- "psrlw $1, %%mm3 nt"
- "pmaxsw %%mm3, %%mm2 nt"
- "movq %%mm2, %[tmp3] nt" /* diff */
- "paddw %%mm0, %%mm1 nt"
- "paddw %%mm0, %%mm0 nt"
- "psubw %%mm1, %%mm0 nt"
- "psrlw $1, %%mm1 nt" /* spatial_pred */
- PABS( %%mm2, %%mm0) /* ABS(c-e) */
- "movq -1(%[cur],%[mrefs]), %%mm2 nt" /* cur[x-refs-1] */
- "movq -1(%[cur],%[prefs]), %%mm3 nt" /* cur[x+refs-1] */
- "movq %%mm2, %%mm4 nt"
- "psubusb %%mm3, %%mm2 nt"
- "psubusb %%mm4, %%mm3 nt"
- "pmaxub %%mm3, %%mm2 nt"
- "pshufw $9,%%mm2, %%mm3 nt"
- "punpcklbw %%mm7, %%mm2 nt" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */
- "punpcklbw %%mm7, %%mm3 nt" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */
- "paddw %%mm2, %%mm0 nt"
- "paddw %%mm3, %%mm0 nt"
- "psubw %[pw1], %%mm0 nt" /* spatial_score */
- CHECK(-2,0)
- CHECK1
- CHECK(-3,1)
- CHECK2
- CHECK(0,-2)
- CHECK1
- CHECK(1,-3)
- CHECK2
- /* if(p->mode<2) ... */
- "movq %[tmp3], %%mm6 nt" /* diff */
- "cmp $2, %[mode] nt"
- "jge 1f nt"
- LOAD4("(%["prev2"],%[mrefs],2)", %%mm2) /* prev2[x-2*refs] */
- LOAD4("(%["next2"],%[mrefs],2)", %%mm4) /* next2[x-2*refs] */
- LOAD4("(%["prev2"],%[prefs],2)", %%mm3) /* prev2[x+2*refs] */
- LOAD4("(%["next2"],%[prefs],2)", %%mm5) /* next2[x+2*refs] */
- "paddw %%mm4, %%mm2 nt"
- "paddw %%mm5, %%mm3 nt"
- "psrlw $1, %%mm2 nt" /* b */
- "psrlw $1, %%mm3 nt" /* f */
- "movq %[tmp0], %%mm4 nt" /* c */
- "movq %[tmp1], %%mm5 nt" /* d */
- "movq %[tmp2], %%mm7 nt" /* e */
- "psubw %%mm4, %%mm2 nt" /* b-c */
- "psubw %%mm7, %%mm3 nt" /* f-e */
- "movq %%mm5, %%mm0 nt"
- "psubw %%mm4, %%mm5 nt" /* d-c */
- "psubw %%mm7, %%mm0 nt" /* d-e */
- "movq %%mm2, %%mm4 nt"
- "pminsw %%mm3, %%mm2 nt"
- "pmaxsw %%mm4, %%mm3 nt"
- "pmaxsw %%mm5, %%mm2 nt"
- "pminsw %%mm5, %%mm3 nt"
- "pmaxsw %%mm0, %%mm2 nt" /* max */
- "pminsw %%mm0, %%mm3 nt" /* min */
- "pxor %%mm4, %%mm4 nt"
- "pmaxsw %%mm3, %%mm6 nt"
- "psubw %%mm2, %%mm4 nt" /* -max */
- "pmaxsw %%mm4, %%mm6 nt" /* diff= MAX3(diff, min, -max); */
- "1: nt"
- "movq %[tmp1], %%mm2 nt" /* d */
- "movq %%mm2, %%mm3 nt"
- "psubw %%mm6, %%mm2 nt" /* d-diff */
- "paddw %%mm6, %%mm3 nt" /* d+diff */
- "pmaxsw %%mm2, %%mm1 nt"
- "pminsw %%mm3, %%mm1 nt" /* d = clip(spatial_pred, d-diff, d+diff); */
- "packuswb %%mm1, %%mm1 nt"
- :[tmp0]"=m"(tmp0),
- [tmp1]"=m"(tmp1),
- [tmp2]"=m"(tmp2),
- [tmp3]"=m"(tmp3)
- :[prev] "r"(prev),
- [cur] "r"(cur),
- [next] "r"(next),
- [prefs]"r"((x86_reg)refs),
- [mrefs]"r"((x86_reg)-refs),
- [pw1] "m"(pw_1),
- [pb1] "m"(pb_1),
- [mode] "g"(mode)
- );
- __asm__ volatile("movd %%mm1, %0" :"=m"(*dst));
- dst += 4;
- prev+= 4;
- cur += 4;
- next+= 4;
- }
- if(parity){
- #define prev2 "prev"
- #define next2 "cur"
- FILTER
- #undef prev2
- #undef next2
- }else{
- #define prev2 "cur"
- #define next2 "next"
- FILTER
- #undef prev2
- #undef next2
- }
- }
- #undef LOAD4
- #undef PABS
- #undef CHECK
- #undef CHECK1
- #undef CHECK2
- #undef FILTER
- #endif
- static void yadif_filter_line_c(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity){
- int x;
- uint8_t *prev2= parity ? prev : cur ;
- uint8_t *next2= parity ? cur : next;
- for(x=0; x<w; x++){
- int c= cur[-refs];
- int d= (prev2[0] + next2[0])>>1;
- int e= cur[+refs];
- int temporal_diff0= FFABS(prev2[0] - next2[0]);
- int temporal_diff1=( FFABS(prev[-refs] - c) + FFABS(prev[+refs] - e) )>>1;
- int temporal_diff2=( FFABS(next[-refs] - c) + FFABS(next[+refs] - e) )>>1;
- int diff= FFMAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
- int spatial_pred= (c+e)>>1;
- int spatial_score= FFABS(cur[-refs-1] - cur[+refs-1]) + FFABS(c-e)
- + FFABS(cur[-refs+1] - cur[+refs+1]) - 1;
- #define CHECK(j)
- { int score= FFABS(cur[-refs-1+j] - cur[+refs-1-j])
- + FFABS(cur[-refs +j] - cur[+refs -j])
- + FFABS(cur[-refs+1+j] - cur[+refs+1-j]);
- if(score < spatial_score){
- spatial_score= score;
- spatial_pred= (cur[-refs +j] + cur[+refs -j])>>1;
- CHECK(-1) CHECK(-2) }} }}
- CHECK( 1) CHECK( 2) }} }}
- if(p->mode<2){
- int b= (prev2[-2*refs] + next2[-2*refs])>>1;
- int f= (prev2[+2*refs] + next2[+2*refs])>>1;
- #if 0
- int a= cur[-3*refs];
- int g= cur[+3*refs];
- int max= FFMAX3(d-e, d-c, FFMIN3(FFMAX(b-c,f-e),FFMAX(b-c,b-a),FFMAX(f-g,f-e)) );
- int min= FFMIN3(d-e, d-c, FFMAX3(FFMIN(b-c,f-e),FFMIN(b-c,b-a),FFMIN(f-g,f-e)) );
- #else
- int max= FFMAX3(d-e, d-c, FFMIN(b-c, f-e));
- int min= FFMIN3(d-e, d-c, FFMAX(b-c, f-e));
- #endif
- diff= FFMAX3(diff, min, -max);
- }
- if(spatial_pred > d + diff)
- spatial_pred = d + diff;
- else if(spatial_pred < d - diff)
- spatial_pred = d - diff;
- dst[0] = spatial_pred;
- dst++;
- cur++;
- prev++;
- next++;
- prev2++;
- next2++;
- }
- }