yuv420argb.c
上传用户:jylinhe
上传日期:2022-07-11
资源大小:334k
文件大小:6k
- // yuv4202rgba.cpp : Defines the entry point for the console application.
- #include "yuv420argb.h"
- static long int crv_tab[256];
- static long int cbu_tab[256];
- static long int cgu_tab[256];
- static long int cgv_tab[256];
- static long int tab_76309[256];
- static unsigned char clp[1024];
- static unsigned __int64 mmw_mult_Y = 0x2568256825682568;
- static unsigned __int64 mmw_mult_U_G = 0xf36ef36ef36ef36e;
- static unsigned __int64 mmw_mult_U_B = 0x40cf40cf40cf40cf;
- static unsigned __int64 mmw_mult_V_R = 0x3343334333433343;
- static unsigned __int64 mmw_mult_V_G = 0xe5e2e5e2e5e2e5e2;
- static unsigned __int64 mmb_0x10 = 0x1010101010101010;
- static unsigned __int64 mmw_0x0080 = 0x0080008000800080;
- static unsigned __int64 mmw_0x00ff = 0x00ff00ff00ff00ff;
- void init_dither_tab()
- {
- int i,ind;
- long crv,cbu,cgu,cgv;
- static int inited = 0;
-
- if (inited != 0)
- return;
-
- inited = 1;
- crv = 104597; cbu = 132201;
- cgu = 25675; cgv = 53279;
- for (i = 0; i < 256; i++)
- {
- crv_tab[i] = (i-128) * crv;
- cbu_tab[i] = (i-128) * cbu;
- cgu_tab[i] = (i-128) * cgu;
- cgv_tab[i] = (i-128) * cgv;
- tab_76309[i] = 76309*(i-16);
- }
- for (i=0; i<384; i++)
- clp[i] =0;
-
- ind=384;
-
- for (i=0;i<256; i++)
- clp[ind++]=i;
-
- ind=640;
-
- for (i=0;i<384;i++)
- clp[ind++]=255;
- }
- int YUV420ToARGB32( unsigned char *src0,
- unsigned char *src1,
- unsigned char *src2,
- int stride_y,
- int stride_u,
- int stride_v,
- unsigned char *dst_ori,
- int width,
- int height,
- int out_stride )
- {
- int i,j,c1,c2,c3,c4;
- int y1,y2,u,v;
- unsigned char *py0,*py1,*pu,*pv;
- unsigned char *d1, *d2;
- py0 = src0;
- py1 = src0+stride_y;
- pu = src1;
- pv = src2;
- d1 = dst_ori;
- d2 = dst_ori+out_stride;
-
- for (j = 0; j < height; j += 2)
- {
- for (i = 0; i < width; i += 2)
- {
- u = *pu++;
- v = *pv++;
-
- c1 = crv_tab[v];
- c2 = cgu_tab[u];
- c3 = cgv_tab[v];
- c4 = cbu_tab[u];
- //up-left
- y1 = tab_76309[*py0++];
- *d1++ = clp[384+((y1 + c4)>>16)];
- *d1++ = clp[384+((y1 - c2 - c3)>>16)];
- *d1++ = clp[384+((y1 + c1)>>16)];
- *d1++ = 0;
- //down-left
- y2 = tab_76309[*py1++];
- *d2++ = clp[384+((y2 + c4)>>16)];
- *d2++ = clp[384+((y2 - c2 - c3)>>16)];
- *d2++ = clp[384+((y2 + c1)>>16)];
- *d2++ = 0;
- //up-right
- y1 = tab_76309[*py0++];
- *d1++ = clp[384+((y1 + c4)>>16)];
- *d1++ = clp[384+((y1 - c2 - c3)>>16)];
- *d1++ = clp[384+((y1 + c1)>>16)];
- *d1++ = 0;
- //down-right
- y2 = tab_76309[*py1++];
- *d2++ = clp[384+((y2 + c4)>>16)];
- *d2++ = clp[384+((y2 - c2 - c3)>>16)];
- *d2++ = clp[384+((y2 + c1)>>16)];
- *d2++ = 0;
- }
-
- d1 += out_stride;
- d2 += out_stride;
-
- src0 += stride_y*2;
- py0 = src0;
- py1 = src0+stride_y;
- src1 += stride_u;
- src2 += stride_v;
- pu = src1;
- pv = src2;
- }
- return 1;
- }
- int YUV420_TO_ARGB32_MMX( unsigned char *puc_y,
- int stride_y,
- unsigned char *puc_u,
- unsigned char *puc_v,
- int stride_u,
- int stride_v,
- unsigned char *puc_out,
- int out_width,
- int out_height,
- int out_stride )
- {
- //unsigned char temp;
- int y, horiz_count;
-
- horiz_count = -(out_width >> 3);
- for (y=0; y<out_height; y++)
- {
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- push edi
- mov eax, puc_out
- mov ebx, puc_y
- mov ecx, puc_u
- mov edx, puc_v
- mov edi, horiz_count
- horiz_loop:
- movd mm2, [ecx]
- pxor mm7, mm7
- movd mm3, [edx]
- punpcklbw mm2, mm7
- movq mm0, [ebx]
- punpcklbw mm3, mm7
- movq mm1, mmw_0x00ff
- psubusb mm0, mmb_0x10
- psubw mm2, mmw_0x0080
- pand mm1, mm0
- psubw mm3, mmw_0x0080
- psllw mm1, 3
- psrlw mm0, 8
- psllw mm2, 3
- pmulhw mm1, mmw_mult_Y
- psllw mm0, 3
- psllw mm3, 3
- movq mm5, mm3
- pmulhw mm5, mmw_mult_V_R
- movq mm4, mm2
- pmulhw mm0, mmw_mult_Y
- movq mm7, mm1
- pmulhw mm2, mmw_mult_U_G
- paddsw mm7, mm5
- pmulhw mm3, mmw_mult_V_G
- packuswb mm7, mm7
- pmulhw mm4, mmw_mult_U_B
- paddsw mm5, mm0
- packuswb mm5, mm5
- paddsw mm2, mm3
- movq mm3, mm1
- movq mm6, mm1
- paddsw mm3, mm4
- paddsw mm6, mm2
- punpcklbw mm7, mm5
- paddsw mm2, mm0
- packuswb mm6, mm6
- packuswb mm2, mm2
- packuswb mm3, mm3
- paddsw mm4, mm0
- packuswb mm4, mm4
- punpcklbw mm6, mm2
- punpcklbw mm3, mm4
- // 32-bit shuffle.
- pxor mm0, mm0
- movq mm1, mm6
- punpcklbw mm1, mm0
- movq mm0, mm3
- punpcklbw mm0, mm7
- movq mm2, mm0
- punpcklbw mm0, mm1
- punpckhbw mm2, mm1
- // 24-bit shuffle and sav
- movd [eax], mm0
-
- psrlq mm0, 32
- movd 4[eax], mm0
- movd 8[eax], mm2
- psrlq mm2, 32
- movd 12[eax], mm2
- // 32-bit shuffle.
- pxor mm0, mm0
- movq mm1, mm6
- punpckhbw mm1, mm0
- movq mm0, mm3
- punpckhbw mm0, mm7
- movq mm2, mm0
- punpcklbw mm0, mm1
- punpckhbw mm2, mm1
- // 24-bit shuffle and sav
- movd 16[eax], mm0
- psrlq mm0, 32
- movd 20[eax], mm0
- add ebx, 8
- movd 24[eax], mm2
- psrlq mm2, 32
- add ecx, 4
- add edx, 4
- movd 28[eax], mm2
- add eax, 32
- inc edi
- jne horiz_loop
- pop edi
- pop edx
- pop ecx
- pop ebx
- pop eax
- emms
- }
-
- puc_out += out_stride;
- puc_y += stride_y;
-
- if (y%2)
- {
- puc_u += stride_u;
- puc_v += stride_v;
- }
- }
- return 1;
- }