ditherer_mmx16.cc
上传用户:aoeyumen
上传日期:2007-01-06
资源大小:3329k
文件大小:12k
源码类别:

DVD

开发平台:

Unix_Linux

  1. #ifdef HAVE_MMX
  2. #include "ditherer_mmx16.hh"
  3. static long long MMX_0 = 0L;
  4. static unsigned long  MMX_10w[]         = {0x00100010, 0x00100010};                     //dd    00010 0010h, 000100010h
  5. static unsigned long  MMX_80w[]         = {0x00800080, 0x00800080};                     //dd    00080 0080h, 000800080h
  6. static unsigned long  MMX_00FFw[]       = {0x00ff00ff, 0x00ff00ff};                     //dd    000FF 00FFh, 000FF00FFh
  7. static unsigned short MMX_Ublucoeff[]   = {0x81, 0x81, 0x81, 0x81};                     //dd    00081 0081h, 000810081h
  8. static unsigned short MMX_Vredcoeff[]   = {0x66, 0x66, 0x66, 0x66};                     //dd    00066 0066h, 000660066h
  9. static unsigned short MMX_Ugrncoeff[]   = {0xffe8, 0xffe8, 0xffe8, 0xffe8};             //dd    0FFE7 FFE7h, 0FFE7FFE7h
  10. static unsigned short MMX_Vgrncoeff[]   = {0xffcd, 0xffcd, 0xffcd, 0xffcd};             //dd    0FFCC FFCCh, 0FFCCFFCCh
  11. static unsigned short MMX_Ycoeff[]      = {0x4a, 0x4a, 0x4a, 0x4a};                     //dd    0004A 004Ah, 0004A004Ah
  12. static unsigned short MMX_redmask[]     = {0xf800, 0xf800, 0xf800, 0xf800};             //dd    07c00 7c00h, 07c007c00h
  13. static unsigned short MMX_grnmask[]     = {0x7e0, 0x7e0, 0x7e0, 0x7e0};                 //dd    003e0 03e0h, 003e003e0h
  14. // static unsigned short MMX_blumask[]  = {0x1f, 0x1f, 0x1f, 0x1f};                     //dd    0001f 001fh, 0001f001fh
  15. void  Dither_MMX16::ditherBlock(unsigned char *lum, unsigned char *cb, unsigned char *cr,
  16.                      unsigned char *out,
  17.                      int cols, int rows, int screen_width) {
  18.       unsigned short *row1;
  19.       int x;
  20.       unsigned char *y;
  21.       int col1;
  22.       int mod = screen_width-cols;
  23.       row1 = (unsigned short *)out;
  24.       col1 = cols +mod;
  25.       mod += cols +mod;
  26.       mod *=2;
  27.       y = lum +cols*rows;
  28.       x = 0;
  29.       __asm__ __volatile__(
  30.          ".align 8n"
  31.          "1:n"
  32.          "movd           (%1),                   %%mm0n"        // 4 Cb         0  0  0  0 u3 u2 u1 u0
  33.          "pxor           %%mm7,                  %%mm7n"
  34.          "movd           (%0),                   %%mm1n" // 4 Cr                0  0  0  0 v3 v2 v1 v0
  35.          "punpcklbw      %%mm7,                  %%mm0n" // 4 W cb   0 u3  0 u2  0 u1  0 u0
  36.          "punpcklbw      %%mm7,                  %%mm1n" // 4 W cr   0 v3  0 v2  0 v1  0 v0
  37.          "psubw          MMX_80w,                %%mm0n"
  38.          "psubw          MMX_80w,                %%mm1n"
  39.          "movq           %%mm0,                  %%mm2n"        // Cb                   0 u3  0 u2  0 u1  0 u0
  40.          "movq           %%mm1,                  %%mm3n" // Cr
  41.          "pmullw         MMX_Ugrncoeff,          %%mm2n" // Cb2green 0 R3  0 R2  0 R1  0 R0
  42.          "movq           (%2),                   %%mm6n"        // L1      l7 L6 L5 L4 L3 L2 L1 L0
  43.          "pmullw         MMX_Ublucoeff,          %%mm0n" // Cb2blue
  44.          "pand           MMX_00FFw,              %%mm6n" // L1      00 L6 00 L4 00 L2 00 L0
  45.          "pmullw         MMX_Vgrncoeff,          %%mm3n" // Cr2green
  46.          "movq           (%2),                   %%mm7n" // L2
  47.          "pmullw         MMX_Vredcoeff,          %%mm1n" // Cr2red
  48.          //                      "psubw          MMX_10w,                %%mm6n"
  49.          "psrlw          $8,                     %%mm7n"        // L2           00 L7 00 L5 00 L3 00 L1
  50.          "pmullw         MMX_Ycoeff,             %%mm6n" // lum1
  51.          //                      "psubw          MMX_10w,                %%mm7n" // L2
  52.          "paddw          %%mm3,                  %%mm2n" // Cb2green + Cr2green == green
  53.          "pmullw         MMX_Ycoeff,             %%mm7n"  // lum2
  54.          "movq           %%mm6,                  %%mm4n"  // lum1
  55.          "paddw          %%mm0,                  %%mm6n"  // lum1 +blue 00 B6 00 B4 00 B2 00 B0
  56.          "movq           %%mm4,                  %%mm5n"  // lum1
  57.          "paddw          %%mm1,                  %%mm4n"  // lum1 +red  00 R6 00 R4 00 R2 00 R0
  58.          "paddw          %%mm2,                  %%mm5n"  // lum1 +green 00 G6 00 G4 00 G2 00 G0
  59.          "psraw          $6,                     %%mm4n"  // R1 0 .. 64
  60.          "movq           %%mm7,                  %%mm3n"  // lum2                       00 L7 00 L5 00 L3 00 L1
  61.          "psraw          $6,                     %%mm5n"  // G1  - .. +
  62.          "paddw          %%mm0,                  %%mm7n"  // Lum2 +blue 00 B7 00 B5 00 B3 00 B1
  63.          "psraw          $6,                     %%mm6n"  // B1         0 .. 64
  64.          "packuswb       %%mm4,                  %%mm4n"  // R1 R1
  65.          "packuswb       %%mm5,                  %%mm5n"  // G1 G1
  66.          "packuswb       %%mm6,                  %%mm6n"  // B1 B1
  67.          "punpcklbw      %%mm4,                  %%mm4n"
  68.          "punpcklbw      %%mm5,                  %%mm5n"
  69.          "pand           MMX_redmask,            %%mm4n"
  70.          "psllw          $3,                     %%mm5n"  // GREEN       1
  71.          "punpcklbw      %%mm6,                  %%mm6n"
  72.          "pand           MMX_grnmask,            %%mm5n"
  73.          "pand           MMX_redmask,            %%mm6n"
  74.          "por            %%mm5,                  %%mm4n" //
  75.          "psrlw          $11,                    %%mm6n"                // BLUE        1
  76.          "movq           %%mm3,                  %%mm5n" // lum2
  77.          "paddw          %%mm1,                  %%mm3n"        // lum2 +red      00 R7 00 R5 00 R3 00 R1
  78.          "paddw          %%mm2,                  %%mm5n" // lum2 +green 00 G7 00 G5 00 G3 00 G1
  79.          "psraw          $6,                     %%mm3n" // R2
  80.          "por            %%mm6,                  %%mm4n" // MM4
  81.          "psraw          $6,                     %%mm5n" // G2
  82.          "movq           (%2, %3),               %%mm6n"  // L3
  83.          "psraw          $6,                     %%mm7n"
  84.          "packuswb       %%mm3,                  %%mm3n"
  85.          "packuswb       %%mm5,                  %%mm5n"
  86.          "packuswb       %%mm7,                  %%mm7n"
  87.          "pand                   MMX_00FFw,              %%mm6n"  // L3
  88.          "punpcklbw      %%mm3,                  %%mm3n"
  89.          //                              "psubw          MMX_10w,                        %%mm6n"  // L3
  90.          "punpcklbw      %%mm5,                  %%mm5n"
  91.          "pmullw         MMX_Ycoeff,             %%mm6n"  // lum3
  92.          "punpcklbw      %%mm7,                  %%mm7n"
  93.          "psllw          $3,                             %%mm5n"  // GREEN 2
  94.          "pand                   MMX_redmask,    %%mm7n"
  95.          "pand                   MMX_redmask,    %%mm3n"
  96.          "psrlw          $11,                            %%mm7n"  // BLUE  2
  97.          "pand                   MMX_grnmask,    %%mm5n"
  98.          "por                    %%mm7,                  %%mm3n"
  99.          "movq                   (%2,%3),                        %%mm7n"  // L4
  100.          "por                    %%mm5,                  %%mm3n"     //
  101.          "psrlw          $8,                             %%mm7n"    // L4
  102.          "movq                   %%mm4,                  %%mm5n"
  103.          //                              "psubw          MMX_10w,                        %%mm7n"                // L4
  104.          "punpcklwd      %%mm3,                  %%mm4n"
  105.          "pmullw         MMX_Ycoeff,             %%mm7n"    // lum4
  106.          "punpckhwd      %%mm3,                  %%mm5n"
  107.          "movq                   %%mm4,                  (%4)n"
  108.          "movq                   %%mm5,                  8(%4)n"
  109.          "movq                   %%mm6,                  %%mm4n"        // Lum3
  110.          "paddw          %%mm0,                  %%mm6n"                // Lum3 +blue
  111.          "movq                   %%mm4,                  %%mm5n"                        // Lum3
  112.          "paddw          %%mm1,                  %%mm4n"       // Lum3 +red
  113.          "paddw          %%mm2,                  %%mm5n"                        // Lum3 +green
  114.          "psraw          $6,                             %%mm4n"
  115.          "movq                   %%mm7,                  %%mm3n"                        // Lum4
  116.          "psraw          $6,                             %%mm5n"
  117.          "paddw          %%mm0,                  %%mm7n"                   // Lum4 +blue
  118.          "psraw          $6,                             %%mm6n"                        // Lum3 +blue
  119.          "movq                   %%mm3,                  %%mm0n"  // Lum4
  120.          "packuswb       %%mm4,                  %%mm4n"
  121.          "paddw          %%mm1,                  %%mm3n"  // Lum4 +red
  122.          "packuswb       %%mm5,                  %%mm5n"
  123.          "paddw          %%mm2,                  %%mm0n"         // Lum4 +green
  124.          "packuswb       %%mm6,                  %%mm6n"
  125.          "punpcklbw      %%mm4,                  %%mm4n"
  126.          "punpcklbw      %%mm5,                  %%mm5n"
  127.          "punpcklbw      %%mm6,                  %%mm6n"
  128.          "psllw          $3,                             %%mm5n" // GREEN 3
  129.          "pand                   MMX_redmask,    %%mm4n"
  130.          "psraw          $6,                             %%mm3n" // psr 6
  131.          "psraw          $6,                             %%mm0n"
  132.          "pand                   MMX_redmask,    %%mm6n" // BLUE
  133.          "pand                   MMX_grnmask,    %%mm5n"
  134.          "psrlw          $11,                            %%mm6n"  // BLUE  3
  135.          "por                    %%mm5,                  %%mm4n"
  136.          "psraw          $6,                             %%mm7n"
  137.          "por                    %%mm6,                  %%mm4n"
  138.          "packuswb       %%mm3,                  %%mm3n"
  139.          "packuswb       %%mm0,                  %%mm0n"
  140.          "packuswb       %%mm7,                  %%mm7n"
  141.          "punpcklbw      %%mm3,                  %%mm3n"
  142.          "punpcklbw      %%mm0,                  %%mm0n"
  143.          "punpcklbw      %%mm7,                  %%mm7n"
  144.          "pand                   MMX_redmask,    %%mm3n"
  145.          "pand                   MMX_redmask,    %%mm7n" // BLUE
  146.          "psllw          $3,                             %%mm0n" // GREEN 4
  147.          "psrlw          $11,                            %%mm7n"
  148.          "pand                   MMX_grnmask,    %%mm0n"
  149.          "por                    %%mm7,                  %%mm3n"
  150.          "addl                   $8,                             %6n"
  151.          "por                    %%mm0,                  %%mm3n"
  152.          "movq                   %%mm4,                  %%mm5n"
  153.          "punpcklwd      %%mm3,                  %%mm4n"
  154.          "punpckhwd      %%mm3,                  %%mm5n"
  155.          "movq                   %%mm4,                  (%4,%5,2)n"
  156.          "movq                   %%mm5,                  8(%4,%5,2)n"
  157.          "addl                   $8,                             %2n"
  158.          "addl                   $4,                             %0n"
  159.          "addl                   $4,                             %1n"
  160.          "cmpl                   %3,                             %6n"
  161.          "leal                   16(%4),                 %4n"
  162.          "jl             1bn"
  163.          "addl           %3,     %2n"                   /* lum += cols */
  164.          "addl           %7,     %4n"                   /* row1 += mod */
  165.          "movl           $0,     %6n"
  166.          "cmpl           %8,     %2n"
  167.          "jl             1bn"
  168.          :
  169.          : "r" (cr), "r" (cb), "r" (lum), "r" (cols), "r" (row1) ,"r" (col1), "m" (x), "m" (mod)
  170.          , "m" (y)
  171.          );
  172.       __asm__ __volatile__(
  173.          "emmsn"
  174.          );
  175.    }
  176. #endif