dsputil_iwmmxt.c
上传用户:wstnjxml
上传日期:2014-04-03
资源大小:7248k
文件大小:8k
源码类别:

Windows CE

开发平台:

C/C++

  1. /*
  2.  * iWMMXt optimized DSP utils
  3.  * Copyright (c) 2004 AGAWA Koji
  4.  *
  5.  * This library is free software; you can redistribute it and/or
  6.  * modify it under the terms of the GNU Lesser General Public
  7.  * License as published by the Free Software Foundation; either
  8.  * version 2 of the License, or (at your option) any later version.
  9.  *
  10.  * This library is distributed in the hope that it will be useful,
  11.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13.  * Lesser General Public License for more details.
  14.  *
  15.  * You should have received a copy of the GNU Lesser General Public
  16.  * License along with this library; if not, write to the Free Software
  17.  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18.  */
  19. #include "../dsputil.h"
  20. #define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
  21. #define SET_RND(regd)  __asm__ __volatile__ ("mov r12, #1 nt tbcsth " #regd ", r12":::"r12");
  22. #define WAVG2B "wavg2b"
  23. #include "dsputil_iwmmxt_rnd.h"
  24. #undef DEF
  25. #undef SET_RND
  26. #undef WAVG2B
  27. #define DEF(x, y) x ## _ ## y ##_iwmmxt
  28. #define SET_RND(regd)  __asm__ __volatile__ ("mov r12, #2 nt tbcsth " #regd ", r12":::"r12");
  29. #define WAVG2B "wavg2br"
  30. #include "dsputil_iwmmxt_rnd.h"
  31. #undef DEF
  32. #undef SET_RND
  33. #undef WAVG2BR
  34. // need scheduling
  35. #define OP(AVG)                                         
  36.     asm volatile (                                      
  37.         /* alignment */                                 
  38.         "and r12, %[pixels], #7 nt"                   
  39.         "bic %[pixels], %[pixels], #7 nt"             
  40.         "tmcr wcgr1, r12 nt"                          
  41.                                                         
  42.         "wldrd wr0, [%[pixels]] nt"                   
  43.         "wldrd wr1, [%[pixels], #8] nt"               
  44.         "add %[pixels], %[pixels], %[line_size] nt"   
  45.         "walignr1 wr4, wr0, wr1 nt"                   
  46.                                                         
  47.         "1: nt"                                       
  48.                                                         
  49.         "wldrd wr2, [%[pixels]] nt"                   
  50.         "wldrd wr3, [%[pixels], #8] nt"               
  51.         "add %[pixels], %[pixels], %[line_size] nt"   
  52.         "pld [%[pixels]] nt"                          
  53.         "walignr1 wr5, wr2, wr3 nt"                   
  54.         AVG " wr6, wr4, wr5 nt"                       
  55.         "wstrd wr6, [%[block]] nt"                    
  56.         "add %[block], %[block], %[line_size] nt"     
  57.                                                         
  58.         "wldrd wr0, [%[pixels]] nt"                   
  59.         "wldrd wr1, [%[pixels], #8] nt"               
  60.         "add %[pixels], %[pixels], %[line_size] nt"   
  61.         "walignr1 wr4, wr0, wr1 nt"                   
  62.         "pld [%[pixels]] nt"                          
  63.         AVG " wr6, wr4, wr5 nt"                       
  64.         "wstrd wr6, [%[block]] nt"                    
  65.         "add %[block], %[block], %[line_size] nt"     
  66.                                                         
  67.         "subs %[h], %[h], #2 nt"                      
  68.         "bne 1b nt"                                   
  69.         : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h)  
  70.         : [line_size]"r"(line_size) 
  71.         : "memory", "r12");
  72. void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
  73. {
  74.     OP("wavg2br");
  75. }
  76. void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
  77. {
  78.     OP("wavg2b");
  79. }
  80. #undef OP
  81. void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size)
  82. {
  83.     uint8_t *pixels2 = pixels + line_size;
  84.     __asm__ __volatile__ (
  85.         "mov            r12, #4                 nt"
  86.         "1:                                     nt"
  87.         "pld            [%[pixels], %[line_size2]]              nt"
  88.         "pld            [%[pixels2], %[line_size2]]             nt"
  89.         "wldrd          wr4, [%[pixels]]        nt"
  90.         "wldrd          wr5, [%[pixels2]]       nt"
  91.         "pld            [%[block], #32]         nt"
  92.         "wunpckelub     wr6, wr4                nt"
  93.         "wldrd          wr0, [%[block]]         nt"
  94.         "wunpckehub     wr7, wr4                nt"
  95.         "wldrd          wr1, [%[block], #8]     nt"
  96.         "wunpckelub     wr8, wr5                nt"
  97.         "wldrd          wr2, [%[block], #16]    nt"
  98.         "wunpckehub     wr9, wr5                nt"
  99.         "wldrd          wr3, [%[block], #24]    nt"
  100.         "add            %[block], %[block], #32 nt"
  101.         "waddhss        wr10, wr0, wr6          nt"
  102.         "waddhss        wr11, wr1, wr7          nt"
  103.         "waddhss        wr12, wr2, wr8          nt"
  104.         "waddhss        wr13, wr3, wr9          nt"
  105.         "wpackhus       wr14, wr10, wr11        nt"
  106.         "wpackhus       wr15, wr12, wr13        nt"
  107.         "wstrd          wr14, [%[pixels]]       nt"
  108.         "add            %[pixels], %[pixels], %[line_size2]     nt"
  109.         "subs           r12, r12, #1            nt"
  110.         "wstrd          wr15, [%[pixels2]]      nt"
  111.         "add            %[pixels2], %[pixels2], %[line_size2]   nt"
  112.         "bne            1b                      nt"
  113.         : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2)
  114.         : [line_size2]"r"(line_size << 1)
  115.         : "cc", "memory", "r12");
  116. }
  117. static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  118. {
  119.     return;
  120. }
  121. int mm_flags; /* multimedia extension flags */
  122. int mm_support(void)
  123. {
  124.     return 0; /* TODO, implement proper detection */
  125. }
  126. void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
  127. {
  128.     mm_flags = mm_support();
  129.     if (avctx->dsp_mask) {
  130. if (avctx->dsp_mask & FF_MM_FORCE)
  131.     mm_flags |= (avctx->dsp_mask & 0xffff);
  132. else
  133.     mm_flags &= ~(avctx->dsp_mask & 0xffff);
  134.     }
  135.     if (!(mm_flags & MM_IWMMXT)) return;
  136.     c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
  137.     c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
  138.     c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt;
  139.     c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt;
  140.     c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt;
  141.     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt;
  142.     c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt;
  143.     c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt;
  144.     c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt;
  145.     c->put_pixels_tab[1][0] = put_pixels8_iwmmxt;
  146.     c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt;
  147.     c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt;
  148.     c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt;
  149.     c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt;
  150.     c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt;
  151.     c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt;
  152.     c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt;
  153.     c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt;
  154.     c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt;
  155.     c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt;
  156.     c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt;
  157.     c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt;
  158.     c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt;
  159.     c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt;
  160.     c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt;
  161.     c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt;
  162.     c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt;
  163.     c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt;
  164.     c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt;
  165.     c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt;
  166.     c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
  167.     c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
  168.     c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
  169. }