getpic.cc
上传用户:aoeyumen
上传日期:2007-01-06
资源大小:3329k
文件大小:23k
- /*
- File: getpic.cc
- */
- #include <string.h>
- #include "all.hh"
- #ifdef HAVE_MMX
- extern "C" void IDCT_mmx(short *);
- extern "C" void add_block_mmx(unsigned char *, short *, int);
- extern "C" void set_block_mmx(unsigned char *, short *, int);
- static unsigned long long MMX_128 = 0x80008000800080LL;
- #endif
- /* ---------------------------------------------------
- */
- static void j_rev_dct_sparse (short *data)
- {
- short int val;
- int v;
- int quant;
- v = *data;
- #ifdef HAVE_MMX
- quant = 8 * 16;
- #else
- quant = 8;
- #endif
- if (v < 0) {
- val = -v;
- val += (quant >> 1);
- val /= quant;
- val = -val;
- }
- else {
- val = (v + (quant >> 1)) / quant;
- }
- *data = val;
- return;
- }
- // decode one frame or field picture
- void LayerData::getpicture(int framenum){
- if (pict_struct==FRAME_PICTURE && secondfield){
- /* recover from illegal number of field pictures */
- printf("odd number of field picturesn");
- secondfield = 0;
- }
- for (int i=0; i<3; i++){
- if (pict_type==B_TYPE) newframe[i] = auxframe[i];
- else {
- if (!secondfield){
- unsigned char* tmp = oldrefframe[i];
- oldrefframe[i] = refframe[i];
- refframe[i] = tmp;
- }
- newframe[i] = refframe[i];
- }
- if (pict_struct==BOTTOM_FIELD)
- newframe[i]+= (i==0) ? coded_picture_width : chrom_width;
- }
- //if (pict_scal && !secondfield) getspatref();
- getMBs(framenum);
- if (framenum!=0){
- if (pict_struct==FRAME_PICTURE || secondfield){
- if (pict_type==B_TYPE) display->dither(auxframe);
- else display->dither(oldrefframe);
- }
- else display->display_second_field();
- }
- if (pict_struct!=FRAME_PICTURE) secondfield = !secondfield;
- }
- // store last frame
- void LayerData::putlast(){
- if (secondfield) printf("last frame incomplete, not storedn");
- else display->dither(refframe);
- }
- /* decode all macroblocks of the current picture */
- void LayerData::getMBs(int framenum){
- int comp;
- int MBA, MBAmax, MBAinc, mb_type, cbp, motion_type(0), dct_type;
- int slice_vert_pos_ext;
- int bx, by;
- unsigned int code;
- int dc_dct_pred[3];
- int mv_count, mv_format, mvscale;
- int PMV[2][2][2], mv_field_sel[2][2];
- int dmv, dmvector[2];
- int qs;
- int stwtype, stwclass;
- int SNRcbp;
- // int SNRMBA(0), SNRmb_type,, SNRMBAinc(0), SNRdct_type, dummy; // SNR scal.
- /* number of macroblocks per picture */
- MBAmax = mb_width*mb_height;
- if (pict_struct!=FRAME_PICTURE)
- MBAmax>>=1; /* field picture has half as mnay macroblocks as frame */
- MBA = 0; /* macroblock address */
- MBAinc = 0;
- /*
- if (twostreams && enhan.scalable_mode==SC_SNR){
- SNRMBA=0;
- SNRMBAinc=0;
- }
- */
- fault=0;
- for (;;){
- #ifdef TRACE
- if (trace) printf("frame %d, MB %dn",framenum,MBA);
- #endif
- if (!prog_seq && pict_struct==FRAME_PICTURE && MBA==(MBAmax>>1) &&
- framenum!=0)
- // && display->getType()==T_X11)
- display->display_second_field();
- // ld = &base;
- if (MBAinc==0){
- // if (scalable_mode==SC_DP && pri_brk==1) ld = &enhan;
- if (!input->showbits(23) || fault){ /* startcode or fault */
- resync: /* if fault: resynchronize to next startcode */
- fault = 0;
- if (MBA>=MBAmax) return; /* all macroblocks decoded */
- code=input->startcode();
- if (code<Slice_min_start || code>Slice_max_start){
- /* only slice headers are allowed in picture_data */
- if (!quiet) printf("Premature end of picturen");
- return;
- }
- input->flushbits(32);
- /* decode slice header (may change quant_scale) */
- slice_vert_pos_ext = getslicehdr();
- /*
- if (scalable_mode==SC_DP){
- ld = &enhan;
- input->startcode();
- code = input->showbits(32);
- if (code<SLICE_MIN_START || code>SLICE_MAX_START){
- // only slice headers are allowed in picture_data
- if (!quiet) printf("Premature end of picturen");
- return;
- }
- input->flushbits(32);
- // decode slice header (may change quant_scale)
- slice_vert_pos_ext = getslicehdr();
- // if (pri_brk!=1) ld = &base;
- }
- */
- /* decode macroblock address increment */
- MBAinc = getMBA();
- if (fault) goto resync;
- /* set current location */
- MBA = ((slice_vert_pos_ext<<7) + (code&255) - 1)*mb_width + MBAinc - 1;
- MBAinc = 1; /* first macroblock in slice: not skipped */
- /* reset all DC coefficient and motion vector predictors */
- dc_dct_pred[0]=dc_dct_pred[1]=dc_dct_pred[2]=0;
- PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
- PMV[0][1][0]=PMV[0][1][1]=PMV[1][1][0]=PMV[1][1][1]=0;
- }
- else { /* neither startcode nor fault */
- if (MBA>=MBAmax){
- if (!quiet) printf("Too many macroblocks in picturen");
- return;
- }
- /*
- if (scalable_mode==SC_DP && pri_brk==1) ld = &enhan;
- */
- /* decode macroblock address increment */
- MBAinc = getMBA();
- if (fault) goto resync;
- }
- }
- if (MBA>=MBAmax){
- /* MBAinc points beyond picture dimensions */
- if (!quiet) printf("Too many macroblocks in picturen");
- return;
- }
- if (MBAinc==1) /* not skipped */{
- /*
- if (scalable_mode==SC_DP){
- if (pri_brk<=2) ld = &enhan;
- else ld = &base;
- }
- */
- macroblock_modes(&mb_type, &stwtype, &stwclass,
- &motion_type, &mv_count, &mv_format, &dmv, &mvscale, &dct_type);
- if (fault) goto resync;
- if (mb_type & MB_QUANT){
- qs = input->getbits(5);
- #ifdef TRACE
- if (trace){
- printf("quantiser_scale_code (");
- printbits(qs,5,5);
- printf("): %dn",qs);
- }
- #endif
- if (mpeg2)
- quant_scale = qscale_type ? non_linear_mquant_table[qs] : (qs << 1);
- else quant_scale = qs;
- if (scalable_mode==SC_DP)
- /* make sure quant_scale is valid */
- quant_scale = quant_scale;
- }
- /* motion vectors */
- /* decode forward motion vectors */
- if ((mb_type & MB_FORWARD) || ((mb_type & MB_INTRA) && conceal_mv)){
- if (mpeg2)
- motion_vectors(PMV,dmvector,mv_field_sel,
- 0,mv_count,mv_format,h_forw_r_size,v_forw_r_size,dmv,mvscale);
- else
- motion_vector(PMV[0][0],dmvector,
- forw_r_size,forw_r_size,0,0,full_forw);
- }
- if (fault) goto resync;
- /* decode backward motion vectors */
- if (mb_type & MB_BACKWARD){
- if (mpeg2)
- motion_vectors(PMV,dmvector,mv_field_sel,
- 1,mv_count,mv_format,h_back_r_size,v_back_r_size,0,mvscale);
- else
- motion_vector(PMV[0][1],dmvector,
- back_r_size,back_r_size,0,0,full_back);
- }
- if (fault) goto resync;
- if ((mb_type & MB_INTRA) && conceal_mv)
- input->flushbits(1); /* remove marker_bit */
- /*
- if (scalable_mode==SC_DP && pri_brk==3) ld = &enhan;
- */
- /* macroblock_pattern */
- if (mb_type & MB_PATTERN){
- cbp = getCBP();
- if (chroma_format==CHROMA422){
- cbp = (cbp<<2) | input->getbits(2); /* coded_block_pattern_1 */
- #ifdef TRACE
- if (trace){
- printf("coded_block_pattern_1: ");
- printbits(cbp,2,2);
- printf(" (%d)n",cbp&3);
- }
- #endif
- }
- else if (chroma_format==CHROMA444){
- cbp = (cbp<<6) | input->getbits(6); /* coded_block_pattern_2 */
- #ifdef TRACE
- if (trace){
- printf("coded_block_pattern_2: ");
- printbits(cbp,6,6);
- printf(" (%d)n",cbp&63);
- }
- #endif
- }
- }
- else
- cbp = (mb_type & MB_INTRA) ? (1<<blk_cnt)-1 : 0;
- if (fault) goto resync;
- /* decode blocks */
- clearblock(0,blk_cnt);
- for (comp=0; comp<blk_cnt; comp++){
- // if (scalable_mode==SC_DP) ld = &base;
- if (cbp & (1<<(blk_cnt-1-comp))){
- if (mb_type & MB_INTRA){
- if (mpeg2) getmpg2intrablock(comp,dc_dct_pred);
- else getintrablock(comp,dc_dct_pred);
- }
- else {
- if (mpeg2) getmpg2interblock(comp);
- else getinterblock(comp);
- }
- if (fault) goto resync;
- }
- }
- /* reset intra_dc predictors */
- if (!(mb_type & MB_INTRA))
- dc_dct_pred[0]=dc_dct_pred[1]=dc_dct_pred[2]=0;
- /* reset motion vector predictors */
- if ((mb_type & MB_INTRA) && !conceal_mv){
- /* intra mb without concealment motion vectors */
- PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
- PMV[0][1][0]=PMV[0][1][1]=PMV[1][1][0]=PMV[1][1][1]=0;
- }
- if ((pict_type==P_TYPE) && !(mb_type & (MB_FORWARD|MB_INTRA))){
- /* non-intra mb without forward mv in a P picture */
- PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
- /* derive motion_type */
- if (pict_struct==FRAME_PICTURE) motion_type = MC_FRAME;
- else
- {
- motion_type = MC_FIELD;
- /* predict from field of same parity */
- mv_field_sel[0][0] = (pict_struct==BOTTOM_FIELD);
- }
- }
- if (stwclass==4)
- {
- /* purely spatially predicted macroblock */
- PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
- PMV[0][1][0]=PMV[0][1][1]=PMV[1][1][0]=PMV[1][1][1]=0;
- }
- }
- else { /* MBAinc!=1: skipped macroblock */
- // if (scalable_mode==SC_DP) ld = &base;
- clearblock(0,blk_cnt);
- /* reset intra_dc predictors */
- dc_dct_pred[0]=dc_dct_pred[1]=dc_dct_pred[2]=0;
- /* reset motion vector predictors */
- if (pict_type==P_TYPE)
- PMV[0][0][0]=PMV[0][0][1]=PMV[1][0][0]=PMV[1][0][1]=0;
- /* derive motion_type */
- if (pict_struct==FRAME_PICTURE)
- motion_type = MC_FRAME;
- else
- {
- motion_type = MC_FIELD;
- /* predict from field of same parity */
- mv_field_sel[0][0]=mv_field_sel[0][1] = (pict_struct==BOTTOM_FIELD);
- }
- /* skipped I are spatial-only predicted, */
- /* skipped P and B are temporal-only predicted */
- stwtype = (pict_type==I_TYPE) ? 8 : 0;
- /* clear MB_INTRA */
- mb_type&= ~MB_INTRA;
- cbp = 0; /* no block data */
- }
- SNRcbp = 0;
- #ifdef ENHANCEMENT
- if (twostreams && enhan.scalable_mode==SC_SNR){
- ld = &enhan;
- if (SNRMBAinc==0){
- if (!input->showbits(23)){ /* startcode */
- code=input->startcode();
- // code = input->showbits(32);
- if (code<SLICE_MIN_START || code>SLICE_MAX_START){
- /* only slice headers are allowed in picture_data */
- if (!quiet)
- printf("Premature end of picturen");
- return;
- }
- input->flushbits(32);
- /* decode slice header (may change quant_scale) */
- slice_vert_pos_ext = getslicehdr();
- /* decode macroblock address increment */
- SNRMBAinc = getMBA();
- /* set current location */
- SNRMBA =
- ((slice_vert_pos_ext<<7) + (code&255) - 1)*mb_width + SNRMBAinc - 1;
- SNRMBAinc = 1; /* first macroblock in slice: not skipped */
- }
- else { /* not startcode */
- if (SNRMBA>=MBAmax){
- if (!quiet) printf("Too many macroblocks in picturen");
- return;
- }
- /* decode macroblock address increment */
- SNRMBAinc = getMBA();
- }
- }
- if (SNRMBA!=MBA){
- /* streams out of sync */
- if (!quiet) printf("Cant't synchronize streamsn");
- return;
- }
- if (SNRMBAinc==1){ /* not skipped */
- macroblock_modes(&SNRmb_type, &dummy, &dummy,
- &dummy, &dummy, &dummy, &dummy, &dummy,
- &SNRdct_type);
- if (SNRmb_type & MB_PATTERN) dct_type = SNRdct_type;
- if (SNRmb_type & MB_QUANT){
- qs = input->getbits(5);
- quant_scale = qscale_type ? non_linear_mquant_table[qs] : qs<<1;
- }
- /* macroblock_pattern */
- if (SNRmb_type & MB_PATTERN)
- {
- SNRcbp = getCBP();
- if (chroma_format==CHROMA422)
- SNRcbp = (SNRcbp<<2) | input->getbits(2); /* coded_block_pattern_1 */
- else if (chroma_format==CHROMA444)
- SNRcbp = (SNRcbp<<6) | input->getbits(6); /* coded_block_pattern_2 */
- }
- else
- SNRcbp = 0;
- /* decode blocks */
- clearblock(0,blk_cnt);
- for (comp=0; comp<blk_cnt; comp++)
- {
- if (SNRcbp & (1<<(blk_cnt-1-comp)))
- getmpg2interblock(comp);
- }
- }
- else /* SNRMBAinc!=1: skipped macroblock */
- {
- clearblock(0,blk_cnt);
- }
- // ld = &base;
- }
- #endif
- /* pixel coordinates of top left corner of current macroblock */
- bx = 16*(MBA%mb_width);
- by = 16*(MBA/mb_width);
- /* motion compensation */
- if (!(mb_type & MB_INTRA))
- reconstruct(bx,by,mb_type,motion_type,PMV,mv_field_sel,dmvector,
- stwtype);
- #ifdef ENHANCEMENT
- if (scalable_mode==SC_DP) ld = &base;
- #endif
- /* copy or add block data into picture */
- for (comp=0; comp<blk_cnt; comp++){
- if ((cbp|SNRcbp) & (1<<(blk_cnt-1-comp))){
- #ifdef ENHANCEMENT
- if (twostreams && enhan.scalable_mode==SC_SNR &&
- SNRcbp & (1<<(blk_cnt-1-comp)))
- sumblock(comp); /* add SNR enhancement layer data to base layer */
- #endif
- /* inverse DCT */
- // if (sparse[comp])
- // j_rev_dct_sparse(block[comp]);
- // else {
- #ifdef HAVE_MMX
- IDCT_mmx(block[comp]);
- #else
- idct->conversion(block[comp]);
- #endif
- // }
- addblock(comp,bx,by,dct_type,(mb_type & MB_INTRA)==0);
- }
- }
- /* advance to next macroblock */
- MBA++;
- MBAinc--;
- #ifdef ENHANCEMENT
- if (twostreams && enhan.scalable_mode==SC_SNR){
- SNRMBA++;
- SNRMBAinc--;
- }
- #endif
- }
- }
- void LayerData::macroblock_modes(int *pmb_type, int *pstwtype, int *pstwclass,
- int *pmotion_type, int *pmv_count, int *pmv_format, int *pdmv, int *pmvscale,
- int *pdct_type)
- {
- int mb_type;
- int stwtype, stwcode, stwclass;
- int motion_type(0), mv_count, mv_format, dmv, mvscale;
- int dct_type;
- static unsigned char stwc_table[3][4]
- = { {6,3,7,4}, {2,1,5,4}, {2,5,7,4} };
- static unsigned char stwclass_table[9]
- = {0, 1, 2, 1, 1, 2, 3, 3, 4};
- /* get macroblock_type */
- mb_type = getMBtype();
- if (fault) return;
- /* get spatial_temporal_weight_code */
- if (mb_type & MB_WEIGHT)
- {
- if (stwc_table_index==0)
- stwtype = 4;
- else
- {
- stwcode = input->getbits(2);
- stwtype = stwc_table[stwc_table_index-1][stwcode];
- }
- }
- else
- stwtype = (mb_type & MB_CLASS4) ? 8 : 0;
- /* derive spatial_temporal_weight_class (Table 7-18) */
- stwclass = stwclass_table[stwtype];
- /* get frame/field motion type */
- if (mb_type & (MB_FORWARD|MB_BACKWARD)){
- if (pict_struct==FRAME_PICTURE){ /* frame_motion_type */
- motion_type = frame_pred_dct ? MC_FRAME : input->getbits(2);
- #ifdef TRACE
- if (!frame_pred_dct && trace){
- printf("frame_motion_type (");
- printbits(motion_type,2,2);
- printf("): %sn",motion_type==MC_FIELD?"Field":
- motion_type==MC_FRAME?"Frame":
- motion_type==MC_DMV?"Dual_Prime":"Invalid");
- }
- #endif
- }
- else { /* field_motion_type */
- motion_type = input->getbits(2);
- #ifdef TRACE
- if (trace){
- printf("field_motion_type (");
- printbits(motion_type,2,2);
- printf("): %sn",motion_type==MC_FIELD?"Field":
- motion_type==MC_16X8?"16x8 MC":
- motion_type==MC_DMV?"Dual_Prime":"Invalid");
- }
- #endif
- }
- }
- else if ((mb_type & MB_INTRA) && conceal_mv)
- {
- /* concealment motion vectors */
- motion_type = (pict_struct==FRAME_PICTURE) ? MC_FRAME : MC_FIELD;
- }
- /* derive mv_count, mv_format and dmv, (table 6-17, 6-18) */
- if (pict_struct==FRAME_PICTURE)
- {
- mv_count = (motion_type==MC_FIELD && stwclass<2) ? 2 : 1;
- mv_format = (motion_type==MC_FRAME) ? MV_FRAME : MV_FIELD;
- }
- else
- {
- mv_count = (motion_type==MC_16X8) ? 2 : 1;
- mv_format = MV_FIELD;
- }
- dmv = (motion_type==MC_DMV); /* dual prime */
- /* field mv predictions in frame pictures have to be scaled */
- mvscale = ((mv_format==MV_FIELD) && (pict_struct==FRAME_PICTURE));
- /* get dct_type (frame DCT / field DCT) */
- dct_type = (pict_struct==FRAME_PICTURE)
- && (!frame_pred_dct)
- && (mb_type & (MB_PATTERN|MB_INTRA))
- ? input->getbits(1)
- : 0;
- #ifdef TRACE
- if (trace && (pict_struct==FRAME_PICTURE)
- && (!frame_pred_dct)
- && (mb_type & (MB_PATTERN|MB_INTRA)))
- printf("dct_type (%d): %sn",dct_type,dct_type?"Field":"Frame");
- #endif
- /* return values */
- *pmb_type = mb_type;
- *pstwtype = stwtype;
- *pstwclass = stwclass;
- *pmotion_type = motion_type;
- *pmv_count = mv_count;
- *pmv_format = mv_format;
- *pdmv = dmv;
- *pmvscale = mvscale;
- *pdct_type = dct_type;
- }
- /* set block to zero */
- void LayerData::clearblock(int comp,int size){
- sparse[comp] = 1;
- memset(block[comp],0,sizeof(short)*64*size);
- }
- #ifdef ENHANCEMENT
- /* add SNR enhancement layer block data to base layer */
- void LayerData::sumblock(int comp){
- short *bp1, *bp2;
- bp1 = block[comp];
- #ifdef ENHANCEMANT
- bp2 = enhan.block[comp];
- #endif
- for (int i=0; i<64; i++) *bp1++ += *bp2++;
- }
- #endif
- /* limit coefficients to -2048..2047 */
- /* move/add 8x8-Block from block[comp] to refframe */
- void LayerData::addblock(int comp, int bx, int by, int dct_type, int addflag){
- int cc,i, iincr;
- unsigned char *rfp;
- short *bp;
- int spar = sparse[comp];
- #ifndef HAVE_MMX
- unsigned char *clp2=display->getClpTable();
- if (!addflag) clp2 += 128;
- #endif
- cc = (comp<4) ? 0 : (comp&1)+1; /* color component index */
- if (cc==0){ /* luminance */
- if (pict_struct==FRAME_PICTURE)
- if (dct_type){
- /* field DCT coding */
- rfp = newframe[0]
- + coded_picture_width*(by+((comp&2)>>1)) + bx + ((comp&1)<<3);
- iincr = (coded_picture_width<<1);
- }
- else{
- /* frame DCT coding */
- rfp = newframe[0]
- + coded_picture_width*(by+((comp&2)<<2)) + bx + ((comp&1)<<3);
- iincr = coded_picture_width;
- }
- else {
- /* field picture */
- rfp = newframe[0]
- + (coded_picture_width<<1)*(by+((comp&2)<<2)) + bx + ((comp&1)<<3);
- iincr = (coded_picture_width<<1);
- }
- }
- else {
- /* chrominance */
- /* scale coordinates */
- if (chroma_format!=CHROMA444) bx >>= 1;
- if (chroma_format==CHROMA420) by >>= 1;
- if (pict_struct==FRAME_PICTURE){
- if (dct_type && (chroma_format!=CHROMA420)){
- /* field DCT coding */
- rfp = newframe[cc]
- + chrom_width*(by+((comp&2)>>1)) + bx + (comp&8);
- iincr = (chrom_width<<1);
- }
- else {
- /* frame DCT coding */
- rfp = newframe[cc]
- + chrom_width*(by+((comp&2)<<2)) + bx + (comp&8);
- iincr = chrom_width;
- }
- }
- else {
- /* field picture */
- rfp = newframe[cc]
- + (chrom_width<<1)*(by+((comp&2)<<2)) + bx + (comp&8);
- iincr = (chrom_width<<1);
- }
- }
- bp = block[comp];
- if (addflag) {
- #ifdef HAVE_MMX
- if (spar) {
- __asm__ __volatile__(
- "movq (%2),%%mm6n" /* 4 blockvals */
- "pxor %%mm4,%%mm4n"
- "punpcklwd %%mm6,%%mm6n"
- "punpcklwd %%mm6,%%mm6n"
- ".align 8n"
- "1:"
- "movq (%1), %%mm0n" /* 8 rindex1 */
- "movq %%mm0, %%mm2n"
- "punpcklbw %%mm4, %%mm0n"
- "punpckhbw %%mm4, %%mm2n"
- "paddw %%mm6, %%mm0n"
- "paddw %%mm6, %%mm2n"
- "packuswb %%mm2, %%mm0n"
- "movq %%mm0, (%1)n"
- "leal (%1,%3), %1n"
- "loop 1bn"
- : /* scr dest */
- : "c" (8),"r" (rfp), "r" (bp), "r" (iincr)
- );
- }
- else {
- __asm__ __volatile__(
- "pxor %%mm4,%%mm4n"
- ".align 8n"
- "1:"
- "movq (%2), %%mm0n" /* 8 rfp 0 1 2 3 4 5 6 7*/
- "movq (%1), %%mm6n" /* 4 blockvals 0 1 2 3 */
- "movq %%mm0, %%mm2n"
- "movq 8(%1), %%mm5n" /* 4 blockvals 0 1 2 3 */
- "punpcklbw %%mm4, %%mm0n" /* 0 2 4 6 */
- "punpckhbw %%mm4, %%mm2n" /* 1 3 5 7 */
- "paddw %%mm6, %%mm0n"
- "paddw %%mm5, %%mm2n"
- "packuswb %%mm2, %%mm0n"
- "addl $16, %1n"
- "movq %%mm0, (%2)n"
- "leal (%2,%3), %2n"
- "loop 1bn"
- : /* scr dest */
- : "c" (8),"r" (bp), "r" (rfp), "r" (iincr)
- );
- // add_block_mmx(rfp,bp,iincr);
- }
- #else
- for (i=0; i<8; i++){
- rfp[0] = clp2[bp[0] + rfp[0]];
- rfp[1] = clp2[bp[1] + rfp[1]];
- rfp[2] = clp2[bp[2] + rfp[2]];
- rfp[3] = clp2[bp[3] + rfp[3]];
- rfp[4] = clp2[bp[4] + rfp[4]];
- rfp[5] = clp2[bp[5] + rfp[5]];
- rfp[6] = clp2[bp[6] + rfp[6]];
- rfp[7] = clp2[bp[7] + rfp[7]];
- rfp+= iincr;
- bp += 8;
- }
- #endif
- }
- else {
- #ifdef HAVE_MMX
- if (spar) {
- __asm__ __volatile__(
- "movd (%2), %%mm0n" // " 0 0 0 v1"
- "punpcklwd %%mm0, %%mm0n" // " 0 0 v1 v1"
- "punpcklwd %%mm0, %%mm0n"
- "paddw MMX_128, %%mm0n"
- "packuswb %%mm0, %%mm0n"
- "leal (%0,%1,2), %%eaxn"
- "movq %%mm0, (%0, %1)n"
- "movq %%mm0, (%%eax)n"
- "leal (%%eax,%1,2), %0n"
- "movq %%mm0, (%%eax, %1)n"
- "movq %%mm0, (%0)n"
- "leal (%0,%1,2), %%eaxn"
- "movq %%mm0, (%0, %1)n"
- "movq %%mm0, (%%eax)n"
- "movq %%mm0, (%%eax, %1)n"
- :
- : "D" (rfp), "c" (iincr), "b" (bp)
- : "eax");
- }
- else {
- __asm__ __volatile__(
- "movq MMX_128,%%mm4n"
- ".align 8n"
- "1:"
- "movq (%1), %%mm0n"
- "movq 8(%1), %%mm1n"
- "paddw %%mm4, %%mm0n"
- "movq 16(%1), %%mm2n"
- "paddw %%mm4, %%mm1n"
- "movq 24(%1), %%mm3n"
- "paddw %%mm4, %%mm2n"
- "packuswb %%mm1, %%mm0n"
- "paddw %%mm4, %%mm3n"
- "addl $32, %1n"
- "packuswb %%mm3, %%mm2n"
- "movq %%mm0, (%2)n"
- "movq %%mm2, (%2,%3)n"
- "leal (%2,%3,2), %2n"
- "loop 1bn"
- :
- : "c" (4), "r" (bp), "r" (rfp), "r" (iincr)
- );
- // set_block_mmx(rfp,bp,iincr);
- }
- #else
- for (i=0; i<8; i++){
- rfp[0] = clp2[bp[0]];
- rfp[1] = clp2[bp[1]];
- rfp[2] = clp2[bp[2]];
- rfp[3] = clp2[bp[3]];
- rfp[4] = clp2[bp[4]];
- rfp[5] = clp2[bp[5]];
- rfp[6] = clp2[bp[6]];
- rfp[7] = clp2[bp[7]];
- rfp+= iincr;
- bp += 8;
- }
- #endif
- }
- }