compress.c
资源名称:bzip.tar.gz [点击查看]
上传用户:zswatin
上传日期:2007-01-06
资源大小:440k
文件大小:16k
源码类别:
压缩解压
开发平台:
C/C++
- /*-------------------------------------------------------------*/
- /*--- Compression machinery (not incl block sorting) ---*/
- /*--- compress.c ---*/
- /*-------------------------------------------------------------*/
- /*--
- This file is a part of bzip2 and/or libbzip2, a program and
- library for lossless, block-sorting data compression.
- Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- 2. The origin of this software must not be misrepresented; you must
- not claim that you wrote the original software. If you use this
- software in a product, an acknowledgment in the product
- documentation would be appreciated but is not required.
- 3. Altered source versions must be plainly marked as such, and must
- not be misrepresented as being the original software.
- 4. The name of the author may not be used to endorse or promote
- products derived from this software without specific prior written
- permission.
- THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
- OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
- GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- Julian Seward, Guildford, Surrey, UK.
- jseward@acm.org
- bzip2/libbzip2 version 0.9.0 of 28 June 1998
- This program is based on (at least) the work of:
- Mike Burrows
- David Wheeler
- Peter Fenwick
- Alistair Moffat
- Radford Neal
- Ian H. Witten
- Robert Sedgewick
- Jon L. Bentley
- For more information on these sources, see the manual.
- --*/
- /*--
- CHANGES
- ~~~~~~~
- 0.9.0 -- original version.
- 0.9.0a/b -- no changes in this file.
- 0.9.0c
- * changed setting of nGroups in sendMTFValues() so as to
- do a bit better on small files
- --*/
- #include "bzlib_private.h"
- /*---------------------------------------------------*/
- /*--- Bit stream I/O ---*/
- /*---------------------------------------------------*/
- /*---------------------------------------------------*/
- void bsInitWrite ( EState* s )
- {
- s->bsLive = 0;
- s->bsBuff = 0;
- }
- /*---------------------------------------------------*/
- static
- void bsFinishWrite ( EState* s )
- {
- while (s->bsLive > 0) {
- ((UChar*)(s->quadrant))[s->numZ] = (UChar)(s->bsBuff >> 24);
- s->numZ++;
- s->bsBuff <<= 8;
- s->bsLive -= 8;
- }
- }
- /*---------------------------------------------------*/
- #define bsNEEDW(nz)
- {
- while (s->bsLive >= 8) {
- ((UChar*)(s->quadrant))[s->numZ]
- = (UChar)(s->bsBuff >> 24);
- s->numZ++;
- s->bsBuff <<= 8;
- s->bsLive -= 8;
- }
- }
- /*---------------------------------------------------*/
- static
- void bsW ( EState* s, Int32 n, UInt32 v )
- {
- bsNEEDW ( n );
- s->bsBuff |= (v << (32 - s->bsLive - n));
- s->bsLive += n;
- }
- /*---------------------------------------------------*/
- static
- void bsPutUInt32 ( EState* s, UInt32 u )
- {
- bsW ( s, 8, (u >> 24) & 0xffL );
- bsW ( s, 8, (u >> 16) & 0xffL );
- bsW ( s, 8, (u >> 8) & 0xffL );
- bsW ( s, 8, u & 0xffL );
- }
- /*---------------------------------------------------*/
- static
- void bsPutUChar ( EState* s, UChar c )
- {
- bsW( s, 8, (UInt32)c );
- }
- /*---------------------------------------------------*/
- /*--- The back end proper ---*/
- /*---------------------------------------------------*/
- /*---------------------------------------------------*/
- static
- void makeMaps_e ( EState* s )
- {
- Int32 i;
- s->nInUse = 0;
- for (i = 0; i < 256; i++)
- if (s->inUse[i]) {
- s->unseqToSeq[i] = s->nInUse;
- s->nInUse++;
- }
- }
- /*---------------------------------------------------*/
- static
- void generateMTFValues ( EState* s )
- {
- UChar yy[256];
- Int32 i, j;
- UChar tmp;
- UChar tmp2;
- Int32 zPend;
- Int32 wr;
- Int32 EOB;
- makeMaps_e ( s );
- EOB = s->nInUse+1;
- for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
- wr = 0;
- zPend = 0;
- for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
- for (i = 0; i < s->nblock; i++) {
- UChar ll_i;
- AssertD ( wr <= i, "generateMTFValues(1)" );
- j = s->zptr[i]-1; if (j < 0) j += s->nblock;
- ll_i = s->unseqToSeq[s->block[j]];
- AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
- j = 0;
- tmp = yy[j];
- while ( ll_i != tmp ) {
- j++;
- tmp2 = tmp;
- tmp = yy[j];
- yy[j] = tmp2;
- };
- yy[0] = tmp;
- if (j == 0) {
- zPend++;
- } else {
- if (zPend > 0) {
- zPend--;
- while (True) {
- switch (zPend % 2) {
- case 0: s->szptr[wr] = BZ_RUNA; wr++; s->mtfFreq[BZ_RUNA]++; break;
- case 1: s->szptr[wr] = BZ_RUNB; wr++; s->mtfFreq[BZ_RUNB]++; break;
- };
- if (zPend < 2) break;
- zPend = (zPend - 2) / 2;
- };
- zPend = 0;
- }
- s->szptr[wr] = j+1; wr++; s->mtfFreq[j+1]++;
- }
- }
- if (zPend > 0) {
- zPend--;
- while (True) {
- switch (zPend % 2) {
- case 0: s->szptr[wr] = BZ_RUNA; wr++; s->mtfFreq[BZ_RUNA]++; break;
- case 1: s->szptr[wr] = BZ_RUNB; wr++; s->mtfFreq[BZ_RUNB]++; break;
- };
- if (zPend < 2) break;
- zPend = (zPend - 2) / 2;
- };
- }
- s->szptr[wr] = EOB; wr++; s->mtfFreq[EOB]++;
- s->nMTF = wr;
- }
- /*---------------------------------------------------*/
- #define BZ_LESSER_ICOST 0
- #define BZ_GREATER_ICOST 15
- static
- void sendMTFValues ( EState* s )
- {
- Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
- Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
- Int32 nGroups, nBytes;
- /*--
- UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- is a global since the decoder also needs it.
- Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- are also globals only used in this proc.
- Made global to keep stack frame size small.
- --*/
- UInt16 cost[BZ_N_GROUPS];
- Int32 fave[BZ_N_GROUPS];
- if (s->verbosity >= 3)
- VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
- "%d+2 syms in usen",
- s->nblock, s->nMTF, s->nInUse );
- alphaSize = s->nInUse+2;
- for (t = 0; t < BZ_N_GROUPS; t++)
- for (v = 0; v < alphaSize; v++)
- s->len[t][v] = BZ_GREATER_ICOST;
- /*--- Decide how many coding tables to use ---*/
- AssertH ( s->nMTF > 0, 3001 );
- if (s->nMTF < 200) nGroups = 2; else
- if (s->nMTF < 600) nGroups = 3; else
- if (s->nMTF < 1200) nGroups = 4; else
- if (s->nMTF < 2400) nGroups = 5; else
- nGroups = 6;
- /*--- Generate an initial set of coding tables ---*/
- {
- Int32 nPart, remF, tFreq, aFreq;
- nPart = nGroups;
- remF = s->nMTF;
- gs = 0;
- while (nPart > 0) {
- tFreq = remF / nPart;
- ge = gs-1;
- aFreq = 0;
- while (aFreq < tFreq && ge < alphaSize-1) {
- ge++;
- aFreq += s->mtfFreq[ge];
- }
- if (ge > gs
- && nPart != nGroups && nPart != 1
- && ((nGroups-nPart) % 2 == 1)) {
- aFreq -= s->mtfFreq[ge];
- ge--;
- }
- if (s->verbosity >= 3)
- VPrintf5( " initial group %d, [%d .. %d], "
- "has %d syms (%4.1f%%)n",
- nPart, gs, ge, aFreq,
- (100.0 * (float)aFreq) / (float)(s->nMTF) );
- for (v = 0; v < alphaSize; v++)
- if (v >= gs && v <= ge)
- s->len[nPart-1][v] = BZ_LESSER_ICOST; else
- s->len[nPart-1][v] = BZ_GREATER_ICOST;
- nPart--;
- gs = ge+1;
- remF -= aFreq;
- }
- }
- /*---
- Iterate up to BZ_N_ITERS times to improve the tables.
- ---*/
- for (iter = 0; iter < BZ_N_ITERS; iter++) {
- for (t = 0; t < nGroups; t++) fave[t] = 0;
- for (t = 0; t < nGroups; t++)
- for (v = 0; v < alphaSize; v++)
- s->rfreq[t][v] = 0;
- nSelectors = 0;
- totc = 0;
- gs = 0;
- while (True) {
- /*--- Set group start & end marks. --*/
- if (gs >= s->nMTF) break;
- ge = gs + BZ_G_SIZE - 1;
- if (ge >= s->nMTF) ge = s->nMTF-1;
- /*--
- Calculate the cost of this group as coded
- by each of the coding tables.
- --*/
- for (t = 0; t < nGroups; t++) cost[t] = 0;
- if (nGroups == 6) {
- register UInt16 cost0, cost1, cost2, cost3, cost4, cost5;
- cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
- for (i = gs; i <= ge; i++) {
- UInt16 icv = s->szptr[i];
- cost0 += s->len[0][icv];
- cost1 += s->len[1][icv];
- cost2 += s->len[2][icv];
- cost3 += s->len[3][icv];
- cost4 += s->len[4][icv];
- cost5 += s->len[5][icv];
- }
- cost[0] = cost0; cost[1] = cost1; cost[2] = cost2;
- cost[3] = cost3; cost[4] = cost4; cost[5] = cost5;
- } else {
- for (i = gs; i <= ge; i++) {
- UInt16 icv = s->szptr[i];
- for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
- }
- }
- /*--
- Find the coding table which is best for this group,
- and record its identity in the selector table.
- --*/
- bc = 999999999; bt = -1;
- for (t = 0; t < nGroups; t++)
- if (cost[t] < bc) { bc = cost[t]; bt = t; };
- totc += bc;
- fave[bt]++;
- s->selector[nSelectors] = bt;
- nSelectors++;
- /*--
- Increment the symbol frequencies for the selected table.
- --*/
- for (i = gs; i <= ge; i++)
- s->rfreq[bt][ s->szptr[i] ]++;
- gs = ge+1;
- }
- if (s->verbosity >= 3) {
- VPrintf2 ( " pass %d: size is %d, grp uses are ",
- iter+1, totc/8 );
- for (t = 0; t < nGroups; t++)
- VPrintf1 ( "%d ", fave[t] );
- VPrintf0 ( "n" );
- }
- /*--
- Recompute the tables based on the accumulated frequencies.
- --*/
- for (t = 0; t < nGroups; t++)
- hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
- alphaSize, 20 );
- }
- AssertH( nGroups < 8, 3002 );
- AssertH( nSelectors < 32768 &&
- nSelectors <= (2 + (900000 / BZ_G_SIZE)),
- 3003 );
- /*--- Compute MTF values for the selectors. ---*/
- {
- UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
- for (i = 0; i < nGroups; i++) pos[i] = i;
- for (i = 0; i < nSelectors; i++) {
- ll_i = s->selector[i];
- j = 0;
- tmp = pos[j];
- while ( ll_i != tmp ) {
- j++;
- tmp2 = tmp;
- tmp = pos[j];
- pos[j] = tmp2;
- };
- pos[0] = tmp;
- s->selectorMtf[i] = j;
- }
- };
- /*--- Assign actual codes for the tables. --*/
- for (t = 0; t < nGroups; t++) {
- minLen = 32;
- maxLen = 0;
- for (i = 0; i < alphaSize; i++) {
- if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
- if (s->len[t][i] < minLen) minLen = s->len[t][i];
- }
- AssertH ( !(maxLen > 20), 3004 );
- AssertH ( !(minLen < 1), 3005 );
- hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
- minLen, maxLen, alphaSize );
- }
- /*--- Transmit the mapping table. ---*/
- {
- Bool inUse16[16];
- for (i = 0; i < 16; i++) {
- inUse16[i] = False;
- for (j = 0; j < 16; j++)
- if (s->inUse[i * 16 + j]) inUse16[i] = True;
- }
- nBytes = s->numZ;
- for (i = 0; i < 16; i++)
- if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
- for (i = 0; i < 16; i++)
- if (inUse16[i])
- for (j = 0; j < 16; j++) {
- if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
- }
- if (s->verbosity >= 3)
- VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes );
- }
- /*--- Now the selectors. ---*/
- nBytes = s->numZ;
- bsW ( s, 3, nGroups );
- bsW ( s, 15, nSelectors );
- for (i = 0; i < nSelectors; i++) {
- for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
- bsW(s,1,0);
- }
- if (s->verbosity >= 3)
- VPrintf1( "selectors %d, ", s->numZ-nBytes );
- /*--- Now the coding tables. ---*/
- nBytes = s->numZ;
- for (t = 0; t < nGroups; t++) {
- Int32 curr = s->len[t][0];
- bsW ( s, 5, curr );
- for (i = 0; i < alphaSize; i++) {
- while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
- while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
- bsW ( s, 1, 0 );
- }
- }
- if (s->verbosity >= 3)
- VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
- /*--- And finally, the block data proper ---*/
- nBytes = s->numZ;
- selCtr = 0;
- gs = 0;
- while (True) {
- if (gs >= s->nMTF) break;
- ge = gs + BZ_G_SIZE - 1;
- if (ge >= s->nMTF) ge = s->nMTF-1;
- for (i = gs; i <= ge; i++) {
- AssertH ( s->selector[selCtr] < nGroups, 3006 );
- bsW ( s,
- s->len [s->selector[selCtr]] [s->szptr[i]],
- s->code [s->selector[selCtr]] [s->szptr[i]] );
- }
- gs = ge+1;
- selCtr++;
- }
- AssertH( selCtr == nSelectors, 3007 );
- if (s->verbosity >= 3)
- VPrintf1( "codes %dn", s->numZ-nBytes );
- }
- /*---------------------------------------------------*/
- void compressBlock ( EState* s, Bool is_last_block )
- {
- if (s->nblock > 0) {
- BZ_FINALISE_CRC ( s->blockCRC );
- s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
- s->combinedCRC ^= s->blockCRC;
- if (s->blockNo > 1) s->numZ = 0;
- if (s->verbosity >= 2)
- VPrintf4( " block %d: crc = 0x%8x, "
- "combined CRC = 0x%8x, size = %dn",
- s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
- blockSort ( s );
- }
- /*-- If this is the first block, create the stream header. --*/
- if (s->blockNo == 1) {
- bsInitWrite ( s );
- bsPutUChar ( s, 'B' );
- bsPutUChar ( s, 'Z' );
- bsPutUChar ( s, 'h' );
- bsPutUChar ( s, '0' + s->blockSize100k );
- }
- if (s->nblock > 0) {
- bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
- bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
- bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
- /*-- Now the block's CRC, so it is in a known place. --*/
- bsPutUInt32 ( s, s->blockCRC );
- /*-- Now a single bit indicating randomisation. --*/
- if (s->blockRandomised) {
- bsW(s,1,1); s->nBlocksRandomised++;
- } else
- bsW(s,1,0);
- bsW ( s, 24, s->origPtr );
- generateMTFValues ( s );
- sendMTFValues ( s );
- }
- /*-- If this is the last block, add the stream trailer. --*/
- if (is_last_block) {
- if (s->verbosity >= 2 && s->nBlocksRandomised > 0)
- VPrintf2 ( " %d block%s needed randomisationn",
- s->nBlocksRandomised,
- s->nBlocksRandomised == 1 ? "" : "s" );
- bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
- bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
- bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
- bsPutUInt32 ( s, s->combinedCRC );
- if (s->verbosity >= 2)
- VPrintf1( " final combined CRC = 0x%xn ", s->combinedCRC );
- bsFinishWrite ( s );
- }
- }
- /*-------------------------------------------------------------*/
- /*--- end compress.c ---*/
- /*-------------------------------------------------------------*/