LCOV - code coverage report
Current view: top level - lib - decompress_bunzip2.c (source / functions) Hit Total Coverage
Test: Real Lines: 0 249 0.0 %
Date: 2020-10-17 15:46:43 Functions: 0 6 0.0 %
Legend: Neither, QEMU, Real, Both Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : /*      Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
       2                 :            : 
       3                 :            :         Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
       4                 :            :         which also acknowledges contributions by Mike Burrows, David Wheeler,
       5                 :            :         Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
       6                 :            :         Robert Sedgewick, and Jon L. Bentley.
       7                 :            : 
       8                 :            :         This code is licensed under the LGPLv2:
       9                 :            :                 LGPL (http://www.gnu.org/copyleft/lgpl.html
      10                 :            : */
      11                 :            : 
      12                 :            : /*
      13                 :            :         Size and speed optimizations by Manuel Novoa III  (mjn3@codepoet.org).
      14                 :            : 
      15                 :            :         More efficient reading of Huffman codes, a streamlined read_bunzip()
      16                 :            :         function, and various other tweaks.  In (limited) tests, approximately
      17                 :            :         20% faster than bzcat on x86 and about 10% faster on arm.
      18                 :            : 
      19                 :            :         Note that about 2/3 of the time is spent in read_unzip() reversing
      20                 :            :         the Burrows-Wheeler transformation.  Much of that time is delay
      21                 :            :         resulting from cache misses.
      22                 :            : 
      23                 :            :         I would ask that anyone benefiting from this work, especially those
      24                 :            :         using it in commercial products, consider making a donation to my local
      25                 :            :         non-profit hospice organization in the name of the woman I loved, who
      26                 :            :         passed away Feb. 12, 2003.
      27                 :            : 
      28                 :            :                 In memory of Toni W. Hagan
      29                 :            : 
      30                 :            :                 Hospice of Acadiana, Inc.
      31                 :            :                 2600 Johnston St., Suite 200
      32                 :            :                 Lafayette, LA 70503-3240
      33                 :            : 
      34                 :            :                 Phone (337) 232-1234 or 1-800-738-2226
      35                 :            :                 Fax   (337) 232-1297
      36                 :            : 
      37                 :            :                 http://www.hospiceacadiana.com/
      38                 :            : 
      39                 :            :         Manuel
      40                 :            :  */
      41                 :            : 
      42                 :            : /*
      43                 :            :         Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu)
      44                 :            : */
      45                 :            : 
      46                 :            : 
      47                 :            : #ifdef STATIC
      48                 :            : #define PREBOOT
      49                 :            : #else
      50                 :            : #include <linux/decompress/bunzip2.h>
      51                 :            : #endif /* STATIC */
      52                 :            : 
      53                 :            : #include <linux/decompress/mm.h>
      54                 :            : #include <linux/crc32poly.h>
      55                 :            : 
      56                 :            : #ifndef INT_MAX
      57                 :            : #define INT_MAX 0x7fffffff
      58                 :            : #endif
      59                 :            : 
      60                 :            : /* Constants for Huffman coding */
      61                 :            : #define MAX_GROUPS              6
      62                 :            : #define GROUP_SIZE              50      /* 64 would have been more efficient */
      63                 :            : #define MAX_HUFCODE_BITS        20      /* Longest Huffman code allowed */
      64                 :            : #define MAX_SYMBOLS             258     /* 256 literals + RUNA + RUNB */
      65                 :            : #define SYMBOL_RUNA             0
      66                 :            : #define SYMBOL_RUNB             1
      67                 :            : 
      68                 :            : /* Status return values */
      69                 :            : #define RETVAL_OK                       0
      70                 :            : #define RETVAL_LAST_BLOCK               (-1)
      71                 :            : #define RETVAL_NOT_BZIP_DATA            (-2)
      72                 :            : #define RETVAL_UNEXPECTED_INPUT_EOF     (-3)
      73                 :            : #define RETVAL_UNEXPECTED_OUTPUT_EOF    (-4)
      74                 :            : #define RETVAL_DATA_ERROR               (-5)
      75                 :            : #define RETVAL_OUT_OF_MEMORY            (-6)
      76                 :            : #define RETVAL_OBSOLETE_INPUT           (-7)
      77                 :            : 
      78                 :            : /* Other housekeeping constants */
      79                 :            : #define BZIP2_IOBUF_SIZE                4096
      80                 :            : 
      81                 :            : /* This is what we know about each Huffman coding group */
      82                 :            : struct group_data {
      83                 :            :         /* We have an extra slot at the end of limit[] for a sentinal value. */
      84                 :            :         int limit[MAX_HUFCODE_BITS+1];
      85                 :            :         int base[MAX_HUFCODE_BITS];
      86                 :            :         int permute[MAX_SYMBOLS];
      87                 :            :         int minLen, maxLen;
      88                 :            : };
      89                 :            : 
      90                 :            : /* Structure holding all the housekeeping data, including IO buffers and
      91                 :            :    memory that persists between calls to bunzip */
      92                 :            : struct bunzip_data {
      93                 :            :         /* State for interrupting output loop */
      94                 :            :         int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
      95                 :            :         /* I/O tracking data (file handles, buffers, positions, etc.) */
      96                 :            :         long (*fill)(void*, unsigned long);
      97                 :            :         long inbufCount, inbufPos /*, outbufPos*/;
      98                 :            :         unsigned char *inbuf /*,*outbuf*/;
      99                 :            :         unsigned int inbufBitCount, inbufBits;
     100                 :            :         /* The CRC values stored in the block header and calculated from the
     101                 :            :         data */
     102                 :            :         unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;
     103                 :            :         /* Intermediate buffer and its size (in bytes) */
     104                 :            :         unsigned int *dbuf, dbufSize;
     105                 :            :         /* These things are a bit too big to go on the stack */
     106                 :            :         unsigned char selectors[32768];         /* nSelectors = 15 bits */
     107                 :            :         struct group_data groups[MAX_GROUPS];   /* Huffman coding tables */
     108                 :            :         int io_error;                   /* non-zero if we have IO error */
     109                 :            :         int byteCount[256];
     110                 :            :         unsigned char symToByte[256], mtfSymbol[256];
     111                 :            : };
     112                 :            : 
     113                 :            : 
     114                 :            : /* Return the next nnn bits of input.  All reads from the compressed input
     115                 :            :    are done through this function.  All reads are big endian */
     116                 :          0 : static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)
     117                 :            : {
     118                 :            :         unsigned int bits = 0;
     119                 :            : 
     120                 :            :         /* If we need to get more data from the byte buffer, do so.
     121                 :            :            (Loop getting one byte at a time to enforce endianness and avoid
     122                 :            :            unaligned access.) */
     123                 :          0 :         while (bd->inbufBitCount < bits_wanted) {
     124                 :            :                 /* If we need to read more data from file into byte buffer, do
     125                 :            :                    so */
     126                 :          0 :                 if (bd->inbufPos == bd->inbufCount) {
     127                 :          0 :                         if (bd->io_error)
     128                 :            :                                 return 0;
     129                 :          0 :                         bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
     130                 :          0 :                         if (bd->inbufCount <= 0) {
     131                 :          0 :                                 bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;
     132                 :          0 :                                 return 0;
     133                 :            :                         }
     134                 :          0 :                         bd->inbufPos = 0;
     135                 :            :                 }
     136                 :            :                 /* Avoid 32-bit overflow (dump bit buffer to top of output) */
     137                 :          0 :                 if (bd->inbufBitCount >= 24) {
     138                 :          0 :                         bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);
     139                 :          0 :                         bits_wanted -= bd->inbufBitCount;
     140                 :          0 :                         bits <<= bits_wanted;
     141                 :          0 :                         bd->inbufBitCount = 0;
     142                 :            :                 }
     143                 :            :                 /* Grab next 8 bits of input from buffer. */
     144                 :          0 :                 bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
     145                 :          0 :                 bd->inbufBitCount += 8;
     146                 :            :         }
     147                 :            :         /* Calculate result */
     148                 :          0 :         bd->inbufBitCount -= bits_wanted;
     149                 :          0 :         bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);
     150                 :            : 
     151                 :          0 :         return bits;
     152                 :            : }
     153                 :            : 
     154                 :            : /* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
     155                 :            : 
     156                 :          0 : static int INIT get_next_block(struct bunzip_data *bd)
     157                 :            : {
     158                 :            :         struct group_data *hufGroup = NULL;
     159                 :            :         int *base = NULL;
     160                 :            :         int *limit = NULL;
     161                 :            :         int dbufCount, nextSym, dbufSize, groupCount, selector,
     162                 :            :                 i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
     163                 :            :         unsigned char uc, *symToByte, *mtfSymbol, *selectors;
     164                 :            :         unsigned int *dbuf, origPtr;
     165                 :            : 
     166                 :          0 :         dbuf = bd->dbuf;
     167                 :          0 :         dbufSize = bd->dbufSize;
     168                 :          0 :         selectors = bd->selectors;
     169                 :          0 :         byteCount = bd->byteCount;
     170                 :          0 :         symToByte = bd->symToByte;
     171                 :          0 :         mtfSymbol = bd->mtfSymbol;
     172                 :            : 
     173                 :            :         /* Read in header signature and CRC, then validate signature.
     174                 :            :            (last block signature means CRC is for whole file, return now) */
     175                 :          0 :         i = get_bits(bd, 24);
     176                 :          0 :         j = get_bits(bd, 24);
     177                 :          0 :         bd->headerCRC = get_bits(bd, 32);
     178                 :          0 :         if ((i == 0x177245) && (j == 0x385090))
     179                 :            :                 return RETVAL_LAST_BLOCK;
     180                 :          0 :         if ((i != 0x314159) || (j != 0x265359))
     181                 :            :                 return RETVAL_NOT_BZIP_DATA;
     182                 :            :         /* We can add support for blockRandomised if anybody complains.
     183                 :            :            There was some code for this in busybox 1.0.0-pre3, but nobody ever
     184                 :            :            noticed that it didn't actually work. */
     185                 :          0 :         if (get_bits(bd, 1))
     186                 :            :                 return RETVAL_OBSOLETE_INPUT;
     187                 :          0 :         origPtr = get_bits(bd, 24);
     188                 :          0 :         if (origPtr >= dbufSize)
     189                 :            :                 return RETVAL_DATA_ERROR;
     190                 :            :         /* mapping table: if some byte values are never used (encoding things
     191                 :            :            like ascii text), the compression code removes the gaps to have fewer
     192                 :            :            symbols to deal with, and writes a sparse bitfield indicating which
     193                 :            :            values were present.  We make a translation table to convert the
     194                 :            :            symbols back to the corresponding bytes. */
     195                 :          0 :         t = get_bits(bd, 16);
     196                 :            :         symTotal = 0;
     197                 :          0 :         for (i = 0; i < 16; i++) {
     198                 :          0 :                 if (t&(1 << (15-i))) {
     199                 :          0 :                         k = get_bits(bd, 16);
     200                 :          0 :                         for (j = 0; j < 16; j++)
     201                 :          0 :                                 if (k&(1 << (15-j)))
     202                 :          0 :                                         symToByte[symTotal++] = (16*i)+j;
     203                 :            :                 }
     204                 :            :         }
     205                 :            :         /* How many different Huffman coding groups does this block use? */
     206                 :          0 :         groupCount = get_bits(bd, 3);
     207                 :          0 :         if (groupCount < 2 || groupCount > MAX_GROUPS)
     208                 :            :                 return RETVAL_DATA_ERROR;
     209                 :            :         /* nSelectors: Every GROUP_SIZE many symbols we select a new
     210                 :            :            Huffman coding group.  Read in the group selector list,
     211                 :            :            which is stored as MTF encoded bit runs.  (MTF = Move To
     212                 :            :            Front, as each value is used it's moved to the start of the
     213                 :            :            list.) */
     214                 :          0 :         nSelectors = get_bits(bd, 15);
     215                 :          0 :         if (!nSelectors)
     216                 :            :                 return RETVAL_DATA_ERROR;
     217                 :          0 :         for (i = 0; i < groupCount; i++)
     218                 :          0 :                 mtfSymbol[i] = i;
     219                 :          0 :         for (i = 0; i < nSelectors; i++) {
     220                 :            :                 /* Get next value */
     221                 :          0 :                 for (j = 0; get_bits(bd, 1); j++)
     222                 :          0 :                         if (j >= groupCount)
     223                 :            :                                 return RETVAL_DATA_ERROR;
     224                 :            :                 /* Decode MTF to get the next selector */
     225                 :          0 :                 uc = mtfSymbol[j];
     226                 :          0 :                 for (; j; j--)
     227                 :          0 :                         mtfSymbol[j] = mtfSymbol[j-1];
     228                 :          0 :                 mtfSymbol[0] = selectors[i] = uc;
     229                 :            :         }
     230                 :            :         /* Read the Huffman coding tables for each group, which code
     231                 :            :            for symTotal literal symbols, plus two run symbols (RUNA,
     232                 :            :            RUNB) */
     233                 :          0 :         symCount = symTotal+2;
     234                 :          0 :         for (j = 0; j < groupCount; j++) {
     235                 :            :                 unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];
     236                 :            :                 int     minLen, maxLen, pp;
     237                 :            :                 /* Read Huffman code lengths for each symbol.  They're
     238                 :            :                    stored in a way similar to mtf; record a starting
     239                 :            :                    value for the first symbol, and an offset from the
     240                 :            :                    previous value for everys symbol after that.
     241                 :            :                    (Subtracting 1 before the loop and then adding it
     242                 :            :                    back at the end is an optimization that makes the
     243                 :            :                    test inside the loop simpler: symbol length 0
     244                 :            :                    becomes negative, so an unsigned inequality catches
     245                 :            :                    it.) */
     246                 :          0 :                 t = get_bits(bd, 5)-1;
     247                 :          0 :                 for (i = 0; i < symCount; i++) {
     248                 :            :                         for (;;) {
     249                 :          0 :                                 if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
     250                 :          0 :                                         return RETVAL_DATA_ERROR;
     251                 :            : 
     252                 :            :                                 /* If first bit is 0, stop.  Else
     253                 :            :                                    second bit indicates whether to
     254                 :            :                                    increment or decrement the value.
     255                 :            :                                    Optimization: grab 2 bits and unget
     256                 :            :                                    the second if the first was 0. */
     257                 :            : 
     258                 :          0 :                                 k = get_bits(bd, 2);
     259                 :          0 :                                 if (k < 2) {
     260                 :          0 :                                         bd->inbufBitCount++;
     261                 :            :                                         break;
     262                 :            :                                 }
     263                 :            :                                 /* Add one if second bit 1, else
     264                 :            :                                  * subtract 1.  Avoids if/else */
     265                 :          0 :                                 t += (((k+1)&2)-1);
     266                 :          0 :                         }
     267                 :            :                         /* Correct for the initial -1, to get the
     268                 :            :                          * final symbol length */
     269                 :          0 :                         length[i] = t+1;
     270                 :            :                 }
     271                 :            :                 /* Find largest and smallest lengths in this group */
     272                 :          0 :                 minLen = maxLen = length[0];
     273                 :            : 
     274                 :          0 :                 for (i = 1; i < symCount; i++) {
     275                 :          0 :                         if (length[i] > maxLen)
     276                 :            :                                 maxLen = length[i];
     277                 :          0 :                         else if (length[i] < minLen)
     278                 :            :                                 minLen = length[i];
     279                 :            :                 }
     280                 :            : 
     281                 :            :                 /* Calculate permute[], base[], and limit[] tables from
     282                 :            :                  * length[].
     283                 :            :                  *
     284                 :            :                  * permute[] is the lookup table for converting
     285                 :            :                  * Huffman coded symbols into decoded symbols.  base[]
     286                 :            :                  * is the amount to subtract from the value of a
     287                 :            :                  * Huffman symbol of a given length when using
     288                 :            :                  * permute[].
     289                 :            :                  *
     290                 :            :                  * limit[] indicates the largest numerical value a
     291                 :            :                  * symbol with a given number of bits can have.  This
     292                 :            :                  * is how the Huffman codes can vary in length: each
     293                 :            :                  * code with a value > limit[length] needs another
     294                 :            :                  * bit.
     295                 :            :                  */
     296                 :          0 :                 hufGroup = bd->groups+j;
     297                 :          0 :                 hufGroup->minLen = minLen;
     298                 :          0 :                 hufGroup->maxLen = maxLen;
     299                 :            :                 /* Note that minLen can't be smaller than 1, so we
     300                 :            :                    adjust the base and limit array pointers so we're
     301                 :            :                    not always wasting the first entry.  We do this
     302                 :            :                    again when using them (during symbol decoding).*/
     303                 :          0 :                 base = hufGroup->base-1;
     304                 :          0 :                 limit = hufGroup->limit-1;
     305                 :            :                 /* Calculate permute[].  Concurrently, initialize
     306                 :            :                  * temp[] and limit[]. */
     307                 :            :                 pp = 0;
     308                 :          0 :                 for (i = minLen; i <= maxLen; i++) {
     309                 :          0 :                         temp[i] = limit[i] = 0;
     310                 :          0 :                         for (t = 0; t < symCount; t++)
     311                 :          0 :                                 if (length[t] == i)
     312                 :          0 :                                         hufGroup->permute[pp++] = t;
     313                 :            :                 }
     314                 :            :                 /* Count symbols coded for at each bit length */
     315                 :          0 :                 for (i = 0; i < symCount; i++)
     316                 :          0 :                         temp[length[i]]++;
     317                 :            :                 /* Calculate limit[] (the largest symbol-coding value
     318                 :            :                  *at each bit length, which is (previous limit <<
     319                 :            :                  *1)+symbols at this level), and base[] (number of
     320                 :            :                  *symbols to ignore at each bit length, which is limit
     321                 :            :                  *minus the cumulative count of symbols coded for
     322                 :            :                  *already). */
     323                 :            :                 pp = t = 0;
     324                 :          0 :                 for (i = minLen; i < maxLen; i++) {
     325                 :          0 :                         pp += temp[i];
     326                 :            :                         /* We read the largest possible symbol size
     327                 :            :                            and then unget bits after determining how
     328                 :            :                            many we need, and those extra bits could be
     329                 :            :                            set to anything.  (They're noise from
     330                 :            :                            future symbols.)  At each level we're
     331                 :            :                            really only interested in the first few
     332                 :            :                            bits, so here we set all the trailing
     333                 :            :                            to-be-ignored bits to 1 so they don't
     334                 :            :                            affect the value > limit[length]
     335                 :            :                            comparison. */
     336                 :          0 :                         limit[i] = (pp << (maxLen - i)) - 1;
     337                 :          0 :                         pp <<= 1;
     338                 :          0 :                         base[i+1] = pp-(t += temp[i]);
     339                 :            :                 }
     340                 :          0 :                 limit[maxLen+1] = INT_MAX; /* Sentinal value for
     341                 :            :                                             * reading next sym. */
     342                 :          0 :                 limit[maxLen] = pp+temp[maxLen]-1;
     343                 :          0 :                 base[minLen] = 0;
     344                 :            :         }
     345                 :            :         /* We've finished reading and digesting the block header.  Now
     346                 :            :            read this block's Huffman coded symbols from the file and
     347                 :            :            undo the Huffman coding and run length encoding, saving the
     348                 :            :            result into dbuf[dbufCount++] = uc */
     349                 :            : 
     350                 :            :         /* Initialize symbol occurrence counters and symbol Move To
     351                 :            :          * Front table */
     352                 :          0 :         for (i = 0; i < 256; i++) {
     353                 :          0 :                 byteCount[i] = 0;
     354                 :          0 :                 mtfSymbol[i] = (unsigned char)i;
     355                 :            :         }
     356                 :            :         /* Loop through compressed symbols. */
     357                 :            :         runPos = dbufCount = symCount = selector = 0;
     358                 :            :         for (;;) {
     359                 :            :                 /* Determine which Huffman coding group to use. */
     360                 :          0 :                 if (!(symCount--)) {
     361                 :            :                         symCount = GROUP_SIZE-1;
     362                 :          0 :                         if (selector >= nSelectors)
     363                 :            :                                 return RETVAL_DATA_ERROR;
     364                 :          0 :                         hufGroup = bd->groups+selectors[selector++];
     365                 :          0 :                         base = hufGroup->base-1;
     366                 :          0 :                         limit = hufGroup->limit-1;
     367                 :            :                 }
     368                 :            :                 /* Read next Huffman-coded symbol. */
     369                 :            :                 /* Note: It is far cheaper to read maxLen bits and
     370                 :            :                    back up than it is to read minLen bits and then an
     371                 :            :                    additional bit at a time, testing as we go.
     372                 :            :                    Because there is a trailing last block (with file
     373                 :            :                    CRC), there is no danger of the overread causing an
     374                 :            :                    unexpected EOF for a valid compressed file.  As a
     375                 :            :                    further optimization, we do the read inline
     376                 :            :                    (falling back to a call to get_bits if the buffer
     377                 :            :                    runs dry).  The following (up to got_huff_bits:) is
     378                 :            :                    equivalent to j = get_bits(bd, hufGroup->maxLen);
     379                 :            :                  */
     380                 :          0 :                 while (bd->inbufBitCount < hufGroup->maxLen) {
     381                 :          0 :                         if (bd->inbufPos == bd->inbufCount) {
     382                 :          0 :                                 j = get_bits(bd, hufGroup->maxLen);
     383                 :          0 :                                 goto got_huff_bits;
     384                 :            :                         }
     385                 :          0 :                         bd->inbufBits =
     386                 :          0 :                                 (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
     387                 :          0 :                         bd->inbufBitCount += 8;
     388                 :            :                 };
     389                 :          0 :                 bd->inbufBitCount -= hufGroup->maxLen;
     390                 :          0 :                 j = (bd->inbufBits >> bd->inbufBitCount)&
     391                 :          0 :                         ((1 << hufGroup->maxLen)-1);
     392                 :            : got_huff_bits:
     393                 :            :                 /* Figure how how many bits are in next symbol and
     394                 :            :                  * unget extras */
     395                 :          0 :                 i = hufGroup->minLen;
     396                 :          0 :                 while (j > limit[i])
     397                 :          0 :                         ++i;
     398                 :          0 :                 bd->inbufBitCount += (hufGroup->maxLen - i);
     399                 :            :                 /* Huffman decode value to get nextSym (with bounds checking) */
     400                 :          0 :                 if ((i > hufGroup->maxLen)
     401                 :          0 :                         || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i]))
     402                 :            :                                 >= MAX_SYMBOLS))
     403                 :            :                         return RETVAL_DATA_ERROR;
     404                 :          0 :                 nextSym = hufGroup->permute[j];
     405                 :            :                 /* We have now decoded the symbol, which indicates
     406                 :            :                    either a new literal byte, or a repeated run of the
     407                 :            :                    most recent literal byte.  First, check if nextSym
     408                 :            :                    indicates a repeated run, and if so loop collecting
     409                 :            :                    how many times to repeat the last literal. */
     410                 :          0 :                 if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
     411                 :            :                         /* If this is the start of a new run, zero out
     412                 :            :                          * counter */
     413                 :          0 :                         if (!runPos) {
     414                 :            :                                 runPos = 1;
     415                 :            :                                 t = 0;
     416                 :            :                         }
     417                 :            :                         /* Neat trick that saves 1 symbol: instead of
     418                 :            :                            or-ing 0 or 1 at each bit position, add 1
     419                 :            :                            or 2 instead.  For example, 1011 is 1 << 0
     420                 :            :                            + 1 << 1 + 2 << 2.  1010 is 2 << 0 + 2 << 1
     421                 :            :                            + 1 << 2.  You can make any bit pattern
     422                 :            :                            that way using 1 less symbol than the basic
     423                 :            :                            or 0/1 method (except all bits 0, which
     424                 :            :                            would use no symbols, but a run of length 0
     425                 :            :                            doesn't mean anything in this context).
     426                 :            :                            Thus space is saved. */
     427                 :          0 :                         t += (runPos << nextSym);
     428                 :            :                         /* +runPos if RUNA; +2*runPos if RUNB */
     429                 :            : 
     430                 :          0 :                         runPos <<= 1;
     431                 :          0 :                         continue;
     432                 :            :                 }
     433                 :            :                 /* When we hit the first non-run symbol after a run,
     434                 :            :                    we now know how many times to repeat the last
     435                 :            :                    literal, so append that many copies to our buffer
     436                 :            :                    of decoded symbols (dbuf) now.  (The last literal
     437                 :            :                    used is the one at the head of the mtfSymbol
     438                 :            :                    array.) */
     439                 :          0 :                 if (runPos) {
     440                 :            :                         runPos = 0;
     441                 :          0 :                         if (dbufCount+t >= dbufSize)
     442                 :            :                                 return RETVAL_DATA_ERROR;
     443                 :            : 
     444                 :          0 :                         uc = symToByte[mtfSymbol[0]];
     445                 :          0 :                         byteCount[uc] += t;
     446                 :          0 :                         while (t--)
     447                 :          0 :                                 dbuf[dbufCount++] = uc;
     448                 :            :                 }
     449                 :            :                 /* Is this the terminating symbol? */
     450                 :          0 :                 if (nextSym > symTotal)
     451                 :            :                         break;
     452                 :            :                 /* At this point, nextSym indicates a new literal
     453                 :            :                    character.  Subtract one to get the position in the
     454                 :            :                    MTF array at which this literal is currently to be
     455                 :            :                    found.  (Note that the result can't be -1 or 0,
     456                 :            :                    because 0 and 1 are RUNA and RUNB.  But another
     457                 :            :                    instance of the first symbol in the mtf array,
     458                 :            :                    position 0, would have been handled as part of a
     459                 :            :                    run above.  Therefore 1 unused mtf position minus 2
     460                 :            :                    non-literal nextSym values equals -1.) */
     461                 :          0 :                 if (dbufCount >= dbufSize)
     462                 :            :                         return RETVAL_DATA_ERROR;
     463                 :          0 :                 i = nextSym - 1;
     464                 :          0 :                 uc = mtfSymbol[i];
     465                 :            :                 /* Adjust the MTF array.  Since we typically expect to
     466                 :            :                  *move only a small number of symbols, and are bound
     467                 :            :                  *by 256 in any case, using memmove here would
     468                 :            :                  *typically be bigger and slower due to function call
     469                 :            :                  *overhead and other assorted setup costs. */
     470                 :            :                 do {
     471                 :          0 :                         mtfSymbol[i] = mtfSymbol[i-1];
     472                 :          0 :                 } while (--i);
     473                 :          0 :                 mtfSymbol[0] = uc;
     474                 :          0 :                 uc = symToByte[uc];
     475                 :            :                 /* We have our literal byte.  Save it into dbuf. */
     476                 :          0 :                 byteCount[uc]++;
     477                 :          0 :                 dbuf[dbufCount++] = (unsigned int)uc;
     478                 :            :         }
     479                 :            :         /* At this point, we've read all the Huffman-coded symbols
     480                 :            :            (and repeated runs) for this block from the input stream,
     481                 :            :            and decoded them into the intermediate buffer.  There are
     482                 :            :            dbufCount many decoded bytes in dbuf[].  Now undo the
     483                 :            :            Burrows-Wheeler transform on dbuf.  See
     484                 :            :            http://dogma.net/markn/articles/bwt/bwt.htm
     485                 :            :          */
     486                 :            :         /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
     487                 :            :         j = 0;
     488                 :          0 :         for (i = 0; i < 256; i++) {
     489                 :          0 :                 k = j+byteCount[i];
     490                 :          0 :                 byteCount[i] = j;
     491                 :            :                 j = k;
     492                 :            :         }
     493                 :            :         /* Figure out what order dbuf would be in if we sorted it. */
     494                 :          0 :         for (i = 0; i < dbufCount; i++) {
     495                 :          0 :                 uc = (unsigned char)(dbuf[i] & 0xff);
     496                 :          0 :                 dbuf[byteCount[uc]] |= (i << 8);
     497                 :          0 :                 byteCount[uc]++;
     498                 :            :         }
     499                 :            :         /* Decode first byte by hand to initialize "previous" byte.
     500                 :            :            Note that it doesn't get output, and if the first three
     501                 :            :            characters are identical it doesn't qualify as a run (hence
     502                 :            :            writeRunCountdown = 5). */
     503                 :          0 :         if (dbufCount) {
     504                 :          0 :                 if (origPtr >= dbufCount)
     505                 :            :                         return RETVAL_DATA_ERROR;
     506                 :          0 :                 bd->writePos = dbuf[origPtr];
     507                 :          0 :                 bd->writeCurrent = (unsigned char)(bd->writePos&0xff);
     508                 :          0 :                 bd->writePos >>= 8;
     509                 :          0 :                 bd->writeRunCountdown = 5;
     510                 :            :         }
     511                 :          0 :         bd->writeCount = dbufCount;
     512                 :            : 
     513                 :          0 :         return RETVAL_OK;
     514                 :            : }
     515                 :            : 
     516                 :            : /* Undo burrows-wheeler transform on intermediate buffer to produce output.
     517                 :            :    If start_bunzip was initialized with out_fd =-1, then up to len bytes of
     518                 :            :    data are written to outbuf.  Return value is number of bytes written or
     519                 :            :    error (all errors are negative numbers).  If out_fd!=-1, outbuf and len
     520                 :            :    are ignored, data is written to out_fd and return is RETVAL_OK or error.
     521                 :            : */
     522                 :            : 
     523                 :          0 : static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len)
     524                 :            : {
     525                 :            :         const unsigned int *dbuf;
     526                 :            :         int pos, xcurrent, previous, gotcount;
     527                 :            : 
     528                 :            :         /* If last read was short due to end of file, return last block now */
     529                 :          0 :         if (bd->writeCount < 0)
     530                 :            :                 return bd->writeCount;
     531                 :            : 
     532                 :            :         gotcount = 0;
     533                 :          0 :         dbuf = bd->dbuf;
     534                 :          0 :         pos = bd->writePos;
     535                 :          0 :         xcurrent = bd->writeCurrent;
     536                 :            : 
     537                 :            :         /* We will always have pending decoded data to write into the output
     538                 :            :            buffer unless this is the very first call (in which case we haven't
     539                 :            :            Huffman-decoded a block into the intermediate buffer yet). */
     540                 :            : 
     541                 :          0 :         if (bd->writeCopies) {
     542                 :            :                 /* Inside the loop, writeCopies means extra copies (beyond 1) */
     543                 :          0 :                 --bd->writeCopies;
     544                 :            :                 /* Loop outputting bytes */
     545                 :            :                 for (;;) {
     546                 :            :                         /* If the output buffer is full, snapshot
     547                 :            :                          * state and return */
     548                 :          0 :                         if (gotcount >= len) {
     549                 :          0 :                                 bd->writePos = pos;
     550                 :          0 :                                 bd->writeCurrent = xcurrent;
     551                 :          0 :                                 bd->writeCopies++;
     552                 :          0 :                                 return len;
     553                 :            :                         }
     554                 :            :                         /* Write next byte into output buffer, updating CRC */
     555                 :          0 :                         outbuf[gotcount++] = xcurrent;
     556                 :          0 :                         bd->writeCRC = (((bd->writeCRC) << 8)
     557                 :          0 :                                 ^bd->crc32Table[((bd->writeCRC) >> 24)
     558                 :          0 :                                 ^xcurrent]);
     559                 :            :                         /* Loop now if we're outputting multiple
     560                 :            :                          * copies of this byte */
     561                 :          0 :                         if (bd->writeCopies) {
     562                 :          0 :                                 --bd->writeCopies;
     563                 :          0 :                                 continue;
     564                 :            :                         }
     565                 :            : decode_next_byte:
     566                 :          0 :                         if (!bd->writeCount--)
     567                 :            :                                 break;
     568                 :            :                         /* Follow sequence vector to undo
     569                 :            :                          * Burrows-Wheeler transform */
     570                 :            :                         previous = xcurrent;
     571                 :          0 :                         pos = dbuf[pos];
     572                 :          0 :                         xcurrent = pos&0xff;
     573                 :          0 :                         pos >>= 8;
     574                 :            :                         /* After 3 consecutive copies of the same
     575                 :            :                            byte, the 4th is a repeat count.  We count
     576                 :            :                            down from 4 instead *of counting up because
     577                 :            :                            testing for non-zero is faster */
     578                 :          0 :                         if (--bd->writeRunCountdown) {
     579                 :          0 :                                 if (xcurrent != previous)
     580                 :          0 :                                         bd->writeRunCountdown = 4;
     581                 :            :                         } else {
     582                 :            :                                 /* We have a repeated run, this byte
     583                 :            :                                  * indicates the count */
     584                 :          0 :                                 bd->writeCopies = xcurrent;
     585                 :            :                                 xcurrent = previous;
     586                 :          0 :                                 bd->writeRunCountdown = 5;
     587                 :            :                                 /* Sometimes there are just 3 bytes
     588                 :            :                                  * (run length 0) */
     589                 :          0 :                                 if (!bd->writeCopies)
     590                 :            :                                         goto decode_next_byte;
     591                 :            :                                 /* Subtract the 1 copy we'd output
     592                 :            :                                  * anyway to get extras */
     593                 :          0 :                                 --bd->writeCopies;
     594                 :            :                         }
     595                 :            :                 }
     596                 :            :                 /* Decompression of this block completed successfully */
     597                 :          0 :                 bd->writeCRC = ~bd->writeCRC;
     598                 :          0 :                 bd->totalCRC = ((bd->totalCRC << 1) |
     599                 :          0 :                                 (bd->totalCRC >> 31)) ^ bd->writeCRC;
     600                 :            :                 /* If this block had a CRC error, force file level CRC error. */
     601                 :          0 :                 if (bd->writeCRC != bd->headerCRC) {
     602                 :          0 :                         bd->totalCRC = bd->headerCRC+1;
     603                 :          0 :                         return RETVAL_LAST_BLOCK;
     604                 :            :                 }
     605                 :            :         }
     606                 :            : 
     607                 :            :         /* Refill the intermediate buffer by Huffman-decoding next
     608                 :            :          * block of input */
     609                 :            :         /* (previous is just a convenient unused temp variable here) */
     610                 :          0 :         previous = get_next_block(bd);
     611                 :          0 :         if (previous) {
     612                 :          0 :                 bd->writeCount = previous;
     613                 :          0 :                 return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
     614                 :            :         }
     615                 :          0 :         bd->writeCRC = 0xffffffffUL;
     616                 :          0 :         pos = bd->writePos;
     617                 :          0 :         xcurrent = bd->writeCurrent;
     618                 :          0 :         goto decode_next_byte;
     619                 :            : }
     620                 :            : 
     621                 :          0 : static long INIT nofill(void *buf, unsigned long len)
     622                 :            : {
     623                 :          0 :         return -1;
     624                 :            : }
     625                 :            : 
     626                 :            : /* Allocate the structure, read file header.  If in_fd ==-1, inbuf must contain
     627                 :            :    a complete bunzip file (len bytes long).  If in_fd!=-1, inbuf and len are
     628                 :            :    ignored, and data is read from file handle into temporary buffer. */
     629                 :          0 : static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len,
     630                 :            :                              long (*fill)(void*, unsigned long))
     631                 :            : {
     632                 :            :         struct bunzip_data *bd;
     633                 :            :         unsigned int i, j, c;
     634                 :            :         const unsigned int BZh0 =
     635                 :            :                 (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
     636                 :            :                 +(((unsigned int)'h') << 8)+(unsigned int)'0';
     637                 :            : 
     638                 :            :         /* Figure out how much data to allocate */
     639                 :            :         i = sizeof(struct bunzip_data);
     640                 :            : 
     641                 :            :         /* Allocate bunzip_data.  Most fields initialize to zero. */
     642                 :          0 :         bd = *bdp = malloc(i);
     643                 :          0 :         if (!bd)
     644                 :            :                 return RETVAL_OUT_OF_MEMORY;
     645                 :          0 :         memset(bd, 0, sizeof(struct bunzip_data));
     646                 :            :         /* Setup input buffer */
     647                 :          0 :         bd->inbuf = inbuf;
     648                 :          0 :         bd->inbufCount = len;
     649                 :          0 :         if (fill != NULL)
     650                 :          0 :                 bd->fill = fill;
     651                 :            :         else
     652                 :          0 :                 bd->fill = nofill;
     653                 :            : 
     654                 :            :         /* Init the CRC32 table (big endian) */
     655                 :          0 :         for (i = 0; i < 256; i++) {
     656                 :          0 :                 c = i << 24;
     657                 :          0 :                 for (j = 8; j; j--)
     658                 :          0 :                         c = c&0x80000000 ? (c << 1)^(CRC32_POLY_BE) : (c << 1);
     659                 :          0 :                 bd->crc32Table[i] = c;
     660                 :            :         }
     661                 :            : 
     662                 :            :         /* Ensure that file starts with "BZh['1'-'9']." */
     663                 :          0 :         i = get_bits(bd, 32);
     664                 :          0 :         if (((unsigned int)(i-BZh0-1)) >= 9)
     665                 :            :                 return RETVAL_NOT_BZIP_DATA;
     666                 :            : 
     667                 :            :         /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
     668                 :            :            uncompressed data.  Allocate intermediate buffer for block. */
     669                 :          0 :         bd->dbufSize = 100000*(i-BZh0);
     670                 :            : 
     671                 :          0 :         bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
     672                 :          0 :         if (!bd->dbuf)
     673                 :            :                 return RETVAL_OUT_OF_MEMORY;
     674                 :          0 :         return RETVAL_OK;
     675                 :            : }
     676                 :            : 
     677                 :            : /* Example usage: decompress src_fd to dst_fd.  (Stops at end of bzip2 data,
     678                 :            :    not end of file.) */
     679                 :          0 : STATIC int INIT bunzip2(unsigned char *buf, long len,
     680                 :            :                         long (*fill)(void*, unsigned long),
     681                 :            :                         long (*flush)(void*, unsigned long),
     682                 :            :                         unsigned char *outbuf,
     683                 :            :                         long *pos,
     684                 :            :                         void(*error)(char *x))
     685                 :            : {
     686                 :            :         struct bunzip_data *bd;
     687                 :            :         int i = -1;
     688                 :            :         unsigned char *inbuf;
     689                 :            : 
     690                 :          0 :         if (flush)
     691                 :            :                 outbuf = malloc(BZIP2_IOBUF_SIZE);
     692                 :            : 
     693                 :          0 :         if (!outbuf) {
     694                 :          0 :                 error("Could not allocate output buffer");
     695                 :          0 :                 return RETVAL_OUT_OF_MEMORY;
     696                 :            :         }
     697                 :          0 :         if (buf)
     698                 :            :                 inbuf = buf;
     699                 :            :         else
     700                 :            :                 inbuf = malloc(BZIP2_IOBUF_SIZE);
     701                 :          0 :         if (!inbuf) {
     702                 :          0 :                 error("Could not allocate input buffer");
     703                 :            :                 i = RETVAL_OUT_OF_MEMORY;
     704                 :          0 :                 goto exit_0;
     705                 :            :         }
     706                 :          0 :         i = start_bunzip(&bd, inbuf, len, fill);
     707                 :          0 :         if (!i) {
     708                 :            :                 for (;;) {
     709                 :          0 :                         i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
     710                 :          0 :                         if (i <= 0)
     711                 :            :                                 break;
     712                 :          0 :                         if (!flush)
     713                 :          0 :                                 outbuf += i;
     714                 :            :                         else
     715                 :          0 :                                 if (i != flush(outbuf, i)) {
     716                 :            :                                         i = RETVAL_UNEXPECTED_OUTPUT_EOF;
     717                 :            :                                         break;
     718                 :            :                                 }
     719                 :            :                 }
     720                 :            :         }
     721                 :            :         /* Check CRC and release memory */
     722                 :          0 :         if (i == RETVAL_LAST_BLOCK) {
     723                 :          0 :                 if (bd->headerCRC != bd->totalCRC)
     724                 :          0 :                         error("Data integrity error when decompressing.");
     725                 :            :                 else
     726                 :            :                         i = RETVAL_OK;
     727                 :          0 :         } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
     728                 :          0 :                 error("Compressed file ends unexpectedly");
     729                 :            :         }
     730                 :          0 :         if (!bd)
     731                 :            :                 goto exit_1;
     732                 :          0 :         if (bd->dbuf)
     733                 :          0 :                 large_free(bd->dbuf);
     734                 :          0 :         if (pos)
     735                 :          0 :                 *pos = bd->inbufPos;
     736                 :          0 :         free(bd);
     737                 :            : exit_1:
     738                 :          0 :         if (!buf)
     739                 :          0 :                 free(inbuf);
     740                 :            : exit_0:
     741                 :          0 :         if (flush)
     742                 :          0 :                 free(outbuf);
     743                 :          0 :         return i;
     744                 :            : }
     745                 :            : 
     746                 :            : #ifdef PREBOOT
     747                 :            : STATIC int INIT __decompress(unsigned char *buf, long len,
     748                 :            :                         long (*fill)(void*, unsigned long),
     749                 :            :                         long (*flush)(void*, unsigned long),
     750                 :            :                         unsigned char *outbuf, long olen,
     751                 :            :                         long *pos,
     752                 :            :                         void (*error)(char *x))
     753                 :            : {
     754                 :            :         return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
     755                 :            : }
     756                 :            : #endif
    

Generated by: LCOV version 1.14