Branch data Line data Source code
1 : : /* inffast.c -- fast decoding 2 : : * Copyright (C) 1995-2004 Mark Adler 3 : : * For conditions of distribution and use, see copyright notice in zlib.h 4 : : */ 5 : : 6 : : #include <linux/zutil.h> 7 : : #include "inftrees.h" 8 : : #include "inflate.h" 9 : : #include "inffast.h" 10 : : 11 : : #ifndef ASMINF 12 : : 13 : : /* Allow machine dependent optimization for post-increment or pre-increment. 14 : : Based on testing to date, 15 : : Pre-increment preferred for: 16 : : - PowerPC G3 (Adler) 17 : : - MIPS R5000 (Randers-Pehrson) 18 : : Post-increment preferred for: 19 : : - none 20 : : No measurable difference: 21 : : - Pentium III (Anderson) 22 : : - M68060 (Nikl) 23 : : */ 24 : : union uu { 25 : : unsigned short us; 26 : : unsigned char b[2]; 27 : : }; 28 : : 29 : : /* Endian independed version */ 30 : : static inline unsigned short 31 : : get_unaligned16(const unsigned short *p) 32 : : { 33 : : union uu mm; 34 : : unsigned char *b = (unsigned char *)p; 35 : : 36 : : mm.b[0] = b[0]; 37 : : mm.b[1] = b[1]; 38 : : return mm.us; 39 : : } 40 : : 41 : : #ifdef POSTINC 42 : : # define OFF 0 43 : : # define PUP(a) *(a)++ 44 : : # define UP_UNALIGNED(a) get_unaligned16((a)++) 45 : : #else 46 : : # define OFF 1 47 : : # define PUP(a) *++(a) 48 : : # define UP_UNALIGNED(a) get_unaligned16(++(a)) 49 : : #endif 50 : : 51 : : /* 52 : : Decode literal, length, and distance codes and write out the resulting 53 : : literal and match bytes until either not enough input or output is 54 : : available, an end-of-block is encountered, or a data error is encountered. 55 : : When large enough input and output buffers are supplied to inflate(), for 56 : : example, a 16K input buffer and a 64K output buffer, more than 95% of the 57 : : inflate execution time is spent in this routine. 58 : : 59 : : Entry assumptions: 60 : : 61 : : state->mode == LEN 62 : : strm->avail_in >= 6 63 : : strm->avail_out >= 258 64 : : start >= strm->avail_out 65 : : state->bits < 8 66 : : 67 : : On return, state->mode is one of: 68 : : 69 : : LEN -- ran out of enough output space or enough available input 70 : : TYPE -- reached end of block code, inflate() to interpret next block 71 : : BAD -- error in block data 72 : : 73 : : Notes: 74 : : 75 : : - The maximum input bits used by a length/distance pair is 15 bits for the 76 : : length code, 5 bits for the length extra, 15 bits for the distance code, 77 : : and 13 bits for the distance extra. This totals 48 bits, or six bytes. 78 : : Therefore if strm->avail_in >= 6, then there is enough input to avoid 79 : : checking for available input while decoding. 80 : : 81 : : - The maximum bytes that a single length/distance pair can output is 258 82 : : bytes, which is the maximum length that can be coded. inflate_fast() 83 : : requires strm->avail_out >= 258 for each loop to avoid checking for 84 : : output space. 85 : : 86 : : - @start: inflate()'s starting value for strm->avail_out 87 : : */ 88 : 0 : void inflate_fast(z_streamp strm, unsigned start) 89 : : { 90 : 0 : struct inflate_state *state; 91 : 0 : const unsigned char *in; /* local strm->next_in */ 92 : 0 : const unsigned char *last; /* while in < last, enough input available */ 93 : 0 : unsigned char *out; /* local strm->next_out */ 94 : 0 : unsigned char *beg; /* inflate()'s initial strm->next_out */ 95 : 0 : unsigned char *end; /* while out < end, enough space available */ 96 : : #ifdef INFLATE_STRICT 97 : : unsigned dmax; /* maximum distance from zlib header */ 98 : : #endif 99 : 0 : unsigned wsize; /* window size or zero if not using window */ 100 : 0 : unsigned whave; /* valid bytes in the window */ 101 : 0 : unsigned write; /* window write index */ 102 : 0 : unsigned char *window; /* allocated sliding window, if wsize != 0 */ 103 : 0 : unsigned long hold; /* local strm->hold */ 104 : 0 : unsigned bits; /* local strm->bits */ 105 : 0 : code const *lcode; /* local strm->lencode */ 106 : 0 : code const *dcode; /* local strm->distcode */ 107 : 0 : unsigned lmask; /* mask for first level of length codes */ 108 : 0 : unsigned dmask; /* mask for first level of distance codes */ 109 : 0 : code this; /* retrieved table entry */ 110 : 0 : unsigned op; /* code bits, operation, extra bits, or */ 111 : : /* window position, window bytes to copy */ 112 : 0 : unsigned len; /* match length, unused bytes */ 113 : 0 : unsigned dist; /* match distance */ 114 : 0 : unsigned char *from; /* where to copy match from */ 115 : : 116 : : /* copy state to local variables */ 117 : 0 : state = (struct inflate_state *)strm->state; 118 : 0 : in = strm->next_in - OFF; 119 : 0 : last = in + (strm->avail_in - 5); 120 : 0 : out = strm->next_out - OFF; 121 : 0 : beg = out - (start - strm->avail_out); 122 : 0 : end = out + (strm->avail_out - 257); 123 : : #ifdef INFLATE_STRICT 124 : : dmax = state->dmax; 125 : : #endif 126 : 0 : wsize = state->wsize; 127 : 0 : whave = state->whave; 128 : 0 : write = state->write; 129 : 0 : window = state->window; 130 : 0 : hold = state->hold; 131 : 0 : bits = state->bits; 132 : 0 : lcode = state->lencode; 133 : 0 : dcode = state->distcode; 134 : 0 : lmask = (1U << state->lenbits) - 1; 135 : 0 : dmask = (1U << state->distbits) - 1; 136 : : 137 : : /* decode literals and length/distances until end-of-block or not enough 138 : : input data or output space */ 139 : 0 : do { 140 [ # # ]: 0 : if (bits < 15) { 141 : 0 : hold += (unsigned long)(PUP(in)) << bits; 142 : 0 : bits += 8; 143 : 0 : hold += (unsigned long)(PUP(in)) << bits; 144 : 0 : bits += 8; 145 : : } 146 : 0 : this = lcode[hold & lmask]; 147 : 0 : dolen: 148 : 0 : op = (unsigned)(this.bits); 149 : 0 : hold >>= op; 150 : 0 : bits -= op; 151 : 0 : op = (unsigned)(this.op); 152 [ # # ]: 0 : if (op == 0) { /* literal */ 153 : 0 : PUP(out) = (unsigned char)(this.val); 154 : : } 155 [ # # ]: 0 : else if (op & 16) { /* length base */ 156 : 0 : len = (unsigned)(this.val); 157 : 0 : op &= 15; /* number of extra bits */ 158 [ # # ]: 0 : if (op) { 159 [ # # ]: 0 : if (bits < op) { 160 : 0 : hold += (unsigned long)(PUP(in)) << bits; 161 : 0 : bits += 8; 162 : : } 163 : 0 : len += (unsigned)hold & ((1U << op) - 1); 164 : 0 : hold >>= op; 165 : 0 : bits -= op; 166 : : } 167 [ # # ]: 0 : if (bits < 15) { 168 : 0 : hold += (unsigned long)(PUP(in)) << bits; 169 : 0 : bits += 8; 170 : 0 : hold += (unsigned long)(PUP(in)) << bits; 171 : 0 : bits += 8; 172 : : } 173 : 0 : this = dcode[hold & dmask]; 174 : 0 : dodist: 175 : 0 : op = (unsigned)(this.bits); 176 : 0 : hold >>= op; 177 : 0 : bits -= op; 178 : 0 : op = (unsigned)(this.op); 179 [ # # ]: 0 : if (op & 16) { /* distance base */ 180 : 0 : dist = (unsigned)(this.val); 181 : 0 : op &= 15; /* number of extra bits */ 182 [ # # ]: 0 : if (bits < op) { 183 : 0 : hold += (unsigned long)(PUP(in)) << bits; 184 : 0 : bits += 8; 185 [ # # ]: 0 : if (bits < op) { 186 : 0 : hold += (unsigned long)(PUP(in)) << bits; 187 : 0 : bits += 8; 188 : : } 189 : : } 190 : 0 : dist += (unsigned)hold & ((1U << op) - 1); 191 : : #ifdef INFLATE_STRICT 192 : : if (dist > dmax) { 193 : : strm->msg = (char *)"invalid distance too far back"; 194 : : state->mode = BAD; 195 : : break; 196 : : } 197 : : #endif 198 : 0 : hold >>= op; 199 : 0 : bits -= op; 200 : 0 : op = (unsigned)(out - beg); /* max distance in output */ 201 [ # # ]: 0 : if (dist > op) { /* see if copy from window */ 202 : 0 : op = dist - op; /* distance back in window */ 203 [ # # ]: 0 : if (op > whave) { 204 : 0 : strm->msg = (char *)"invalid distance too far back"; 205 : 0 : state->mode = BAD; 206 : 0 : break; 207 : : } 208 : 0 : from = window - OFF; 209 [ # # ]: 0 : if (write == 0) { /* very common case */ 210 : 0 : from += wsize - op; 211 [ # # ]: 0 : if (op < len) { /* some from window */ 212 : 0 : len -= op; 213 : 0 : do { 214 : 0 : PUP(out) = PUP(from); 215 [ # # ]: 0 : } while (--op); 216 : 0 : from = out - dist; /* rest from output */ 217 : : } 218 : : } 219 [ # # ]: 0 : else if (write < op) { /* wrap around window */ 220 : 0 : from += wsize + write - op; 221 : 0 : op -= write; 222 [ # # ]: 0 : if (op < len) { /* some from end of window */ 223 : 0 : len -= op; 224 : 0 : do { 225 : 0 : PUP(out) = PUP(from); 226 [ # # ]: 0 : } while (--op); 227 : 0 : from = window - OFF; 228 [ # # ]: 0 : if (write < len) { /* some from start of window */ 229 : 0 : op = write; 230 : 0 : len -= op; 231 : 0 : do { 232 : 0 : PUP(out) = PUP(from); 233 [ # # ]: 0 : } while (--op); 234 : 0 : from = out - dist; /* rest from output */ 235 : : } 236 : : } 237 : : } 238 : : else { /* contiguous in window */ 239 : 0 : from += write - op; 240 [ # # ]: 0 : if (op < len) { /* some from window */ 241 : 0 : len -= op; 242 : 0 : do { 243 : 0 : PUP(out) = PUP(from); 244 [ # # ]: 0 : } while (--op); 245 : 0 : from = out - dist; /* rest from output */ 246 : : } 247 : : } 248 [ # # ]: 0 : while (len > 2) { 249 : 0 : PUP(out) = PUP(from); 250 : 0 : PUP(out) = PUP(from); 251 : 0 : PUP(out) = PUP(from); 252 : 0 : len -= 3; 253 : : } 254 [ # # ]: 0 : if (len) { 255 : 0 : PUP(out) = PUP(from); 256 [ # # ]: 0 : if (len > 1) 257 : 0 : PUP(out) = PUP(from); 258 : : } 259 : : } 260 : : else { 261 : 0 : unsigned short *sout; 262 : 0 : unsigned long loops; 263 : : 264 : 0 : from = out - dist; /* copy direct from output */ 265 : : /* minimum length is three */ 266 : : /* Align out addr */ 267 [ # # ]: 0 : if (!((long)(out - 1 + OFF) & 1)) { 268 : 0 : PUP(out) = PUP(from); 269 : 0 : len--; 270 : : } 271 : 0 : sout = (unsigned short *)(out - OFF); 272 [ # # ]: 0 : if (dist > 2) { 273 : 0 : unsigned short *sfrom; 274 : : 275 : 0 : sfrom = (unsigned short *)(from - OFF); 276 : 0 : loops = len >> 1; 277 : 0 : do 278 : : #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 279 : 0 : PUP(sout) = PUP(sfrom); 280 : : #else 281 : : PUP(sout) = UP_UNALIGNED(sfrom); 282 : : #endif 283 [ # # ]: 0 : while (--loops); 284 : 0 : out = (unsigned char *)sout + OFF; 285 : 0 : from = (unsigned char *)sfrom + OFF; 286 : : } else { /* dist == 1 or dist == 2 */ 287 : 0 : unsigned short pat16; 288 : : 289 : 0 : pat16 = *(sout-1+OFF); 290 [ # # ]: 0 : if (dist == 1) { 291 : 0 : union uu mm; 292 : : /* copy one char pattern to both bytes */ 293 : 0 : mm.us = pat16; 294 : 0 : mm.b[0] = mm.b[1]; 295 : 0 : pat16 = mm.us; 296 : : } 297 : 0 : loops = len >> 1; 298 : 0 : do 299 : 0 : PUP(sout) = pat16; 300 [ # # ]: 0 : while (--loops); 301 : 0 : out = (unsigned char *)sout + OFF; 302 : : } 303 [ # # ]: 0 : if (len & 1) 304 : 0 : PUP(out) = PUP(from); 305 : : } 306 : : } 307 [ # # ]: 0 : else if ((op & 64) == 0) { /* 2nd level distance code */ 308 : 0 : this = dcode[this.val + (hold & ((1U << op) - 1))]; 309 : 0 : goto dodist; 310 : : } 311 : : else { 312 : 0 : strm->msg = (char *)"invalid distance code"; 313 : 0 : state->mode = BAD; 314 : 0 : break; 315 : : } 316 : : } 317 [ # # ]: 0 : else if ((op & 64) == 0) { /* 2nd level length code */ 318 : 0 : this = lcode[this.val + (hold & ((1U << op) - 1))]; 319 : 0 : goto dolen; 320 : : } 321 [ # # ]: 0 : else if (op & 32) { /* end-of-block */ 322 : 0 : state->mode = TYPE; 323 : 0 : break; 324 : : } 325 : : else { 326 : 0 : strm->msg = (char *)"invalid literal/length code"; 327 : 0 : state->mode = BAD; 328 : 0 : break; 329 : : } 330 [ # # ]: 0 : } while (in < last && out < end); 331 : : 332 : : /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 333 : 0 : len = bits >> 3; 334 : 0 : in -= len; 335 : 0 : bits -= len << 3; 336 : 0 : hold &= (1U << bits) - 1; 337 : : 338 : : /* update state and return */ 339 : 0 : strm->next_in = in + OFF; 340 : 0 : strm->next_out = out + OFF; 341 [ # # ]: 0 : strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); 342 [ # # ]: 0 : strm->avail_out = (unsigned)(out < end ? 343 : 0 : 257 + (end - out) : 257 - (out - end)); 344 : 0 : state->hold = hold; 345 : 0 : state->bits = bits; 346 : 0 : return; 347 : : } 348 : : 349 : : /* 350 : : inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 351 : : - Using bit fields for code structure 352 : : - Different op definition to avoid & for extra bits (do & for table bits) 353 : : - Three separate decoding do-loops for direct, window, and write == 0 354 : : - Special case for distance > 1 copies to do overlapped load and store copy 355 : : - Explicit branch predictions (based on measured branch probabilities) 356 : : - Deferring match copy and interspersed it with decoding subsequent codes 357 : : - Swapping literal/length else 358 : : - Swapping window/direct else 359 : : - Larger unrolled copy loops (three is about right) 360 : : - Moving len -= 3 statement into middle of loop 361 : : */ 362 : : 363 : : #endif /* !ASMINF */