inflate.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:generate go run gen.go -output fixedhuff.go
  5. // Package flate implements the DEFLATE compressed data format, described in
  6. // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
  7. // formats.
  8. package flate
  9. import (
  10. "bufio"
  11. "io"
  12. "strconv"
  13. )
  14. const (
  15. maxCodeLen = 16 // max length of Huffman code
  16. maxHist = 32768 // max history required
  17. // The next three numbers come from the RFC section 3.2.7, with the
  18. // additional proviso in section 3.2.5 which implies that distance codes
  19. // 30 and 31 should never occur in compressed data.
  20. maxNumLit = 286
  21. maxNumDist = 30
  22. numCodes = 19 // number of codes in Huffman meta-code
  23. )
  24. // A CorruptInputError reports the presence of corrupt input at a given offset.
  25. type CorruptInputError int64
  26. func (e CorruptInputError) Error() string {
  27. return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
  28. }
  29. // An InternalError reports an error in the flate code itself.
  30. type InternalError string
  31. func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
  32. // A ReadError reports an error encountered while reading input.
  33. type ReadError struct {
  34. Offset int64 // byte offset where error occurred
  35. Err error // error returned by underlying Read
  36. }
  37. func (e *ReadError) Error() string {
  38. return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
  39. }
  40. // A WriteError reports an error encountered while writing output.
  41. type WriteError struct {
  42. Offset int64 // byte offset where error occurred
  43. Err error // error returned by underlying Write
  44. }
  45. func (e *WriteError) Error() string {
  46. return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
  47. }
  48. // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
  49. // to switch to a new underlying Reader. This permits reusing a ReadCloser
  50. // instead of allocating a new one.
  51. type Resetter interface {
  52. // Reset discards any buffered data and resets the Resetter as if it was
  53. // newly initialized with the given reader.
  54. Reset(r io.Reader, dict []byte) error
  55. }
  56. // Note that much of the implementation of huffmanDecoder is also copied
  57. // into gen.go (in package main) for the purpose of precomputing the
  58. // fixed huffman tables so they can be included statically.
  59. // The data structure for decoding Huffman tables is based on that of
  60. // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
  61. // For codes smaller than the table width, there are multiple entries
  62. // (each combination of trailing bits has the same value). For codes
  63. // larger than the table width, the table contains a link to an overflow
  64. // table. The width of each entry in the link table is the maximum code
  65. // size minus the chunk width.
  66. // Note that you can do a lookup in the table even without all bits
  67. // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
  68. // have the property that shorter codes come before longer ones, the
  69. // bit length estimate in the result is a lower bound on the actual
  70. // number of bits.
  71. // chunk & 15 is number of bits
  72. // chunk >> 4 is value, including table link
  73. const (
  74. huffmanChunkBits = 9
  75. huffmanNumChunks = 1 << huffmanChunkBits
  76. huffmanCountMask = 15
  77. huffmanValueShift = 4
  78. )
  79. type huffmanDecoder struct {
  80. min int // the minimum code length
  81. chunks [huffmanNumChunks]uint32 // chunks as described above
  82. links [][]uint32 // overflow links
  83. linkMask uint32 // mask the width of the link table
  84. }
  85. // Initialize Huffman decoding tables from array of code lengths.
  86. // Following this function, h is guaranteed to be initialized into a complete
  87. // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
  88. // degenerate case where the tree has only a single symbol with length 1. Empty
  89. // trees are permitted.
  90. func (h *huffmanDecoder) init(bits []int) bool {
  91. // Sanity enables additional runtime tests during Huffman
  92. // table construction. It's intended to be used during
  93. // development to supplement the currently ad-hoc unit tests.
  94. const sanity = false
  95. if h.min != 0 {
  96. *h = huffmanDecoder{}
  97. }
  98. // Count number of codes of each length,
  99. // compute min and max length.
  100. var count [maxCodeLen]int
  101. var min, max int
  102. for _, n := range bits {
  103. if n == 0 {
  104. continue
  105. }
  106. if min == 0 || n < min {
  107. min = n
  108. }
  109. if n > max {
  110. max = n
  111. }
  112. count[n]++
  113. }
  114. // Empty tree. The decompressor.huffSym function will fail later if the tree
  115. // is used. Technically, an empty tree is only valid for the HDIST tree and
  116. // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
  117. // is guaranteed to fail since it will attempt to use the tree to decode the
  118. // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
  119. // guaranteed to fail later since the compressed data section must be
  120. // composed of at least one symbol (the end-of-block marker).
  121. if max == 0 {
  122. return true
  123. }
  124. code := 0
  125. var nextcode [maxCodeLen]int
  126. for i := min; i <= max; i++ {
  127. code <<= 1
  128. nextcode[i] = code
  129. code += count[i]
  130. }
  131. // Check that the coding is complete (i.e., that we've
  132. // assigned all 2-to-the-max possible bit sequences).
  133. // Exception: To be compatible with zlib, we also need to
  134. // accept degenerate single-code codings. See also
  135. // TestDegenerateHuffmanCoding.
  136. if code != 1<<uint(max) && !(code == 1 && max == 1) {
  137. return false
  138. }
  139. h.min = min
  140. if max > huffmanChunkBits {
  141. numLinks := 1 << (uint(max) - huffmanChunkBits)
  142. h.linkMask = uint32(numLinks - 1)
  143. // create link tables
  144. link := nextcode[huffmanChunkBits+1] >> 1
  145. h.links = make([][]uint32, huffmanNumChunks-link)
  146. for j := uint(link); j < huffmanNumChunks; j++ {
  147. reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
  148. reverse >>= uint(16 - huffmanChunkBits)
  149. off := j - uint(link)
  150. if sanity && h.chunks[reverse] != 0 {
  151. panic("impossible: overwriting existing chunk")
  152. }
  153. h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
  154. h.links[off] = make([]uint32, numLinks)
  155. }
  156. }
  157. for i, n := range bits {
  158. if n == 0 {
  159. continue
  160. }
  161. code := nextcode[n]
  162. nextcode[n]++
  163. chunk := uint32(i<<huffmanValueShift | n)
  164. reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
  165. reverse >>= uint(16 - n)
  166. if n <= huffmanChunkBits {
  167. for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
  168. // We should never need to overwrite
  169. // an existing chunk. Also, 0 is
  170. // never a valid chunk, because the
  171. // lower 4 "count" bits should be
  172. // between 1 and 15.
  173. if sanity && h.chunks[off] != 0 {
  174. panic("impossible: overwriting existing chunk")
  175. }
  176. h.chunks[off] = chunk
  177. }
  178. } else {
  179. j := reverse & (huffmanNumChunks - 1)
  180. if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
  181. // Longer codes should have been
  182. // associated with a link table above.
  183. panic("impossible: not an indirect chunk")
  184. }
  185. value := h.chunks[j] >> huffmanValueShift
  186. linktab := h.links[value]
  187. reverse >>= huffmanChunkBits
  188. for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
  189. if sanity && linktab[off] != 0 {
  190. panic("impossible: overwriting existing chunk")
  191. }
  192. linktab[off] = chunk
  193. }
  194. }
  195. }
  196. if sanity {
  197. // Above we've sanity checked that we never overwrote
  198. // an existing entry. Here we additionally check that
  199. // we filled the tables completely.
  200. for i, chunk := range h.chunks {
  201. if chunk == 0 {
  202. // As an exception, in the degenerate
  203. // single-code case, we allow odd
  204. // chunks to be missing.
  205. if code == 1 && i%2 == 1 {
  206. continue
  207. }
  208. panic("impossible: missing chunk")
  209. }
  210. }
  211. for _, linktab := range h.links {
  212. for _, chunk := range linktab {
  213. if chunk == 0 {
  214. panic("impossible: missing chunk")
  215. }
  216. }
  217. }
  218. }
  219. return true
  220. }
  221. // The actual read interface needed by NewReader.
  222. // If the passed in io.Reader does not also have ReadByte,
  223. // the NewReader will introduce its own buffering.
  224. type Reader interface {
  225. io.Reader
  226. io.ByteReader
  227. }
  228. // Decompress state.
  229. type decompressor struct {
  230. // Input source.
  231. r Reader
  232. roffset int64
  233. woffset int64
  234. // Input bits, in top of b.
  235. b uint32
  236. nb uint
  237. // Huffman decoders for literal/length, distance.
  238. h1, h2 huffmanDecoder
  239. // Length arrays used to define Huffman codes.
  240. bits *[maxNumLit + maxNumDist]int
  241. codebits *[numCodes]int
  242. // Output history, buffer.
  243. hist *[maxHist]byte
  244. hp int // current output position in buffer
  245. hw int // have written hist[0:hw] already
  246. hfull bool // buffer has filled at least once
  247. // Temporary buffer (avoids repeated allocation).
  248. buf [4]byte
  249. // Next step in the decompression,
  250. // and decompression state.
  251. step func(*decompressor)
  252. final bool
  253. err error
  254. toRead []byte
  255. hl, hd *huffmanDecoder
  256. copyLen int
  257. copyDist int
  258. }
  259. func (f *decompressor) nextBlock() {
  260. if f.final {
  261. if f.hw != f.hp {
  262. f.flush((*decompressor).nextBlock)
  263. return
  264. }
  265. f.err = io.EOF
  266. return
  267. }
  268. for f.nb < 1+2 {
  269. if f.err = f.moreBits(); f.err != nil {
  270. return
  271. }
  272. }
  273. f.final = f.b&1 == 1
  274. f.b >>= 1
  275. typ := f.b & 3
  276. f.b >>= 2
  277. f.nb -= 1 + 2
  278. switch typ {
  279. case 0:
  280. f.dataBlock()
  281. case 1:
  282. // compressed, fixed Huffman tables
  283. f.hl = &fixedHuffmanDecoder
  284. f.hd = nil
  285. f.huffmanBlock()
  286. case 2:
  287. // compressed, dynamic Huffman tables
  288. if f.err = f.readHuffman(); f.err != nil {
  289. break
  290. }
  291. f.hl = &f.h1
  292. f.hd = &f.h2
  293. f.huffmanBlock()
  294. default:
  295. // 3 is reserved.
  296. f.err = CorruptInputError(f.roffset)
  297. }
  298. }
  299. func (f *decompressor) Read(b []byte) (int, error) {
  300. for {
  301. if len(f.toRead) > 0 {
  302. n := copy(b, f.toRead)
  303. f.toRead = f.toRead[n:]
  304. return n, nil
  305. }
  306. if f.err != nil {
  307. return 0, f.err
  308. }
  309. f.step(f)
  310. }
  311. }
  312. // Support the io.WriteTo interface for io.Copy and friends.
  313. func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
  314. total := int64(0)
  315. for {
  316. if f.err != nil {
  317. if f.err == io.EOF {
  318. return total, nil
  319. }
  320. return total, f.err
  321. }
  322. if len(f.toRead) > 0 {
  323. var n int
  324. n, f.err = w.Write(f.toRead)
  325. if f.err != nil {
  326. return total, f.err
  327. }
  328. if n != len(f.toRead) {
  329. return total, io.ErrShortWrite
  330. }
  331. f.toRead = f.toRead[:0]
  332. total += int64(n)
  333. }
  334. f.step(f)
  335. }
  336. }
  337. func (f *decompressor) Close() error {
  338. if f.err == io.EOF {
  339. return nil
  340. }
  341. return f.err
  342. }
  343. // RFC 1951 section 3.2.7.
  344. // Compression with dynamic Huffman codes
  345. var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
  346. func (f *decompressor) readHuffman() error {
  347. // HLIT[5], HDIST[5], HCLEN[4].
  348. for f.nb < 5+5+4 {
  349. if err := f.moreBits(); err != nil {
  350. return err
  351. }
  352. }
  353. nlit := int(f.b&0x1F) + 257
  354. if nlit > maxNumLit {
  355. return CorruptInputError(f.roffset)
  356. }
  357. f.b >>= 5
  358. ndist := int(f.b&0x1F) + 1
  359. if ndist > maxNumDist {
  360. return CorruptInputError(f.roffset)
  361. }
  362. f.b >>= 5
  363. nclen := int(f.b&0xF) + 4
  364. // numCodes is 19, so nclen is always valid.
  365. f.b >>= 4
  366. f.nb -= 5 + 5 + 4
  367. // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
  368. for i := 0; i < nclen; i++ {
  369. for f.nb < 3 {
  370. if err := f.moreBits(); err != nil {
  371. return err
  372. }
  373. }
  374. f.codebits[codeOrder[i]] = int(f.b & 0x7)
  375. f.b >>= 3
  376. f.nb -= 3
  377. }
  378. for i := nclen; i < len(codeOrder); i++ {
  379. f.codebits[codeOrder[i]] = 0
  380. }
  381. if !f.h1.init(f.codebits[0:]) {
  382. return CorruptInputError(f.roffset)
  383. }
  384. // HLIT + 257 code lengths, HDIST + 1 code lengths,
  385. // using the code length Huffman code.
  386. for i, n := 0, nlit+ndist; i < n; {
  387. x, err := f.huffSym(&f.h1)
  388. if err != nil {
  389. return err
  390. }
  391. if x < 16 {
  392. // Actual length.
  393. f.bits[i] = x
  394. i++
  395. continue
  396. }
  397. // Repeat previous length or zero.
  398. var rep int
  399. var nb uint
  400. var b int
  401. switch x {
  402. default:
  403. return InternalError("unexpected length code")
  404. case 16:
  405. rep = 3
  406. nb = 2
  407. if i == 0 {
  408. return CorruptInputError(f.roffset)
  409. }
  410. b = f.bits[i-1]
  411. case 17:
  412. rep = 3
  413. nb = 3
  414. b = 0
  415. case 18:
  416. rep = 11
  417. nb = 7
  418. b = 0
  419. }
  420. for f.nb < nb {
  421. if err := f.moreBits(); err != nil {
  422. return err
  423. }
  424. }
  425. rep += int(f.b & uint32(1<<nb-1))
  426. f.b >>= nb
  427. f.nb -= nb
  428. if i+rep > n {
  429. return CorruptInputError(f.roffset)
  430. }
  431. for j := 0; j < rep; j++ {
  432. f.bits[i] = b
  433. i++
  434. }
  435. }
  436. if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
  437. return CorruptInputError(f.roffset)
  438. }
  439. // In order to preserve the property that we never read any extra bytes
  440. // after the end of the DEFLATE stream, huffSym conservatively reads min
  441. // bits at a time until it decodes the symbol. However, since every block
  442. // must end with an EOB marker, we can use that as the minimum number of
  443. // bits to read and guarantee we never read past the end of the stream.
  444. if f.bits[endBlockMarker] > 0 {
  445. f.h1.min = f.bits[endBlockMarker] // Length of EOB marker
  446. }
  447. return nil
  448. }
  449. // Decode a single Huffman block from f.
  450. // hl and hd are the Huffman states for the lit/length values
  451. // and the distance values, respectively. If hd == nil, using the
  452. // fixed distance encoding associated with fixed Huffman blocks.
  453. func (f *decompressor) huffmanBlock() {
  454. for {
  455. v, err := f.huffSym(f.hl)
  456. if err != nil {
  457. f.err = err
  458. return
  459. }
  460. var n uint // number of bits extra
  461. var length int
  462. switch {
  463. case v < 256:
  464. f.hist[f.hp] = byte(v)
  465. f.hp++
  466. if f.hp == len(f.hist) {
  467. // After the flush, continue this loop.
  468. f.flush((*decompressor).huffmanBlock)
  469. return
  470. }
  471. continue
  472. case v == 256:
  473. // Done with huffman block; read next block.
  474. f.step = (*decompressor).nextBlock
  475. return
  476. // otherwise, reference to older data
  477. case v < 265:
  478. length = v - (257 - 3)
  479. n = 0
  480. case v < 269:
  481. length = v*2 - (265*2 - 11)
  482. n = 1
  483. case v < 273:
  484. length = v*4 - (269*4 - 19)
  485. n = 2
  486. case v < 277:
  487. length = v*8 - (273*8 - 35)
  488. n = 3
  489. case v < 281:
  490. length = v*16 - (277*16 - 67)
  491. n = 4
  492. case v < 285:
  493. length = v*32 - (281*32 - 131)
  494. n = 5
  495. case v < maxNumLit:
  496. length = 258
  497. n = 0
  498. default:
  499. f.err = CorruptInputError(f.roffset)
  500. return
  501. }
  502. if n > 0 {
  503. for f.nb < n {
  504. if err = f.moreBits(); err != nil {
  505. f.err = err
  506. return
  507. }
  508. }
  509. length += int(f.b & uint32(1<<n-1))
  510. f.b >>= n
  511. f.nb -= n
  512. }
  513. var dist int
  514. if f.hd == nil {
  515. for f.nb < 5 {
  516. if err = f.moreBits(); err != nil {
  517. f.err = err
  518. return
  519. }
  520. }
  521. dist = int(reverseByte[(f.b&0x1F)<<3])
  522. f.b >>= 5
  523. f.nb -= 5
  524. } else {
  525. if dist, err = f.huffSym(f.hd); err != nil {
  526. f.err = err
  527. return
  528. }
  529. }
  530. switch {
  531. case dist < 4:
  532. dist++
  533. case dist < maxNumDist:
  534. nb := uint(dist-2) >> 1
  535. // have 1 bit in bottom of dist, need nb more.
  536. extra := (dist & 1) << nb
  537. for f.nb < nb {
  538. if err = f.moreBits(); err != nil {
  539. f.err = err
  540. return
  541. }
  542. }
  543. extra |= int(f.b & uint32(1<<nb-1))
  544. f.b >>= nb
  545. f.nb -= nb
  546. dist = 1<<(nb+1) + 1 + extra
  547. default:
  548. f.err = CorruptInputError(f.roffset)
  549. return
  550. }
  551. // Copy history[-dist:-dist+length] into output.
  552. if dist > len(f.hist) {
  553. f.err = InternalError("bad history distance")
  554. return
  555. }
  556. // No check on length; encoding can be prescient.
  557. if !f.hfull && dist > f.hp {
  558. f.err = CorruptInputError(f.roffset)
  559. return
  560. }
  561. f.copyLen, f.copyDist = length, dist
  562. if f.copyHist() {
  563. return
  564. }
  565. }
  566. }
  567. // copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself.
  568. // It reports whether the f.hist buffer is full.
  569. func (f *decompressor) copyHist() bool {
  570. p := f.hp - f.copyDist
  571. if p < 0 {
  572. p += len(f.hist)
  573. }
  574. for f.copyLen > 0 {
  575. n := f.copyLen
  576. if x := len(f.hist) - f.hp; n > x {
  577. n = x
  578. }
  579. if x := len(f.hist) - p; n > x {
  580. n = x
  581. }
  582. forwardCopy(f.hist[:], f.hp, p, n)
  583. p += n
  584. f.hp += n
  585. f.copyLen -= n
  586. if f.hp == len(f.hist) {
  587. // After flush continue copying out of history.
  588. f.flush((*decompressor).copyHuff)
  589. return true
  590. }
  591. if p == len(f.hist) {
  592. p = 0
  593. }
  594. }
  595. return false
  596. }
  597. func (f *decompressor) copyHuff() {
  598. if f.copyHist() {
  599. return
  600. }
  601. f.huffmanBlock()
  602. }
  603. // Copy a single uncompressed data block from input to output.
  604. func (f *decompressor) dataBlock() {
  605. // Uncompressed.
  606. // Discard current half-byte.
  607. f.nb = 0
  608. f.b = 0
  609. // Length then ones-complement of length.
  610. nr, err := io.ReadFull(f.r, f.buf[0:4])
  611. f.roffset += int64(nr)
  612. if err != nil {
  613. f.err = &ReadError{f.roffset, err}
  614. return
  615. }
  616. n := int(f.buf[0]) | int(f.buf[1])<<8
  617. nn := int(f.buf[2]) | int(f.buf[3])<<8
  618. if uint16(nn) != uint16(^n) {
  619. f.err = CorruptInputError(f.roffset)
  620. return
  621. }
  622. if n == 0 {
  623. // 0-length block means sync
  624. f.flush((*decompressor).nextBlock)
  625. return
  626. }
  627. f.copyLen = n
  628. f.copyData()
  629. }
  630. // copyData copies f.copyLen bytes from the underlying reader into f.hist.
  631. // It pauses for reads when f.hist is full.
  632. func (f *decompressor) copyData() {
  633. n := f.copyLen
  634. for n > 0 {
  635. m := len(f.hist) - f.hp
  636. if m > n {
  637. m = n
  638. }
  639. m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m])
  640. f.roffset += int64(m)
  641. if err != nil {
  642. f.err = &ReadError{f.roffset, err}
  643. return
  644. }
  645. n -= m
  646. f.hp += m
  647. if f.hp == len(f.hist) {
  648. f.copyLen = n
  649. f.flush((*decompressor).copyData)
  650. return
  651. }
  652. }
  653. f.step = (*decompressor).nextBlock
  654. }
  655. func (f *decompressor) setDict(dict []byte) {
  656. if len(dict) > len(f.hist) {
  657. // Will only remember the tail.
  658. dict = dict[len(dict)-len(f.hist):]
  659. }
  660. f.hp = copy(f.hist[:], dict)
  661. if f.hp == len(f.hist) {
  662. f.hp = 0
  663. f.hfull = true
  664. }
  665. f.hw = f.hp
  666. }
  667. func (f *decompressor) moreBits() error {
  668. c, err := f.r.ReadByte()
  669. if err != nil {
  670. if err == io.EOF {
  671. err = io.ErrUnexpectedEOF
  672. }
  673. return err
  674. }
  675. f.roffset++
  676. f.b |= uint32(c) << f.nb
  677. f.nb += 8
  678. return nil
  679. }
  680. // Read the next Huffman-encoded symbol from f according to h.
  681. func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
  682. // Since a huffmanDecoder can be empty or be composed of a degenerate tree
  683. // with single element, huffSym must error on these two edge cases. In both
  684. // cases, the chunks slice will be 0 for the invalid sequence, leading it
  685. // satisfy the n == 0 check below.
  686. n := uint(h.min)
  687. for {
  688. for f.nb < n {
  689. if err := f.moreBits(); err != nil {
  690. return 0, err
  691. }
  692. }
  693. chunk := h.chunks[f.b&(huffmanNumChunks-1)]
  694. n = uint(chunk & huffmanCountMask)
  695. if n > huffmanChunkBits {
  696. chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask]
  697. n = uint(chunk & huffmanCountMask)
  698. }
  699. if n <= f.nb {
  700. if n == 0 {
  701. f.err = CorruptInputError(f.roffset)
  702. return 0, f.err
  703. }
  704. f.b >>= n
  705. f.nb -= n
  706. return int(chunk >> huffmanValueShift), nil
  707. }
  708. }
  709. }
  710. // Flush any buffered output to the underlying writer.
  711. func (f *decompressor) flush(step func(*decompressor)) {
  712. f.toRead = f.hist[f.hw:f.hp]
  713. f.woffset += int64(f.hp - f.hw)
  714. f.hw = f.hp
  715. if f.hp == len(f.hist) {
  716. f.hp = 0
  717. f.hw = 0
  718. f.hfull = true
  719. }
  720. f.step = step
  721. }
  722. func makeReader(r io.Reader) Reader {
  723. if rr, ok := r.(Reader); ok {
  724. return rr
  725. }
  726. return bufio.NewReader(r)
  727. }
  728. func (f *decompressor) Reset(r io.Reader, dict []byte) error {
  729. *f = decompressor{
  730. r: makeReader(r),
  731. bits: f.bits,
  732. codebits: f.codebits,
  733. hist: f.hist,
  734. step: (*decompressor).nextBlock,
  735. }
  736. if dict != nil {
  737. f.setDict(dict)
  738. }
  739. return nil
  740. }
  741. // NewReader returns a new ReadCloser that can be used
  742. // to read the uncompressed version of r.
  743. // If r does not also implement io.ByteReader,
  744. // the decompressor may read more data than necessary from r.
  745. // It is the caller's responsibility to call Close on the ReadCloser
  746. // when finished reading.
  747. //
  748. // The ReadCloser returned by NewReader also implements Resetter.
  749. func NewReader(r io.Reader) io.ReadCloser {
  750. var f decompressor
  751. f.bits = new([maxNumLit + maxNumDist]int)
  752. f.codebits = new([numCodes]int)
  753. f.r = makeReader(r)
  754. f.hist = new([maxHist]byte)
  755. f.step = (*decompressor).nextBlock
  756. return &f
  757. }
  758. // NewReaderDict is like NewReader but initializes the reader
  759. // with a preset dictionary. The returned Reader behaves as if
  760. // the uncompressed data stream started with the given dictionary,
  761. // which has already been read. NewReaderDict is typically used
  762. // to read data compressed by NewWriterDict.
  763. //
  764. // The ReadCloser returned by NewReader also implements Resetter.
  765. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
  766. var f decompressor
  767. f.r = makeReader(r)
  768. f.hist = new([maxHist]byte)
  769. f.bits = new([maxNumLit + maxNumDist]int)
  770. f.codebits = new([numCodes]int)
  771. f.step = (*decompressor).nextBlock
  772. f.setDict(dict)
  773. return &f
  774. }