huffman_code.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package flate
  5. import (
  6. "math"
  7. "sort"
  8. )
  9. type hcode uint32
  10. type huffmanEncoder struct {
  11. codes []hcode
  12. freqcache []literalNode
  13. bitCount [17]int32
  14. lns literalNodeSorter
  15. lfs literalFreqSorter
  16. }
  17. type literalNode struct {
  18. literal uint16
  19. freq int32
  20. }
  21. // A levelInfo describes the state of the constructed tree for a given depth.
  22. type levelInfo struct {
  23. // Our level. for better printing
  24. level int32
  25. // The frequency of the last node at this level
  26. lastFreq int32
  27. // The frequency of the next character to add to this level
  28. nextCharFreq int32
  29. // The frequency of the next pair (from level below) to add to this level.
  30. // Only valid if the "needed" value of the next lower level is 0.
  31. nextPairFreq int32
  32. // The number of chains remaining to generate for this level before moving
  33. // up to the next level
  34. needed int32
  35. }
  36. func (h hcode) codeBits() (code uint16, bits uint8) {
  37. return uint16(h), uint8(h >> 16)
  38. }
  39. func (h *hcode) set(code uint16, bits uint8) {
  40. *h = hcode(code) | hcode(uint32(bits)<<16)
  41. }
  42. func (h *hcode) setBits(bits uint8) {
  43. *h = hcode(*h&0xffff) | hcode(uint32(bits)<<16)
  44. }
  45. func toCode(code uint16, bits uint8) hcode {
  46. return hcode(code) | hcode(uint32(bits)<<16)
  47. }
  48. func (h hcode) code() (code uint16) {
  49. return uint16(h)
  50. }
  51. func (h hcode) bits() (bits uint) {
  52. return uint(h >> 16)
  53. }
  54. func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }
  55. func newHuffmanEncoder(size int) *huffmanEncoder {
  56. return &huffmanEncoder{codes: make([]hcode, size), freqcache: nil}
  57. }
  58. // Generates a HuffmanCode corresponding to the fixed literal table
  59. func generateFixedLiteralEncoding() *huffmanEncoder {
  60. h := newHuffmanEncoder(maxNumLit)
  61. codes := h.codes
  62. var ch uint16
  63. for ch = 0; ch < maxNumLit; ch++ {
  64. var bits uint16
  65. var size uint8
  66. switch {
  67. case ch < 144:
  68. // size 8, 000110000 .. 10111111
  69. bits = ch + 48
  70. size = 8
  71. break
  72. case ch < 256:
  73. // size 9, 110010000 .. 111111111
  74. bits = ch + 400 - 144
  75. size = 9
  76. break
  77. case ch < 280:
  78. // size 7, 0000000 .. 0010111
  79. bits = ch - 256
  80. size = 7
  81. break
  82. default:
  83. // size 8, 11000000 .. 11000111
  84. bits = ch + 192 - 280
  85. size = 8
  86. }
  87. codes[ch] = toCode(reverseBits(bits, size), size)
  88. }
  89. return h
  90. }
  91. func generateFixedOffsetEncoding() *huffmanEncoder {
  92. h := newHuffmanEncoder(30)
  93. codes := h.codes
  94. for ch := uint16(0); ch < 30; ch++ {
  95. codes[ch] = toCode(reverseBits(ch, 5), 5)
  96. }
  97. return h
  98. }
  99. var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
  100. var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
  101. func (h *huffmanEncoder) bitLength(freq []int32) int64 {
  102. var total int64
  103. for i, f := range freq {
  104. if f != 0 {
  105. total += int64(f) * int64(h.codes[i].bits())
  106. }
  107. }
  108. return total
  109. }
  110. const maxBitsLimit = 16
  111. // Return the number of literals assigned to each bit size in the Huffman encoding
  112. //
  113. // This method is only called when list.length >= 3
  114. // The cases of 0, 1, and 2 literals are handled by special case code.
  115. //
  116. // list An array of the literals with non-zero frequencies
  117. // and their associated frequencies. The array is in order of increasing
  118. // frequency, and has as its last element a special element with frequency
  119. // MaxInt32
  120. // maxBits The maximum number of bits that should be used to encode any literal.
  121. // Must be less than 16.
  122. // return An integer array in which array[i] indicates the number of literals
  123. // that should be encoded in i bits.
  124. func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
  125. if maxBits >= maxBitsLimit {
  126. panic("flate: maxBits too large")
  127. }
  128. n := int32(len(list))
  129. list = list[0 : n+1]
  130. list[n] = maxNode()
  131. // The tree can't have greater depth than n - 1, no matter what. This
  132. // saves a little bit of work in some small cases
  133. if maxBits > n-1 {
  134. maxBits = n - 1
  135. }
  136. // Create information about each of the levels.
  137. // A bogus "Level 0" whose sole purpose is so that
  138. // level1.prev.needed==0. This makes level1.nextPairFreq
  139. // be a legitimate value that never gets chosen.
  140. var levels [maxBitsLimit]levelInfo
  141. // leafCounts[i] counts the number of literals at the left
  142. // of ancestors of the rightmost node at level i.
  143. // leafCounts[i][j] is the number of literals at the left
  144. // of the level j ancestor.
  145. var leafCounts [maxBitsLimit][maxBitsLimit]int32
  146. for level := int32(1); level <= maxBits; level++ {
  147. // For every level, the first two items are the first two characters.
  148. // We initialize the levels as if we had already figured this out.
  149. levels[level] = levelInfo{
  150. level: level,
  151. lastFreq: list[1].freq,
  152. nextCharFreq: list[2].freq,
  153. nextPairFreq: list[0].freq + list[1].freq,
  154. }
  155. leafCounts[level][level] = 2
  156. if level == 1 {
  157. levels[level].nextPairFreq = math.MaxInt32
  158. }
  159. }
  160. // We need a total of 2*n - 2 items at top level and have already generated 2.
  161. levels[maxBits].needed = 2*n - 4
  162. level := maxBits
  163. for {
  164. l := &levels[level]
  165. if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 {
  166. // We've run out of both leafs and pairs.
  167. // End all calculations for this level.
  168. // To make sure we never come back to this level or any lower level,
  169. // set nextPairFreq impossibly large.
  170. l.needed = 0
  171. levels[level+1].nextPairFreq = math.MaxInt32
  172. level++
  173. continue
  174. }
  175. prevFreq := l.lastFreq
  176. if l.nextCharFreq < l.nextPairFreq {
  177. // The next item on this row is a leaf node.
  178. n := leafCounts[level][level] + 1
  179. l.lastFreq = l.nextCharFreq
  180. // Lower leafCounts are the same of the previous node.
  181. leafCounts[level][level] = n
  182. l.nextCharFreq = list[n].freq
  183. } else {
  184. // The next item on this row is a pair from the previous row.
  185. // nextPairFreq isn't valid until we generate two
  186. // more values in the level below
  187. l.lastFreq = l.nextPairFreq
  188. // Take leaf counts from the lower level, except counts[level] remains the same.
  189. copy(leafCounts[level][:level], leafCounts[level-1][:level])
  190. levels[l.level-1].needed = 2
  191. }
  192. if l.needed--; l.needed == 0 {
  193. // We've done everything we need to do for this level.
  194. // Continue calculating one level up. Fill in nextPairFreq
  195. // of that level with the sum of the two nodes we've just calculated on
  196. // this level.
  197. if l.level == maxBits {
  198. // All done!
  199. break
  200. }
  201. levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq
  202. level++
  203. } else {
  204. // If we stole from below, move down temporarily to replenish it.
  205. for levels[level-1].needed > 0 {
  206. level--
  207. }
  208. }
  209. }
  210. // Somethings is wrong if at the end, the top level is null or hasn't used
  211. // all of the leaves.
  212. if leafCounts[maxBits][maxBits] != n {
  213. panic("leafCounts[maxBits][maxBits] != n")
  214. }
  215. bitCount := h.bitCount[:maxBits+1]
  216. //make([]int32, maxBits+1)
  217. bits := 1
  218. counts := &leafCounts[maxBits]
  219. for level := maxBits; level > 0; level-- {
  220. // chain.leafCount gives the number of literals requiring at least "bits"
  221. // bits to encode.
  222. bitCount[bits] = counts[level] - counts[level-1]
  223. bits++
  224. }
  225. return bitCount
  226. }
  227. // Look at the leaves and assign them a bit count and an encoding as specified
  228. // in RFC 1951 3.2.2
  229. func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) {
  230. code := uint16(0)
  231. for n, bits := range bitCount {
  232. code <<= 1
  233. if n == 0 || bits == 0 {
  234. continue
  235. }
  236. // The literals list[len(list)-bits] .. list[len(list)-bits]
  237. // are encoded using "bits" bits, and get the values
  238. // code, code + 1, .... The code values are
  239. // assigned in literal order (not frequency order).
  240. chunk := list[len(list)-int(bits):]
  241. h.lns.Sort(chunk)
  242. for _, node := range chunk {
  243. h.codes[node.literal] = toCode(reverseBits(code, uint8(n)), uint8(n))
  244. code++
  245. }
  246. list = list[0 : len(list)-int(bits)]
  247. }
  248. }
  249. // Update this Huffman Code object to be the minimum code for the specified frequency count.
  250. //
  251. // freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
  252. // maxBits The maximum number of bits to use for any literal.
  253. func (h *huffmanEncoder) generate(freq []int32, maxBits int32) {
  254. if h.freqcache == nil {
  255. h.freqcache = make([]literalNode, 300)
  256. }
  257. list := h.freqcache[:len(freq)+1]
  258. // Number of non-zero literals
  259. count := 0
  260. // Set list to be the set of all non-zero literals and their frequencies
  261. for i, f := range freq {
  262. if f != 0 {
  263. list[count] = literalNode{uint16(i), f}
  264. count++
  265. } else {
  266. list[count] = literalNode{}
  267. //h.codeBits[i] = 0
  268. h.codes[i].setBits(0)
  269. }
  270. }
  271. list[len(freq)] = literalNode{}
  272. // If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros
  273. // FIXME: Doesn't do what it says on the tin (klauspost)
  274. //h.codeBits = h.codeBits[0:len(freq)]
  275. list = list[0:count]
  276. if count <= 2 {
  277. // Handle the small cases here, because they are awkward for the general case code. With
  278. // two or fewer literals, everything has bit length 1.
  279. for i, node := range list {
  280. // "list" is in order of increasing literal value.
  281. h.codes[node.literal].set(uint16(i), 1)
  282. //h.codeBits[node.literal] = 1
  283. //h.code[node.literal] = uint16(i)
  284. }
  285. return
  286. }
  287. h.lfs.Sort(list)
  288. // Get the number of literals for each bit count
  289. bitCount := h.bitCounts(list, maxBits)
  290. // And do the assignment
  291. h.assignEncodingAndSize(bitCount, list)
  292. }
  293. type literalNodeSorter []literalNode
  294. func (s *literalNodeSorter) Sort(a []literalNode) {
  295. *s = literalNodeSorter(a)
  296. sort.Sort(s)
  297. }
  298. func (s literalNodeSorter) Len() int { return len(s) }
  299. func (s literalNodeSorter) Less(i, j int) bool {
  300. return s[i].literal < s[j].literal
  301. }
  302. func (s literalNodeSorter) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
  303. type literalFreqSorter []literalNode
  304. func (s *literalFreqSorter) Sort(a []literalNode) {
  305. *s = literalFreqSorter(a)
  306. sort.Sort(s)
  307. }
  308. func (s literalFreqSorter) Len() int { return len(s) }
  309. func (s literalFreqSorter) Less(i, j int) bool {
  310. if s[i].freq == s[j].freq {
  311. return s[i].literal < s[j].literal
  312. }
  313. return s[i].freq < s[j].freq
  314. }
  315. func (s literalFreqSorter) Swap(i, j int) { s[i], s[j] = s[j], s[i] }