huffman.go

  1// Copyright 2014 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package vp8l
  6
  7import (
  8	"io"
  9)
 10
 11// reverseBits reverses the bits in a byte.
 12var reverseBits = [256]uint8{
 13	0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
 14	0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
 15	0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
 16	0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
 17	0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
 18	0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
 19	0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
 20	0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
 21	0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
 22	0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
 23	0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
 24	0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
 25	0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
 26	0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
 27	0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
 28	0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
 29}
 30
 31// hNode is a node in a Huffman tree.
 32type hNode struct {
 33	// symbol is the symbol held by this node.
 34	symbol uint32
 35	// children, if positive, is the hTree.nodes index of the first of
 36	// this node's two children. Zero means an uninitialized node,
 37	// and -1 means a leaf node.
 38	children int32
 39}
 40
 41const leafNode = -1
 42
 43// lutSize is the log-2 size of an hTree's look-up table.
 44const lutSize, lutMask = 7, 1<<7 - 1
 45
 46// hTree is a Huffman tree.
 47type hTree struct {
 48	// nodes are the nodes of the Huffman tree. During construction,
 49	// len(nodes) grows from 1 up to cap(nodes) by steps of two.
 50	// After construction, len(nodes) == cap(nodes), and both equal
 51	// 2*theNumberOfSymbols - 1.
 52	nodes []hNode
 53	// lut is a look-up table for walking the nodes. The x in lut[x] is
 54	// the next lutSize bits in the bit-stream. The low 8 bits of lut[x]
 55	// equals 1 plus the number of bits in the next code, or 0 if the
 56	// next code requires more than lutSize bits. The high 24 bits are:
 57	//   - the symbol, if the code requires lutSize or fewer bits, or
 58	//   - the hTree.nodes index to start the tree traversal from, if
 59	//     the next code requires more than lutSize bits.
 60	lut [1 << lutSize]uint32
 61}
 62
 63// insert inserts into the hTree a symbol whose encoding is the least
 64// significant codeLength bits of code.
 65func (h *hTree) insert(symbol uint32, code uint32, codeLength uint32) error {
 66	if symbol > 0xffff || codeLength > 0xfe {
 67		return errInvalidHuffmanTree
 68	}
 69	baseCode := uint32(0)
 70	if codeLength > lutSize {
 71		baseCode = uint32(reverseBits[(code>>(codeLength-lutSize))&0xff]) >> (8 - lutSize)
 72	} else {
 73		baseCode = uint32(reverseBits[code&0xff]) >> (8 - codeLength)
 74		for i := 0; i < 1<<(lutSize-codeLength); i++ {
 75			h.lut[baseCode|uint32(i)<<codeLength] = symbol<<8 | (codeLength + 1)
 76		}
 77	}
 78
 79	n := uint32(0)
 80	for jump := lutSize; codeLength > 0; {
 81		codeLength--
 82		if int(n) > len(h.nodes) {
 83			return errInvalidHuffmanTree
 84		}
 85		switch h.nodes[n].children {
 86		case leafNode:
 87			return errInvalidHuffmanTree
 88		case 0:
 89			if len(h.nodes) == cap(h.nodes) {
 90				return errInvalidHuffmanTree
 91			}
 92			// Create two empty child nodes.
 93			h.nodes[n].children = int32(len(h.nodes))
 94			h.nodes = h.nodes[:len(h.nodes)+2]
 95		}
 96		n = uint32(h.nodes[n].children) + 1&(code>>codeLength)
 97		jump--
 98		if jump == 0 && h.lut[baseCode] == 0 {
 99			h.lut[baseCode] = n << 8
100		}
101	}
102
103	switch h.nodes[n].children {
104	case leafNode:
105		// No-op.
106	case 0:
107		// Turn the uninitialized node into a leaf.
108		h.nodes[n].children = leafNode
109	default:
110		return errInvalidHuffmanTree
111	}
112	h.nodes[n].symbol = symbol
113	return nil
114}
115
116// codeLengthsToCodes returns the canonical Huffman codes implied by the
117// sequence of code lengths.
118func codeLengthsToCodes(codeLengths []uint32) ([]uint32, error) {
119	maxCodeLength := uint32(0)
120	for _, cl := range codeLengths {
121		if maxCodeLength < cl {
122			maxCodeLength = cl
123		}
124	}
125	const maxAllowedCodeLength = 15
126	if len(codeLengths) == 0 || maxCodeLength > maxAllowedCodeLength {
127		return nil, errInvalidHuffmanTree
128	}
129	histogram := [maxAllowedCodeLength + 1]uint32{}
130	for _, cl := range codeLengths {
131		histogram[cl]++
132	}
133	currCode, nextCodes := uint32(0), [maxAllowedCodeLength + 1]uint32{}
134	for cl := 1; cl < len(nextCodes); cl++ {
135		currCode = (currCode + histogram[cl-1]) << 1
136		nextCodes[cl] = currCode
137	}
138	codes := make([]uint32, len(codeLengths))
139	for symbol, cl := range codeLengths {
140		if cl > 0 {
141			codes[symbol] = nextCodes[cl]
142			nextCodes[cl]++
143		}
144	}
145	return codes, nil
146}
147
148// build builds a canonical Huffman tree from the given code lengths.
149func (h *hTree) build(codeLengths []uint32) error {
150	// Calculate the number of symbols.
151	var nSymbols, lastSymbol uint32
152	for symbol, cl := range codeLengths {
153		if cl != 0 {
154			nSymbols++
155			lastSymbol = uint32(symbol)
156		}
157	}
158	if nSymbols == 0 {
159		return errInvalidHuffmanTree
160	}
161	h.nodes = make([]hNode, 1, 2*nSymbols-1)
162	// Handle the trivial case.
163	if nSymbols == 1 {
164		if len(codeLengths) <= int(lastSymbol) {
165			return errInvalidHuffmanTree
166		}
167		return h.insert(lastSymbol, 0, 0)
168	}
169	// Handle the non-trivial case.
170	codes, err := codeLengthsToCodes(codeLengths)
171	if err != nil {
172		return err
173	}
174	for symbol, cl := range codeLengths {
175		if cl > 0 {
176			if err := h.insert(uint32(symbol), codes[symbol], cl); err != nil {
177				return err
178			}
179		}
180	}
181	return nil
182}
183
184// buildSimple builds a Huffman tree with 1 or 2 symbols.
185func (h *hTree) buildSimple(nSymbols uint32, symbols [2]uint32, alphabetSize uint32) error {
186	h.nodes = make([]hNode, 1, 2*nSymbols-1)
187	for i := uint32(0); i < nSymbols; i++ {
188		if symbols[i] >= alphabetSize {
189			return errInvalidHuffmanTree
190		}
191		if err := h.insert(symbols[i], i, nSymbols-1); err != nil {
192			return err
193		}
194	}
195	return nil
196}
197
198// next returns the next Huffman-encoded symbol from the bit-stream d.
199func (h *hTree) next(d *decoder) (uint32, error) {
200	var n uint32
201	// Read enough bits so that we can use the look-up table.
202	if d.nBits < lutSize {
203		c, err := d.r.ReadByte()
204		if err != nil {
205			if err == io.EOF {
206				// There are no more bytes of data, but we may still be able
207				// to read the next symbol out of the previously read bits.
208				goto slowPath
209			}
210			return 0, err
211		}
212		d.bits |= uint32(c) << d.nBits
213		d.nBits += 8
214	}
215	// Use the look-up table.
216	n = h.lut[d.bits&lutMask]
217	if b := n & 0xff; b != 0 {
218		b--
219		d.bits >>= b
220		d.nBits -= b
221		return n >> 8, nil
222	}
223	n >>= 8
224	d.bits >>= lutSize
225	d.nBits -= lutSize
226
227slowPath:
228	for h.nodes[n].children != leafNode {
229		if d.nBits == 0 {
230			c, err := d.r.ReadByte()
231			if err != nil {
232				if err == io.EOF {
233					err = io.ErrUnexpectedEOF
234				}
235				return 0, err
236			}
237			d.bits = uint32(c)
238			d.nBits = 8
239		}
240		n = uint32(h.nodes[n].children) + 1&d.bits
241		d.bits >>= 1
242		d.nBits--
243	}
244	return h.nodes[n].symbol, nil
245}