decode.go

  1// Copyright 2011 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5// Package vp8 implements a decoder for the VP8 lossy image format.
  6//
  7// The VP8 specification is RFC 6386.
  8package vp8 // import "golang.org/x/image/vp8"
  9
 10// This file implements the top-level decoding algorithm.
 11
 12import (
 13	"errors"
 14	"image"
 15	"io"
 16)
 17
 18// limitReader wraps an io.Reader to read at most n bytes from it.
 19type limitReader struct {
 20	r io.Reader
 21	n int
 22}
 23
 24// ReadFull reads exactly len(p) bytes into p.
 25func (r *limitReader) ReadFull(p []byte) error {
 26	if len(p) > r.n {
 27		return io.ErrUnexpectedEOF
 28	}
 29	n, err := io.ReadFull(r.r, p)
 30	r.n -= n
 31	return err
 32}
 33
 34// FrameHeader is a frame header, as specified in section 9.1.
 35type FrameHeader struct {
 36	KeyFrame          bool
 37	VersionNumber     uint8
 38	ShowFrame         bool
 39	FirstPartitionLen uint32
 40	Width             int
 41	Height            int
 42	XScale            uint8
 43	YScale            uint8
 44}
 45
 46const (
 47	nSegment     = 4
 48	nSegmentProb = 3
 49)
 50
 51// segmentHeader holds segment-related header information.
 52type segmentHeader struct {
 53	useSegment     bool
 54	updateMap      bool
 55	relativeDelta  bool
 56	quantizer      [nSegment]int8
 57	filterStrength [nSegment]int8
 58	prob           [nSegmentProb]uint8
 59}
 60
 61const (
 62	nRefLFDelta  = 4
 63	nModeLFDelta = 4
 64)
 65
 66// filterHeader holds filter-related header information.
 67type filterHeader struct {
 68	simple          bool
 69	level           int8
 70	sharpness       uint8
 71	useLFDelta      bool
 72	refLFDelta      [nRefLFDelta]int8
 73	modeLFDelta     [nModeLFDelta]int8
 74	perSegmentLevel [nSegment]int8
 75}
 76
 77// mb is the per-macroblock decode state. A decoder maintains mbw+1 of these
 78// as it is decoding macroblocks left-to-right and top-to-bottom: mbw for the
 79// macroblocks in the row above, and one for the macroblock to the left.
 80type mb struct {
 81	// pred is the predictor mode for the 4 bottom or right 4x4 luma regions.
 82	pred [4]uint8
 83	// nzMask is a mask of 8 bits: 4 for the bottom or right 4x4 luma regions,
 84	// and 2 + 2 for the bottom or right 4x4 chroma regions. A 1 bit indicates
 85	// that region has non-zero coefficients.
 86	nzMask uint8
 87	// nzY16 is a 0/1 value that is 1 if the macroblock used Y16 prediction and
 88	// had non-zero coefficients.
 89	nzY16 uint8
 90}
 91
 92// Decoder decodes VP8 bitstreams into frames. Decoding one frame consists of
 93// calling Init, DecodeFrameHeader and then DecodeFrame in that order.
 94// A Decoder can be re-used to decode multiple frames.
 95type Decoder struct {
 96	// r is the input bitsream.
 97	r limitReader
 98	// scratch is a scratch buffer.
 99	scratch [8]byte
100	// img is the YCbCr image to decode into.
101	img *image.YCbCr
102	// mbw and mbh are the number of 16x16 macroblocks wide and high the image is.
103	mbw, mbh int
104	// frameHeader is the frame header. When decoding multiple frames,
105	// frames that aren't key frames will inherit the Width, Height,
106	// XScale and YScale of the most recent key frame.
107	frameHeader FrameHeader
108	// Other headers.
109	segmentHeader segmentHeader
110	filterHeader  filterHeader
111	// The image data is divided into a number of independent partitions.
112	// There is 1 "first partition" and between 1 and 8 "other partitions"
113	// for coefficient data.
114	fp  partition
115	op  [8]partition
116	nOP int
117	// Quantization factors.
118	quant [nSegment]quant
119	// DCT/WHT coefficient decoding probabilities.
120	tokenProb   [nPlane][nBand][nContext][nProb]uint8
121	useSkipProb bool
122	skipProb    uint8
123	// Loop filter parameters.
124	filterParams      [nSegment][2]filterParam
125	perMBFilterParams []filterParam
126
127	// The eight fields below relate to the current macroblock being decoded.
128	//
129	// Segment-based adjustments.
130	segment int
131	// Per-macroblock state for the macroblock immediately left of and those
132	// macroblocks immediately above the current macroblock.
133	leftMB mb
134	upMB   []mb
135	// Bitmasks for which 4x4 regions of coeff contain non-zero coefficients.
136	nzDCMask, nzACMask uint32
137	// Predictor modes.
138	usePredY16 bool // The libwebp C code calls this !is_i4x4_.
139	predY16    uint8
140	predC8     uint8
141	predY4     [4][4]uint8
142
143	// The two fields below form a workspace for reconstructing a macroblock.
144	// Their specific sizes are documented in reconstruct.go.
145	coeff [1*16*16 + 2*8*8 + 1*4*4]int16
146	ybr   [1 + 16 + 1 + 8][32]uint8
147}
148
149// NewDecoder returns a new Decoder.
150func NewDecoder() *Decoder {
151	return &Decoder{}
152}
153
154// Init initializes the decoder to read at most n bytes from r.
155func (d *Decoder) Init(r io.Reader, n int) {
156	d.r = limitReader{r, n}
157}
158
159// DecodeFrameHeader decodes the frame header.
160func (d *Decoder) DecodeFrameHeader() (fh FrameHeader, err error) {
161	// All frame headers are at least 3 bytes long.
162	b := d.scratch[:3]
163	if err = d.r.ReadFull(b); err != nil {
164		return
165	}
166	d.frameHeader.KeyFrame = (b[0] & 1) == 0
167	d.frameHeader.VersionNumber = (b[0] >> 1) & 7
168	d.frameHeader.ShowFrame = (b[0]>>4)&1 == 1
169	d.frameHeader.FirstPartitionLen = uint32(b[0])>>5 | uint32(b[1])<<3 | uint32(b[2])<<11
170	if !d.frameHeader.KeyFrame {
171		return d.frameHeader, nil
172	}
173	// Frame headers for key frames are an additional 7 bytes long.
174	b = d.scratch[:7]
175	if err = d.r.ReadFull(b); err != nil {
176		return
177	}
178	// Check the magic sync code.
179	if b[0] != 0x9d || b[1] != 0x01 || b[2] != 0x2a {
180		err = errors.New("vp8: invalid format")
181		return
182	}
183	d.frameHeader.Width = int(b[4]&0x3f)<<8 | int(b[3])
184	d.frameHeader.Height = int(b[6]&0x3f)<<8 | int(b[5])
185	d.frameHeader.XScale = b[4] >> 6
186	d.frameHeader.YScale = b[6] >> 6
187	d.mbw = (d.frameHeader.Width + 0x0f) >> 4
188	d.mbh = (d.frameHeader.Height + 0x0f) >> 4
189	d.segmentHeader = segmentHeader{
190		prob: [3]uint8{0xff, 0xff, 0xff},
191	}
192	d.tokenProb = defaultTokenProb
193	d.segment = 0
194	return d.frameHeader, nil
195}
196
197// ensureImg ensures that d.img is large enough to hold the decoded frame.
198func (d *Decoder) ensureImg() {
199	if d.img != nil {
200		p0, p1 := d.img.Rect.Min, d.img.Rect.Max
201		if p0.X == 0 && p0.Y == 0 && p1.X >= 16*d.mbw && p1.Y >= 16*d.mbh {
202			return
203		}
204	}
205	m := image.NewYCbCr(image.Rect(0, 0, 16*d.mbw, 16*d.mbh), image.YCbCrSubsampleRatio420)
206	d.img = m.SubImage(image.Rect(0, 0, d.frameHeader.Width, d.frameHeader.Height)).(*image.YCbCr)
207	d.perMBFilterParams = make([]filterParam, d.mbw*d.mbh)
208	d.upMB = make([]mb, d.mbw)
209}
210
211// parseSegmentHeader parses the segment header, as specified in section 9.3.
212func (d *Decoder) parseSegmentHeader() {
213	d.segmentHeader.useSegment = d.fp.readBit(uniformProb)
214	if !d.segmentHeader.useSegment {
215		d.segmentHeader.updateMap = false
216		return
217	}
218	d.segmentHeader.updateMap = d.fp.readBit(uniformProb)
219	if d.fp.readBit(uniformProb) {
220		d.segmentHeader.relativeDelta = !d.fp.readBit(uniformProb)
221		for i := range d.segmentHeader.quantizer {
222			d.segmentHeader.quantizer[i] = int8(d.fp.readOptionalInt(uniformProb, 7))
223		}
224		for i := range d.segmentHeader.filterStrength {
225			d.segmentHeader.filterStrength[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
226		}
227	}
228	if !d.segmentHeader.updateMap {
229		return
230	}
231	for i := range d.segmentHeader.prob {
232		if d.fp.readBit(uniformProb) {
233			d.segmentHeader.prob[i] = uint8(d.fp.readUint(uniformProb, 8))
234		} else {
235			d.segmentHeader.prob[i] = 0xff
236		}
237	}
238}
239
240// parseFilterHeader parses the filter header, as specified in section 9.4.
241func (d *Decoder) parseFilterHeader() {
242	d.filterHeader.simple = d.fp.readBit(uniformProb)
243	d.filterHeader.level = int8(d.fp.readUint(uniformProb, 6))
244	d.filterHeader.sharpness = uint8(d.fp.readUint(uniformProb, 3))
245	d.filterHeader.useLFDelta = d.fp.readBit(uniformProb)
246	if d.filterHeader.useLFDelta && d.fp.readBit(uniformProb) {
247		for i := range d.filterHeader.refLFDelta {
248			d.filterHeader.refLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
249		}
250		for i := range d.filterHeader.modeLFDelta {
251			d.filterHeader.modeLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
252		}
253	}
254	if d.filterHeader.level == 0 {
255		return
256	}
257	if d.segmentHeader.useSegment {
258		for i := range d.filterHeader.perSegmentLevel {
259			strength := d.segmentHeader.filterStrength[i]
260			if d.segmentHeader.relativeDelta {
261				strength += d.filterHeader.level
262			}
263			d.filterHeader.perSegmentLevel[i] = strength
264		}
265	} else {
266		d.filterHeader.perSegmentLevel[0] = d.filterHeader.level
267	}
268	d.computeFilterParams()
269}
270
271// parseOtherPartitions parses the other partitions, as specified in section 9.5.
272func (d *Decoder) parseOtherPartitions() error {
273	const maxNOP = 1 << 3
274	var partLens [maxNOP]int
275	d.nOP = 1 << d.fp.readUint(uniformProb, 2)
276
277	// The final partition length is implied by the remaining chunk data
278	// (d.r.n) and the other d.nOP-1 partition lengths. Those d.nOP-1 partition
279	// lengths are stored as 24-bit uints, i.e. up to 16 MiB per partition.
280	n := 3 * (d.nOP - 1)
281	partLens[d.nOP-1] = d.r.n - n
282	if partLens[d.nOP-1] < 0 {
283		return io.ErrUnexpectedEOF
284	}
285	if n > 0 {
286		buf := make([]byte, n)
287		if err := d.r.ReadFull(buf); err != nil {
288			return err
289		}
290		for i := 0; i < d.nOP-1; i++ {
291			pl := int(buf[3*i+0]) | int(buf[3*i+1])<<8 | int(buf[3*i+2])<<16
292			if pl > partLens[d.nOP-1] {
293				return io.ErrUnexpectedEOF
294			}
295			partLens[i] = pl
296			partLens[d.nOP-1] -= pl
297		}
298	}
299
300	// We check if the final partition length can also fit into a 24-bit uint.
301	// Strictly speaking, this isn't part of the spec, but it guards against a
302	// malicious WEBP image that is too large to ReadFull the encoded DCT
303	// coefficients into memory, whether that's because the actual WEBP file is
304	// too large, or whether its RIFF metadata lists too large a chunk.
305	if 1<<24 <= partLens[d.nOP-1] {
306		return errors.New("vp8: too much data to decode")
307	}
308
309	buf := make([]byte, d.r.n)
310	if err := d.r.ReadFull(buf); err != nil {
311		return err
312	}
313	for i, pl := range partLens {
314		if i == d.nOP {
315			break
316		}
317		d.op[i].init(buf[:pl])
318		buf = buf[pl:]
319	}
320	return nil
321}
322
323// parseOtherHeaders parses header information other than the frame header.
324func (d *Decoder) parseOtherHeaders() error {
325	// Initialize and parse the first partition.
326	firstPartition := make([]byte, d.frameHeader.FirstPartitionLen)
327	if err := d.r.ReadFull(firstPartition); err != nil {
328		return err
329	}
330	d.fp.init(firstPartition)
331	if d.frameHeader.KeyFrame {
332		// Read and ignore the color space and pixel clamp values. They are
333		// specified in section 9.2, but are unimplemented.
334		d.fp.readBit(uniformProb)
335		d.fp.readBit(uniformProb)
336	}
337	d.parseSegmentHeader()
338	d.parseFilterHeader()
339	if err := d.parseOtherPartitions(); err != nil {
340		return err
341	}
342	d.parseQuant()
343	if !d.frameHeader.KeyFrame {
344		// Golden and AltRef frames are specified in section 9.7.
345		// TODO(nigeltao): implement. Note that they are only used for video, not still images.
346		return errors.New("vp8: Golden / AltRef frames are not implemented")
347	}
348	// Read and ignore the refreshLastFrameBuffer bit, specified in section 9.8.
349	// It applies only to video, and not still images.
350	d.fp.readBit(uniformProb)
351	d.parseTokenProb()
352	d.useSkipProb = d.fp.readBit(uniformProb)
353	if d.useSkipProb {
354		d.skipProb = uint8(d.fp.readUint(uniformProb, 8))
355	}
356	if d.fp.unexpectedEOF {
357		return io.ErrUnexpectedEOF
358	}
359	return nil
360}
361
362// DecodeFrame decodes the frame and returns it as an YCbCr image.
363// The image's contents are valid up until the next call to Decoder.Init.
364func (d *Decoder) DecodeFrame() (*image.YCbCr, error) {
365	d.ensureImg()
366	if err := d.parseOtherHeaders(); err != nil {
367		return nil, err
368	}
369	// Reconstruct the rows.
370	for mbx := 0; mbx < d.mbw; mbx++ {
371		d.upMB[mbx] = mb{}
372	}
373	for mby := 0; mby < d.mbh; mby++ {
374		d.leftMB = mb{}
375		for mbx := 0; mbx < d.mbw; mbx++ {
376			skip := d.reconstruct(mbx, mby)
377			fs := d.filterParams[d.segment][btou(!d.usePredY16)]
378			fs.inner = fs.inner || !skip
379			d.perMBFilterParams[d.mbw*mby+mbx] = fs
380		}
381	}
382	if d.fp.unexpectedEOF {
383		return nil, io.ErrUnexpectedEOF
384	}
385	for i := 0; i < d.nOP; i++ {
386		if d.op[i].unexpectedEOF {
387			return nil, io.ErrUnexpectedEOF
388		}
389	}
390	// Apply the loop filter.
391	//
392	// Even if we are using per-segment levels, section 15 says that "loop
393	// filtering must be skipped entirely if loop_filter_level at either the
394	// frame header level or macroblock override level is 0".
395	if d.filterHeader.level != 0 {
396		if d.filterHeader.simple {
397			d.simpleFilter()
398		} else {
399			d.normalFilter()
400		}
401	}
402	return d.img, nil
403}