1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package vp8 implements a decoder for the VP8 lossy image format.
6//
7// The VP8 specification is RFC 6386.
8package vp8 // import "golang.org/x/image/vp8"
9
10// This file implements the top-level decoding algorithm.
11
12import (
13 "errors"
14 "image"
15 "io"
16)
17
18// limitReader wraps an io.Reader to read at most n bytes from it.
19type limitReader struct {
20 r io.Reader
21 n int
22}
23
24// ReadFull reads exactly len(p) bytes into p.
25func (r *limitReader) ReadFull(p []byte) error {
26 if len(p) > r.n {
27 return io.ErrUnexpectedEOF
28 }
29 n, err := io.ReadFull(r.r, p)
30 r.n -= n
31 return err
32}
33
34// FrameHeader is a frame header, as specified in section 9.1.
35type FrameHeader struct {
36 KeyFrame bool
37 VersionNumber uint8
38 ShowFrame bool
39 FirstPartitionLen uint32
40 Width int
41 Height int
42 XScale uint8
43 YScale uint8
44}
45
46const (
47 nSegment = 4
48 nSegmentProb = 3
49)
50
51// segmentHeader holds segment-related header information.
52type segmentHeader struct {
53 useSegment bool
54 updateMap bool
55 relativeDelta bool
56 quantizer [nSegment]int8
57 filterStrength [nSegment]int8
58 prob [nSegmentProb]uint8
59}
60
61const (
62 nRefLFDelta = 4
63 nModeLFDelta = 4
64)
65
66// filterHeader holds filter-related header information.
67type filterHeader struct {
68 simple bool
69 level int8
70 sharpness uint8
71 useLFDelta bool
72 refLFDelta [nRefLFDelta]int8
73 modeLFDelta [nModeLFDelta]int8
74 perSegmentLevel [nSegment]int8
75}
76
77// mb is the per-macroblock decode state. A decoder maintains mbw+1 of these
78// as it is decoding macroblocks left-to-right and top-to-bottom: mbw for the
79// macroblocks in the row above, and one for the macroblock to the left.
80type mb struct {
81 // pred is the predictor mode for the 4 bottom or right 4x4 luma regions.
82 pred [4]uint8
83 // nzMask is a mask of 8 bits: 4 for the bottom or right 4x4 luma regions,
84 // and 2 + 2 for the bottom or right 4x4 chroma regions. A 1 bit indicates
85 // that region has non-zero coefficients.
86 nzMask uint8
87 // nzY16 is a 0/1 value that is 1 if the macroblock used Y16 prediction and
88 // had non-zero coefficients.
89 nzY16 uint8
90}
91
92// Decoder decodes VP8 bitstreams into frames. Decoding one frame consists of
93// calling Init, DecodeFrameHeader and then DecodeFrame in that order.
94// A Decoder can be re-used to decode multiple frames.
95type Decoder struct {
96 // r is the input bitsream.
97 r limitReader
98 // scratch is a scratch buffer.
99 scratch [8]byte
100 // img is the YCbCr image to decode into.
101 img *image.YCbCr
102 // mbw and mbh are the number of 16x16 macroblocks wide and high the image is.
103 mbw, mbh int
104 // frameHeader is the frame header. When decoding multiple frames,
105 // frames that aren't key frames will inherit the Width, Height,
106 // XScale and YScale of the most recent key frame.
107 frameHeader FrameHeader
108 // Other headers.
109 segmentHeader segmentHeader
110 filterHeader filterHeader
111 // The image data is divided into a number of independent partitions.
112 // There is 1 "first partition" and between 1 and 8 "other partitions"
113 // for coefficient data.
114 fp partition
115 op [8]partition
116 nOP int
117 // Quantization factors.
118 quant [nSegment]quant
119 // DCT/WHT coefficient decoding probabilities.
120 tokenProb [nPlane][nBand][nContext][nProb]uint8
121 useSkipProb bool
122 skipProb uint8
123 // Loop filter parameters.
124 filterParams [nSegment][2]filterParam
125 perMBFilterParams []filterParam
126
127 // The eight fields below relate to the current macroblock being decoded.
128 //
129 // Segment-based adjustments.
130 segment int
131 // Per-macroblock state for the macroblock immediately left of and those
132 // macroblocks immediately above the current macroblock.
133 leftMB mb
134 upMB []mb
135 // Bitmasks for which 4x4 regions of coeff contain non-zero coefficients.
136 nzDCMask, nzACMask uint32
137 // Predictor modes.
138 usePredY16 bool // The libwebp C code calls this !is_i4x4_.
139 predY16 uint8
140 predC8 uint8
141 predY4 [4][4]uint8
142
143 // The two fields below form a workspace for reconstructing a macroblock.
144 // Their specific sizes are documented in reconstruct.go.
145 coeff [1*16*16 + 2*8*8 + 1*4*4]int16
146 ybr [1 + 16 + 1 + 8][32]uint8
147}
148
149// NewDecoder returns a new Decoder.
150func NewDecoder() *Decoder {
151 return &Decoder{}
152}
153
154// Init initializes the decoder to read at most n bytes from r.
155func (d *Decoder) Init(r io.Reader, n int) {
156 d.r = limitReader{r, n}
157}
158
159// DecodeFrameHeader decodes the frame header.
160func (d *Decoder) DecodeFrameHeader() (fh FrameHeader, err error) {
161 // All frame headers are at least 3 bytes long.
162 b := d.scratch[:3]
163 if err = d.r.ReadFull(b); err != nil {
164 return
165 }
166 d.frameHeader.KeyFrame = (b[0] & 1) == 0
167 d.frameHeader.VersionNumber = (b[0] >> 1) & 7
168 d.frameHeader.ShowFrame = (b[0]>>4)&1 == 1
169 d.frameHeader.FirstPartitionLen = uint32(b[0])>>5 | uint32(b[1])<<3 | uint32(b[2])<<11
170 if !d.frameHeader.KeyFrame {
171 return d.frameHeader, nil
172 }
173 // Frame headers for key frames are an additional 7 bytes long.
174 b = d.scratch[:7]
175 if err = d.r.ReadFull(b); err != nil {
176 return
177 }
178 // Check the magic sync code.
179 if b[0] != 0x9d || b[1] != 0x01 || b[2] != 0x2a {
180 err = errors.New("vp8: invalid format")
181 return
182 }
183 d.frameHeader.Width = int(b[4]&0x3f)<<8 | int(b[3])
184 d.frameHeader.Height = int(b[6]&0x3f)<<8 | int(b[5])
185 d.frameHeader.XScale = b[4] >> 6
186 d.frameHeader.YScale = b[6] >> 6
187 d.mbw = (d.frameHeader.Width + 0x0f) >> 4
188 d.mbh = (d.frameHeader.Height + 0x0f) >> 4
189 d.segmentHeader = segmentHeader{
190 prob: [3]uint8{0xff, 0xff, 0xff},
191 }
192 d.tokenProb = defaultTokenProb
193 d.segment = 0
194 return d.frameHeader, nil
195}
196
197// ensureImg ensures that d.img is large enough to hold the decoded frame.
198func (d *Decoder) ensureImg() {
199 if d.img != nil {
200 p0, p1 := d.img.Rect.Min, d.img.Rect.Max
201 if p0.X == 0 && p0.Y == 0 && p1.X >= 16*d.mbw && p1.Y >= 16*d.mbh {
202 return
203 }
204 }
205 m := image.NewYCbCr(image.Rect(0, 0, 16*d.mbw, 16*d.mbh), image.YCbCrSubsampleRatio420)
206 d.img = m.SubImage(image.Rect(0, 0, d.frameHeader.Width, d.frameHeader.Height)).(*image.YCbCr)
207 d.perMBFilterParams = make([]filterParam, d.mbw*d.mbh)
208 d.upMB = make([]mb, d.mbw)
209}
210
211// parseSegmentHeader parses the segment header, as specified in section 9.3.
212func (d *Decoder) parseSegmentHeader() {
213 d.segmentHeader.useSegment = d.fp.readBit(uniformProb)
214 if !d.segmentHeader.useSegment {
215 d.segmentHeader.updateMap = false
216 return
217 }
218 d.segmentHeader.updateMap = d.fp.readBit(uniformProb)
219 if d.fp.readBit(uniformProb) {
220 d.segmentHeader.relativeDelta = !d.fp.readBit(uniformProb)
221 for i := range d.segmentHeader.quantizer {
222 d.segmentHeader.quantizer[i] = int8(d.fp.readOptionalInt(uniformProb, 7))
223 }
224 for i := range d.segmentHeader.filterStrength {
225 d.segmentHeader.filterStrength[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
226 }
227 }
228 if !d.segmentHeader.updateMap {
229 return
230 }
231 for i := range d.segmentHeader.prob {
232 if d.fp.readBit(uniformProb) {
233 d.segmentHeader.prob[i] = uint8(d.fp.readUint(uniformProb, 8))
234 } else {
235 d.segmentHeader.prob[i] = 0xff
236 }
237 }
238}
239
240// parseFilterHeader parses the filter header, as specified in section 9.4.
241func (d *Decoder) parseFilterHeader() {
242 d.filterHeader.simple = d.fp.readBit(uniformProb)
243 d.filterHeader.level = int8(d.fp.readUint(uniformProb, 6))
244 d.filterHeader.sharpness = uint8(d.fp.readUint(uniformProb, 3))
245 d.filterHeader.useLFDelta = d.fp.readBit(uniformProb)
246 if d.filterHeader.useLFDelta && d.fp.readBit(uniformProb) {
247 for i := range d.filterHeader.refLFDelta {
248 d.filterHeader.refLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
249 }
250 for i := range d.filterHeader.modeLFDelta {
251 d.filterHeader.modeLFDelta[i] = int8(d.fp.readOptionalInt(uniformProb, 6))
252 }
253 }
254 if d.filterHeader.level == 0 {
255 return
256 }
257 if d.segmentHeader.useSegment {
258 for i := range d.filterHeader.perSegmentLevel {
259 strength := d.segmentHeader.filterStrength[i]
260 if d.segmentHeader.relativeDelta {
261 strength += d.filterHeader.level
262 }
263 d.filterHeader.perSegmentLevel[i] = strength
264 }
265 } else {
266 d.filterHeader.perSegmentLevel[0] = d.filterHeader.level
267 }
268 d.computeFilterParams()
269}
270
271// parseOtherPartitions parses the other partitions, as specified in section 9.5.
272func (d *Decoder) parseOtherPartitions() error {
273 const maxNOP = 1 << 3
274 var partLens [maxNOP]int
275 d.nOP = 1 << d.fp.readUint(uniformProb, 2)
276
277 // The final partition length is implied by the remaining chunk data
278 // (d.r.n) and the other d.nOP-1 partition lengths. Those d.nOP-1 partition
279 // lengths are stored as 24-bit uints, i.e. up to 16 MiB per partition.
280 n := 3 * (d.nOP - 1)
281 partLens[d.nOP-1] = d.r.n - n
282 if partLens[d.nOP-1] < 0 {
283 return io.ErrUnexpectedEOF
284 }
285 if n > 0 {
286 buf := make([]byte, n)
287 if err := d.r.ReadFull(buf); err != nil {
288 return err
289 }
290 for i := 0; i < d.nOP-1; i++ {
291 pl := int(buf[3*i+0]) | int(buf[3*i+1])<<8 | int(buf[3*i+2])<<16
292 if pl > partLens[d.nOP-1] {
293 return io.ErrUnexpectedEOF
294 }
295 partLens[i] = pl
296 partLens[d.nOP-1] -= pl
297 }
298 }
299
300 // We check if the final partition length can also fit into a 24-bit uint.
301 // Strictly speaking, this isn't part of the spec, but it guards against a
302 // malicious WEBP image that is too large to ReadFull the encoded DCT
303 // coefficients into memory, whether that's because the actual WEBP file is
304 // too large, or whether its RIFF metadata lists too large a chunk.
305 if 1<<24 <= partLens[d.nOP-1] {
306 return errors.New("vp8: too much data to decode")
307 }
308
309 buf := make([]byte, d.r.n)
310 if err := d.r.ReadFull(buf); err != nil {
311 return err
312 }
313 for i, pl := range partLens {
314 if i == d.nOP {
315 break
316 }
317 d.op[i].init(buf[:pl])
318 buf = buf[pl:]
319 }
320 return nil
321}
322
323// parseOtherHeaders parses header information other than the frame header.
324func (d *Decoder) parseOtherHeaders() error {
325 // Initialize and parse the first partition.
326 firstPartition := make([]byte, d.frameHeader.FirstPartitionLen)
327 if err := d.r.ReadFull(firstPartition); err != nil {
328 return err
329 }
330 d.fp.init(firstPartition)
331 if d.frameHeader.KeyFrame {
332 // Read and ignore the color space and pixel clamp values. They are
333 // specified in section 9.2, but are unimplemented.
334 d.fp.readBit(uniformProb)
335 d.fp.readBit(uniformProb)
336 }
337 d.parseSegmentHeader()
338 d.parseFilterHeader()
339 if err := d.parseOtherPartitions(); err != nil {
340 return err
341 }
342 d.parseQuant()
343 if !d.frameHeader.KeyFrame {
344 // Golden and AltRef frames are specified in section 9.7.
345 // TODO(nigeltao): implement. Note that they are only used for video, not still images.
346 return errors.New("vp8: Golden / AltRef frames are not implemented")
347 }
348 // Read and ignore the refreshLastFrameBuffer bit, specified in section 9.8.
349 // It applies only to video, and not still images.
350 d.fp.readBit(uniformProb)
351 d.parseTokenProb()
352 d.useSkipProb = d.fp.readBit(uniformProb)
353 if d.useSkipProb {
354 d.skipProb = uint8(d.fp.readUint(uniformProb, 8))
355 }
356 if d.fp.unexpectedEOF {
357 return io.ErrUnexpectedEOF
358 }
359 return nil
360}
361
362// DecodeFrame decodes the frame and returns it as an YCbCr image.
363// The image's contents are valid up until the next call to Decoder.Init.
364func (d *Decoder) DecodeFrame() (*image.YCbCr, error) {
365 d.ensureImg()
366 if err := d.parseOtherHeaders(); err != nil {
367 return nil, err
368 }
369 // Reconstruct the rows.
370 for mbx := 0; mbx < d.mbw; mbx++ {
371 d.upMB[mbx] = mb{}
372 }
373 for mby := 0; mby < d.mbh; mby++ {
374 d.leftMB = mb{}
375 for mbx := 0; mbx < d.mbw; mbx++ {
376 skip := d.reconstruct(mbx, mby)
377 fs := d.filterParams[d.segment][btou(!d.usePredY16)]
378 fs.inner = fs.inner || !skip
379 d.perMBFilterParams[d.mbw*mby+mbx] = fs
380 }
381 }
382 if d.fp.unexpectedEOF {
383 return nil, io.ErrUnexpectedEOF
384 }
385 for i := 0; i < d.nOP; i++ {
386 if d.op[i].unexpectedEOF {
387 return nil, io.ErrUnexpectedEOF
388 }
389 }
390 // Apply the loop filter.
391 //
392 // Even if we are using per-segment levels, section 15 says that "loop
393 // filtering must be skipped entirely if loop_filter_level at either the
394 // frame header level or macroblock override level is 0".
395 if d.filterHeader.level != 0 {
396 if d.filterHeader.simple {
397 d.simpleFilter()
398 } else {
399 d.normalFilter()
400 }
401 }
402 return d.img, nil
403}