leb128.go

  1package leb128
  2
  3import (
  4	"errors"
  5	"fmt"
  6	"io"
  7)
  8
  9const (
 10	maxVarintLen32 = 5
 11	maxVarintLen33 = maxVarintLen32
 12	maxVarintLen64 = 10
 13
 14	int33Mask  int64 = 1 << 7
 15	int33Mask2       = ^int33Mask
 16	int33Mask3       = 1 << 6
 17	int33Mask4       = 8589934591 // 2^33-1
 18	int33Mask5       = 1 << 32
 19	int33Mask6       = int33Mask4 + 1 // 2^33
 20
 21	int64Mask3 = 1 << 6
 22	int64Mask4 = ^0
 23)
 24
 25var (
 26	errOverflow32 = errors.New("overflows a 32-bit integer")
 27	errOverflow33 = errors.New("overflows a 33-bit integer")
 28	errOverflow64 = errors.New("overflows a 64-bit integer")
 29)
 30
 31// EncodeInt32 encodes the signed value into a buffer in LEB128 format
 32//
 33// See https://en.wikipedia.org/wiki/LEB128#Encode_signed_integer
 34func EncodeInt32(value int32) []byte {
 35	return EncodeInt64(int64(value))
 36}
 37
 38// EncodeInt64 encodes the signed value into a buffer in LEB128 format
 39//
 40// See https://en.wikipedia.org/wiki/LEB128#Encode_signed_integer
 41func EncodeInt64(value int64) (buf []byte) {
 42	for {
 43		// Take 7 remaining low-order bits from the value into b.
 44		b := uint8(value & 0x7f)
 45		// Extract the sign bit.
 46		s := uint8(value & 0x40)
 47		value >>= 7
 48
 49		// The encoding unsigned numbers is simpler as it only needs to check if the value is non-zero to tell if there
 50		// are more bits to encode. Signed is a little more complicated as you have to double-check the sign bit.
 51		// If either case, set the high-order bit to tell the reader there are more bytes in this int.
 52		if (value != -1 || s == 0) && (value != 0 || s != 0) {
 53			b |= 0x80
 54		}
 55
 56		// Append b into the buffer
 57		buf = append(buf, b)
 58		if b&0x80 == 0 {
 59			break
 60		}
 61	}
 62	return buf
 63}
 64
 65// EncodeUint32 encodes the value into a buffer in LEB128 format
 66//
 67// See https://en.wikipedia.org/wiki/LEB128#Encode_unsigned_integer
 68func EncodeUint32(value uint32) []byte {
 69	return EncodeUint64(uint64(value))
 70}
 71
 72// EncodeUint64 encodes the value into a buffer in LEB128 format
 73//
 74// See https://en.wikipedia.org/wiki/LEB128#Encode_unsigned_integer
 75func EncodeUint64(value uint64) (buf []byte) {
 76	// This is effectively a do/while loop where we take 7 bits of the value and encode them until it is zero.
 77	for {
 78		// Take 7 remaining low-order bits from the value into b.
 79		b := uint8(value & 0x7f)
 80		value = value >> 7
 81
 82		// If there are remaining bits, the value won't be zero: Set the high-
 83		// order bit to tell the reader there are more bytes in this uint.
 84		if value != 0 {
 85			b |= 0x80
 86		}
 87
 88		// Append b into the buffer
 89		buf = append(buf, b)
 90		if b&0x80 == 0 {
 91			return buf
 92		}
 93	}
 94}
 95
 96type nextByte func(i int) (byte, error)
 97
 98func DecodeUint32(r io.ByteReader) (ret uint32, bytesRead uint64, err error) {
 99	return decodeUint32(func(_ int) (byte, error) { return r.ReadByte() })
100}
101
102func LoadUint32(buf []byte) (ret uint32, bytesRead uint64, err error) {
103	return decodeUint32(func(i int) (byte, error) {
104		if i >= len(buf) {
105			return 0, io.EOF
106		}
107		return buf[i], nil
108	})
109}
110
111func decodeUint32(next nextByte) (ret uint32, bytesRead uint64, err error) {
112	// Derived from https://github.com/golang/go/blob/go1.20/src/encoding/binary/varint.go
113	// with the modification on the overflow handling tailored for 32-bits.
114	var s uint32
115	for i := 0; i < maxVarintLen32; i++ {
116		b, err := next(i)
117		if err != nil {
118			return 0, 0, err
119		}
120		if b < 0x80 {
121			// Unused bits must be all zero.
122			if i == maxVarintLen32-1 && (b&0xf0) > 0 {
123				return 0, 0, errOverflow32
124			}
125			return ret | uint32(b)<<s, uint64(i) + 1, nil
126		}
127		ret |= (uint32(b) & 0x7f) << s
128		s += 7
129	}
130	return 0, 0, errOverflow32
131}
132
133func LoadUint64(buf []byte) (ret uint64, bytesRead uint64, err error) {
134	bufLen := len(buf)
135	if bufLen == 0 {
136		return 0, 0, io.EOF
137	}
138
139	// Derived from https://github.com/golang/go/blob/go1.20/src/encoding/binary/varint.go
140	var s uint64
141	for i := 0; i < maxVarintLen64; i++ {
142		if i >= bufLen {
143			return 0, 0, io.EOF
144		}
145		b := buf[i]
146		if b < 0x80 {
147			// Unused bits (non first bit) must all be zero.
148			if i == maxVarintLen64-1 && b > 1 {
149				return 0, 0, errOverflow64
150			}
151			return ret | uint64(b)<<s, uint64(i) + 1, nil
152		}
153		ret |= (uint64(b) & 0x7f) << s
154		s += 7
155	}
156	return 0, 0, errOverflow64
157}
158
159func DecodeInt32(r io.ByteReader) (ret int32, bytesRead uint64, err error) {
160	return decodeInt32(func(_ int) (byte, error) { return r.ReadByte() })
161}
162
163func LoadInt32(buf []byte) (ret int32, bytesRead uint64, err error) {
164	return decodeInt32(func(i int) (byte, error) {
165		if i >= len(buf) {
166			return 0, io.EOF
167		}
168		return buf[i], nil
169	})
170}
171
172func decodeInt32(next nextByte) (ret int32, bytesRead uint64, err error) {
173	var shift int
174	var b byte
175	for {
176		b, err = next(int(bytesRead))
177		if err != nil {
178			return 0, 0, fmt.Errorf("readByte failed: %w", err)
179		}
180		ret |= (int32(b) & 0x7f) << shift
181		shift += 7
182		bytesRead++
183		if b&0x80 == 0 {
184			if shift < 32 && (b&0x40) != 0 {
185				ret |= ^0 << shift
186			}
187			// Over flow checks.
188			// fixme: can be optimized.
189			if bytesRead > maxVarintLen32 {
190				return 0, 0, errOverflow32
191			} else if unused := b & 0b00110000; bytesRead == maxVarintLen32 && ret < 0 && unused != 0b00110000 {
192				return 0, 0, errOverflow32
193			} else if bytesRead == maxVarintLen32 && ret >= 0 && unused != 0x00 {
194				return 0, 0, errOverflow32
195			}
196			return
197		}
198	}
199}
200
201// DecodeInt33AsInt64 is a special cased decoder for wasm.BlockType which is encoded as a positive signed integer, yet
202// still needs to fit the 32-bit range of allowed indices. Hence, this is 33, not 32-bit!
203//
204// See https://webassembly.github.io/spec/core/binary/instructions.html#control-instructions
205func DecodeInt33AsInt64(r io.ByteReader) (ret int64, bytesRead uint64, err error) {
206	var shift int
207	var b int64
208	var rb byte
209	for shift < 35 {
210		rb, err = r.ReadByte()
211		if err != nil {
212			return 0, 0, fmt.Errorf("readByte failed: %w", err)
213		}
214		b = int64(rb)
215		ret |= (b & int33Mask2) << shift
216		shift += 7
217		bytesRead++
218		if b&int33Mask == 0 {
219			break
220		}
221	}
222
223	// fixme: can be optimized
224	if shift < 33 && (b&int33Mask3) == int33Mask3 {
225		ret |= int33Mask4 << shift
226	}
227	ret = ret & int33Mask4
228
229	// if 33rd bit == 1, we translate it as a corresponding signed-33bit minus value
230	if ret&int33Mask5 > 0 {
231		ret = ret - int33Mask6
232	}
233	// Over flow checks.
234	// fixme: can be optimized.
235	if bytesRead > maxVarintLen33 {
236		return 0, 0, errOverflow33
237	} else if unused := b & 0b00100000; bytesRead == maxVarintLen33 && ret < 0 && unused != 0b00100000 {
238		return 0, 0, errOverflow33
239	} else if bytesRead == maxVarintLen33 && ret >= 0 && unused != 0x00 {
240		return 0, 0, errOverflow33
241	}
242	return ret, bytesRead, nil
243}
244
245func DecodeInt64(r io.ByteReader) (ret int64, bytesRead uint64, err error) {
246	return decodeInt64(func(_ int) (byte, error) { return r.ReadByte() })
247}
248
249func LoadInt64(buf []byte) (ret int64, bytesRead uint64, err error) {
250	return decodeInt64(func(i int) (byte, error) {
251		if i >= len(buf) {
252			return 0, io.EOF
253		}
254		return buf[i], nil
255	})
256}
257
258func decodeInt64(next nextByte) (ret int64, bytesRead uint64, err error) {
259	var shift int
260	var b byte
261	for {
262		b, err = next(int(bytesRead))
263		if err != nil {
264			return 0, 0, fmt.Errorf("readByte failed: %w", err)
265		}
266		ret |= (int64(b) & 0x7f) << shift
267		shift += 7
268		bytesRead++
269		if b&0x80 == 0 {
270			if shift < 64 && (b&int64Mask3) == int64Mask3 {
271				ret |= int64Mask4 << shift
272			}
273			// Over flow checks.
274			// fixme: can be optimized.
275			if bytesRead > maxVarintLen64 {
276				return 0, 0, errOverflow64
277			} else if unused := b & 0b00111110; bytesRead == maxVarintLen64 && ret < 0 && unused != 0b00111110 {
278				return 0, 0, errOverflow64
279			} else if bytesRead == maxVarintLen64 && ret >= 0 && unused != 0x00 {
280				return 0, 0, errOverflow64
281			}
282			return
283		}
284	}
285}