1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:generate go run gen.go
6//go:generate asmfmt -w acc_amd64.s
7
8// asmfmt is https://github.com/klauspost/asmfmt
9
10// Package vector provides a rasterizer for 2-D vector graphics.
11package vector // import "golang.org/x/image/vector"
12
13// The rasterizer's design follows
14// https://medium.com/@raphlinus/inside-the-fastest-font-renderer-in-the-world-75ae5270c445
15//
16// Proof of concept code is in
17// https://github.com/google/font-go
18//
19// See also:
20// http://nothings.org/gamedev/rasterize/
21// http://projects.tuxee.net/cl-vectors/section-the-cl-aa-algorithm
22// https://people.gnome.org/~mathieu/libart/internals.html#INTERNALS-SCANLINE
23
24import (
25 "image"
26 "image/color"
27 "image/draw"
28 "math"
29)
30
31// floatingPointMathThreshold is the width or height above which the rasterizer
32// chooses to used floating point math instead of fixed point math.
33//
34// Both implementations of line segmentation rasterization (see raster_fixed.go
35// and raster_floating.go) implement the same algorithm (in ideal, infinite
36// precision math) but they perform differently in practice. The fixed point
37// math version is roughtly 1.25x faster (on GOARCH=amd64) on the benchmarks,
38// but at sufficiently large scales, the computations will overflow and hence
39// show rendering artifacts. The floating point math version has more
40// consistent quality over larger scales, but it is significantly slower.
41//
42// This constant determines when to use the faster implementation and when to
43// use the better quality implementation.
44//
45// The rationale for this particular value is that TestRasterizePolygon in
46// vector_test.go checks the rendering quality of polygon edges at various
47// angles, inscribed in a circle of diameter 512. It may be that a higher value
48// would still produce acceptable quality, but 512 seems to work.
49const floatingPointMathThreshold = 512
50
51func lerp(t, px, py, qx, qy float32) (x, y float32) {
52 return px + t*(qx-px), py + t*(qy-py)
53}
54
55func clamp(i, width int32) uint {
56 if i < 0 {
57 return 0
58 }
59 if i < width {
60 return uint(i)
61 }
62 return uint(width)
63}
64
65// NewRasterizer returns a new Rasterizer whose rendered mask image is bounded
66// by the given width and height.
67func NewRasterizer(w, h int) *Rasterizer {
68 z := &Rasterizer{}
69 z.Reset(w, h)
70 return z
71}
72
73// Raster is a 2-D vector graphics rasterizer.
74//
75// The zero value is usable, in that it is a Rasterizer whose rendered mask
76// image has zero width and zero height. Call Reset to change its bounds.
77type Rasterizer struct {
78 // bufXxx are buffers of float32 or uint32 values, holding either the
79 // individual or cumulative area values.
80 //
81 // We don't actually need both values at any given time, and to conserve
82 // memory, the integration of the individual to the cumulative could modify
83 // the buffer in place. In other words, we could use a single buffer, say
84 // of type []uint32, and add some math.Float32bits and math.Float32frombits
85 // calls to satisfy the compiler's type checking. As of Go 1.7, though,
86 // there is a performance penalty between:
87 // bufF32[i] += x
88 // and
89 // bufU32[i] = math.Float32bits(x + math.Float32frombits(bufU32[i]))
90 //
91 // See golang.org/issue/17220 for some discussion.
92 bufF32 []float32
93 bufU32 []uint32
94
95 useFloatingPointMath bool
96
97 size image.Point
98 firstX float32
99 firstY float32
100 penX float32
101 penY float32
102
103 // DrawOp is the operator used for the Draw method.
104 //
105 // The zero value is draw.Over.
106 DrawOp draw.Op
107
108 // TODO: an exported field equivalent to the mask point in the
109 // draw.DrawMask function in the stdlib image/draw package?
110}
111
112// Reset resets a Rasterizer as if it was just returned by NewRasterizer.
113//
114// This includes setting z.DrawOp to draw.Over.
115func (z *Rasterizer) Reset(w, h int) {
116 z.size = image.Point{w, h}
117 z.firstX = 0
118 z.firstY = 0
119 z.penX = 0
120 z.penY = 0
121 z.DrawOp = draw.Over
122
123 z.setUseFloatingPointMath(w > floatingPointMathThreshold || h > floatingPointMathThreshold)
124}
125
126func (z *Rasterizer) setUseFloatingPointMath(b bool) {
127 z.useFloatingPointMath = b
128
129 // Make z.bufF32 or z.bufU32 large enough to hold width * height samples.
130 if z.useFloatingPointMath {
131 if n := z.size.X * z.size.Y; n > cap(z.bufF32) {
132 z.bufF32 = make([]float32, n)
133 } else {
134 z.bufF32 = z.bufF32[:n]
135 for i := range z.bufF32 {
136 z.bufF32[i] = 0
137 }
138 }
139 } else {
140 if n := z.size.X * z.size.Y; n > cap(z.bufU32) {
141 z.bufU32 = make([]uint32, n)
142 } else {
143 z.bufU32 = z.bufU32[:n]
144 for i := range z.bufU32 {
145 z.bufU32[i] = 0
146 }
147 }
148 }
149}
150
151// Size returns the width and height passed to NewRasterizer or Reset.
152func (z *Rasterizer) Size() image.Point {
153 return z.size
154}
155
156// Bounds returns the rectangle from (0, 0) to the width and height passed to
157// NewRasterizer or Reset.
158func (z *Rasterizer) Bounds() image.Rectangle {
159 return image.Rectangle{Max: z.size}
160}
161
162// Pen returns the location of the path-drawing pen: the last argument to the
163// most recent XxxTo call.
164func (z *Rasterizer) Pen() (x, y float32) {
165 return z.penX, z.penY
166}
167
168// ClosePath closes the current path.
169func (z *Rasterizer) ClosePath() {
170 z.LineTo(z.firstX, z.firstY)
171}
172
173// MoveTo starts a new path and moves the pen to (ax, ay).
174//
175// The coordinates are allowed to be out of the Rasterizer's bounds.
176func (z *Rasterizer) MoveTo(ax, ay float32) {
177 z.firstX = ax
178 z.firstY = ay
179 z.penX = ax
180 z.penY = ay
181}
182
183// LineTo adds a line segment, from the pen to (bx, by), and moves the pen to
184// (bx, by).
185//
186// The coordinates are allowed to be out of the Rasterizer's bounds.
187func (z *Rasterizer) LineTo(bx, by float32) {
188 if z.useFloatingPointMath {
189 z.floatingLineTo(bx, by)
190 } else {
191 z.fixedLineTo(bx, by)
192 }
193}
194
195// QuadTo adds a quadratic BΓ©zier segment, from the pen via (bx, by) to (cx,
196// cy), and moves the pen to (cx, cy).
197//
198// The coordinates are allowed to be out of the Rasterizer's bounds.
199func (z *Rasterizer) QuadTo(bx, by, cx, cy float32) {
200 ax, ay := z.penX, z.penY
201 devsq := devSquared(ax, ay, bx, by, cx, cy)
202 if devsq >= 0.333 {
203 const tol = 3
204 n := 1 + int(math.Sqrt(math.Sqrt(tol*float64(devsq))))
205 t, nInv := float32(0), 1/float32(n)
206 for i := 0; i < n-1; i++ {
207 t += nInv
208 abx, aby := lerp(t, ax, ay, bx, by)
209 bcx, bcy := lerp(t, bx, by, cx, cy)
210 z.LineTo(lerp(t, abx, aby, bcx, bcy))
211 }
212 }
213 z.LineTo(cx, cy)
214}
215
216// CubeTo adds a cubic BΓ©zier segment, from the pen via (bx, by) and (cx, cy)
217// to (dx, dy), and moves the pen to (dx, dy).
218//
219// The coordinates are allowed to be out of the Rasterizer's bounds.
220func (z *Rasterizer) CubeTo(bx, by, cx, cy, dx, dy float32) {
221 ax, ay := z.penX, z.penY
222 devsq := devSquared(ax, ay, bx, by, dx, dy)
223 if devsqAlt := devSquared(ax, ay, cx, cy, dx, dy); devsq < devsqAlt {
224 devsq = devsqAlt
225 }
226 if devsq >= 0.333 {
227 const tol = 3
228 n := 1 + int(math.Sqrt(math.Sqrt(tol*float64(devsq))))
229 t, nInv := float32(0), 1/float32(n)
230 for i := 0; i < n-1; i++ {
231 t += nInv
232 abx, aby := lerp(t, ax, ay, bx, by)
233 bcx, bcy := lerp(t, bx, by, cx, cy)
234 cdx, cdy := lerp(t, cx, cy, dx, dy)
235 abcx, abcy := lerp(t, abx, aby, bcx, bcy)
236 bcdx, bcdy := lerp(t, bcx, bcy, cdx, cdy)
237 z.LineTo(lerp(t, abcx, abcy, bcdx, bcdy))
238 }
239 }
240 z.LineTo(dx, dy)
241}
242
243// devSquared returns a measure of how curvy the sequence (ax, ay) to (bx, by)
244// to (cx, cy) is. It determines how many line segments will approximate a
245// BΓ©zier curve segment.
246//
247// http://lists.nongnu.org/archive/html/freetype-devel/2016-08/msg00080.html
248// gives the rationale for this evenly spaced heuristic instead of a recursive
249// de Casteljau approach:
250//
251// The reason for the subdivision by n is that I expect the "flatness"
252// computation to be semi-expensive (it's done once rather than on each
253// potential subdivision) and also because you'll often get fewer subdivisions.
254// Taking a circular arc as a simplifying assumption (ie a spherical cow),
255// where I get n, a recursive approach would get 2^βlg nβ, which, if I haven't
256// made any horrible mistakes, is expected to be 33% more in the limit.
257func devSquared(ax, ay, bx, by, cx, cy float32) float32 {
258 devx := ax - 2*bx + cx
259 devy := ay - 2*by + cy
260 return devx*devx + devy*devy
261}
262
263// Draw implements the Drawer interface from the standard library's image/draw
264// package.
265//
266// The vector paths previously added via the XxxTo calls become the mask for
267// drawing src onto dst.
268func (z *Rasterizer) Draw(dst draw.Image, r image.Rectangle, src image.Image, sp image.Point) {
269 // TODO: adjust r and sp (and mp?) if src.Bounds() doesn't contain
270 // r.Add(sp.Sub(r.Min)).
271
272 if src, ok := src.(*image.Uniform); ok {
273 srcR, srcG, srcB, srcA := src.RGBA()
274 switch dst := dst.(type) {
275 case *image.Alpha:
276 // Fast path for glyph rendering.
277 if srcA == 0xffff {
278 if z.DrawOp == draw.Over {
279 z.rasterizeDstAlphaSrcOpaqueOpOver(dst, r)
280 } else {
281 z.rasterizeDstAlphaSrcOpaqueOpSrc(dst, r)
282 }
283 return
284 }
285 case *image.RGBA:
286 if z.DrawOp == draw.Over {
287 z.rasterizeDstRGBASrcUniformOpOver(dst, r, srcR, srcG, srcB, srcA)
288 } else {
289 z.rasterizeDstRGBASrcUniformOpSrc(dst, r, srcR, srcG, srcB, srcA)
290 }
291 return
292 }
293 }
294
295 if z.DrawOp == draw.Over {
296 z.rasterizeOpOver(dst, r, src, sp)
297 } else {
298 z.rasterizeOpSrc(dst, r, src, sp)
299 }
300}
301
302func (z *Rasterizer) accumulateMask() {
303 if z.useFloatingPointMath {
304 if n := z.size.X * z.size.Y; n > cap(z.bufU32) {
305 z.bufU32 = make([]uint32, n)
306 } else {
307 z.bufU32 = z.bufU32[:n]
308 }
309 if haveAccumulateSIMD {
310 floatingAccumulateMaskSIMD(z.bufU32, z.bufF32)
311 } else {
312 floatingAccumulateMask(z.bufU32, z.bufF32)
313 }
314 } else {
315 if haveAccumulateSIMD {
316 fixedAccumulateMaskSIMD(z.bufU32)
317 } else {
318 fixedAccumulateMask(z.bufU32)
319 }
320 }
321}
322
323func (z *Rasterizer) rasterizeDstAlphaSrcOpaqueOpOver(dst *image.Alpha, r image.Rectangle) {
324 // TODO: non-zero vs even-odd winding?
325 if r == dst.Bounds() && r == z.Bounds() {
326 // We bypass the z.accumulateMask step and convert straight from
327 // z.bufF32 or z.bufU32 to dst.Pix.
328 if z.useFloatingPointMath {
329 if haveAccumulateSIMD {
330 floatingAccumulateOpOverSIMD(dst.Pix, z.bufF32)
331 } else {
332 floatingAccumulateOpOver(dst.Pix, z.bufF32)
333 }
334 } else {
335 if haveAccumulateSIMD {
336 fixedAccumulateOpOverSIMD(dst.Pix, z.bufU32)
337 } else {
338 fixedAccumulateOpOver(dst.Pix, z.bufU32)
339 }
340 }
341 return
342 }
343
344 z.accumulateMask()
345 pix := dst.Pix[dst.PixOffset(r.Min.X, r.Min.Y):]
346 for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ {
347 for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ {
348 ma := z.bufU32[y*z.size.X+x]
349 i := y*dst.Stride + x
350
351 // This formula is like rasterizeOpOver's, simplified for the
352 // concrete dst type and opaque src assumption.
353 a := 0xffff - ma
354 pix[i] = uint8((uint32(pix[i])*0x101*a/0xffff + ma) >> 8)
355 }
356 }
357}
358
359func (z *Rasterizer) rasterizeDstAlphaSrcOpaqueOpSrc(dst *image.Alpha, r image.Rectangle) {
360 // TODO: non-zero vs even-odd winding?
361 if r == dst.Bounds() && r == z.Bounds() {
362 // We bypass the z.accumulateMask step and convert straight from
363 // z.bufF32 or z.bufU32 to dst.Pix.
364 if z.useFloatingPointMath {
365 if haveAccumulateSIMD {
366 floatingAccumulateOpSrcSIMD(dst.Pix, z.bufF32)
367 } else {
368 floatingAccumulateOpSrc(dst.Pix, z.bufF32)
369 }
370 } else {
371 if haveAccumulateSIMD {
372 fixedAccumulateOpSrcSIMD(dst.Pix, z.bufU32)
373 } else {
374 fixedAccumulateOpSrc(dst.Pix, z.bufU32)
375 }
376 }
377 return
378 }
379
380 z.accumulateMask()
381 pix := dst.Pix[dst.PixOffset(r.Min.X, r.Min.Y):]
382 for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ {
383 for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ {
384 ma := z.bufU32[y*z.size.X+x]
385
386 // This formula is like rasterizeOpSrc's, simplified for the
387 // concrete dst type and opaque src assumption.
388 pix[y*dst.Stride+x] = uint8(ma >> 8)
389 }
390 }
391}
392
393func (z *Rasterizer) rasterizeDstRGBASrcUniformOpOver(dst *image.RGBA, r image.Rectangle, sr, sg, sb, sa uint32) {
394 z.accumulateMask()
395 pix := dst.Pix[dst.PixOffset(r.Min.X, r.Min.Y):]
396 for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ {
397 for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ {
398 ma := z.bufU32[y*z.size.X+x]
399
400 // This formula is like rasterizeOpOver's, simplified for the
401 // concrete dst type and uniform src assumption.
402 a := 0xffff - (sa * ma / 0xffff)
403 i := y*dst.Stride + 4*x
404 pix[i+0] = uint8(((uint32(pix[i+0])*0x101*a + sr*ma) / 0xffff) >> 8)
405 pix[i+1] = uint8(((uint32(pix[i+1])*0x101*a + sg*ma) / 0xffff) >> 8)
406 pix[i+2] = uint8(((uint32(pix[i+2])*0x101*a + sb*ma) / 0xffff) >> 8)
407 pix[i+3] = uint8(((uint32(pix[i+3])*0x101*a + sa*ma) / 0xffff) >> 8)
408 }
409 }
410}
411
412func (z *Rasterizer) rasterizeDstRGBASrcUniformOpSrc(dst *image.RGBA, r image.Rectangle, sr, sg, sb, sa uint32) {
413 z.accumulateMask()
414 pix := dst.Pix[dst.PixOffset(r.Min.X, r.Min.Y):]
415 for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ {
416 for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ {
417 ma := z.bufU32[y*z.size.X+x]
418
419 // This formula is like rasterizeOpSrc's, simplified for the
420 // concrete dst type and uniform src assumption.
421 i := y*dst.Stride + 4*x
422 pix[i+0] = uint8((sr * ma / 0xffff) >> 8)
423 pix[i+1] = uint8((sg * ma / 0xffff) >> 8)
424 pix[i+2] = uint8((sb * ma / 0xffff) >> 8)
425 pix[i+3] = uint8((sa * ma / 0xffff) >> 8)
426 }
427 }
428}
429
430func (z *Rasterizer) rasterizeOpOver(dst draw.Image, r image.Rectangle, src image.Image, sp image.Point) {
431 z.accumulateMask()
432 out := color.RGBA64{}
433 outc := color.Color(&out)
434 for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ {
435 for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ {
436 sr, sg, sb, sa := src.At(sp.X+x, sp.Y+y).RGBA()
437 ma := z.bufU32[y*z.size.X+x]
438
439 // This algorithm comes from the standard library's image/draw
440 // package.
441 dr, dg, db, da := dst.At(r.Min.X+x, r.Min.Y+y).RGBA()
442 a := 0xffff - (sa * ma / 0xffff)
443 out.R = uint16((dr*a + sr*ma) / 0xffff)
444 out.G = uint16((dg*a + sg*ma) / 0xffff)
445 out.B = uint16((db*a + sb*ma) / 0xffff)
446 out.A = uint16((da*a + sa*ma) / 0xffff)
447
448 dst.Set(r.Min.X+x, r.Min.Y+y, outc)
449 }
450 }
451}
452
453func (z *Rasterizer) rasterizeOpSrc(dst draw.Image, r image.Rectangle, src image.Image, sp image.Point) {
454 z.accumulateMask()
455 out := color.RGBA64{}
456 outc := color.Color(&out)
457 for y, y1 := 0, r.Max.Y-r.Min.Y; y < y1; y++ {
458 for x, x1 := 0, r.Max.X-r.Min.X; x < x1; x++ {
459 sr, sg, sb, sa := src.At(sp.X+x, sp.Y+y).RGBA()
460 ma := z.bufU32[y*z.size.X+x]
461
462 // This algorithm comes from the standard library's image/draw
463 // package.
464 out.R = uint16(sr * ma / 0xffff)
465 out.G = uint16(sg * ma / 0xffff)
466 out.B = uint16(sb * ma / 0xffff)
467 out.A = uint16(sa * ma / 0xffff)
468
469 dst.Set(r.Min.X+x, r.Min.Y+y, outc)
470 }
471 }
472}