Add unicode control characters test case

Amine Hilaly created

Move `cleanupText` to utils/text/transform.go
`text.Cleanup`: removing unicode control characters except for those allowed by `text.Safe`

Add golang.org/x/text dependencies

fix text.Cleanup

Fix import panic

Change summary

Gopkg.lock                                      |  13 
bridge/github/import.go                         |  46 
bridge/github/import_test.go                    |  44 
util/text/transform.go                          |  31 
vendor/golang.org/x/text/AUTHORS                |   3 
vendor/golang.org/x/text/CONTRIBUTORS           |   3 
vendor/golang.org/x/text/LICENSE                |  27 
vendor/golang.org/x/text/PATENTS                |  22 
vendor/golang.org/x/text/runes/cond.go          | 187 +++++
vendor/golang.org/x/text/runes/runes.go         | 355 +++++++++
vendor/golang.org/x/text/transform/transform.go | 705 +++++++++++++++++++
11 files changed, 1,404 insertions(+), 32 deletions(-)

Detailed changes

Gopkg.lock 🔗

@@ -386,6 +386,17 @@
   pruneopts = "UT"
   revision = "ac767d655b305d4e9612f5f6e33120b9176c4ad4"
 
+[[projects]]
+  digest = "1:86cb348528a842f96e651ca3f8197070e9ebc315f8c73e71d0df7a60e92a6db1"
+  name = "golang.org/x/text"
+  packages = [
+    "runes",
+    "transform",
+  ]
+  pruneopts = "UT"
+  revision = "342b2e1fbaa52c93f31447ad2c6abc048c63e475"
+  version = "v0.3.2"
+
 [[projects]]
   digest = "1:71850ac10bbeb4d8dd06ce0743fe57654daf28510b0f6cbd9692aaf0d269360e"
   name = "golang.org/x/tools"
@@ -460,6 +471,8 @@
     "github.com/vektah/gqlparser/ast",
     "golang.org/x/crypto/ssh/terminal",
     "golang.org/x/oauth2",
+    "golang.org/x/text/runes",
+    "golang.org/x/text/transform",
   ]
   solver-name = "gps-cdcl"
   solver-version = 1

bridge/github/import.go 🔗

@@ -3,7 +3,6 @@ package github
 import (
 	"context"
 	"fmt"
-	"strings"
 	"time"
 
 	"github.com/MichaelMure/git-bug/bridge/core"
@@ -11,6 +10,7 @@ import (
 	"github.com/MichaelMure/git-bug/cache"
 	"github.com/MichaelMure/git-bug/identity"
 	"github.com/MichaelMure/git-bug/util/git"
+	"github.com/MichaelMure/git-bug/util/text"
 	"github.com/shurcooL/githubv4"
 )
 
@@ -112,12 +112,17 @@ func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issueTimeline
 	// if issueEdits is empty
 	if len(issueEdits) == 0 {
 		if err == bug.ErrBugNotExist {
+			cleanText, err := text.Cleanup(string(issue.Body))
+			if err != nil {
+				return nil, err
+			}
+
 			// create bug
 			b, err = repo.NewBugRaw(
 				author,
 				issue.CreatedAt.Unix(),
 				issue.Title,
-				cleanupText(string(issue.Body)),
+				cleanText,
 				nil,
 				map[string]string{
 					keyGithubId:  parseId(issue.Id),
@@ -136,6 +141,11 @@ func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issueTimeline
 				continue
 			}
 
+			cleanText, err := text.Cleanup(string(*edit.Diff))
+			if err != nil {
+				return nil, err
+			}
+
 			// if the bug doesn't exist
 			if b == nil {
 				// we create the bug as soon as we have a legit first edition
@@ -143,7 +153,7 @@ func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issueTimeline
 					author,
 					issue.CreatedAt.Unix(),
 					issue.Title,
-					cleanupText(string(*edit.Diff)),
+					cleanText,
 					nil,
 					map[string]string{
 						keyGithubId:  parseId(issue.Id),
@@ -301,12 +311,16 @@ func (gi *githubImporter) ensureTimelineComment(repo *cache.RepoCache, b *cache.
 
 		// if comment doesn't exist
 		if err == cache.ErrNoMatchingOp {
+			cleanText, err := text.Cleanup(string(item.Body))
+			if err != nil {
+				return err
+			}
 
 			// add comment operation
 			op, err := b.AddCommentRaw(
 				author,
 				item.CreatedAt.Unix(),
-				cleanupText(string(item.Body)),
+				cleanText,
 				nil,
 				map[string]string{
 					keyGithubId:  parseId(item.Id),
@@ -338,10 +352,15 @@ func (gi *githubImporter) ensureTimelineComment(repo *cache.RepoCache, b *cache.
 
 			// create comment when target is empty
 			if target == "" {
+				cleanText, err := text.Cleanup(string(*edit.Diff))
+				if err != nil {
+					return err
+				}
+
 				op, err := b.AddCommentRaw(
 					editor,
 					edit.CreatedAt.Unix(),
-					cleanupText(string(*edit.Diff)),
+					cleanText,
 					nil,
 					map[string]string{
 						keyGithubId:  parseId(item.Id),
@@ -395,12 +414,17 @@ func (gi *githubImporter) ensureCommentEdit(repo *cache.RepoCache, b *cache.BugC
 
 	case edit.DeletedAt == nil:
 
+		cleanText, err := text.Cleanup(string(*edit.Diff))
+		if err != nil {
+			return err
+		}
+
 		// comment edition
-		_, err := b.EditCommentRaw(
+		_, err = b.EditCommentRaw(
 			editor,
 			edit.CreatedAt.Unix(),
 			target,
-			cleanupText(string(*edit.Diff)),
+			cleanText,
 			map[string]string{
 				keyGithubId: parseId(edit.Id),
 			},
@@ -505,14 +529,6 @@ func parseId(id githubv4.ID) string {
 	return fmt.Sprintf("%v", id)
 }
 
-func cleanupText(text string) string {
-	// windows new line, Github, really ?
-	text = strings.Replace(text, "\r\n", "\n", -1)
-
-	// trim extra new line not displayed in the github UI but still present in the data
-	return strings.TrimSpace(text)
-}
-
 func reverseEdits(edits []userContentEdit) []userContentEdit {
 	for i, j := 0, len(edits)-1; i < j; i, j = i+1, j-1 {
 		edits[i], edits[j] = edits[j], edits[i]

bridge/github/import_test.go 🔗

@@ -31,7 +31,8 @@ func Test_Importer(t *testing.T) {
 				Operations: []bug.Operation{
 					bug.NewCreateOp(author, 0, "simple issue", "initial comment", nil),
 					bug.NewAddCommentOp(author, 0, "first comment", nil),
-					bug.NewAddCommentOp(author, 0, "second comment", nil)},
+					bug.NewAddCommentOp(author, 0, "second comment", nil),
+				},
 			},
 		},
 		{
@@ -112,6 +113,15 @@ func Test_Importer(t *testing.T) {
 				},
 			},
 		},
+		{
+			name: "unicode control characters",
+			url:  "https://github.com/MichaelMure/git-bug-test-github-bridge/issues/10",
+			bug: &bug.Snapshot{
+				Operations: []bug.Operation{
+					bug.NewCreateOp(author, 0, "unicode control characters", "u0000: \nu0001: \nu0002: \nu0003: \nu0004: \nu0005: \nu0006: \nu0007: \nu0008: \nu0009: \t\nu0010: \nu0011: \nu0012: \nu0013: \nu0014: \nu0015: \nu0016: \nu0017: \nu0018: \nu0019:", nil),
+				},
+			},
+		},
 	}
 
 	repo := test.CreateRepo(false)
@@ -142,7 +152,7 @@ func Test_Importer(t *testing.T) {
 
 	fmt.Printf("test repository imported in %f seconds\n", time.Since(start).Seconds())
 
-	require.Len(t, backend.AllBugsIds(), 8)
+	require.Len(t, backend.AllBugsIds(), 9)
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
@@ -157,26 +167,26 @@ func Test_Importer(t *testing.T) {
 
 				switch op.(type) {
 				case *bug.CreateOperation:
-					assert.Equal(t, ops[i].(*bug.CreateOperation).Title, op.(*bug.CreateOperation).Title)
-					assert.Equal(t, ops[i].(*bug.CreateOperation).Message, op.(*bug.CreateOperation).Message)
-					assert.Equal(t, ops[i].(*bug.CreateOperation).Author.Name(), op.(*bug.CreateOperation).Author.Name())
+					assert.Equal(t, op.(*bug.CreateOperation).Title, ops[i].(*bug.CreateOperation).Title)
+					assert.Equal(t, op.(*bug.CreateOperation).Message, ops[i].(*bug.CreateOperation).Message)
+					assert.Equal(t, op.(*bug.CreateOperation).Author.Name(), ops[i].(*bug.CreateOperation).Author.Name())
 				case *bug.SetStatusOperation:
-					assert.Equal(t, ops[i].(*bug.SetStatusOperation).Status, op.(*bug.SetStatusOperation).Status)
-					assert.Equal(t, ops[i].(*bug.SetStatusOperation).Author.Name(), op.(*bug.SetStatusOperation).Author.Name())
+					assert.Equal(t, op.(*bug.SetStatusOperation).Status, ops[i].(*bug.SetStatusOperation).Status)
+					assert.Equal(t, op.(*bug.SetStatusOperation).Author.Name(), ops[i].(*bug.SetStatusOperation).Author.Name())
 				case *bug.SetTitleOperation:
-					assert.Equal(t, ops[i].(*bug.SetTitleOperation).Was, op.(*bug.SetTitleOperation).Was)
-					assert.Equal(t, ops[i].(*bug.SetTitleOperation).Title, op.(*bug.SetTitleOperation).Title)
-					assert.Equal(t, ops[i].(*bug.SetTitleOperation).Author.Name(), op.(*bug.SetTitleOperation).Author.Name())
+					assert.Equal(t, op.(*bug.SetTitleOperation).Was, ops[i].(*bug.SetTitleOperation).Was)
+					assert.Equal(t, op.(*bug.SetTitleOperation).Title, ops[i].(*bug.SetTitleOperation).Title)
+					assert.Equal(t, op.(*bug.SetTitleOperation).Author.Name(), ops[i].(*bug.SetTitleOperation).Author.Name())
 				case *bug.LabelChangeOperation:
-					assert.ElementsMatch(t, ops[i].(*bug.LabelChangeOperation).Added, op.(*bug.LabelChangeOperation).Added)
-					assert.ElementsMatch(t, ops[i].(*bug.LabelChangeOperation).Removed, op.(*bug.LabelChangeOperation).Removed)
-					assert.Equal(t, ops[i].(*bug.LabelChangeOperation).Author.Name(), op.(*bug.LabelChangeOperation).Author.Name())
+					assert.ElementsMatch(t, op.(*bug.LabelChangeOperation).Added, ops[i].(*bug.LabelChangeOperation).Added)
+					assert.ElementsMatch(t, op.(*bug.LabelChangeOperation).Removed, ops[i].(*bug.LabelChangeOperation).Removed)
+					assert.Equal(t, op.(*bug.LabelChangeOperation).Author.Name(), ops[i].(*bug.LabelChangeOperation).Author.Name())
 				case *bug.AddCommentOperation:
-					assert.Equal(t, ops[i].(*bug.AddCommentOperation).Message, op.(*bug.AddCommentOperation).Message)
-					assert.Equal(t, ops[i].(*bug.AddCommentOperation).Author.Name(), op.(*bug.AddCommentOperation).Author.Name())
+					assert.Equal(t, op.(*bug.AddCommentOperation).Message, ops[i].(*bug.AddCommentOperation).Message)
+					assert.Equal(t, op.(*bug.AddCommentOperation).Author.Name(), ops[i].(*bug.AddCommentOperation).Author.Name())
 				case *bug.EditCommentOperation:
-					assert.Equal(t, ops[i].(*bug.EditCommentOperation).Message, op.(*bug.EditCommentOperation).Message)
-					assert.Equal(t, ops[i].(*bug.EditCommentOperation).Author.Name(), op.(*bug.EditCommentOperation).Author.Name())
+					assert.Equal(t, op.(*bug.EditCommentOperation).Message, ops[i].(*bug.EditCommentOperation).Message)
+					assert.Equal(t, op.(*bug.EditCommentOperation).Author.Name(), ops[i].(*bug.EditCommentOperation).Author.Name())
 
 				default:
 					panic("Unknown operation type")

util/text/transform.go 🔗

@@ -0,0 +1,31 @@
+package text
+
+import (
+	"strings"
+	"unicode"
+
+	"golang.org/x/text/runes"
+	"golang.org/x/text/transform"
+)
+
+func Cleanup(text string) (string, error) {
+	// windows new line, Github, really ?
+	text = strings.Replace(text, "\r\n", "\n", -1)
+
+	// remove all unicode control characters except
+	// '\n', '\r' and '\t'
+	t := runes.Remove(runes.Predicate(func(r rune) bool {
+		switch r {
+		case '\r', '\n', '\t':
+			return false
+		}
+		return unicode.IsControl(r)
+	}))
+	sanitized, _, err := transform.String(t, text)
+	if err != nil {
+		return "", err
+	}
+
+	// trim extra new line not displayed in the github UI but still present in the data
+	return strings.TrimSpace(sanitized), nil
+}

vendor/golang.org/x/text/AUTHORS 🔗

@@ -0,0 +1,3 @@
+# This source code refers to The Go Authors for copyright purposes.
+# The master list of authors is in the main Go distribution,
+# visible at http://tip.golang.org/AUTHORS.

vendor/golang.org/x/text/CONTRIBUTORS 🔗

@@ -0,0 +1,3 @@
+# This source code was written by the Go contributors.
+# The master list of contributors is in the main Go distribution,
+# visible at http://tip.golang.org/CONTRIBUTORS.

vendor/golang.org/x/text/LICENSE 🔗

@@ -0,0 +1,27 @@
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

vendor/golang.org/x/text/PATENTS 🔗

@@ -0,0 +1,22 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the Go project.
+
+Google hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer and otherwise run, modify and propagate the contents of this
+implementation of Go, where such license applies only to those patent
+claims, both currently owned or controlled by Google and acquired in
+the future, licensable by Google that are necessarily infringed by this
+implementation of Go.  This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation.  If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of Go or any code incorporated within this
+implementation of Go constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of Go
+shall terminate as of the date such litigation is filed.

vendor/golang.org/x/text/runes/cond.go 🔗

@@ -0,0 +1,187 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runes
+
+import (
+	"unicode/utf8"
+
+	"golang.org/x/text/transform"
+)
+
+// Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is.
+// This is done for various reasons:
+// - To retain the semantics of the Nop transformer: if input is passed to a Nop
+//   one would expect it to be unchanged.
+// - It would be very expensive to pass a converted RuneError to a transformer:
+//   a transformer might need more source bytes after RuneError, meaning that
+//   the only way to pass it safely is to create a new buffer and manage the
+//   intermingling of RuneErrors and normal input.
+// - Many transformers leave ill-formed UTF-8 as is, so this is not
+//   inconsistent. Generally ill-formed UTF-8 is only replaced if it is a
+//   logical consequence of the operation (as for Map) or if it otherwise would
+//   pose security concerns (as for Remove).
+// - An alternative would be to return an error on ill-formed UTF-8, but this
+//   would be inconsistent with other operations.
+
+// If returns a transformer that applies tIn to consecutive runes for which
+// s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset
+// is called on tIn and tNotIn at the start of each run. A Nop transformer will
+// substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated
+// to RuneError to determine which transformer to apply, but is passed as is to
+// the respective transformer.
+func If(s Set, tIn, tNotIn transform.Transformer) Transformer {
+	if tIn == nil && tNotIn == nil {
+		return Transformer{transform.Nop}
+	}
+	if tIn == nil {
+		tIn = transform.Nop
+	}
+	if tNotIn == nil {
+		tNotIn = transform.Nop
+	}
+	sIn, ok := tIn.(transform.SpanningTransformer)
+	if !ok {
+		sIn = dummySpan{tIn}
+	}
+	sNotIn, ok := tNotIn.(transform.SpanningTransformer)
+	if !ok {
+		sNotIn = dummySpan{tNotIn}
+	}
+
+	a := &cond{
+		tIn:    sIn,
+		tNotIn: sNotIn,
+		f:      s.Contains,
+	}
+	a.Reset()
+	return Transformer{a}
+}
+
+type dummySpan struct{ transform.Transformer }
+
+func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) {
+	return 0, transform.ErrEndOfSpan
+}
+
+type cond struct {
+	tIn, tNotIn transform.SpanningTransformer
+	f           func(rune) bool
+	check       func(rune) bool               // current check to perform
+	t           transform.SpanningTransformer // current transformer to use
+}
+
+// Reset implements transform.Transformer.
+func (t *cond) Reset() {
+	t.check = t.is
+	t.t = t.tIn
+	t.t.Reset() // notIn will be reset on first usage.
+}
+
+func (t *cond) is(r rune) bool {
+	if t.f(r) {
+		return true
+	}
+	t.check = t.isNot
+	t.t = t.tNotIn
+	t.tNotIn.Reset()
+	return false
+}
+
+func (t *cond) isNot(r rune) bool {
+	if !t.f(r) {
+		return true
+	}
+	t.check = t.is
+	t.t = t.tIn
+	t.tIn.Reset()
+	return false
+}
+
+// This implementation of Span doesn't help all too much, but it needs to be
+// there to satisfy this package's Transformer interface.
+// TODO: there are certainly room for improvements, though. For example, if
+// t.t == transform.Nop (which will a common occurrence) it will save a bundle
+// to special-case that loop.
+func (t *cond) Span(src []byte, atEOF bool) (n int, err error) {
+	p := 0
+	for n < len(src) && err == nil {
+		// Don't process too much at a time as the Spanner that will be
+		// called on this block may terminate early.
+		const maxChunk = 4096
+		max := len(src)
+		if v := n + maxChunk; v < max {
+			max = v
+		}
+		atEnd := false
+		size := 0
+		current := t.t
+		for ; p < max; p += size {
+			r := rune(src[p])
+			if r < utf8.RuneSelf {
+				size = 1
+			} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
+				if !atEOF && !utf8.FullRune(src[p:]) {
+					err = transform.ErrShortSrc
+					break
+				}
+			}
+			if !t.check(r) {
+				// The next rune will be the start of a new run.
+				atEnd = true
+				break
+			}
+		}
+		n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src)))
+		n += n2
+		if err2 != nil {
+			return n, err2
+		}
+		// At this point either err != nil or t.check will pass for the rune at p.
+		p = n + size
+	}
+	return n, err
+}
+
+func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	p := 0
+	for nSrc < len(src) && err == nil {
+		// Don't process too much at a time, as the work might be wasted if the
+		// destination buffer isn't large enough to hold the result or a
+		// transform returns an error early.
+		const maxChunk = 4096
+		max := len(src)
+		if n := nSrc + maxChunk; n < len(src) {
+			max = n
+		}
+		atEnd := false
+		size := 0
+		current := t.t
+		for ; p < max; p += size {
+			r := rune(src[p])
+			if r < utf8.RuneSelf {
+				size = 1
+			} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
+				if !atEOF && !utf8.FullRune(src[p:]) {
+					err = transform.ErrShortSrc
+					break
+				}
+			}
+			if !t.check(r) {
+				// The next rune will be the start of a new run.
+				atEnd = true
+				break
+			}
+		}
+		nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src)))
+		nDst += nDst2
+		nSrc += nSrc2
+		if err2 != nil {
+			return nDst, nSrc, err2
+		}
+		// At this point either err != nil or t.check will pass for the rune at p.
+		p = nSrc + size
+	}
+	return nDst, nSrc, err
+}

vendor/golang.org/x/text/runes/runes.go 🔗

@@ -0,0 +1,355 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package runes provide transforms for UTF-8 encoded text.
+package runes // import "golang.org/x/text/runes"
+
+import (
+	"unicode"
+	"unicode/utf8"
+
+	"golang.org/x/text/transform"
+)
+
+// A Set is a collection of runes.
+type Set interface {
+	// Contains returns true if r is contained in the set.
+	Contains(r rune) bool
+}
+
+type setFunc func(rune) bool
+
+func (s setFunc) Contains(r rune) bool {
+	return s(r)
+}
+
+// Note: using funcs here instead of wrapping types result in cleaner
+// documentation and a smaller API.
+
+// In creates a Set with a Contains method that returns true for all runes in
+// the given RangeTable.
+func In(rt *unicode.RangeTable) Set {
+	return setFunc(func(r rune) bool { return unicode.Is(rt, r) })
+}
+
+// In creates a Set with a Contains method that returns true for all runes not
+// in the given RangeTable.
+func NotIn(rt *unicode.RangeTable) Set {
+	return setFunc(func(r rune) bool { return !unicode.Is(rt, r) })
+}
+
+// Predicate creates a Set with a Contains method that returns f(r).
+func Predicate(f func(rune) bool) Set {
+	return setFunc(f)
+}
+
+// Transformer implements the transform.Transformer interface.
+type Transformer struct {
+	t transform.SpanningTransformer
+}
+
+func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	return t.t.Transform(dst, src, atEOF)
+}
+
+func (t Transformer) Span(b []byte, atEOF bool) (n int, err error) {
+	return t.t.Span(b, atEOF)
+}
+
+func (t Transformer) Reset() { t.t.Reset() }
+
+// Bytes returns a new byte slice with the result of converting b using t.  It
+// calls Reset on t. It returns nil if any error was found. This can only happen
+// if an error-producing Transformer is passed to If.
+func (t Transformer) Bytes(b []byte) []byte {
+	b, _, err := transform.Bytes(t, b)
+	if err != nil {
+		return nil
+	}
+	return b
+}
+
+// String returns a string with the result of converting s using t. It calls
+// Reset on t. It returns the empty string if any error was found. This can only
+// happen if an error-producing Transformer is passed to If.
+func (t Transformer) String(s string) string {
+	s, _, err := transform.String(t, s)
+	if err != nil {
+		return ""
+	}
+	return s
+}
+
+// TODO:
+// - Copy: copying strings and bytes in whole-rune units.
+// - Validation (maybe)
+// - Well-formed-ness (maybe)
+
+const runeErrorString = string(utf8.RuneError)
+
+// Remove returns a Transformer that removes runes r for which s.Contains(r).
+// Illegal input bytes are replaced by RuneError before being passed to f.
+func Remove(s Set) Transformer {
+	if f, ok := s.(setFunc); ok {
+		// This little trick cuts the running time of BenchmarkRemove for sets
+		// created by Predicate roughly in half.
+		// TODO: special-case RangeTables as well.
+		return Transformer{remove(f)}
+	}
+	return Transformer{remove(s.Contains)}
+}
+
+// TODO: remove transform.RemoveFunc.
+
+type remove func(r rune) bool
+
+func (remove) Reset() {}
+
+// Span implements transform.Spanner.
+func (t remove) Span(src []byte, atEOF bool) (n int, err error) {
+	for r, size := rune(0), 0; n < len(src); {
+		if r = rune(src[n]); r < utf8.RuneSelf {
+			size = 1
+		} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
+			// Invalid rune.
+			if !atEOF && !utf8.FullRune(src[n:]) {
+				err = transform.ErrShortSrc
+			} else {
+				err = transform.ErrEndOfSpan
+			}
+			break
+		}
+		if t(r) {
+			err = transform.ErrEndOfSpan
+			break
+		}
+		n += size
+	}
+	return
+}
+
+// Transform implements transform.Transformer.
+func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	for r, size := rune(0), 0; nSrc < len(src); {
+		if r = rune(src[nSrc]); r < utf8.RuneSelf {
+			size = 1
+		} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
+			// Invalid rune.
+			if !atEOF && !utf8.FullRune(src[nSrc:]) {
+				err = transform.ErrShortSrc
+				break
+			}
+			// We replace illegal bytes with RuneError. Not doing so might
+			// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
+			// The resulting byte sequence may subsequently contain runes
+			// for which t(r) is true that were passed unnoticed.
+			if !t(utf8.RuneError) {
+				if nDst+3 > len(dst) {
+					err = transform.ErrShortDst
+					break
+				}
+				dst[nDst+0] = runeErrorString[0]
+				dst[nDst+1] = runeErrorString[1]
+				dst[nDst+2] = runeErrorString[2]
+				nDst += 3
+			}
+			nSrc++
+			continue
+		}
+		if t(r) {
+			nSrc += size
+			continue
+		}
+		if nDst+size > len(dst) {
+			err = transform.ErrShortDst
+			break
+		}
+		for i := 0; i < size; i++ {
+			dst[nDst] = src[nSrc]
+			nDst++
+			nSrc++
+		}
+	}
+	return
+}
+
+// Map returns a Transformer that maps the runes in the input using the given
+// mapping. Illegal bytes in the input are converted to utf8.RuneError before
+// being passed to the mapping func.
+func Map(mapping func(rune) rune) Transformer {
+	return Transformer{mapper(mapping)}
+}
+
+type mapper func(rune) rune
+
+func (mapper) Reset() {}
+
+// Span implements transform.Spanner.
+func (t mapper) Span(src []byte, atEOF bool) (n int, err error) {
+	for r, size := rune(0), 0; n < len(src); n += size {
+		if r = rune(src[n]); r < utf8.RuneSelf {
+			size = 1
+		} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
+			// Invalid rune.
+			if !atEOF && !utf8.FullRune(src[n:]) {
+				err = transform.ErrShortSrc
+			} else {
+				err = transform.ErrEndOfSpan
+			}
+			break
+		}
+		if t(r) != r {
+			err = transform.ErrEndOfSpan
+			break
+		}
+	}
+	return n, err
+}
+
+// Transform implements transform.Transformer.
+func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	var replacement rune
+	var b [utf8.UTFMax]byte
+
+	for r, size := rune(0), 0; nSrc < len(src); {
+		if r = rune(src[nSrc]); r < utf8.RuneSelf {
+			if replacement = t(r); replacement < utf8.RuneSelf {
+				if nDst == len(dst) {
+					err = transform.ErrShortDst
+					break
+				}
+				dst[nDst] = byte(replacement)
+				nDst++
+				nSrc++
+				continue
+			}
+			size = 1
+		} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
+			// Invalid rune.
+			if !atEOF && !utf8.FullRune(src[nSrc:]) {
+				err = transform.ErrShortSrc
+				break
+			}
+
+			if replacement = t(utf8.RuneError); replacement == utf8.RuneError {
+				if nDst+3 > len(dst) {
+					err = transform.ErrShortDst
+					break
+				}
+				dst[nDst+0] = runeErrorString[0]
+				dst[nDst+1] = runeErrorString[1]
+				dst[nDst+2] = runeErrorString[2]
+				nDst += 3
+				nSrc++
+				continue
+			}
+		} else if replacement = t(r); replacement == r {
+			if nDst+size > len(dst) {
+				err = transform.ErrShortDst
+				break
+			}
+			for i := 0; i < size; i++ {
+				dst[nDst] = src[nSrc]
+				nDst++
+				nSrc++
+			}
+			continue
+		}
+
+		n := utf8.EncodeRune(b[:], replacement)
+
+		if nDst+n > len(dst) {
+			err = transform.ErrShortDst
+			break
+		}
+		for i := 0; i < n; i++ {
+			dst[nDst] = b[i]
+			nDst++
+		}
+		nSrc += size
+	}
+	return
+}
+
+// ReplaceIllFormed returns a transformer that replaces all input bytes that are
+// not part of a well-formed UTF-8 code sequence with utf8.RuneError.
+func ReplaceIllFormed() Transformer {
+	return Transformer{&replaceIllFormed{}}
+}
+
+type replaceIllFormed struct{ transform.NopResetter }
+
+func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) {
+	for n < len(src) {
+		// ASCII fast path.
+		if src[n] < utf8.RuneSelf {
+			n++
+			continue
+		}
+
+		r, size := utf8.DecodeRune(src[n:])
+
+		// Look for a valid non-ASCII rune.
+		if r != utf8.RuneError || size != 1 {
+			n += size
+			continue
+		}
+
+		// Look for short source data.
+		if !atEOF && !utf8.FullRune(src[n:]) {
+			err = transform.ErrShortSrc
+			break
+		}
+
+		// We have an invalid rune.
+		err = transform.ErrEndOfSpan
+		break
+	}
+	return n, err
+}
+
+func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	for nSrc < len(src) {
+		// ASCII fast path.
+		if r := src[nSrc]; r < utf8.RuneSelf {
+			if nDst == len(dst) {
+				err = transform.ErrShortDst
+				break
+			}
+			dst[nDst] = r
+			nDst++
+			nSrc++
+			continue
+		}
+
+		// Look for a valid non-ASCII rune.
+		if _, size := utf8.DecodeRune(src[nSrc:]); size != 1 {
+			if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
+				err = transform.ErrShortDst
+				break
+			}
+			nDst += size
+			nSrc += size
+			continue
+		}
+
+		// Look for short source data.
+		if !atEOF && !utf8.FullRune(src[nSrc:]) {
+			err = transform.ErrShortSrc
+			break
+		}
+
+		// We have an invalid rune.
+		if nDst+3 > len(dst) {
+			err = transform.ErrShortDst
+			break
+		}
+		dst[nDst+0] = runeErrorString[0]
+		dst[nDst+1] = runeErrorString[1]
+		dst[nDst+2] = runeErrorString[2]
+		nDst += 3
+		nSrc++
+	}
+	return nDst, nSrc, err
+}

vendor/golang.org/x/text/transform/transform.go 🔗

@@ -0,0 +1,705 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package transform provides reader and writer wrappers that transform the
+// bytes passing through as well as various transformations. Example
+// transformations provided by other packages include normalization and
+// conversion between character sets.
+package transform // import "golang.org/x/text/transform"
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"unicode/utf8"
+)
+
+var (
+	// ErrShortDst means that the destination buffer was too short to
+	// receive all of the transformed bytes.
+	ErrShortDst = errors.New("transform: short destination buffer")
+
+	// ErrShortSrc means that the source buffer has insufficient data to
+	// complete the transformation.
+	ErrShortSrc = errors.New("transform: short source buffer")
+
+	// ErrEndOfSpan means that the input and output (the transformed input)
+	// are not identical.
+	ErrEndOfSpan = errors.New("transform: input and output are not identical")
+
+	// errInconsistentByteCount means that Transform returned success (nil
+	// error) but also returned nSrc inconsistent with the src argument.
+	errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
+
+	// errShortInternal means that an internal buffer is not large enough
+	// to make progress and the Transform operation must be aborted.
+	errShortInternal = errors.New("transform: short internal buffer")
+)
+
+// Transformer transforms bytes.
+type Transformer interface {
+	// Transform writes to dst the transformed bytes read from src, and
+	// returns the number of dst bytes written and src bytes read. The
+	// atEOF argument tells whether src represents the last bytes of the
+	// input.
+	//
+	// Callers should always process the nDst bytes produced and account
+	// for the nSrc bytes consumed before considering the error err.
+	//
+	// A nil error means that all of the transformed bytes (whether freshly
+	// transformed from src or left over from previous Transform calls)
+	// were written to dst. A nil error can be returned regardless of
+	// whether atEOF is true. If err is nil then nSrc must equal len(src);
+	// the converse is not necessarily true.
+	//
+	// ErrShortDst means that dst was too short to receive all of the
+	// transformed bytes. ErrShortSrc means that src had insufficient data
+	// to complete the transformation. If both conditions apply, then
+	// either error may be returned. Other than the error conditions listed
+	// here, implementations are free to report other errors that arise.
+	Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
+
+	// Reset resets the state and allows a Transformer to be reused.
+	Reset()
+}
+
+// SpanningTransformer extends the Transformer interface with a Span method
+// that determines how much of the input already conforms to the Transformer.
+type SpanningTransformer interface {
+	Transformer
+
+	// Span returns a position in src such that transforming src[:n] results in
+	// identical output src[:n] for these bytes. It does not necessarily return
+	// the largest such n. The atEOF argument tells whether src represents the
+	// last bytes of the input.
+	//
+	// Callers should always account for the n bytes consumed before
+	// considering the error err.
+	//
+	// A nil error means that all input bytes are known to be identical to the
+	// output produced by the Transformer. A nil error can be returned
+	// regardless of whether atEOF is true. If err is nil, then n must
+	// equal len(src); the converse is not necessarily true.
+	//
+	// ErrEndOfSpan means that the Transformer output may differ from the
+	// input after n bytes. Note that n may be len(src), meaning that the output
+	// would contain additional bytes after otherwise identical output.
+	// ErrShortSrc means that src had insufficient data to determine whether the
+	// remaining bytes would change. Other than the error conditions listed
+	// here, implementations are free to report other errors that arise.
+	//
+	// Calling Span can modify the Transformer state as a side effect. In
+	// effect, it does the transformation just as calling Transform would, only
+	// without copying to a destination buffer and only up to a point it can
+	// determine the input and output bytes are the same. This is obviously more
+	// limited than calling Transform, but can be more efficient in terms of
+	// copying and allocating buffers. Calls to Span and Transform may be
+	// interleaved.
+	Span(src []byte, atEOF bool) (n int, err error)
+}
+
+// NopResetter can be embedded by implementations of Transformer to add a nop
+// Reset method.
+type NopResetter struct{}
+
+// Reset implements the Reset method of the Transformer interface.
+func (NopResetter) Reset() {}
+
+// Reader wraps another io.Reader by transforming the bytes read.
+type Reader struct {
+	r   io.Reader
+	t   Transformer
+	err error
+
+	// dst[dst0:dst1] contains bytes that have been transformed by t but
+	// not yet copied out via Read.
+	dst        []byte
+	dst0, dst1 int
+
+	// src[src0:src1] contains bytes that have been read from r but not
+	// yet transformed through t.
+	src        []byte
+	src0, src1 int
+
+	// transformComplete is whether the transformation is complete,
+	// regardless of whether or not it was successful.
+	transformComplete bool
+}
+
+const defaultBufSize = 4096
+
+// NewReader returns a new Reader that wraps r by transforming the bytes read
+// via t. It calls Reset on t.
+func NewReader(r io.Reader, t Transformer) *Reader {
+	t.Reset()
+	return &Reader{
+		r:   r,
+		t:   t,
+		dst: make([]byte, defaultBufSize),
+		src: make([]byte, defaultBufSize),
+	}
+}
+
+// Read implements the io.Reader interface.
+func (r *Reader) Read(p []byte) (int, error) {
+	n, err := 0, error(nil)
+	for {
+		// Copy out any transformed bytes and return the final error if we are done.
+		if r.dst0 != r.dst1 {
+			n = copy(p, r.dst[r.dst0:r.dst1])
+			r.dst0 += n
+			if r.dst0 == r.dst1 && r.transformComplete {
+				return n, r.err
+			}
+			return n, nil
+		} else if r.transformComplete {
+			return 0, r.err
+		}
+
+		// Try to transform some source bytes, or to flush the transformer if we
+		// are out of source bytes. We do this even if r.r.Read returned an error.
+		// As the io.Reader documentation says, "process the n > 0 bytes returned
+		// before considering the error".
+		if r.src0 != r.src1 || r.err != nil {
+			r.dst0 = 0
+			r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], r.err == io.EOF)
+			r.src0 += n
+
+			switch {
+			case err == nil:
+				if r.src0 != r.src1 {
+					r.err = errInconsistentByteCount
+				}
+				// The Transform call was successful; we are complete if we
+				// cannot read more bytes into src.
+				r.transformComplete = r.err != nil
+				continue
+			case err == ErrShortDst && (r.dst1 != 0 || n != 0):
+				// Make room in dst by copying out, and try again.
+				continue
+			case err == ErrShortSrc && r.src1-r.src0 != len(r.src) && r.err == nil:
+				// Read more bytes into src via the code below, and try again.
+			default:
+				r.transformComplete = true
+				// The reader error (r.err) takes precedence over the
+				// transformer error (err) unless r.err is nil or io.EOF.
+				if r.err == nil || r.err == io.EOF {
+					r.err = err
+				}
+				continue
+			}
+		}
+
+		// Move any untransformed source bytes to the start of the buffer
+		// and read more bytes.
+		if r.src0 != 0 {
+			r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1])
+		}
+		n, r.err = r.r.Read(r.src[r.src1:])
+		r.src1 += n
+	}
+}
+
+// TODO: implement ReadByte (and ReadRune??).
+
+// Writer wraps another io.Writer by transforming the bytes read.
+// The user needs to call Close to flush unwritten bytes that may
+// be buffered.
+type Writer struct {
+	w   io.Writer
+	t   Transformer
+	dst []byte
+
+	// src[:n] contains bytes that have not yet passed through t.
+	src []byte
+	n   int
+}
+
+// NewWriter returns a new Writer that wraps w by transforming the bytes written
+// via t. It calls Reset on t.
+func NewWriter(w io.Writer, t Transformer) *Writer {
+	t.Reset()
+	return &Writer{
+		w:   w,
+		t:   t,
+		dst: make([]byte, defaultBufSize),
+		src: make([]byte, defaultBufSize),
+	}
+}
+
+// Write implements the io.Writer interface. If there are not enough
+// bytes available to complete a Transform, the bytes will be buffered
+// for the next write. Call Close to convert the remaining bytes.
+func (w *Writer) Write(data []byte) (n int, err error) {
+	src := data
+	if w.n > 0 {
+		// Append bytes from data to the last remainder.
+		// TODO: limit the amount copied on first try.
+		n = copy(w.src[w.n:], data)
+		w.n += n
+		src = w.src[:w.n]
+	}
+	for {
+		nDst, nSrc, err := w.t.Transform(w.dst, src, false)
+		if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
+			return n, werr
+		}
+		src = src[nSrc:]
+		if w.n == 0 {
+			n += nSrc
+		} else if len(src) <= n {
+			// Enough bytes from w.src have been consumed. We make src point
+			// to data instead to reduce the copying.
+			w.n = 0
+			n -= len(src)
+			src = data[n:]
+			if n < len(data) && (err == nil || err == ErrShortSrc) {
+				continue
+			}
+		}
+		switch err {
+		case ErrShortDst:
+			// This error is okay as long as we are making progress.
+			if nDst > 0 || nSrc > 0 {
+				continue
+			}
+		case ErrShortSrc:
+			if len(src) < len(w.src) {
+				m := copy(w.src, src)
+				// If w.n > 0, bytes from data were already copied to w.src and n
+				// was already set to the number of bytes consumed.
+				if w.n == 0 {
+					n += m
+				}
+				w.n = m
+				err = nil
+			} else if nDst > 0 || nSrc > 0 {
+				// Not enough buffer to store the remainder. Keep processing as
+				// long as there is progress. Without this case, transforms that
+				// require a lookahead larger than the buffer may result in an
+				// error. This is not something one may expect to be common in
+				// practice, but it may occur when buffers are set to small
+				// sizes during testing.
+				continue
+			}
+		case nil:
+			if w.n > 0 {
+				err = errInconsistentByteCount
+			}
+		}
+		return n, err
+	}
+}
+
+// Close implements the io.Closer interface.
+func (w *Writer) Close() error {
+	src := w.src[:w.n]
+	for {
+		nDst, nSrc, err := w.t.Transform(w.dst, src, true)
+		if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
+			return werr
+		}
+		if err != ErrShortDst {
+			return err
+		}
+		src = src[nSrc:]
+	}
+}
+
+type nop struct{ NopResetter }
+
+func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	n := copy(dst, src)
+	if n < len(src) {
+		err = ErrShortDst
+	}
+	return n, n, err
+}
+
+func (nop) Span(src []byte, atEOF bool) (n int, err error) {
+	return len(src), nil
+}
+
+type discard struct{ NopResetter }
+
+func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	return 0, len(src), nil
+}
+
+var (
+	// Discard is a Transformer for which all Transform calls succeed
+	// by consuming all bytes and writing nothing.
+	Discard Transformer = discard{}
+
+	// Nop is a SpanningTransformer that copies src to dst.
+	Nop SpanningTransformer = nop{}
+)
+
+// chain is a sequence of links. A chain with N Transformers has N+1 links and
+// N+1 buffers. Of those N+1 buffers, the first and last are the src and dst
+// buffers given to chain.Transform and the middle N-1 buffers are intermediate
+// buffers owned by the chain. The i'th link transforms bytes from the i'th
+// buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer
+// chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N).
+type chain struct {
+	link []link
+	err  error
+	// errStart is the index at which the error occurred plus 1. Processing
+	// errStart at this level at the next call to Transform. As long as
+	// errStart > 0, chain will not consume any more source bytes.
+	errStart int
+}
+
+func (c *chain) fatalError(errIndex int, err error) {
+	if i := errIndex + 1; i > c.errStart {
+		c.errStart = i
+		c.err = err
+	}
+}
+
+type link struct {
+	t Transformer
+	// b[p:n] holds the bytes to be transformed by t.
+	b []byte
+	p int
+	n int
+}
+
+func (l *link) src() []byte {
+	return l.b[l.p:l.n]
+}
+
+func (l *link) dst() []byte {
+	return l.b[l.n:]
+}
+
+// Chain returns a Transformer that applies t in sequence.
+func Chain(t ...Transformer) Transformer {
+	if len(t) == 0 {
+		return nop{}
+	}
+	c := &chain{link: make([]link, len(t)+1)}
+	for i, tt := range t {
+		c.link[i].t = tt
+	}
+	// Allocate intermediate buffers.
+	b := make([][defaultBufSize]byte, len(t)-1)
+	for i := range b {
+		c.link[i+1].b = b[i][:]
+	}
+	return c
+}
+
+// Reset resets the state of Chain. It calls Reset on all the Transformers.
+func (c *chain) Reset() {
+	for i, l := range c.link {
+		if l.t != nil {
+			l.t.Reset()
+		}
+		c.link[i].p, c.link[i].n = 0, 0
+	}
+}
+
+// TODO: make chain use Span (is going to be fun to implement!)
+
+// Transform applies the transformers of c in sequence.
+func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	// Set up src and dst in the chain.
+	srcL := &c.link[0]
+	dstL := &c.link[len(c.link)-1]
+	srcL.b, srcL.p, srcL.n = src, 0, len(src)
+	dstL.b, dstL.n = dst, 0
+	var lastFull, needProgress bool // for detecting progress
+
+	// i is the index of the next Transformer to apply, for i in [low, high].
+	// low is the lowest index for which c.link[low] may still produce bytes.
+	// high is the highest index for which c.link[high] has a Transformer.
+	// The error returned by Transform determines whether to increase or
+	// decrease i. We try to completely fill a buffer before converting it.
+	for low, i, high := c.errStart, c.errStart, len(c.link)-2; low <= i && i <= high; {
+		in, out := &c.link[i], &c.link[i+1]
+		nDst, nSrc, err0 := in.t.Transform(out.dst(), in.src(), atEOF && low == i)
+		out.n += nDst
+		in.p += nSrc
+		if i > 0 && in.p == in.n {
+			in.p, in.n = 0, 0
+		}
+		needProgress, lastFull = lastFull, false
+		switch err0 {
+		case ErrShortDst:
+			// Process the destination buffer next. Return if we are already
+			// at the high index.
+			if i == high {
+				return dstL.n, srcL.p, ErrShortDst
+			}
+			if out.n != 0 {
+				i++
+				// If the Transformer at the next index is not able to process any
+				// source bytes there is nothing that can be done to make progress
+				// and the bytes will remain unprocessed. lastFull is used to
+				// detect this and break out of the loop with a fatal error.
+				lastFull = true
+				continue
+			}
+			// The destination buffer was too small, but is completely empty.
+			// Return a fatal error as this transformation can never complete.
+			c.fatalError(i, errShortInternal)
+		case ErrShortSrc:
+			if i == 0 {
+				// Save ErrShortSrc in err. All other errors take precedence.
+				err = ErrShortSrc
+				break
+			}
+			// Source bytes were depleted before filling up the destination buffer.
+			// Verify we made some progress, move the remaining bytes to the errStart
+			// and try to get more source bytes.
+			if needProgress && nSrc == 0 || in.n-in.p == len(in.b) {
+				// There were not enough source bytes to proceed while the source
+				// buffer cannot hold any more bytes. Return a fatal error as this
+				// transformation can never complete.
+				c.fatalError(i, errShortInternal)
+				break
+			}
+			// in.b is an internal buffer and we can make progress.
+			in.p, in.n = 0, copy(in.b, in.src())
+			fallthrough
+		case nil:
+			// if i == low, we have depleted the bytes at index i or any lower levels.
+			// In that case we increase low and i. In all other cases we decrease i to
+			// fetch more bytes before proceeding to the next index.
+			if i > low {
+				i--
+				continue
+			}
+		default:
+			c.fatalError(i, err0)
+		}
+		// Exhausted level low or fatal error: increase low and continue
+		// to process the bytes accepted so far.
+		i++
+		low = i
+	}
+
+	// If c.errStart > 0, this means we found a fatal error.  We will clear
+	// all upstream buffers. At this point, no more progress can be made
+	// downstream, as Transform would have bailed while handling ErrShortDst.
+	if c.errStart > 0 {
+		for i := 1; i < c.errStart; i++ {
+			c.link[i].p, c.link[i].n = 0, 0
+		}
+		err, c.errStart, c.err = c.err, 0, nil
+	}
+	return dstL.n, srcL.p, err
+}
+
+// Deprecated: Use runes.Remove instead.
+func RemoveFunc(f func(r rune) bool) Transformer {
+	return removeF(f)
+}
+
+type removeF func(r rune) bool
+
+func (removeF) Reset() {}
+
+// Transform implements the Transformer interface.
+func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] {
+
+		if r = rune(src[0]); r < utf8.RuneSelf {
+			sz = 1
+		} else {
+			r, sz = utf8.DecodeRune(src)
+
+			if sz == 1 {
+				// Invalid rune.
+				if !atEOF && !utf8.FullRune(src) {
+					err = ErrShortSrc
+					break
+				}
+				// We replace illegal bytes with RuneError. Not doing so might
+				// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
+				// The resulting byte sequence may subsequently contain runes
+				// for which t(r) is true that were passed unnoticed.
+				if !t(r) {
+					if nDst+3 > len(dst) {
+						err = ErrShortDst
+						break
+					}
+					nDst += copy(dst[nDst:], "\uFFFD")
+				}
+				nSrc++
+				continue
+			}
+		}
+
+		if !t(r) {
+			if nDst+sz > len(dst) {
+				err = ErrShortDst
+				break
+			}
+			nDst += copy(dst[nDst:], src[:sz])
+		}
+		nSrc += sz
+	}
+	return
+}
+
+// grow returns a new []byte that is longer than b, and copies the first n bytes
+// of b to the start of the new slice.
+func grow(b []byte, n int) []byte {
+	m := len(b)
+	if m <= 32 {
+		m = 64
+	} else if m <= 256 {
+		m *= 2
+	} else {
+		m += m >> 1
+	}
+	buf := make([]byte, m)
+	copy(buf, b[:n])
+	return buf
+}
+
+const initialBufSize = 128
+
+// String returns a string with the result of converting s[:n] using t, where
+// n <= len(s). If err == nil, n will be len(s). It calls Reset on t.
+func String(t Transformer, s string) (result string, n int, err error) {
+	t.Reset()
+	if s == "" {
+		// Fast path for the common case for empty input. Results in about a
+		// 86% reduction of running time for BenchmarkStringLowerEmpty.
+		if _, _, err := t.Transform(nil, nil, true); err == nil {
+			return "", 0, nil
+		}
+	}
+
+	// Allocate only once. Note that both dst and src escape when passed to
+	// Transform.
+	buf := [2 * initialBufSize]byte{}
+	dst := buf[:initialBufSize:initialBufSize]
+	src := buf[initialBufSize : 2*initialBufSize]
+
+	// The input string s is transformed in multiple chunks (starting with a
+	// chunk size of initialBufSize). nDst and nSrc are per-chunk (or
+	// per-Transform-call) indexes, pDst and pSrc are overall indexes.
+	nDst, nSrc := 0, 0
+	pDst, pSrc := 0, 0
+
+	// pPrefix is the length of a common prefix: the first pPrefix bytes of the
+	// result will equal the first pPrefix bytes of s. It is not guaranteed to
+	// be the largest such value, but if pPrefix, len(result) and len(s) are
+	// all equal after the final transform (i.e. calling Transform with atEOF
+	// being true returned nil error) then we don't need to allocate a new
+	// result string.
+	pPrefix := 0
+	for {
+		// Invariant: pDst == pPrefix && pSrc == pPrefix.
+
+		n := copy(src, s[pSrc:])
+		nDst, nSrc, err = t.Transform(dst, src[:n], pSrc+n == len(s))
+		pDst += nDst
+		pSrc += nSrc
+
+		// TODO:  let transformers implement an optional Spanner interface, akin
+		// to norm's QuickSpan. This would even allow us to avoid any allocation.
+		if !bytes.Equal(dst[:nDst], src[:nSrc]) {
+			break
+		}
+		pPrefix = pSrc
+		if err == ErrShortDst {
+			// A buffer can only be short if a transformer modifies its input.
+			break
+		} else if err == ErrShortSrc {
+			if nSrc == 0 {
+				// No progress was made.
+				break
+			}
+			// Equal so far and !atEOF, so continue checking.
+		} else if err != nil || pPrefix == len(s) {
+			return string(s[:pPrefix]), pPrefix, err
+		}
+	}
+	// Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc.
+
+	// We have transformed the first pSrc bytes of the input s to become pDst
+	// transformed bytes. Those transformed bytes are discontiguous: the first
+	// pPrefix of them equal s[:pPrefix] and the last nDst of them equal
+	// dst[:nDst]. We copy them around, into a new dst buffer if necessary, so
+	// that they become one contiguous slice: dst[:pDst].
+	if pPrefix != 0 {
+		newDst := dst
+		if pDst > len(newDst) {
+			newDst = make([]byte, len(s)+nDst-nSrc)
+		}
+		copy(newDst[pPrefix:pDst], dst[:nDst])
+		copy(newDst[:pPrefix], s[:pPrefix])
+		dst = newDst
+	}
+
+	// Prevent duplicate Transform calls with atEOF being true at the end of
+	// the input. Also return if we have an unrecoverable error.
+	if (err == nil && pSrc == len(s)) ||
+		(err != nil && err != ErrShortDst && err != ErrShortSrc) {
+		return string(dst[:pDst]), pSrc, err
+	}
+
+	// Transform the remaining input, growing dst and src buffers as necessary.
+	for {
+		n := copy(src, s[pSrc:])
+		nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], pSrc+n == len(s))
+		pDst += nDst
+		pSrc += nSrc
+
+		// If we got ErrShortDst or ErrShortSrc, do not grow as long as we can
+		// make progress. This may avoid excessive allocations.
+		if err == ErrShortDst {
+			if nDst == 0 {
+				dst = grow(dst, pDst)
+			}
+		} else if err == ErrShortSrc {
+			if nSrc == 0 {
+				src = grow(src, 0)
+			}
+		} else if err != nil || pSrc == len(s) {
+			return string(dst[:pDst]), pSrc, err
+		}
+	}
+}
+
+// Bytes returns a new byte slice with the result of converting b[:n] using t,
+// where n <= len(b). If err == nil, n will be len(b). It calls Reset on t.
+func Bytes(t Transformer, b []byte) (result []byte, n int, err error) {
+	return doAppend(t, 0, make([]byte, len(b)), b)
+}
+
+// Append appends the result of converting src[:n] using t to dst, where
+// n <= len(src), If err == nil, n will be len(src). It calls Reset on t.
+func Append(t Transformer, dst, src []byte) (result []byte, n int, err error) {
+	if len(dst) == cap(dst) {
+		n := len(src) + len(dst) // It is okay for this to be 0.
+		b := make([]byte, n)
+		dst = b[:copy(b, dst)]
+	}
+	return doAppend(t, len(dst), dst[:cap(dst)], src)
+}
+
+func doAppend(t Transformer, pDst int, dst, src []byte) (result []byte, n int, err error) {
+	t.Reset()
+	pSrc := 0
+	for {
+		nDst, nSrc, err := t.Transform(dst[pDst:], src[pSrc:], true)
+		pDst += nDst
+		pSrc += nSrc
+		if err != ErrShortDst {
+			return dst[:pDst], pSrc, err
+		}
+
+		// Grow the destination buffer, but do not grow as long as we can make
+		// progress. This may avoid excessive allocations.
+		if nDst == 0 {
+			dst = grow(dst, pDst)
+		}
+	}
+}