transform.go

 1package text
 2
 3import (
 4	"strings"
 5	"unicode"
 6
 7	"golang.org/x/text/runes"
 8	"golang.org/x/text/transform"
 9)
10
11func Cleanup(text string) string {
12	// windows new line, Github, really ?
13	text = strings.Replace(text, "\r\n", "\n", -1)
14
15	// remove all unicode control characters except
16	// '\n', '\r' and '\t'
17	t := runes.Remove(runes.Predicate(func(r rune) bool {
18		switch r {
19		case '\r', '\n', '\t':
20			return false
21		}
22		return unicode.IsControl(r)
23	}))
24	sanitized, _, err := transform.String(t, text)
25	if err != nil {
26		// transform.String should never return an error as our transformer doesn't returns one.
27		// Confirmed with fuzzing.
28		panic(err)
29	}
30
31	// trim extra new line not displayed in the github UI but still present in the data
32	return strings.TrimSpace(sanitized)
33}
34
35func CleanupOneLine(text string) string {
36	// remove all unicode control characters *including*
37	// '\n', '\r' and '\t'
38	t := runes.Remove(runes.Predicate(unicode.IsControl))
39	sanitized, _, err := transform.String(t, text)
40	if err != nil {
41		// transform.String should never return an error as our transformer doesn't returns one.
42		// Confirmed with fuzzing.
43		panic(err)
44	}
45
46	// trim extra new line not displayed in the github UI but still present in the data
47	return strings.TrimSpace(sanitized)
48}
49
50func CleanupOneLineArray(texts []string) []string {
51	for i := range texts {
52		texts[i] = CleanupOneLine(texts[i])
53	}
54	return texts
55}