escape.go

  1// Copyright 2009 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5// Copied and modified from Go 1.14 stdlib's encoding/xml
  6
  7package xml
  8
  9import (
 10	"unicode/utf8"
 11)
 12
 13// Copied from Go 1.14 stdlib's encoding/xml
 14var (
 15	escQuot = []byte(""") // shorter than """
 16	escApos = []byte("'") // shorter than "'"
 17	escAmp  = []byte("&")
 18	escLT   = []byte("<")
 19	escGT   = []byte(">")
 20	escTab  = []byte("	")
 21	escNL   = []byte("
")
 22	escCR   = []byte("
")
 23	escFFFD = []byte("\uFFFD") // Unicode replacement character
 24
 25	// Additional Escapes
 26	escNextLine = []byte("…")
 27	escLS       = []byte("
")
 28)
 29
 30// Decide whether the given rune is in the XML Character Range, per
 31// the Char production of https://www.xml.com/axml/testaxml.htm,
 32// Section 2.2 Characters.
 33func isInCharacterRange(r rune) (inrange bool) {
 34	return r == 0x09 ||
 35		r == 0x0A ||
 36		r == 0x0D ||
 37		r >= 0x20 && r <= 0xD7FF ||
 38		r >= 0xE000 && r <= 0xFFFD ||
 39		r >= 0x10000 && r <= 0x10FFFF
 40}
 41
 42// TODO: When do we need to escape the string?
 43// Based on encoding/xml escapeString from the Go Standard Library.
 44// https://golang.org/src/encoding/xml/xml.go
 45func escapeString(e writer, s string) {
 46	var esc []byte
 47	last := 0
 48	for i := 0; i < len(s); {
 49		r, width := utf8.DecodeRuneInString(s[i:])
 50		i += width
 51		switch r {
 52		case '"':
 53			esc = escQuot
 54		case '\'':
 55			esc = escApos
 56		case '&':
 57			esc = escAmp
 58		case '<':
 59			esc = escLT
 60		case '>':
 61			esc = escGT
 62		case '\t':
 63			esc = escTab
 64		case '\n':
 65			esc = escNL
 66		case '\r':
 67			esc = escCR
 68		case '\u0085':
 69			// Not escaped by stdlib
 70			esc = escNextLine
 71		case '\u2028':
 72			// Not escaped by stdlib
 73			esc = escLS
 74		default:
 75			if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
 76				esc = escFFFD
 77				break
 78			}
 79			continue
 80		}
 81		e.WriteString(s[last : i-width])
 82		e.Write(esc)
 83		last = i
 84	}
 85	e.WriteString(s[last:])
 86}
 87
 88// escapeText writes to w the properly escaped XML equivalent
 89// of the plain text data s. If escapeNewline is true, newline
 90// characters will be escaped.
 91//
 92// Based on encoding/xml escapeText from the Go Standard Library.
 93// https://golang.org/src/encoding/xml/xml.go
 94func escapeText(e writer, s []byte) {
 95	var esc []byte
 96	last := 0
 97	for i := 0; i < len(s); {
 98		r, width := utf8.DecodeRune(s[i:])
 99		i += width
100		switch r {
101		case '"':
102			esc = escQuot
103		case '\'':
104			esc = escApos
105		case '&':
106			esc = escAmp
107		case '<':
108			esc = escLT
109		case '>':
110			esc = escGT
111		case '\t':
112			esc = escTab
113		case '\n':
114			// This always escapes newline, which is different than stdlib's optional
115			// escape of new line.
116			esc = escNL
117		case '\r':
118			esc = escCR
119		case '\u0085':
120			// Not escaped by stdlib
121			esc = escNextLine
122		case '\u2028':
123			// Not escaped by stdlib
124			esc = escLS
125		default:
126			if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
127				esc = escFFFD
128				break
129			}
130			continue
131		}
132		e.Write(s[last : i-width])
133		e.Write(esc)
134		last = i
135	}
136	e.Write(s[last:])
137}