1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Copied and modified from Go 1.14 stdlib's encoding/xml
6
7package xml
8
9import (
10 "unicode/utf8"
11)
12
13// Copied from Go 1.14 stdlib's encoding/xml
14var (
15 escQuot = []byte(""") // shorter than """
16 escApos = []byte("'") // shorter than "'"
17 escAmp = []byte("&")
18 escLT = []byte("<")
19 escGT = []byte(">")
20 escTab = []byte("	")
21 escNL = []byte("
")
22 escCR = []byte("
")
23 escFFFD = []byte("\uFFFD") // Unicode replacement character
24
25 // Additional Escapes
26 escNextLine = []byte("…")
27 escLS = []byte("
")
28)
29
30// Decide whether the given rune is in the XML Character Range, per
31// the Char production of https://www.xml.com/axml/testaxml.htm,
32// Section 2.2 Characters.
33func isInCharacterRange(r rune) (inrange bool) {
34 return r == 0x09 ||
35 r == 0x0A ||
36 r == 0x0D ||
37 r >= 0x20 && r <= 0xD7FF ||
38 r >= 0xE000 && r <= 0xFFFD ||
39 r >= 0x10000 && r <= 0x10FFFF
40}
41
42// TODO: When do we need to escape the string?
43// Based on encoding/xml escapeString from the Go Standard Library.
44// https://golang.org/src/encoding/xml/xml.go
45func escapeString(e writer, s string) {
46 var esc []byte
47 last := 0
48 for i := 0; i < len(s); {
49 r, width := utf8.DecodeRuneInString(s[i:])
50 i += width
51 switch r {
52 case '"':
53 esc = escQuot
54 case '\'':
55 esc = escApos
56 case '&':
57 esc = escAmp
58 case '<':
59 esc = escLT
60 case '>':
61 esc = escGT
62 case '\t':
63 esc = escTab
64 case '\n':
65 esc = escNL
66 case '\r':
67 esc = escCR
68 case '\u0085':
69 // Not escaped by stdlib
70 esc = escNextLine
71 case '\u2028':
72 // Not escaped by stdlib
73 esc = escLS
74 default:
75 if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
76 esc = escFFFD
77 break
78 }
79 continue
80 }
81 e.WriteString(s[last : i-width])
82 e.Write(esc)
83 last = i
84 }
85 e.WriteString(s[last:])
86}
87
88// escapeText writes to w the properly escaped XML equivalent
89// of the plain text data s. If escapeNewline is true, newline
90// characters will be escaped.
91//
92// Based on encoding/xml escapeText from the Go Standard Library.
93// https://golang.org/src/encoding/xml/xml.go
94func escapeText(e writer, s []byte) {
95 var esc []byte
96 last := 0
97 for i := 0; i < len(s); {
98 r, width := utf8.DecodeRune(s[i:])
99 i += width
100 switch r {
101 case '"':
102 esc = escQuot
103 case '\'':
104 esc = escApos
105 case '&':
106 esc = escAmp
107 case '<':
108 esc = escLT
109 case '>':
110 esc = escGT
111 case '\t':
112 esc = escTab
113 case '\n':
114 // This always escapes newline, which is different than stdlib's optional
115 // escape of new line.
116 esc = escNL
117 case '\r':
118 esc = escCR
119 case '\u0085':
120 // Not escaped by stdlib
121 esc = escNextLine
122 case '\u2028':
123 // Not escaped by stdlib
124 esc = escLS
125 default:
126 if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
127 esc = escFFFD
128 break
129 }
130 continue
131 }
132 e.Write(s[last : i-width])
133 e.Write(esc)
134 last = i
135 }
136 e.Write(s[last:])
137}