smartypants.go

  1package html
  2
  3import (
  4	"bytes"
  5	"io"
  6)
  7
  8// SmartyPants rendering
  9
 10// SPRenderer is a struct containing state of a Smartypants renderer.
 11type SPRenderer struct {
 12	inSingleQuote bool
 13	inDoubleQuote bool
 14	callbacks     [256]smartCallback
 15}
 16
 17func wordBoundary(c byte) bool {
 18	return c == 0 || isSpace(c) || isPunctuation(c)
 19}
 20
 21func tolower(c byte) byte {
 22	if c >= 'A' && c <= 'Z' {
 23		return c - 'A' + 'a'
 24	}
 25	return c
 26}
 27
 28func isdigit(c byte) bool {
 29	return c >= '0' && c <= '9'
 30}
 31
 32func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool, addNBSP bool) bool {
 33	// edge of the buffer is likely to be a tag that we don't get to see,
 34	// so we treat it like text sometimes
 35
 36	// enumerate all sixteen possibilities for (previousChar, nextChar)
 37	// each can be one of {0, space, punct, other}
 38	switch {
 39	case previousChar == 0 && nextChar == 0:
 40		// context is not any help here, so toggle
 41		*isOpen = !*isOpen
 42	case isSpace(previousChar) && nextChar == 0:
 43		// [ "] might be [ "<code>foo...]
 44		*isOpen = true
 45	case isPunctuation(previousChar) && nextChar == 0:
 46		// [!"] hmm... could be [Run!"] or [("<code>...]
 47		*isOpen = false
 48	case /* isnormal(previousChar) && */ nextChar == 0:
 49		// [a"] is probably a close
 50		*isOpen = false
 51	case previousChar == 0 && isSpace(nextChar):
 52		// [" ] might be [...foo</code>" ]
 53		*isOpen = false
 54	case isSpace(previousChar) && isSpace(nextChar):
 55		// [ " ] context is not any help here, so toggle
 56		*isOpen = !*isOpen
 57	case isPunctuation(previousChar) && isSpace(nextChar):
 58		// [!" ] is probably a close
 59		*isOpen = false
 60	case /* isnormal(previousChar) && */ isSpace(nextChar):
 61		// [a" ] this is one of the easy cases
 62		*isOpen = false
 63	case previousChar == 0 && isPunctuation(nextChar):
 64		// ["!] hmm... could be ["$1.95] or [</code>"!...]
 65		*isOpen = false
 66	case isSpace(previousChar) && isPunctuation(nextChar):
 67		// [ "!] looks more like [ "$1.95]
 68		*isOpen = true
 69	case isPunctuation(previousChar) && isPunctuation(nextChar):
 70		// [!"!] context is not any help here, so toggle
 71		*isOpen = !*isOpen
 72	case /* isnormal(previousChar) && */ isPunctuation(nextChar):
 73		// [a"!] is probably a close
 74		*isOpen = false
 75	case previousChar == 0 /* && isnormal(nextChar) */ :
 76		// ["a] is probably an open
 77		*isOpen = true
 78	case isSpace(previousChar) /* && isnormal(nextChar) */ :
 79		// [ "a] this is one of the easy cases
 80		*isOpen = true
 81	case isPunctuation(previousChar) /* && isnormal(nextChar) */ :
 82		// [!"a] is probably an open
 83		*isOpen = true
 84	default:
 85		// [a'b] maybe a contraction?
 86		*isOpen = false
 87	}
 88
 89	// Note that with the limited lookahead, this non-breaking
 90	// space will also be appended to single double quotes.
 91	if addNBSP && !*isOpen {
 92		out.WriteString("&nbsp;")
 93	}
 94
 95	out.WriteByte('&')
 96	if *isOpen {
 97		out.WriteByte('l')
 98	} else {
 99		out.WriteByte('r')
100	}
101	out.WriteByte(quote)
102	out.WriteString("quo;")
103
104	if addNBSP && *isOpen {
105		out.WriteString("&nbsp;")
106	}
107
108	return true
109}
110
111func (r *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
112	if len(text) >= 2 {
113		t1 := tolower(text[1])
114
115		if t1 == '\'' {
116			nextChar := byte(0)
117			if len(text) >= 3 {
118				nextChar = text[2]
119			}
120			if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) {
121				return 1
122			}
123		}
124
125		if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
126			out.WriteString("&rsquo;")
127			return 0
128		}
129
130		if len(text) >= 3 {
131			t2 := tolower(text[2])
132
133			if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
134				(len(text) < 4 || wordBoundary(text[3])) {
135				out.WriteString("&rsquo;")
136				return 0
137			}
138		}
139	}
140
141	nextChar := byte(0)
142	if len(text) > 1 {
143		nextChar = text[1]
144	}
145	if smartQuoteHelper(out, previousChar, nextChar, 's', &r.inSingleQuote, false) {
146		return 0
147	}
148
149	out.WriteByte(text[0])
150	return 0
151}
152
153func (r *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int {
154	if len(text) >= 3 {
155		t1 := tolower(text[1])
156		t2 := tolower(text[2])
157
158		if t1 == 'c' && t2 == ')' {
159			out.WriteString("&copy;")
160			return 2
161		}
162
163		if t1 == 'r' && t2 == ')' {
164			out.WriteString("&reg;")
165			return 2
166		}
167
168		if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
169			out.WriteString("&trade;")
170			return 3
171		}
172	}
173
174	out.WriteByte(text[0])
175	return 0
176}
177
178func (r *SPRenderer) smartDash(out *bytes.Buffer, previousChar byte, text []byte) int {
179	if len(text) >= 2 {
180		if text[1] == '-' {
181			out.WriteString("&mdash;")
182			return 1
183		}
184
185		if wordBoundary(previousChar) && wordBoundary(text[1]) {
186			out.WriteString("&ndash;")
187			return 0
188		}
189	}
190
191	out.WriteByte(text[0])
192	return 0
193}
194
195func (r *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int {
196	if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
197		out.WriteString("&mdash;")
198		return 2
199	}
200	if len(text) >= 2 && text[1] == '-' {
201		out.WriteString("&ndash;")
202		return 1
203	}
204
205	out.WriteByte(text[0])
206	return 0
207}
208
209func (r *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte, addNBSP bool) int {
210	if bytes.HasPrefix(text, []byte("&quot;")) {
211		nextChar := byte(0)
212		if len(text) >= 7 {
213			nextChar = text[6]
214		}
215		if smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, addNBSP) {
216			return 5
217		}
218	}
219
220	if bytes.HasPrefix(text, []byte("&#0;")) {
221		return 3
222	}
223
224	out.WriteByte('&')
225	return 0
226}
227
228func (r *SPRenderer) smartAmp(angledQuotes, addNBSP bool) func(*bytes.Buffer, byte, []byte) int {
229	var quote byte = 'd'
230	if angledQuotes {
231		quote = 'a'
232	}
233
234	return func(out *bytes.Buffer, previousChar byte, text []byte) int {
235		return r.smartAmpVariant(out, previousChar, text, quote, addNBSP)
236	}
237}
238
239func (r *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int {
240	if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
241		out.WriteString("&hellip;")
242		return 2
243	}
244
245	if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
246		out.WriteString("&hellip;")
247		return 4
248	}
249
250	out.WriteByte(text[0])
251	return 0
252}
253
254func (r *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int {
255	if len(text) >= 2 && text[1] == '`' {
256		nextChar := byte(0)
257		if len(text) >= 3 {
258			nextChar = text[2]
259		}
260		if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) {
261			return 1
262		}
263	}
264
265	out.WriteByte(text[0])
266	return 0
267}
268
269func (r *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int {
270	if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
271		// is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
272		// note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
273		//       and avoid changing dates like 1/23/2005 into fractions.
274		numEnd := 0
275		for len(text) > numEnd && isdigit(text[numEnd]) {
276			numEnd++
277		}
278		if numEnd == 0 {
279			out.WriteByte(text[0])
280			return 0
281		}
282		denStart := numEnd + 1
283		if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
284			denStart = numEnd + 3
285		} else if len(text) < numEnd+2 || text[numEnd] != '/' {
286			out.WriteByte(text[0])
287			return 0
288		}
289		denEnd := denStart
290		for len(text) > denEnd && isdigit(text[denEnd]) {
291			denEnd++
292		}
293		if denEnd == denStart {
294			out.WriteByte(text[0])
295			return 0
296		}
297		if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
298			out.WriteString("<sup>")
299			out.Write(text[:numEnd])
300			out.WriteString("</sup>&frasl;<sub>")
301			out.Write(text[denStart:denEnd])
302			out.WriteString("</sub>")
303			return denEnd - 1
304		}
305	}
306
307	out.WriteByte(text[0])
308	return 0
309}
310
311func (r *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int {
312	if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
313		if text[0] == '1' && text[1] == '/' && text[2] == '2' {
314			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
315				out.WriteString("&frac12;")
316				return 2
317			}
318		}
319
320		if text[0] == '1' && text[1] == '/' && text[2] == '4' {
321			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
322				out.WriteString("&frac14;")
323				return 2
324			}
325		}
326
327		if text[0] == '3' && text[1] == '/' && text[2] == '4' {
328			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
329				out.WriteString("&frac34;")
330				return 2
331			}
332		}
333	}
334
335	out.WriteByte(text[0])
336	return 0
337}
338
339func (r *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int {
340	nextChar := byte(0)
341	if len(text) > 1 {
342		nextChar = text[1]
343	}
344	if !smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, false) {
345		out.WriteString("&quot;")
346	}
347
348	return 0
349}
350
351func (r *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
352	return r.smartDoubleQuoteVariant(out, previousChar, text, 'd')
353}
354
355func (r *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int {
356	return r.smartDoubleQuoteVariant(out, previousChar, text, 'a')
357}
358
359func (r *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int {
360	i := 0
361
362	for i < len(text) && text[i] != '>' {
363		i++
364	}
365
366	out.Write(text[:i+1])
367	return i
368}
369
370type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int
371
372// NewSmartypantsRenderer constructs a Smartypants renderer object.
373func NewSmartypantsRenderer(flags Flags) *SPRenderer {
374	var (
375		r SPRenderer
376
377		smartAmpAngled      = r.smartAmp(true, false)
378		smartAmpAngledNBSP  = r.smartAmp(true, true)
379		smartAmpRegular     = r.smartAmp(false, false)
380		smartAmpRegularNBSP = r.smartAmp(false, true)
381
382		addNBSP = flags&SmartypantsQuotesNBSP != 0
383	)
384
385	if flags&SmartypantsAngledQuotes == 0 {
386		r.callbacks['"'] = r.smartDoubleQuote
387		if !addNBSP {
388			r.callbacks['&'] = smartAmpRegular
389		} else {
390			r.callbacks['&'] = smartAmpRegularNBSP
391		}
392	} else {
393		r.callbacks['"'] = r.smartAngledDoubleQuote
394		if !addNBSP {
395			r.callbacks['&'] = smartAmpAngled
396		} else {
397			r.callbacks['&'] = smartAmpAngledNBSP
398		}
399	}
400	r.callbacks['\''] = r.smartSingleQuote
401	r.callbacks['('] = r.smartParens
402	if flags&SmartypantsDashes != 0 {
403		if flags&SmartypantsLatexDashes == 0 {
404			r.callbacks['-'] = r.smartDash
405		} else {
406			r.callbacks['-'] = r.smartDashLatex
407		}
408	}
409	r.callbacks['.'] = r.smartPeriod
410	if flags&SmartypantsFractions == 0 {
411		r.callbacks['1'] = r.smartNumber
412		r.callbacks['3'] = r.smartNumber
413	} else {
414		for ch := '1'; ch <= '9'; ch++ {
415			r.callbacks[ch] = r.smartNumberGeneric
416		}
417	}
418	r.callbacks['<'] = r.smartLeftAngle
419	r.callbacks['`'] = r.smartBacktick
420	return &r
421}
422
423// Process is the entry point of the Smartypants renderer.
424func (r *SPRenderer) Process(w io.Writer, text []byte) {
425	mark := 0
426	for i := 0; i < len(text); i++ {
427		if action := r.callbacks[text[i]]; action != nil {
428			if i > mark {
429				w.Write(text[mark:i])
430			}
431			previousChar := byte(0)
432			if i > 0 {
433				previousChar = text[i-1]
434			}
435			var tmp bytes.Buffer
436			i += action(&tmp, previousChar, text[i:])
437			w.Write(tmp.Bytes())
438			mark = i + 1
439		}
440	}
441	if mark < len(text) {
442		w.Write(text[mark:])
443	}
444}