smartypants.go

  1//
  2// Blackfriday Markdown Processor
  3// Available at http://github.com/russross/blackfriday
  4//
  5// Copyright © 2011 Russ Ross <russ@russross.com>.
  6// Distributed under the Simplified BSD License.
  7// See README.md for details.
  8//
  9
 10//
 11//
 12// SmartyPants rendering
 13//
 14//
 15
 16package blackfriday
 17
 18import (
 19	"bytes"
 20)
 21
 22type smartypantsData struct {
 23	inSingleQuote bool
 24	inDoubleQuote bool
 25}
 26
 27func wordBoundary(c byte) bool {
 28	return c == 0 || isspace(c) || ispunct(c)
 29}
 30
 31func tolower(c byte) byte {
 32	if c >= 'A' && c <= 'Z' {
 33		return c - 'A' + 'a'
 34	}
 35	return c
 36}
 37
 38func isdigit(c byte) bool {
 39	return c >= '0' && c <= '9'
 40}
 41
 42func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool, addNBSP bool) bool {
 43	// edge of the buffer is likely to be a tag that we don't get to see,
 44	// so we treat it like text sometimes
 45
 46	// enumerate all sixteen possibilities for (previousChar, nextChar)
 47	// each can be one of {0, space, punct, other}
 48	switch {
 49	case previousChar == 0 && nextChar == 0:
 50		// context is not any help here, so toggle
 51		*isOpen = !*isOpen
 52	case isspace(previousChar) && nextChar == 0:
 53		// [ "] might be [ "<code>foo...]
 54		*isOpen = true
 55	case ispunct(previousChar) && nextChar == 0:
 56		// [!"] hmm... could be [Run!"] or [("<code>...]
 57		*isOpen = false
 58	case /* isnormal(previousChar) && */ nextChar == 0:
 59		// [a"] is probably a close
 60		*isOpen = false
 61	case previousChar == 0 && isspace(nextChar):
 62		// [" ] might be [...foo</code>" ]
 63		*isOpen = false
 64	case isspace(previousChar) && isspace(nextChar):
 65		// [ " ] context is not any help here, so toggle
 66		*isOpen = !*isOpen
 67	case ispunct(previousChar) && isspace(nextChar):
 68		// [!" ] is probably a close
 69		*isOpen = false
 70	case /* isnormal(previousChar) && */ isspace(nextChar):
 71		// [a" ] this is one of the easy cases
 72		*isOpen = false
 73	case previousChar == 0 && ispunct(nextChar):
 74		// ["!] hmm... could be ["$1.95] or [</code>"!...]
 75		*isOpen = false
 76	case isspace(previousChar) && ispunct(nextChar):
 77		// [ "!] looks more like [ "$1.95]
 78		*isOpen = true
 79	case ispunct(previousChar) && ispunct(nextChar):
 80		// [!"!] context is not any help here, so toggle
 81		*isOpen = !*isOpen
 82	case /* isnormal(previousChar) && */ ispunct(nextChar):
 83		// [a"!] is probably a close
 84		*isOpen = false
 85	case previousChar == 0 /* && isnormal(nextChar) */ :
 86		// ["a] is probably an open
 87		*isOpen = true
 88	case isspace(previousChar) /* && isnormal(nextChar) */ :
 89		// [ "a] this is one of the easy cases
 90		*isOpen = true
 91	case ispunct(previousChar) /* && isnormal(nextChar) */ :
 92		// [!"a] is probably an open
 93		*isOpen = true
 94	default:
 95		// [a'b] maybe a contraction?
 96		*isOpen = false
 97	}
 98
 99	// Note that with the limited lookahead, this non-breaking
100	// space will also be appended to single double quotes.
101	if addNBSP && !*isOpen {
102		out.WriteString("&nbsp;")
103	}
104
105	out.WriteByte('&')
106	if *isOpen {
107		out.WriteByte('l')
108	} else {
109		out.WriteByte('r')
110	}
111	out.WriteByte(quote)
112	out.WriteString("quo;")
113
114	if addNBSP && *isOpen {
115		out.WriteString("&nbsp;")
116	}
117
118	return true
119}
120
121func smartSingleQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
122	if len(text) >= 2 {
123		t1 := tolower(text[1])
124
125		if t1 == '\'' {
126			nextChar := byte(0)
127			if len(text) >= 3 {
128				nextChar = text[2]
129			}
130			if smartQuoteHelper(out, previousChar, nextChar, 'd', &smrt.inDoubleQuote, false) {
131				return 1
132			}
133		}
134
135		if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) {
136			out.WriteString("&rsquo;")
137			return 0
138		}
139
140		if len(text) >= 3 {
141			t2 := tolower(text[2])
142
143			if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) &&
144				(len(text) < 4 || wordBoundary(text[3])) {
145				out.WriteString("&rsquo;")
146				return 0
147			}
148		}
149	}
150
151	nextChar := byte(0)
152	if len(text) > 1 {
153		nextChar = text[1]
154	}
155	if smartQuoteHelper(out, previousChar, nextChar, 's', &smrt.inSingleQuote, false) {
156		return 0
157	}
158
159	out.WriteByte(text[0])
160	return 0
161}
162
163func smartParens(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
164	if len(text) >= 3 {
165		t1 := tolower(text[1])
166		t2 := tolower(text[2])
167
168		if t1 == 'c' && t2 == ')' {
169			out.WriteString("&copy;")
170			return 2
171		}
172
173		if t1 == 'r' && t2 == ')' {
174			out.WriteString("&reg;")
175			return 2
176		}
177
178		if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' {
179			out.WriteString("&trade;")
180			return 3
181		}
182	}
183
184	out.WriteByte(text[0])
185	return 0
186}
187
188func smartDash(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
189	if len(text) >= 2 {
190		if text[1] == '-' {
191			out.WriteString("&mdash;")
192			return 1
193		}
194
195		if wordBoundary(previousChar) && wordBoundary(text[1]) {
196			out.WriteString("&ndash;")
197			return 0
198		}
199	}
200
201	out.WriteByte(text[0])
202	return 0
203}
204
205func smartDashLatex(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
206	if len(text) >= 3 && text[1] == '-' && text[2] == '-' {
207		out.WriteString("&mdash;")
208		return 2
209	}
210	if len(text) >= 2 && text[1] == '-' {
211		out.WriteString("&ndash;")
212		return 1
213	}
214
215	out.WriteByte(text[0])
216	return 0
217}
218
219func smartAmpVariant(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte, quote byte, addNBSP bool) int {
220	if bytes.HasPrefix(text, []byte("&quot;")) {
221		nextChar := byte(0)
222		if len(text) >= 7 {
223			nextChar = text[6]
224		}
225		if smartQuoteHelper(out, previousChar, nextChar, quote, &smrt.inDoubleQuote, addNBSP) {
226			return 5
227		}
228	}
229
230	if bytes.HasPrefix(text, []byte("&#0;")) {
231		return 3
232	}
233
234	out.WriteByte('&')
235	return 0
236}
237
238func smartAmp(angledQuotes, addNBSP bool) func(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
239	var quote byte = 'd'
240	if angledQuotes {
241		quote = 'a'
242	}
243
244	return func(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
245		return smartAmpVariant(out, smrt, previousChar, text, quote, addNBSP)
246	}
247}
248
249func smartPeriod(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
250	if len(text) >= 3 && text[1] == '.' && text[2] == '.' {
251		out.WriteString("&hellip;")
252		return 2
253	}
254
255	if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' {
256		out.WriteString("&hellip;")
257		return 4
258	}
259
260	out.WriteByte(text[0])
261	return 0
262}
263
264func smartBacktick(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
265	if len(text) >= 2 && text[1] == '`' {
266		nextChar := byte(0)
267		if len(text) >= 3 {
268			nextChar = text[2]
269		}
270		if smartQuoteHelper(out, previousChar, nextChar, 'd', &smrt.inDoubleQuote, false) {
271			return 1
272		}
273	}
274
275	out.WriteByte(text[0])
276	return 0
277}
278
279func smartNumberGeneric(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
280	if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
281		// is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b
282		// note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8)
283		//       and avoid changing dates like 1/23/2005 into fractions.
284		numEnd := 0
285		for len(text) > numEnd && isdigit(text[numEnd]) {
286			numEnd++
287		}
288		if numEnd == 0 {
289			out.WriteByte(text[0])
290			return 0
291		}
292		denStart := numEnd + 1
293		if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 {
294			denStart = numEnd + 3
295		} else if len(text) < numEnd+2 || text[numEnd] != '/' {
296			out.WriteByte(text[0])
297			return 0
298		}
299		denEnd := denStart
300		for len(text) > denEnd && isdigit(text[denEnd]) {
301			denEnd++
302		}
303		if denEnd == denStart {
304			out.WriteByte(text[0])
305			return 0
306		}
307		if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' {
308			out.WriteString("<sup>")
309			out.Write(text[:numEnd])
310			out.WriteString("</sup>&frasl;<sub>")
311			out.Write(text[denStart:denEnd])
312			out.WriteString("</sub>")
313			return denEnd - 1
314		}
315	}
316
317	out.WriteByte(text[0])
318	return 0
319}
320
321func smartNumber(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
322	if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 {
323		if text[0] == '1' && text[1] == '/' && text[2] == '2' {
324			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' {
325				out.WriteString("&frac12;")
326				return 2
327			}
328		}
329
330		if text[0] == '1' && text[1] == '/' && text[2] == '4' {
331			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') {
332				out.WriteString("&frac14;")
333				return 2
334			}
335		}
336
337		if text[0] == '3' && text[1] == '/' && text[2] == '4' {
338			if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') {
339				out.WriteString("&frac34;")
340				return 2
341			}
342		}
343	}
344
345	out.WriteByte(text[0])
346	return 0
347}
348
349func smartDoubleQuoteVariant(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte, quote byte) int {
350	nextChar := byte(0)
351	if len(text) > 1 {
352		nextChar = text[1]
353	}
354	if !smartQuoteHelper(out, previousChar, nextChar, quote, &smrt.inDoubleQuote, false) {
355		out.WriteString("&quot;")
356	}
357
358	return 0
359}
360
361func smartDoubleQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
362	return smartDoubleQuoteVariant(out, smrt, previousChar, text, 'd')
363}
364
365func smartAngledDoubleQuote(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
366	return smartDoubleQuoteVariant(out, smrt, previousChar, text, 'a')
367}
368
369func smartLeftAngle(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int {
370	i := 0
371
372	for i < len(text) && text[i] != '>' {
373		i++
374	}
375
376	out.Write(text[:i+1])
377	return i
378}
379
380type smartCallback func(out *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int
381
382type smartypantsRenderer [256]smartCallback
383
384var (
385	smartAmpAngled      = smartAmp(true, false)
386	smartAmpAngledNBSP  = smartAmp(true, true)
387	smartAmpRegular     = smartAmp(false, false)
388	smartAmpRegularNBSP = smartAmp(false, true)
389)
390
391func smartypants(flags int) *smartypantsRenderer {
392	r := new(smartypantsRenderer)
393	addNBSP := flags&HTML_SMARTYPANTS_QUOTES_NBSP != 0
394	if flags&HTML_SMARTYPANTS_ANGLED_QUOTES == 0 {
395		r['"'] = smartDoubleQuote
396		if !addNBSP {
397			r['&'] = smartAmpRegular
398		} else {
399			r['&'] = smartAmpRegularNBSP
400		}
401	} else {
402		r['"'] = smartAngledDoubleQuote
403		if !addNBSP {
404			r['&'] = smartAmpAngled
405		} else {
406			r['&'] = smartAmpAngledNBSP
407		}
408	}
409	r['\''] = smartSingleQuote
410	r['('] = smartParens
411	if flags&HTML_SMARTYPANTS_DASHES != 0 {
412		if flags&HTML_SMARTYPANTS_LATEX_DASHES == 0 {
413			r['-'] = smartDash
414		} else {
415			r['-'] = smartDashLatex
416		}
417	}
418	r['.'] = smartPeriod
419	if flags&HTML_SMARTYPANTS_FRACTIONS == 0 {
420		r['1'] = smartNumber
421		r['3'] = smartNumber
422	} else {
423		for ch := '1'; ch <= '9'; ch++ {
424			r[ch] = smartNumberGeneric
425		}
426	}
427	r['<'] = smartLeftAngle
428	r['`'] = smartBacktick
429	return r
430}