typographer.go

  1package extension
  2
  3import (
  4	"unicode"
  5
  6	"github.com/yuin/goldmark"
  7	gast "github.com/yuin/goldmark/ast"
  8	"github.com/yuin/goldmark/parser"
  9	"github.com/yuin/goldmark/text"
 10	"github.com/yuin/goldmark/util"
 11)
 12
 13var uncloseCounterKey = parser.NewContextKey()
 14
 15type unclosedCounter struct {
 16	Single int
 17	Double int
 18}
 19
 20func (u *unclosedCounter) Reset() {
 21	u.Single = 0
 22	u.Double = 0
 23}
 24
 25func getUnclosedCounter(pc parser.Context) *unclosedCounter {
 26	v := pc.Get(uncloseCounterKey)
 27	if v == nil {
 28		v = &unclosedCounter{}
 29		pc.Set(uncloseCounterKey, v)
 30	}
 31	return v.(*unclosedCounter)
 32}
 33
 34// TypographicPunctuation is a key of the punctuations that can be replaced with
 35// typographic entities.
 36type TypographicPunctuation int
 37
 38const (
 39	// LeftSingleQuote is ' .
 40	LeftSingleQuote TypographicPunctuation = iota + 1
 41	// RightSingleQuote is ' .
 42	RightSingleQuote
 43	// LeftDoubleQuote is " .
 44	LeftDoubleQuote
 45	// RightDoubleQuote is " .
 46	RightDoubleQuote
 47	// EnDash is -- .
 48	EnDash
 49	// EmDash is --- .
 50	EmDash
 51	// Ellipsis is ... .
 52	Ellipsis
 53	// LeftAngleQuote is << .
 54	LeftAngleQuote
 55	// RightAngleQuote is >> .
 56	RightAngleQuote
 57	// Apostrophe is ' .
 58	Apostrophe
 59
 60	typographicPunctuationMax
 61)
 62
 63// An TypographerConfig struct is a data structure that holds configuration of the
 64// Typographer extension.
 65type TypographerConfig struct {
 66	Substitutions [][]byte
 67}
 68
 69func newDefaultSubstitutions() [][]byte {
 70	replacements := make([][]byte, typographicPunctuationMax)
 71	replacements[LeftSingleQuote] = []byte("&lsquo;")
 72	replacements[RightSingleQuote] = []byte("&rsquo;")
 73	replacements[LeftDoubleQuote] = []byte("&ldquo;")
 74	replacements[RightDoubleQuote] = []byte("&rdquo;")
 75	replacements[EnDash] = []byte("&ndash;")
 76	replacements[EmDash] = []byte("&mdash;")
 77	replacements[Ellipsis] = []byte("&hellip;")
 78	replacements[LeftAngleQuote] = []byte("&laquo;")
 79	replacements[RightAngleQuote] = []byte("&raquo;")
 80	replacements[Apostrophe] = []byte("&rsquo;")
 81
 82	return replacements
 83}
 84
 85// SetOption implements SetOptioner.
 86func (b *TypographerConfig) SetOption(name parser.OptionName, value interface{}) {
 87	switch name {
 88	case optTypographicSubstitutions:
 89		b.Substitutions = value.([][]byte)
 90	}
 91}
 92
 93// A TypographerOption interface sets options for the TypographerParser.
 94type TypographerOption interface {
 95	parser.Option
 96	SetTypographerOption(*TypographerConfig)
 97}
 98
 99const optTypographicSubstitutions parser.OptionName = "TypographicSubstitutions"
100
101// TypographicSubstitutions is a list of the substitutions for the Typographer extension.
102type TypographicSubstitutions map[TypographicPunctuation][]byte
103
104type withTypographicSubstitutions struct {
105	value [][]byte
106}
107
108func (o *withTypographicSubstitutions) SetParserOption(c *parser.Config) {
109	c.Options[optTypographicSubstitutions] = o.value
110}
111
112func (o *withTypographicSubstitutions) SetTypographerOption(p *TypographerConfig) {
113	p.Substitutions = o.value
114}
115
116// WithTypographicSubstitutions is a functional otpion that specify replacement text
117// for punctuations.
118func WithTypographicSubstitutions[T []byte | string](values map[TypographicPunctuation]T) TypographerOption {
119	replacements := newDefaultSubstitutions()
120	for k, v := range values {
121		replacements[k] = []byte(v)
122	}
123
124	return &withTypographicSubstitutions{replacements}
125}
126
127type typographerDelimiterProcessor struct {
128}
129
130func (p *typographerDelimiterProcessor) IsDelimiter(b byte) bool {
131	return b == '\'' || b == '"'
132}
133
134func (p *typographerDelimiterProcessor) CanOpenCloser(opener, closer *parser.Delimiter) bool {
135	return opener.Char == closer.Char
136}
137
138func (p *typographerDelimiterProcessor) OnMatch(consumes int) gast.Node {
139	return nil
140}
141
142var defaultTypographerDelimiterProcessor = &typographerDelimiterProcessor{}
143
144type typographerParser struct {
145	TypographerConfig
146}
147
148// NewTypographerParser return a new InlineParser that parses
149// typographer expressions.
150func NewTypographerParser(opts ...TypographerOption) parser.InlineParser {
151	p := &typographerParser{
152		TypographerConfig: TypographerConfig{
153			Substitutions: newDefaultSubstitutions(),
154		},
155	}
156	for _, o := range opts {
157		o.SetTypographerOption(&p.TypographerConfig)
158	}
159	return p
160}
161
162func (s *typographerParser) Trigger() []byte {
163	return []byte{'\'', '"', '-', '.', ',', '<', '>', '*', '['}
164}
165
166func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node {
167	line, _ := block.PeekLine()
168	c := line[0]
169	if len(line) > 2 {
170		if c == '-' {
171			if s.Substitutions[EmDash] != nil && line[1] == '-' && line[2] == '-' { // ---
172				node := gast.NewString(s.Substitutions[EmDash])
173				node.SetCode(true)
174				block.Advance(3)
175				return node
176			}
177		} else if c == '.' {
178			if s.Substitutions[Ellipsis] != nil && line[1] == '.' && line[2] == '.' { // ...
179				node := gast.NewString(s.Substitutions[Ellipsis])
180				node.SetCode(true)
181				block.Advance(3)
182				return node
183			}
184			return nil
185		}
186	}
187	if len(line) > 1 {
188		if c == '<' {
189			if s.Substitutions[LeftAngleQuote] != nil && line[1] == '<' { // <<
190				node := gast.NewString(s.Substitutions[LeftAngleQuote])
191				node.SetCode(true)
192				block.Advance(2)
193				return node
194			}
195			return nil
196		} else if c == '>' {
197			if s.Substitutions[RightAngleQuote] != nil && line[1] == '>' { // >>
198				node := gast.NewString(s.Substitutions[RightAngleQuote])
199				node.SetCode(true)
200				block.Advance(2)
201				return node
202			}
203			return nil
204		} else if s.Substitutions[EnDash] != nil && c == '-' && line[1] == '-' { // --
205			node := gast.NewString(s.Substitutions[EnDash])
206			node.SetCode(true)
207			block.Advance(2)
208			return node
209		}
210	}
211	if c == '\'' || c == '"' {
212		before := block.PrecendingCharacter()
213		d := parser.ScanDelimiter(line, before, 1, defaultTypographerDelimiterProcessor)
214		if d == nil {
215			return nil
216		}
217		counter := getUnclosedCounter(pc)
218		if c == '\'' {
219			if s.Substitutions[Apostrophe] != nil {
220				// Handle decade abbrevations such as '90s
221				if d.CanOpen && !d.CanClose && len(line) > 3 &&
222					util.IsNumeric(line[1]) && util.IsNumeric(line[2]) && line[3] == 's' {
223					after := rune(' ')
224					if len(line) > 4 {
225						after = util.ToRune(line, 4)
226					}
227					if len(line) == 3 || util.IsSpaceRune(after) || util.IsPunctRune(after) {
228						node := gast.NewString(s.Substitutions[Apostrophe])
229						node.SetCode(true)
230						block.Advance(1)
231						return node
232					}
233				}
234				// special cases: 'twas, 'em, 'net
235				if len(line) > 1 && (unicode.IsPunct(before) || unicode.IsSpace(before)) &&
236					(line[1] == 't' || line[1] == 'e' || line[1] == 'n' || line[1] == 'l') {
237					node := gast.NewString(s.Substitutions[Apostrophe])
238					node.SetCode(true)
239					block.Advance(1)
240					return node
241				}
242				// Convert normal apostrophes. This is probably more flexible than necessary but
243				// converts any apostrophe in between two alphanumerics.
244				if len(line) > 1 && (unicode.IsDigit(before) || unicode.IsLetter(before)) &&
245					(unicode.IsLetter(util.ToRune(line, 1))) {
246					node := gast.NewString(s.Substitutions[Apostrophe])
247					node.SetCode(true)
248					block.Advance(1)
249					return node
250				}
251			}
252			if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose {
253				nt := LeftSingleQuote
254				// special cases: Alice's, I'm, Don't, You'd
255				if len(line) > 1 && (line[1] == 's' || line[1] == 'm' || line[1] == 't' || line[1] == 'd') &&
256					(len(line) < 3 || util.IsPunct(line[2]) || util.IsSpace(line[2])) {
257					nt = RightSingleQuote
258				}
259				// special cases: I've, I'll, You're
260				if len(line) > 2 && ((line[1] == 'v' && line[2] == 'e') ||
261					(line[1] == 'l' && line[2] == 'l') || (line[1] == 'r' && line[2] == 'e')) &&
262					(len(line) < 4 || util.IsPunct(line[3]) || util.IsSpace(line[3])) {
263					nt = RightSingleQuote
264				}
265				if nt == LeftSingleQuote {
266					counter.Single++
267				}
268
269				node := gast.NewString(s.Substitutions[nt])
270				node.SetCode(true)
271				block.Advance(1)
272				return node
273			}
274			if s.Substitutions[RightSingleQuote] != nil {
275				// plural possesive and abbreviations: Smiths', doin'
276				if len(line) > 1 && unicode.IsSpace(util.ToRune(line, 0)) || unicode.IsPunct(util.ToRune(line, 0)) &&
277					(len(line) > 2 && !unicode.IsDigit(util.ToRune(line, 1))) {
278					node := gast.NewString(s.Substitutions[RightSingleQuote])
279					node.SetCode(true)
280					block.Advance(1)
281					return node
282				}
283			}
284			if s.Substitutions[RightSingleQuote] != nil && counter.Single > 0 {
285				isClose := d.CanClose && !d.CanOpen
286				maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && unicode.IsPunct(util.ToRune(line, 1)) &&
287					(len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
288				if isClose || maybeClose {
289					node := gast.NewString(s.Substitutions[RightSingleQuote])
290					node.SetCode(true)
291					block.Advance(1)
292					counter.Single--
293					return node
294				}
295			}
296		}
297		if c == '"' {
298			if s.Substitutions[LeftDoubleQuote] != nil && d.CanOpen && !d.CanClose {
299				node := gast.NewString(s.Substitutions[LeftDoubleQuote])
300				node.SetCode(true)
301				block.Advance(1)
302				counter.Double++
303				return node
304			}
305			if s.Substitutions[RightDoubleQuote] != nil && counter.Double > 0 {
306				isClose := d.CanClose && !d.CanOpen
307				maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && (unicode.IsPunct(util.ToRune(line, 1))) &&
308					(len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
309				if isClose || maybeClose {
310					// special case: "Monitor 21""
311					if len(line) > 1 && line[1] == '"' && unicode.IsDigit(before) {
312						return nil
313					}
314					node := gast.NewString(s.Substitutions[RightDoubleQuote])
315					node.SetCode(true)
316					block.Advance(1)
317					counter.Double--
318					return node
319				}
320			}
321		}
322	}
323	return nil
324}
325
326func (s *typographerParser) CloseBlock(parent gast.Node, pc parser.Context) {
327	getUnclosedCounter(pc).Reset()
328}
329
330type typographer struct {
331	options []TypographerOption
332}
333
334// Typographer is an extension that replaces punctuations with typographic entities.
335var Typographer = &typographer{}
336
337// NewTypographer returns a new Extender that replaces punctuations with typographic entities.
338func NewTypographer(opts ...TypographerOption) goldmark.Extender {
339	return &typographer{
340		options: opts,
341	}
342}
343
344func (e *typographer) Extend(m goldmark.Markdown) {
345	m.Parser().AddOptions(parser.WithInlineParsers(
346		util.Prioritized(NewTypographerParser(e.options...), 9999),
347	))
348}