1package extension
2
3import (
4 "unicode"
5
6 "github.com/yuin/goldmark"
7 gast "github.com/yuin/goldmark/ast"
8 "github.com/yuin/goldmark/parser"
9 "github.com/yuin/goldmark/text"
10 "github.com/yuin/goldmark/util"
11)
12
13var uncloseCounterKey = parser.NewContextKey()
14
15type unclosedCounter struct {
16 Single int
17 Double int
18}
19
20func (u *unclosedCounter) Reset() {
21 u.Single = 0
22 u.Double = 0
23}
24
25func getUnclosedCounter(pc parser.Context) *unclosedCounter {
26 v := pc.Get(uncloseCounterKey)
27 if v == nil {
28 v = &unclosedCounter{}
29 pc.Set(uncloseCounterKey, v)
30 }
31 return v.(*unclosedCounter)
32}
33
34// TypographicPunctuation is a key of the punctuations that can be replaced with
35// typographic entities.
36type TypographicPunctuation int
37
38const (
39 // LeftSingleQuote is ' .
40 LeftSingleQuote TypographicPunctuation = iota + 1
41 // RightSingleQuote is ' .
42 RightSingleQuote
43 // LeftDoubleQuote is " .
44 LeftDoubleQuote
45 // RightDoubleQuote is " .
46 RightDoubleQuote
47 // EnDash is -- .
48 EnDash
49 // EmDash is --- .
50 EmDash
51 // Ellipsis is ... .
52 Ellipsis
53 // LeftAngleQuote is << .
54 LeftAngleQuote
55 // RightAngleQuote is >> .
56 RightAngleQuote
57 // Apostrophe is ' .
58 Apostrophe
59
60 typographicPunctuationMax
61)
62
63// An TypographerConfig struct is a data structure that holds configuration of the
64// Typographer extension.
65type TypographerConfig struct {
66 Substitutions [][]byte
67}
68
69func newDefaultSubstitutions() [][]byte {
70 replacements := make([][]byte, typographicPunctuationMax)
71 replacements[LeftSingleQuote] = []byte("‘")
72 replacements[RightSingleQuote] = []byte("’")
73 replacements[LeftDoubleQuote] = []byte("“")
74 replacements[RightDoubleQuote] = []byte("”")
75 replacements[EnDash] = []byte("–")
76 replacements[EmDash] = []byte("—")
77 replacements[Ellipsis] = []byte("…")
78 replacements[LeftAngleQuote] = []byte("«")
79 replacements[RightAngleQuote] = []byte("»")
80 replacements[Apostrophe] = []byte("’")
81
82 return replacements
83}
84
85// SetOption implements SetOptioner.
86func (b *TypographerConfig) SetOption(name parser.OptionName, value interface{}) {
87 switch name {
88 case optTypographicSubstitutions:
89 b.Substitutions = value.([][]byte)
90 }
91}
92
93// A TypographerOption interface sets options for the TypographerParser.
94type TypographerOption interface {
95 parser.Option
96 SetTypographerOption(*TypographerConfig)
97}
98
99const optTypographicSubstitutions parser.OptionName = "TypographicSubstitutions"
100
101// TypographicSubstitutions is a list of the substitutions for the Typographer extension.
102type TypographicSubstitutions map[TypographicPunctuation][]byte
103
104type withTypographicSubstitutions struct {
105 value [][]byte
106}
107
108func (o *withTypographicSubstitutions) SetParserOption(c *parser.Config) {
109 c.Options[optTypographicSubstitutions] = o.value
110}
111
112func (o *withTypographicSubstitutions) SetTypographerOption(p *TypographerConfig) {
113 p.Substitutions = o.value
114}
115
116// WithTypographicSubstitutions is a functional otpion that specify replacement text
117// for punctuations.
118func WithTypographicSubstitutions[T []byte | string](values map[TypographicPunctuation]T) TypographerOption {
119 replacements := newDefaultSubstitutions()
120 for k, v := range values {
121 replacements[k] = []byte(v)
122 }
123
124 return &withTypographicSubstitutions{replacements}
125}
126
127type typographerDelimiterProcessor struct {
128}
129
130func (p *typographerDelimiterProcessor) IsDelimiter(b byte) bool {
131 return b == '\'' || b == '"'
132}
133
134func (p *typographerDelimiterProcessor) CanOpenCloser(opener, closer *parser.Delimiter) bool {
135 return opener.Char == closer.Char
136}
137
138func (p *typographerDelimiterProcessor) OnMatch(consumes int) gast.Node {
139 return nil
140}
141
142var defaultTypographerDelimiterProcessor = &typographerDelimiterProcessor{}
143
144type typographerParser struct {
145 TypographerConfig
146}
147
148// NewTypographerParser return a new InlineParser that parses
149// typographer expressions.
150func NewTypographerParser(opts ...TypographerOption) parser.InlineParser {
151 p := &typographerParser{
152 TypographerConfig: TypographerConfig{
153 Substitutions: newDefaultSubstitutions(),
154 },
155 }
156 for _, o := range opts {
157 o.SetTypographerOption(&p.TypographerConfig)
158 }
159 return p
160}
161
162func (s *typographerParser) Trigger() []byte {
163 return []byte{'\'', '"', '-', '.', ',', '<', '>', '*', '['}
164}
165
166func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser.Context) gast.Node {
167 line, _ := block.PeekLine()
168 c := line[0]
169 if len(line) > 2 {
170 if c == '-' {
171 if s.Substitutions[EmDash] != nil && line[1] == '-' && line[2] == '-' { // ---
172 node := gast.NewString(s.Substitutions[EmDash])
173 node.SetCode(true)
174 block.Advance(3)
175 return node
176 }
177 } else if c == '.' {
178 if s.Substitutions[Ellipsis] != nil && line[1] == '.' && line[2] == '.' { // ...
179 node := gast.NewString(s.Substitutions[Ellipsis])
180 node.SetCode(true)
181 block.Advance(3)
182 return node
183 }
184 return nil
185 }
186 }
187 if len(line) > 1 {
188 if c == '<' {
189 if s.Substitutions[LeftAngleQuote] != nil && line[1] == '<' { // <<
190 node := gast.NewString(s.Substitutions[LeftAngleQuote])
191 node.SetCode(true)
192 block.Advance(2)
193 return node
194 }
195 return nil
196 } else if c == '>' {
197 if s.Substitutions[RightAngleQuote] != nil && line[1] == '>' { // >>
198 node := gast.NewString(s.Substitutions[RightAngleQuote])
199 node.SetCode(true)
200 block.Advance(2)
201 return node
202 }
203 return nil
204 } else if s.Substitutions[EnDash] != nil && c == '-' && line[1] == '-' { // --
205 node := gast.NewString(s.Substitutions[EnDash])
206 node.SetCode(true)
207 block.Advance(2)
208 return node
209 }
210 }
211 if c == '\'' || c == '"' {
212 before := block.PrecendingCharacter()
213 d := parser.ScanDelimiter(line, before, 1, defaultTypographerDelimiterProcessor)
214 if d == nil {
215 return nil
216 }
217 counter := getUnclosedCounter(pc)
218 if c == '\'' {
219 if s.Substitutions[Apostrophe] != nil {
220 // Handle decade abbrevations such as '90s
221 if d.CanOpen && !d.CanClose && len(line) > 3 &&
222 util.IsNumeric(line[1]) && util.IsNumeric(line[2]) && line[3] == 's' {
223 after := rune(' ')
224 if len(line) > 4 {
225 after = util.ToRune(line, 4)
226 }
227 if len(line) == 3 || util.IsSpaceRune(after) || util.IsPunctRune(after) {
228 node := gast.NewString(s.Substitutions[Apostrophe])
229 node.SetCode(true)
230 block.Advance(1)
231 return node
232 }
233 }
234 // special cases: 'twas, 'em, 'net
235 if len(line) > 1 && (unicode.IsPunct(before) || unicode.IsSpace(before)) &&
236 (line[1] == 't' || line[1] == 'e' || line[1] == 'n' || line[1] == 'l') {
237 node := gast.NewString(s.Substitutions[Apostrophe])
238 node.SetCode(true)
239 block.Advance(1)
240 return node
241 }
242 // Convert normal apostrophes. This is probably more flexible than necessary but
243 // converts any apostrophe in between two alphanumerics.
244 if len(line) > 1 && (unicode.IsDigit(before) || unicode.IsLetter(before)) &&
245 (unicode.IsLetter(util.ToRune(line, 1))) {
246 node := gast.NewString(s.Substitutions[Apostrophe])
247 node.SetCode(true)
248 block.Advance(1)
249 return node
250 }
251 }
252 if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose {
253 nt := LeftSingleQuote
254 // special cases: Alice's, I'm, Don't, You'd
255 if len(line) > 1 && (line[1] == 's' || line[1] == 'm' || line[1] == 't' || line[1] == 'd') &&
256 (len(line) < 3 || util.IsPunct(line[2]) || util.IsSpace(line[2])) {
257 nt = RightSingleQuote
258 }
259 // special cases: I've, I'll, You're
260 if len(line) > 2 && ((line[1] == 'v' && line[2] == 'e') ||
261 (line[1] == 'l' && line[2] == 'l') || (line[1] == 'r' && line[2] == 'e')) &&
262 (len(line) < 4 || util.IsPunct(line[3]) || util.IsSpace(line[3])) {
263 nt = RightSingleQuote
264 }
265 if nt == LeftSingleQuote {
266 counter.Single++
267 }
268
269 node := gast.NewString(s.Substitutions[nt])
270 node.SetCode(true)
271 block.Advance(1)
272 return node
273 }
274 if s.Substitutions[RightSingleQuote] != nil {
275 // plural possesive and abbreviations: Smiths', doin'
276 if len(line) > 1 && unicode.IsSpace(util.ToRune(line, 0)) || unicode.IsPunct(util.ToRune(line, 0)) &&
277 (len(line) > 2 && !unicode.IsDigit(util.ToRune(line, 1))) {
278 node := gast.NewString(s.Substitutions[RightSingleQuote])
279 node.SetCode(true)
280 block.Advance(1)
281 return node
282 }
283 }
284 if s.Substitutions[RightSingleQuote] != nil && counter.Single > 0 {
285 isClose := d.CanClose && !d.CanOpen
286 maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && unicode.IsPunct(util.ToRune(line, 1)) &&
287 (len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
288 if isClose || maybeClose {
289 node := gast.NewString(s.Substitutions[RightSingleQuote])
290 node.SetCode(true)
291 block.Advance(1)
292 counter.Single--
293 return node
294 }
295 }
296 }
297 if c == '"' {
298 if s.Substitutions[LeftDoubleQuote] != nil && d.CanOpen && !d.CanClose {
299 node := gast.NewString(s.Substitutions[LeftDoubleQuote])
300 node.SetCode(true)
301 block.Advance(1)
302 counter.Double++
303 return node
304 }
305 if s.Substitutions[RightDoubleQuote] != nil && counter.Double > 0 {
306 isClose := d.CanClose && !d.CanOpen
307 maybeClose := d.CanClose && d.CanOpen && len(line) > 1 && (unicode.IsPunct(util.ToRune(line, 1))) &&
308 (len(line) == 2 || (len(line) > 2 && util.IsPunct(line[2]) || util.IsSpace(line[2])))
309 if isClose || maybeClose {
310 // special case: "Monitor 21""
311 if len(line) > 1 && line[1] == '"' && unicode.IsDigit(before) {
312 return nil
313 }
314 node := gast.NewString(s.Substitutions[RightDoubleQuote])
315 node.SetCode(true)
316 block.Advance(1)
317 counter.Double--
318 return node
319 }
320 }
321 }
322 }
323 return nil
324}
325
326func (s *typographerParser) CloseBlock(parent gast.Node, pc parser.Context) {
327 getUnclosedCounter(pc).Reset()
328}
329
330type typographer struct {
331 options []TypographerOption
332}
333
334// Typographer is an extension that replaces punctuations with typographic entities.
335var Typographer = &typographer{}
336
337// NewTypographer returns a new Extender that replaces punctuations with typographic entities.
338func NewTypographer(opts ...TypographerOption) goldmark.Extender {
339 return &typographer{
340 options: opts,
341 }
342}
343
344func (e *typographer) Extend(m goldmark.Markdown) {
345 m.Parser().AddOptions(parser.WithInlineParsers(
346 util.Prioritized(NewTypographerParser(e.options...), 9999),
347 ))
348}