1// Package parser contains stuff that are related to parsing a Markdown text.
2package parser
3
4import (
5 "fmt"
6 "strings"
7 "sync"
8
9 "github.com/yuin/goldmark/ast"
10 "github.com/yuin/goldmark/text"
11 "github.com/yuin/goldmark/util"
12)
13
14// A Reference interface represents a link reference in Markdown text.
15type Reference interface {
16 // String implements Stringer.
17 String() string
18
19 // Label returns a label of the reference.
20 Label() []byte
21
22 // Destination returns a destination(URL) of the reference.
23 Destination() []byte
24
25 // Title returns a title of the reference.
26 Title() []byte
27}
28
29type reference struct {
30 label []byte
31 destination []byte
32 title []byte
33}
34
35// NewReference returns a new Reference.
36func NewReference(label, destination, title []byte) Reference {
37 return &reference{label, destination, title}
38}
39
40func (r *reference) Label() []byte {
41 return r.label
42}
43
44func (r *reference) Destination() []byte {
45 return r.destination
46}
47
48func (r *reference) Title() []byte {
49 return r.title
50}
51
52func (r *reference) String() string {
53 return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title)
54}
55
56// An IDs interface is a collection of the element ids.
57type IDs interface {
58 // Generate generates a new element id.
59 Generate(value []byte, kind ast.NodeKind) []byte
60
61 // Put puts a given element id to the used ids table.
62 Put(value []byte)
63}
64
65type ids struct {
66 values map[string]bool
67}
68
69func newIDs() IDs {
70 return &ids{
71 values: map[string]bool{},
72 }
73}
74
75func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
76 value = util.TrimLeftSpace(value)
77 value = util.TrimRightSpace(value)
78 result := []byte{}
79 for i := 0; i < len(value); {
80 v := value[i]
81 l := util.UTF8Len(v)
82 i += int(l)
83 if l != 1 {
84 continue
85 }
86 if util.IsAlphaNumeric(v) {
87 if 'A' <= v && v <= 'Z' {
88 v += 'a' - 'A'
89 }
90 result = append(result, v)
91 } else if util.IsSpace(v) || v == '-' || v == '_' {
92 result = append(result, '-')
93 }
94 }
95 if len(result) == 0 {
96 if kind == ast.KindHeading {
97 result = []byte("heading")
98 } else {
99 result = []byte("id")
100 }
101 }
102 if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok {
103 s.values[util.BytesToReadOnlyString(result)] = true
104 return result
105 }
106 for i := 1; ; i++ {
107 newResult := fmt.Sprintf("%s-%d", result, i)
108 if _, ok := s.values[newResult]; !ok {
109 s.values[newResult] = true
110 return []byte(newResult)
111 }
112
113 }
114}
115
116func (s *ids) Put(value []byte) {
117 s.values[util.BytesToReadOnlyString(value)] = true
118}
119
120// ContextKey is a key that is used to set arbitrary values to the context.
121type ContextKey int
122
123// ContextKeyMax is a maximum value of the ContextKey.
124var ContextKeyMax ContextKey
125
126// NewContextKey return a new ContextKey value.
127func NewContextKey() ContextKey {
128 ContextKeyMax++
129 return ContextKeyMax
130}
131
132// A Context interface holds a information that are necessary to parse
133// Markdown text.
134type Context interface {
135 // String implements Stringer.
136 String() string
137
138 // Get returns a value associated with the given key.
139 Get(ContextKey) interface{}
140
141 // ComputeIfAbsent computes a value if a value associated with the given key is absent and returns the value.
142 ComputeIfAbsent(ContextKey, func() interface{}) interface{}
143
144 // Set sets the given value to the context.
145 Set(ContextKey, interface{})
146
147 // AddReference adds the given reference to this context.
148 AddReference(Reference)
149
150 // Reference returns (a reference, true) if a reference associated with
151 // the given label exists, otherwise (nil, false).
152 Reference(label string) (Reference, bool)
153
154 // References returns a list of references.
155 References() []Reference
156
157 // IDs returns a collection of the element ids.
158 IDs() IDs
159
160 // BlockOffset returns a first non-space character position on current line.
161 // This value is valid only for BlockParser.Open.
162 // BlockOffset returns -1 if current line is blank.
163 BlockOffset() int
164
165 // BlockOffset sets a first non-space character position on current line.
166 // This value is valid only for BlockParser.Open.
167 SetBlockOffset(int)
168
169 // BlockIndent returns an indent width on current line.
170 // This value is valid only for BlockParser.Open.
171 // BlockIndent returns -1 if current line is blank.
172 BlockIndent() int
173
174 // BlockIndent sets an indent width on current line.
175 // This value is valid only for BlockParser.Open.
176 SetBlockIndent(int)
177
178 // FirstDelimiter returns a first delimiter of the current delimiter list.
179 FirstDelimiter() *Delimiter
180
181 // LastDelimiter returns a last delimiter of the current delimiter list.
182 LastDelimiter() *Delimiter
183
184 // PushDelimiter appends the given delimiter to the tail of the current
185 // delimiter list.
186 PushDelimiter(delimiter *Delimiter)
187
188 // RemoveDelimiter removes the given delimiter from the current delimiter list.
189 RemoveDelimiter(d *Delimiter)
190
191 // ClearDelimiters clears the current delimiter list.
192 ClearDelimiters(bottom ast.Node)
193
194 // OpenedBlocks returns a list of nodes that are currently in parsing.
195 OpenedBlocks() []Block
196
197 // SetOpenedBlocks sets a list of nodes that are currently in parsing.
198 SetOpenedBlocks([]Block)
199
200 // LastOpenedBlock returns a last node that is currently in parsing.
201 LastOpenedBlock() Block
202
203 // IsInLinkLabel returns true if current position seems to be in link label.
204 IsInLinkLabel() bool
205}
206
207// A ContextConfig struct is a data structure that holds configuration of the Context.
208type ContextConfig struct {
209 IDs IDs
210}
211
212// An ContextOption is a functional option type for the Context.
213type ContextOption func(*ContextConfig)
214
215// WithIDs is a functional option for the Context.
216func WithIDs(ids IDs) ContextOption {
217 return func(c *ContextConfig) {
218 c.IDs = ids
219 }
220}
221
222type parseContext struct {
223 store []interface{}
224 ids IDs
225 refs map[string]Reference
226 blockOffset int
227 blockIndent int
228 delimiters *Delimiter
229 lastDelimiter *Delimiter
230 openedBlocks []Block
231}
232
233// NewContext returns a new Context.
234func NewContext(options ...ContextOption) Context {
235 cfg := &ContextConfig{
236 IDs: newIDs(),
237 }
238 for _, option := range options {
239 option(cfg)
240 }
241
242 return &parseContext{
243 store: make([]interface{}, ContextKeyMax+1),
244 refs: map[string]Reference{},
245 ids: cfg.IDs,
246 blockOffset: -1,
247 blockIndent: -1,
248 delimiters: nil,
249 lastDelimiter: nil,
250 openedBlocks: []Block{},
251 }
252}
253
254func (p *parseContext) Get(key ContextKey) interface{} {
255 return p.store[key]
256}
257
258func (p *parseContext) ComputeIfAbsent(key ContextKey, f func() interface{}) interface{} {
259 v := p.store[key]
260 if v == nil {
261 v = f()
262 p.store[key] = v
263 }
264 return v
265}
266
267func (p *parseContext) Set(key ContextKey, value interface{}) {
268 p.store[key] = value
269}
270
271func (p *parseContext) IDs() IDs {
272 return p.ids
273}
274
275func (p *parseContext) BlockOffset() int {
276 return p.blockOffset
277}
278
279func (p *parseContext) SetBlockOffset(v int) {
280 p.blockOffset = v
281}
282
283func (p *parseContext) BlockIndent() int {
284 return p.blockIndent
285}
286
287func (p *parseContext) SetBlockIndent(v int) {
288 p.blockIndent = v
289}
290
291func (p *parseContext) LastDelimiter() *Delimiter {
292 return p.lastDelimiter
293}
294
295func (p *parseContext) FirstDelimiter() *Delimiter {
296 return p.delimiters
297}
298
299func (p *parseContext) PushDelimiter(d *Delimiter) {
300 if p.delimiters == nil {
301 p.delimiters = d
302 p.lastDelimiter = d
303 } else {
304 l := p.lastDelimiter
305 p.lastDelimiter = d
306 l.NextDelimiter = d
307 d.PreviousDelimiter = l
308 }
309}
310
311func (p *parseContext) RemoveDelimiter(d *Delimiter) {
312 if d.PreviousDelimiter == nil {
313 p.delimiters = d.NextDelimiter
314 } else {
315 d.PreviousDelimiter.NextDelimiter = d.NextDelimiter
316 if d.NextDelimiter != nil {
317 d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter
318 }
319 }
320 if d.NextDelimiter == nil {
321 p.lastDelimiter = d.PreviousDelimiter
322 }
323 if p.delimiters != nil {
324 p.delimiters.PreviousDelimiter = nil
325 }
326 if p.lastDelimiter != nil {
327 p.lastDelimiter.NextDelimiter = nil
328 }
329 d.NextDelimiter = nil
330 d.PreviousDelimiter = nil
331 if d.Length != 0 {
332 ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment)
333 } else {
334 d.Parent().RemoveChild(d.Parent(), d)
335 }
336}
337
338func (p *parseContext) ClearDelimiters(bottom ast.Node) {
339 if p.lastDelimiter == nil {
340 return
341 }
342 var c ast.Node
343 for c = p.lastDelimiter; c != nil && c != bottom; {
344 prev := c.PreviousSibling()
345 if d, ok := c.(*Delimiter); ok {
346 p.RemoveDelimiter(d)
347 }
348 c = prev
349 }
350}
351
352func (p *parseContext) AddReference(ref Reference) {
353 key := util.ToLinkReference(ref.Label())
354 if _, ok := p.refs[key]; !ok {
355 p.refs[key] = ref
356 }
357}
358
359func (p *parseContext) Reference(label string) (Reference, bool) {
360 v, ok := p.refs[label]
361 return v, ok
362}
363
364func (p *parseContext) References() []Reference {
365 ret := make([]Reference, 0, len(p.refs))
366 for _, v := range p.refs {
367 ret = append(ret, v)
368 }
369 return ret
370}
371
372func (p *parseContext) String() string {
373 refs := []string{}
374 for _, r := range p.refs {
375 refs = append(refs, r.String())
376 }
377
378 return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ","))
379}
380
381func (p *parseContext) OpenedBlocks() []Block {
382 return p.openedBlocks
383}
384
385func (p *parseContext) SetOpenedBlocks(v []Block) {
386 p.openedBlocks = v
387}
388
389func (p *parseContext) LastOpenedBlock() Block {
390 if l := len(p.openedBlocks); l != 0 {
391 return p.openedBlocks[l-1]
392 }
393 return Block{}
394}
395
396func (p *parseContext) IsInLinkLabel() bool {
397 tlist := p.Get(linkLabelStateKey)
398 return tlist != nil
399}
400
401// State represents parser's state.
402// State is designed to use as a bit flag.
403type State int
404
405const (
406 // None is a default value of the [State].
407 None State = 1 << iota
408
409 // Continue indicates parser can continue parsing.
410 Continue
411
412 // Close indicates parser cannot parse anymore.
413 Close
414
415 // HasChildren indicates parser may have child blocks.
416 HasChildren
417
418 // NoChildren indicates parser does not have child blocks.
419 NoChildren
420
421 // RequireParagraph indicates parser requires that the last node
422 // must be a paragraph and is not converted to other nodes by
423 // ParagraphTransformers.
424 RequireParagraph
425)
426
427// A Config struct is a data structure that holds configuration of the Parser.
428type Config struct {
429 Options map[OptionName]interface{}
430 BlockParsers util.PrioritizedSlice /*<BlockParser>*/
431 InlineParsers util.PrioritizedSlice /*<InlineParser>*/
432 ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
433 ASTTransformers util.PrioritizedSlice /*<ASTTransformer>*/
434 EscapedSpace bool
435}
436
437// NewConfig returns a new Config.
438func NewConfig() *Config {
439 return &Config{
440 Options: map[OptionName]interface{}{},
441 BlockParsers: util.PrioritizedSlice{},
442 InlineParsers: util.PrioritizedSlice{},
443 ParagraphTransformers: util.PrioritizedSlice{},
444 ASTTransformers: util.PrioritizedSlice{},
445 }
446}
447
448// An Option interface is a functional option type for the Parser.
449type Option interface {
450 SetParserOption(*Config)
451}
452
453// OptionName is a name of parser options.
454type OptionName string
455
456// Attribute is an option name that spacify attributes of elements.
457const optAttribute OptionName = "Attribute"
458
459type withAttribute struct {
460}
461
462func (o *withAttribute) SetParserOption(c *Config) {
463 c.Options[optAttribute] = true
464}
465
466// WithAttribute is a functional option that enables custom attributes.
467func WithAttribute() Option {
468 return &withAttribute{}
469}
470
471// A Parser interface parses Markdown text into AST nodes.
472type Parser interface {
473 // Parse parses the given Markdown text into AST nodes.
474 Parse(reader text.Reader, opts ...ParseOption) ast.Node
475
476 // AddOption adds the given option to this parser.
477 AddOptions(...Option)
478}
479
480// A SetOptioner interface sets the given option to the object.
481type SetOptioner interface {
482 // SetOption sets the given option to the object.
483 // Unacceptable options may be passed.
484 // Thus implementations must ignore unacceptable options.
485 SetOption(name OptionName, value interface{})
486}
487
488// A BlockParser interface parses a block level element like Paragraph, List,
489// Blockquote etc.
490type BlockParser interface {
491 // Trigger returns a list of characters that triggers Parse method of
492 // this parser.
493 // If Trigger returns a nil, Open will be called with any lines.
494 Trigger() []byte
495
496 // Open parses the current line and returns a result of parsing.
497 //
498 // Open must not parse beyond the current line.
499 // If Open has been able to parse the current line, Open must advance a reader
500 // position by consumed byte length.
501 //
502 // If Open has not been able to parse the current line, Open should returns
503 // (nil, NoChildren). If Open has been able to parse the current line, Open
504 // should returns a new Block node and returns HasChildren or NoChildren.
505 Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State)
506
507 // Continue parses the current line and returns a result of parsing.
508 //
509 // Continue must not parse beyond the current line.
510 // If Continue has been able to parse the current line, Continue must advance
511 // a reader position by consumed byte length.
512 //
513 // If Continue has not been able to parse the current line, Continue should
514 // returns Close. If Continue has been able to parse the current line,
515 // Continue should returns (Continue | NoChildren) or
516 // (Continue | HasChildren)
517 Continue(node ast.Node, reader text.Reader, pc Context) State
518
519 // Close will be called when the parser returns Close.
520 Close(node ast.Node, reader text.Reader, pc Context)
521
522 // CanInterruptParagraph returns true if the parser can interrupt paragraphs,
523 // otherwise false.
524 CanInterruptParagraph() bool
525
526 // CanAcceptIndentedLine returns true if the parser can open new node when
527 // the given line is being indented more than 3 spaces.
528 CanAcceptIndentedLine() bool
529}
530
531// An InlineParser interface parses an inline level element like CodeSpan, Link etc.
532type InlineParser interface {
533 // Trigger returns a list of characters that triggers Parse method of
534 // this parser.
535 // Trigger characters must be a punctuation or a halfspace.
536 // Halfspaces triggers this parser when character is any spaces characters or
537 // a head of line
538 Trigger() []byte
539
540 // Parse parse the given block into an inline node.
541 //
542 // Parse can parse beyond the current line.
543 // If Parse has been able to parse the current line, it must advance a reader
544 // position by consumed byte length.
545 Parse(parent ast.Node, block text.Reader, pc Context) ast.Node
546}
547
548// A CloseBlocker interface is a callback function that will be
549// called when block is closed in the inline parsing.
550type CloseBlocker interface {
551 // CloseBlock will be called when a block is closed.
552 CloseBlock(parent ast.Node, block text.Reader, pc Context)
553}
554
555// A ParagraphTransformer transforms parsed Paragraph nodes.
556// For example, link references are searched in parsed Paragraphs.
557type ParagraphTransformer interface {
558 // Transform transforms the given paragraph.
559 Transform(node *ast.Paragraph, reader text.Reader, pc Context)
560}
561
562// ASTTransformer transforms entire Markdown document AST tree.
563type ASTTransformer interface {
564 // Transform transforms the given AST tree.
565 Transform(node *ast.Document, reader text.Reader, pc Context)
566}
567
568// DefaultBlockParsers returns a new list of default BlockParsers.
569// Priorities of default BlockParsers are:
570//
571// SetextHeadingParser, 100
572// ThematicBreakParser, 200
573// ListParser, 300
574// ListItemParser, 400
575// CodeBlockParser, 500
576// ATXHeadingParser, 600
577// FencedCodeBlockParser, 700
578// BlockquoteParser, 800
579// HTMLBlockParser, 900
580// ParagraphParser, 1000
581func DefaultBlockParsers() []util.PrioritizedValue {
582 return []util.PrioritizedValue{
583 util.Prioritized(NewSetextHeadingParser(), 100),
584 util.Prioritized(NewThematicBreakParser(), 200),
585 util.Prioritized(NewListParser(), 300),
586 util.Prioritized(NewListItemParser(), 400),
587 util.Prioritized(NewCodeBlockParser(), 500),
588 util.Prioritized(NewATXHeadingParser(), 600),
589 util.Prioritized(NewFencedCodeBlockParser(), 700),
590 util.Prioritized(NewBlockquoteParser(), 800),
591 util.Prioritized(NewHTMLBlockParser(), 900),
592 util.Prioritized(NewParagraphParser(), 1000),
593 }
594}
595
596// DefaultInlineParsers returns a new list of default InlineParsers.
597// Priorities of default InlineParsers are:
598//
599// CodeSpanParser, 100
600// LinkParser, 200
601// AutoLinkParser, 300
602// RawHTMLParser, 400
603// EmphasisParser, 500
604func DefaultInlineParsers() []util.PrioritizedValue {
605 return []util.PrioritizedValue{
606 util.Prioritized(NewCodeSpanParser(), 100),
607 util.Prioritized(NewLinkParser(), 200),
608 util.Prioritized(NewAutoLinkParser(), 300),
609 util.Prioritized(NewRawHTMLParser(), 400),
610 util.Prioritized(NewEmphasisParser(), 500),
611 }
612}
613
614// DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
615// Priorities of default ParagraphTransformers are:
616//
617// LinkReferenceParagraphTransformer, 100
618func DefaultParagraphTransformers() []util.PrioritizedValue {
619 return []util.PrioritizedValue{
620 util.Prioritized(LinkReferenceParagraphTransformer, 100),
621 }
622}
623
624// A Block struct holds a node and correspond parser pair.
625type Block struct {
626 // Node is a BlockNode.
627 Node ast.Node
628 // Parser is a BlockParser.
629 Parser BlockParser
630}
631
632type parser struct {
633 options map[OptionName]interface{}
634 blockParsers [256][]BlockParser
635 freeBlockParsers []BlockParser
636 inlineParsers [256][]InlineParser
637 closeBlockers []CloseBlocker
638 paragraphTransformers []ParagraphTransformer
639 astTransformers []ASTTransformer
640 escapedSpace bool
641 config *Config
642 initSync sync.Once
643}
644
645type withBlockParsers struct {
646 value []util.PrioritizedValue
647}
648
649func (o *withBlockParsers) SetParserOption(c *Config) {
650 c.BlockParsers = append(c.BlockParsers, o.value...)
651}
652
653// WithBlockParsers is a functional option that allow you to add
654// BlockParsers to the parser.
655func WithBlockParsers(bs ...util.PrioritizedValue) Option {
656 return &withBlockParsers{bs}
657}
658
659type withInlineParsers struct {
660 value []util.PrioritizedValue
661}
662
663func (o *withInlineParsers) SetParserOption(c *Config) {
664 c.InlineParsers = append(c.InlineParsers, o.value...)
665}
666
667// WithInlineParsers is a functional option that allow you to add
668// InlineParsers to the parser.
669func WithInlineParsers(bs ...util.PrioritizedValue) Option {
670 return &withInlineParsers{bs}
671}
672
673type withParagraphTransformers struct {
674 value []util.PrioritizedValue
675}
676
677func (o *withParagraphTransformers) SetParserOption(c *Config) {
678 c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...)
679}
680
681// WithParagraphTransformers is a functional option that allow you to add
682// ParagraphTransformers to the parser.
683func WithParagraphTransformers(ps ...util.PrioritizedValue) Option {
684 return &withParagraphTransformers{ps}
685}
686
687type withASTTransformers struct {
688 value []util.PrioritizedValue
689}
690
691func (o *withASTTransformers) SetParserOption(c *Config) {
692 c.ASTTransformers = append(c.ASTTransformers, o.value...)
693}
694
695// WithASTTransformers is a functional option that allow you to add
696// ASTTransformers to the parser.
697func WithASTTransformers(ps ...util.PrioritizedValue) Option {
698 return &withASTTransformers{ps}
699}
700
701type withEscapedSpace struct {
702}
703
704func (o *withEscapedSpace) SetParserOption(c *Config) {
705 c.EscapedSpace = true
706}
707
708// WithEscapedSpace is a functional option indicates that a '\' escaped half-space(0x20) should not trigger parsers.
709func WithEscapedSpace() Option {
710 return &withEscapedSpace{}
711}
712
713type withOption struct {
714 name OptionName
715 value interface{}
716}
717
718func (o *withOption) SetParserOption(c *Config) {
719 c.Options[o.name] = o.value
720}
721
722// WithOption is a functional option that allow you to set
723// an arbitrary option to the parser.
724func WithOption(name OptionName, value interface{}) Option {
725 return &withOption{name, value}
726}
727
728// NewParser returns a new Parser with given options.
729func NewParser(options ...Option) Parser {
730 config := NewConfig()
731 for _, opt := range options {
732 opt.SetParserOption(config)
733 }
734
735 p := &parser{
736 options: map[OptionName]interface{}{},
737 config: config,
738 }
739
740 return p
741}
742
743func (p *parser) AddOptions(opts ...Option) {
744 for _, opt := range opts {
745 opt.SetParserOption(p.config)
746 }
747}
748
749func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
750 bp, ok := v.Value.(BlockParser)
751 if !ok {
752 panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
753 }
754 tcs := bp.Trigger()
755 so, ok := v.Value.(SetOptioner)
756 if ok {
757 for oname, ovalue := range options {
758 so.SetOption(oname, ovalue)
759 }
760 }
761 if tcs == nil {
762 p.freeBlockParsers = append(p.freeBlockParsers, bp)
763 } else {
764 for _, tc := range tcs {
765 if p.blockParsers[tc] == nil {
766 p.blockParsers[tc] = []BlockParser{}
767 }
768 p.blockParsers[tc] = append(p.blockParsers[tc], bp)
769 }
770 }
771}
772
773func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
774 ip, ok := v.Value.(InlineParser)
775 if !ok {
776 panic(fmt.Sprintf("%v is not a InlineParser", v.Value))
777 }
778 tcs := ip.Trigger()
779 so, ok := v.Value.(SetOptioner)
780 if ok {
781 for oname, ovalue := range options {
782 so.SetOption(oname, ovalue)
783 }
784 }
785 if cb, ok := ip.(CloseBlocker); ok {
786 p.closeBlockers = append(p.closeBlockers, cb)
787 }
788 for _, tc := range tcs {
789 if p.inlineParsers[tc] == nil {
790 p.inlineParsers[tc] = []InlineParser{}
791 }
792 p.inlineParsers[tc] = append(p.inlineParsers[tc], ip)
793 }
794}
795
796func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
797 pt, ok := v.Value.(ParagraphTransformer)
798 if !ok {
799 panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value))
800 }
801 so, ok := v.Value.(SetOptioner)
802 if ok {
803 for oname, ovalue := range options {
804 so.SetOption(oname, ovalue)
805 }
806 }
807 p.paragraphTransformers = append(p.paragraphTransformers, pt)
808}
809
810func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
811 at, ok := v.Value.(ASTTransformer)
812 if !ok {
813 panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value))
814 }
815 so, ok := v.Value.(SetOptioner)
816 if ok {
817 for oname, ovalue := range options {
818 so.SetOption(oname, ovalue)
819 }
820 }
821 p.astTransformers = append(p.astTransformers, at)
822}
823
824// A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
825type ParseConfig struct {
826 Context Context
827}
828
829// A ParseOption is a functional option type for the Parser.Parse.
830type ParseOption func(c *ParseConfig)
831
832// WithContext is a functional option that allow you to override
833// a default context.
834func WithContext(context Context) ParseOption {
835 return func(c *ParseConfig) {
836 c.Context = context
837 }
838}
839
840func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
841 p.initSync.Do(func() {
842 p.config.BlockParsers.Sort()
843 for _, v := range p.config.BlockParsers {
844 p.addBlockParser(v, p.config.Options)
845 }
846 for i := range p.blockParsers {
847 if p.blockParsers[i] != nil {
848 p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
849 }
850 }
851
852 p.config.InlineParsers.Sort()
853 for _, v := range p.config.InlineParsers {
854 p.addInlineParser(v, p.config.Options)
855 }
856 p.config.ParagraphTransformers.Sort()
857 for _, v := range p.config.ParagraphTransformers {
858 p.addParagraphTransformer(v, p.config.Options)
859 }
860 p.config.ASTTransformers.Sort()
861 for _, v := range p.config.ASTTransformers {
862 p.addASTTransformer(v, p.config.Options)
863 }
864 p.escapedSpace = p.config.EscapedSpace
865 p.config = nil
866 })
867 c := &ParseConfig{}
868 for _, opt := range opts {
869 opt(c)
870 }
871 if c.Context == nil {
872 c.Context = NewContext()
873 }
874 pc := c.Context
875 root := ast.NewDocument()
876 p.parseBlocks(root, reader, pc)
877
878 blockReader := text.NewBlockReader(reader.Source(), nil)
879 p.walkBlock(root, func(node ast.Node) {
880 p.parseBlock(blockReader, node, pc)
881 })
882 for _, at := range p.astTransformers {
883 at.Transform(root, reader, pc)
884 }
885
886 // root.Dump(reader.Source(), 0)
887 return root
888}
889
890func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
891 for _, pt := range p.paragraphTransformers {
892 pt.Transform(node, reader, pc)
893 if node.Parent() == nil {
894 return true
895 }
896 }
897 return false
898}
899
900func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
901 blocks := pc.OpenedBlocks()
902 for i := from; i >= to; i-- {
903 node := blocks[i].Node
904 paragraph, ok := node.(*ast.Paragraph)
905 if ok && node.Parent() != nil {
906 p.transformParagraph(paragraph, reader, pc)
907 }
908 if node.Parent() != nil { // closes only if node has not been transformed
909 blocks[i].Parser.Close(blocks[i].Node, reader, pc)
910 }
911 }
912 if from == len(blocks)-1 {
913 blocks = blocks[0:to]
914 } else {
915 blocks = append(blocks[0:to], blocks[from+1:]...)
916 }
917 pc.SetOpenedBlocks(blocks)
918}
919
920type blockOpenResult int
921
922const (
923 paragraphContinuation blockOpenResult = iota + 1
924 newBlocksOpened
925 noBlocksOpened
926)
927
928func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult {
929 result := blockOpenResult(noBlocksOpened)
930 continuable := false
931 lastBlock := pc.LastOpenedBlock()
932 if lastBlock.Node != nil {
933 continuable = ast.IsParagraph(lastBlock.Node)
934 }
935retry:
936 var bps []BlockParser
937 line, _ := reader.PeekLine()
938 w, pos := util.IndentWidth(line, reader.LineOffset())
939 if w >= len(line) {
940 pc.SetBlockOffset(-1)
941 pc.SetBlockIndent(-1)
942 } else {
943 pc.SetBlockOffset(pos)
944 pc.SetBlockIndent(w)
945 }
946 if line == nil || line[0] == '\n' {
947 goto continuable
948 }
949 bps = p.freeBlockParsers
950 if pos < len(line) {
951 bps = p.blockParsers[line[pos]]
952 if bps == nil {
953 bps = p.freeBlockParsers
954 }
955 }
956 if bps == nil {
957 goto continuable
958 }
959
960 for _, bp := range bps {
961 if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
962 continue
963 }
964 if w > 3 && !bp.CanAcceptIndentedLine() {
965 continue
966 }
967 lastBlock = pc.LastOpenedBlock()
968 last := lastBlock.Node
969 node, state := bp.Open(parent, reader, pc)
970 if node != nil {
971 // Parser requires last node to be a paragraph.
972 // With table extension:
973 //
974 // 0
975 // -:
976 // -
977 //
978 // '-' on 3rd line seems a Setext heading because 1st and 2nd lines
979 // are being paragraph when the Settext heading parser tries to parse the 3rd
980 // line.
981 // But 1st line and 2nd line are a table. Thus this paragraph will be transformed
982 // by a paragraph transformer. So this text should be converted to a table and
983 // an empty list.
984 if state&RequireParagraph != 0 {
985 if last == parent.LastChild() {
986 // Opened paragraph may be transformed by ParagraphTransformers in
987 // closeBlocks().
988 lastBlock.Parser.Close(last, reader, pc)
989 blocks := pc.OpenedBlocks()
990 pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
991 if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
992 // Paragraph has been transformed.
993 // So this parser is considered as failing.
994 continuable = false
995 goto retry
996 }
997 }
998 }
999 node.SetBlankPreviousLines(blankLine)
1000 if last != nil && last.Parent() == nil {
1001 lastPos := len(pc.OpenedBlocks()) - 1
1002 p.closeBlocks(lastPos, lastPos, reader, pc)
1003 }
1004 parent.AppendChild(parent, node)
1005 result = newBlocksOpened
1006 be := Block{node, bp}
1007 pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
1008 if state&HasChildren != 0 {
1009 parent = node
1010 goto retry // try child block
1011 }
1012 break // no children, can not open more blocks on this line
1013 }
1014 }
1015
1016continuable:
1017 if result == noBlocksOpened && continuable {
1018 state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
1019 if state&Continue != 0 {
1020 result = paragraphContinuation
1021 }
1022 }
1023 return result
1024}
1025
1026type lineStat struct {
1027 lineNum int
1028 level int
1029 isBlank bool
1030}
1031
1032func isBlankLine(lineNum, level int, stats []lineStat) bool {
1033 ret := true
1034 for i := len(stats) - 1 - level; i >= 0; i-- {
1035 ret = false
1036 s := stats[i]
1037 if s.lineNum == lineNum {
1038 if s.level < level && s.isBlank {
1039 return true
1040 } else if s.level == level {
1041 return s.isBlank
1042 }
1043 }
1044 if s.lineNum < lineNum {
1045 return ret
1046 }
1047 }
1048 return ret
1049}
1050
1051func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
1052 pc.SetOpenedBlocks([]Block{})
1053 blankLines := make([]lineStat, 0, 128)
1054 var isBlank bool
1055 for { // process blocks separated by blank lines
1056 _, lines, ok := reader.SkipBlankLines()
1057 if !ok {
1058 return
1059 }
1060 lineNum, _ := reader.Position()
1061 if lines != 0 {
1062 blankLines = blankLines[0:0]
1063 l := len(pc.OpenedBlocks())
1064 for i := 0; i < l; i++ {
1065 blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0})
1066 }
1067 }
1068 isBlank = isBlankLine(lineNum-1, 0, blankLines)
1069 // first, we try to open blocks
1070 if p.openBlocks(parent, isBlank, reader, pc) != newBlocksOpened {
1071 return
1072 }
1073 reader.AdvanceLine()
1074 for { // process opened blocks line by line
1075 openedBlocks := pc.OpenedBlocks()
1076 l := len(openedBlocks)
1077 if l == 0 {
1078 break
1079 }
1080 lastIndex := l - 1
1081 for i := 0; i < l; i++ {
1082 be := openedBlocks[i]
1083 line, _ := reader.PeekLine()
1084 if line == nil {
1085 p.closeBlocks(lastIndex, 0, reader, pc)
1086 reader.AdvanceLine()
1087 return
1088 }
1089 lineNum, _ := reader.Position()
1090 blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)})
1091 // If node is a paragraph, p.openBlocks determines whether it is continuable.
1092 // So we do not process paragraphs here.
1093 if !ast.IsParagraph(be.Node) {
1094 state := be.Parser.Continue(be.Node, reader, pc)
1095 if state&Continue != 0 {
1096 // When current node is a container block and has no children,
1097 // we try to open new child nodes
1098 if state&HasChildren != 0 && i == lastIndex {
1099 isBlank = isBlankLine(lineNum-1, i, blankLines)
1100 p.openBlocks(be.Node, isBlank, reader, pc)
1101 break
1102 }
1103 continue
1104 }
1105 }
1106 // current node may be closed or lazy continuation
1107 isBlank = isBlankLine(lineNum-1, i, blankLines)
1108 thisParent := parent
1109 if i != 0 {
1110 thisParent = openedBlocks[i-1].Node
1111 }
1112 lastNode := openedBlocks[lastIndex].Node
1113 result := p.openBlocks(thisParent, isBlank, reader, pc)
1114 if result != paragraphContinuation {
1115 // lastNode is a paragraph and was transformed by the paragraph
1116 // transformers.
1117 if openedBlocks[lastIndex].Node != lastNode {
1118 lastIndex--
1119 }
1120 p.closeBlocks(lastIndex, i, reader, pc)
1121 }
1122 break
1123 }
1124
1125 reader.AdvanceLine()
1126 }
1127 }
1128}
1129
1130func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
1131 for c := block.FirstChild(); c != nil; c = c.NextSibling() {
1132 p.walkBlock(c, cb)
1133 }
1134 cb(block)
1135}
1136
1137const (
1138 lineBreakHard uint8 = 1 << iota
1139 lineBreakSoft
1140 lineBreakVisible
1141)
1142
1143func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
1144 if parent.IsRaw() {
1145 return
1146 }
1147 escaped := false
1148 source := block.Source()
1149 block.Reset(parent.Lines())
1150 for {
1151 retry:
1152 line, _ := block.PeekLine()
1153 if line == nil {
1154 break
1155 }
1156 lineLength := len(line)
1157 var lineBreakFlags uint8
1158 hasNewLine := line[lineLength-1] == '\n'
1159 if ((lineLength >= 3 && line[lineLength-2] == '\\' &&
1160 line[lineLength-3] != '\\') || (lineLength == 2 && line[lineLength-2] == '\\')) && hasNewLine { // ends with \\n
1161 lineLength -= 2
1162 lineBreakFlags |= lineBreakHard | lineBreakVisible
1163 } else if ((lineLength >= 4 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' &&
1164 line[lineLength-4] != '\\') || (lineLength == 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r')) &&
1165 hasNewLine { // ends with \\r\n
1166 lineLength -= 3
1167 lineBreakFlags |= lineBreakHard | lineBreakVisible
1168 } else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' &&
1169 hasNewLine { // ends with [space][space]\n
1170 lineLength -= 3
1171 lineBreakFlags |= lineBreakHard
1172 } else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' &&
1173 line[lineLength-2] == '\r' && hasNewLine { // ends with [space][space]\r\n
1174 lineLength -= 4
1175 lineBreakFlags |= lineBreakHard
1176 } else if hasNewLine {
1177 // If the line ends with a newline character, but it is not a hardlineBreak, then it is a softLinebreak
1178 // If the line ends with a hardlineBreak, then it cannot end with a softLinebreak
1179 // See https://spec.commonmark.org/0.30/#soft-line-breaks
1180 lineBreakFlags |= lineBreakSoft
1181 }
1182
1183 l, startPosition := block.Position()
1184 n := 0
1185 for i := 0; i < lineLength; i++ {
1186 c := line[i]
1187 if c == '\n' {
1188 break
1189 }
1190 isSpace := util.IsSpace(c) && c != '\r' && c != '\n'
1191 isPunct := util.IsPunct(c)
1192 if (isPunct && !escaped) || isSpace && !(escaped && p.escapedSpace) || i == 0 {
1193 parserChar := c
1194 if isSpace || (i == 0 && !isPunct) {
1195 parserChar = ' '
1196 }
1197 ips := p.inlineParsers[parserChar]
1198 if ips != nil {
1199 block.Advance(n)
1200 n = 0
1201 savedLine, savedPosition := block.Position()
1202 if i != 0 {
1203 _, currentPosition := block.Position()
1204 ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition))
1205 _, startPosition = block.Position()
1206 }
1207 var inlineNode ast.Node
1208 for _, ip := range ips {
1209 inlineNode = ip.Parse(parent, block, pc)
1210 if inlineNode != nil {
1211 break
1212 }
1213 block.SetPosition(savedLine, savedPosition)
1214 }
1215 if inlineNode != nil {
1216 parent.AppendChild(parent, inlineNode)
1217 goto retry
1218 }
1219 }
1220 }
1221 if escaped {
1222 escaped = false
1223 n++
1224 continue
1225 }
1226
1227 if c == '\\' {
1228 escaped = true
1229 n++
1230 continue
1231 }
1232
1233 escaped = false
1234 n++
1235 }
1236 if n != 0 {
1237 block.Advance(n)
1238 }
1239 currentL, currentPosition := block.Position()
1240 if l != currentL {
1241 continue
1242 }
1243 diff := startPosition.Between(currentPosition)
1244 var text *ast.Text
1245 if lineBreakFlags&(lineBreakHard|lineBreakVisible) == lineBreakHard|lineBreakVisible {
1246 text = ast.NewTextSegment(diff)
1247 } else {
1248 text = ast.NewTextSegment(diff.TrimRightSpace(source))
1249 }
1250 text.SetSoftLineBreak(lineBreakFlags&lineBreakSoft != 0)
1251 text.SetHardLineBreak(lineBreakFlags&lineBreakHard != 0)
1252 parent.AppendChild(parent, text)
1253 block.AdvanceLine()
1254 }
1255
1256 ProcessDelimiters(nil, pc)
1257 for _, ip := range p.closeBlockers {
1258 ip.CloseBlock(parent, block, pc)
1259 }
1260
1261}