parser.go

   1// Package parser contains stuff that are related to parsing a Markdown text.
   2package parser
   3
   4import (
   5	"fmt"
   6	"strings"
   7	"sync"
   8
   9	"github.com/yuin/goldmark/ast"
  10	"github.com/yuin/goldmark/text"
  11	"github.com/yuin/goldmark/util"
  12)
  13
  14// A Reference interface represents a link reference in Markdown text.
  15type Reference interface {
  16	// String implements Stringer.
  17	String() string
  18
  19	// Label returns a label of the reference.
  20	Label() []byte
  21
  22	// Destination returns a destination(URL) of the reference.
  23	Destination() []byte
  24
  25	// Title returns a title of the reference.
  26	Title() []byte
  27}
  28
  29type reference struct {
  30	label       []byte
  31	destination []byte
  32	title       []byte
  33}
  34
  35// NewReference returns a new Reference.
  36func NewReference(label, destination, title []byte) Reference {
  37	return &reference{label, destination, title}
  38}
  39
  40func (r *reference) Label() []byte {
  41	return r.label
  42}
  43
  44func (r *reference) Destination() []byte {
  45	return r.destination
  46}
  47
  48func (r *reference) Title() []byte {
  49	return r.title
  50}
  51
  52func (r *reference) String() string {
  53	return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title)
  54}
  55
  56// An IDs interface is a collection of the element ids.
  57type IDs interface {
  58	// Generate generates a new element id.
  59	Generate(value []byte, kind ast.NodeKind) []byte
  60
  61	// Put puts a given element id to the used ids table.
  62	Put(value []byte)
  63}
  64
  65type ids struct {
  66	values map[string]bool
  67}
  68
  69func newIDs() IDs {
  70	return &ids{
  71		values: map[string]bool{},
  72	}
  73}
  74
  75func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
  76	value = util.TrimLeftSpace(value)
  77	value = util.TrimRightSpace(value)
  78	result := []byte{}
  79	for i := 0; i < len(value); {
  80		v := value[i]
  81		l := util.UTF8Len(v)
  82		i += int(l)
  83		if l != 1 {
  84			continue
  85		}
  86		if util.IsAlphaNumeric(v) {
  87			if 'A' <= v && v <= 'Z' {
  88				v += 'a' - 'A'
  89			}
  90			result = append(result, v)
  91		} else if util.IsSpace(v) || v == '-' || v == '_' {
  92			result = append(result, '-')
  93		}
  94	}
  95	if len(result) == 0 {
  96		if kind == ast.KindHeading {
  97			result = []byte("heading")
  98		} else {
  99			result = []byte("id")
 100		}
 101	}
 102	if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok {
 103		s.values[util.BytesToReadOnlyString(result)] = true
 104		return result
 105	}
 106	for i := 1; ; i++ {
 107		newResult := fmt.Sprintf("%s-%d", result, i)
 108		if _, ok := s.values[newResult]; !ok {
 109			s.values[newResult] = true
 110			return []byte(newResult)
 111		}
 112
 113	}
 114}
 115
 116func (s *ids) Put(value []byte) {
 117	s.values[util.BytesToReadOnlyString(value)] = true
 118}
 119
 120// ContextKey is a key that is used to set arbitrary values to the context.
 121type ContextKey int
 122
 123// ContextKeyMax is a maximum value of the ContextKey.
 124var ContextKeyMax ContextKey
 125
 126// NewContextKey return a new ContextKey value.
 127func NewContextKey() ContextKey {
 128	ContextKeyMax++
 129	return ContextKeyMax
 130}
 131
 132// A Context interface holds a information that are necessary to parse
 133// Markdown text.
 134type Context interface {
 135	// String implements Stringer.
 136	String() string
 137
 138	// Get returns a value associated with the given key.
 139	Get(ContextKey) interface{}
 140
 141	// ComputeIfAbsent computes a value if a value associated with the given key is absent and returns the value.
 142	ComputeIfAbsent(ContextKey, func() interface{}) interface{}
 143
 144	// Set sets the given value to the context.
 145	Set(ContextKey, interface{})
 146
 147	// AddReference adds the given reference to this context.
 148	AddReference(Reference)
 149
 150	// Reference returns (a reference, true) if a reference associated with
 151	// the given label exists, otherwise (nil, false).
 152	Reference(label string) (Reference, bool)
 153
 154	// References returns a list of references.
 155	References() []Reference
 156
 157	// IDs returns a collection of the element ids.
 158	IDs() IDs
 159
 160	// BlockOffset returns a first non-space character position on current line.
 161	// This value is valid only for BlockParser.Open.
 162	// BlockOffset returns -1 if current line is blank.
 163	BlockOffset() int
 164
 165	// BlockOffset sets a first non-space character position on current line.
 166	// This value is valid only for BlockParser.Open.
 167	SetBlockOffset(int)
 168
 169	// BlockIndent returns an indent width on current line.
 170	// This value is valid only for BlockParser.Open.
 171	// BlockIndent returns -1 if current line is blank.
 172	BlockIndent() int
 173
 174	// BlockIndent sets an indent width on current line.
 175	// This value is valid only for BlockParser.Open.
 176	SetBlockIndent(int)
 177
 178	// FirstDelimiter returns a first delimiter of the current delimiter list.
 179	FirstDelimiter() *Delimiter
 180
 181	// LastDelimiter returns a last delimiter of the current delimiter list.
 182	LastDelimiter() *Delimiter
 183
 184	// PushDelimiter appends the given delimiter to the tail of the current
 185	// delimiter list.
 186	PushDelimiter(delimiter *Delimiter)
 187
 188	// RemoveDelimiter removes the given delimiter from the current delimiter list.
 189	RemoveDelimiter(d *Delimiter)
 190
 191	// ClearDelimiters clears the current delimiter list.
 192	ClearDelimiters(bottom ast.Node)
 193
 194	// OpenedBlocks returns a list of nodes that are currently in parsing.
 195	OpenedBlocks() []Block
 196
 197	// SetOpenedBlocks sets a list of nodes that are currently in parsing.
 198	SetOpenedBlocks([]Block)
 199
 200	// LastOpenedBlock returns a last node that is currently in parsing.
 201	LastOpenedBlock() Block
 202
 203	// IsInLinkLabel returns true if current position seems to be in link label.
 204	IsInLinkLabel() bool
 205}
 206
 207// A ContextConfig struct is a data structure that holds configuration of the Context.
 208type ContextConfig struct {
 209	IDs IDs
 210}
 211
 212// An ContextOption is a functional option type for the Context.
 213type ContextOption func(*ContextConfig)
 214
 215// WithIDs is a functional option for the Context.
 216func WithIDs(ids IDs) ContextOption {
 217	return func(c *ContextConfig) {
 218		c.IDs = ids
 219	}
 220}
 221
 222type parseContext struct {
 223	store         []interface{}
 224	ids           IDs
 225	refs          map[string]Reference
 226	blockOffset   int
 227	blockIndent   int
 228	delimiters    *Delimiter
 229	lastDelimiter *Delimiter
 230	openedBlocks  []Block
 231}
 232
 233// NewContext returns a new Context.
 234func NewContext(options ...ContextOption) Context {
 235	cfg := &ContextConfig{
 236		IDs: newIDs(),
 237	}
 238	for _, option := range options {
 239		option(cfg)
 240	}
 241
 242	return &parseContext{
 243		store:         make([]interface{}, ContextKeyMax+1),
 244		refs:          map[string]Reference{},
 245		ids:           cfg.IDs,
 246		blockOffset:   -1,
 247		blockIndent:   -1,
 248		delimiters:    nil,
 249		lastDelimiter: nil,
 250		openedBlocks:  []Block{},
 251	}
 252}
 253
 254func (p *parseContext) Get(key ContextKey) interface{} {
 255	return p.store[key]
 256}
 257
 258func (p *parseContext) ComputeIfAbsent(key ContextKey, f func() interface{}) interface{} {
 259	v := p.store[key]
 260	if v == nil {
 261		v = f()
 262		p.store[key] = v
 263	}
 264	return v
 265}
 266
 267func (p *parseContext) Set(key ContextKey, value interface{}) {
 268	p.store[key] = value
 269}
 270
 271func (p *parseContext) IDs() IDs {
 272	return p.ids
 273}
 274
 275func (p *parseContext) BlockOffset() int {
 276	return p.blockOffset
 277}
 278
 279func (p *parseContext) SetBlockOffset(v int) {
 280	p.blockOffset = v
 281}
 282
 283func (p *parseContext) BlockIndent() int {
 284	return p.blockIndent
 285}
 286
 287func (p *parseContext) SetBlockIndent(v int) {
 288	p.blockIndent = v
 289}
 290
 291func (p *parseContext) LastDelimiter() *Delimiter {
 292	return p.lastDelimiter
 293}
 294
 295func (p *parseContext) FirstDelimiter() *Delimiter {
 296	return p.delimiters
 297}
 298
 299func (p *parseContext) PushDelimiter(d *Delimiter) {
 300	if p.delimiters == nil {
 301		p.delimiters = d
 302		p.lastDelimiter = d
 303	} else {
 304		l := p.lastDelimiter
 305		p.lastDelimiter = d
 306		l.NextDelimiter = d
 307		d.PreviousDelimiter = l
 308	}
 309}
 310
 311func (p *parseContext) RemoveDelimiter(d *Delimiter) {
 312	if d.PreviousDelimiter == nil {
 313		p.delimiters = d.NextDelimiter
 314	} else {
 315		d.PreviousDelimiter.NextDelimiter = d.NextDelimiter
 316		if d.NextDelimiter != nil {
 317			d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter
 318		}
 319	}
 320	if d.NextDelimiter == nil {
 321		p.lastDelimiter = d.PreviousDelimiter
 322	}
 323	if p.delimiters != nil {
 324		p.delimiters.PreviousDelimiter = nil
 325	}
 326	if p.lastDelimiter != nil {
 327		p.lastDelimiter.NextDelimiter = nil
 328	}
 329	d.NextDelimiter = nil
 330	d.PreviousDelimiter = nil
 331	if d.Length != 0 {
 332		ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment)
 333	} else {
 334		d.Parent().RemoveChild(d.Parent(), d)
 335	}
 336}
 337
 338func (p *parseContext) ClearDelimiters(bottom ast.Node) {
 339	if p.lastDelimiter == nil {
 340		return
 341	}
 342	var c ast.Node
 343	for c = p.lastDelimiter; c != nil && c != bottom; {
 344		prev := c.PreviousSibling()
 345		if d, ok := c.(*Delimiter); ok {
 346			p.RemoveDelimiter(d)
 347		}
 348		c = prev
 349	}
 350}
 351
 352func (p *parseContext) AddReference(ref Reference) {
 353	key := util.ToLinkReference(ref.Label())
 354	if _, ok := p.refs[key]; !ok {
 355		p.refs[key] = ref
 356	}
 357}
 358
 359func (p *parseContext) Reference(label string) (Reference, bool) {
 360	v, ok := p.refs[label]
 361	return v, ok
 362}
 363
 364func (p *parseContext) References() []Reference {
 365	ret := make([]Reference, 0, len(p.refs))
 366	for _, v := range p.refs {
 367		ret = append(ret, v)
 368	}
 369	return ret
 370}
 371
 372func (p *parseContext) String() string {
 373	refs := []string{}
 374	for _, r := range p.refs {
 375		refs = append(refs, r.String())
 376	}
 377
 378	return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ","))
 379}
 380
 381func (p *parseContext) OpenedBlocks() []Block {
 382	return p.openedBlocks
 383}
 384
 385func (p *parseContext) SetOpenedBlocks(v []Block) {
 386	p.openedBlocks = v
 387}
 388
 389func (p *parseContext) LastOpenedBlock() Block {
 390	if l := len(p.openedBlocks); l != 0 {
 391		return p.openedBlocks[l-1]
 392	}
 393	return Block{}
 394}
 395
 396func (p *parseContext) IsInLinkLabel() bool {
 397	tlist := p.Get(linkLabelStateKey)
 398	return tlist != nil
 399}
 400
 401// State represents parser's state.
 402// State is designed to use as a bit flag.
 403type State int
 404
 405const (
 406	// None is a default value of the [State].
 407	None State = 1 << iota
 408
 409	// Continue indicates parser can continue parsing.
 410	Continue
 411
 412	// Close indicates parser cannot parse anymore.
 413	Close
 414
 415	// HasChildren indicates parser may have child blocks.
 416	HasChildren
 417
 418	// NoChildren indicates parser does not have child blocks.
 419	NoChildren
 420
 421	// RequireParagraph indicates parser requires that the last node
 422	// must be a paragraph and is not converted to other nodes by
 423	// ParagraphTransformers.
 424	RequireParagraph
 425)
 426
 427// A Config struct is a data structure that holds configuration of the Parser.
 428type Config struct {
 429	Options               map[OptionName]interface{}
 430	BlockParsers          util.PrioritizedSlice /*<BlockParser>*/
 431	InlineParsers         util.PrioritizedSlice /*<InlineParser>*/
 432	ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
 433	ASTTransformers       util.PrioritizedSlice /*<ASTTransformer>*/
 434	EscapedSpace          bool
 435}
 436
 437// NewConfig returns a new Config.
 438func NewConfig() *Config {
 439	return &Config{
 440		Options:               map[OptionName]interface{}{},
 441		BlockParsers:          util.PrioritizedSlice{},
 442		InlineParsers:         util.PrioritizedSlice{},
 443		ParagraphTransformers: util.PrioritizedSlice{},
 444		ASTTransformers:       util.PrioritizedSlice{},
 445	}
 446}
 447
 448// An Option interface is a functional option type for the Parser.
 449type Option interface {
 450	SetParserOption(*Config)
 451}
 452
 453// OptionName is a name of parser options.
 454type OptionName string
 455
 456// Attribute is an option name that spacify attributes of elements.
 457const optAttribute OptionName = "Attribute"
 458
 459type withAttribute struct {
 460}
 461
 462func (o *withAttribute) SetParserOption(c *Config) {
 463	c.Options[optAttribute] = true
 464}
 465
 466// WithAttribute is a functional option that enables custom attributes.
 467func WithAttribute() Option {
 468	return &withAttribute{}
 469}
 470
 471// A Parser interface parses Markdown text into AST nodes.
 472type Parser interface {
 473	// Parse parses the given Markdown text into AST nodes.
 474	Parse(reader text.Reader, opts ...ParseOption) ast.Node
 475
 476	// AddOption adds the given option to this parser.
 477	AddOptions(...Option)
 478}
 479
 480// A SetOptioner interface sets the given option to the object.
 481type SetOptioner interface {
 482	// SetOption sets the given option to the object.
 483	// Unacceptable options may be passed.
 484	// Thus implementations must ignore unacceptable options.
 485	SetOption(name OptionName, value interface{})
 486}
 487
 488// A BlockParser interface parses a block level element like Paragraph, List,
 489// Blockquote etc.
 490type BlockParser interface {
 491	// Trigger returns a list of characters that triggers Parse method of
 492	// this parser.
 493	// If Trigger returns a nil, Open will be called with any lines.
 494	Trigger() []byte
 495
 496	// Open parses the current line and returns a result of parsing.
 497	//
 498	// Open must not parse beyond the current line.
 499	// If Open has been able to parse the current line, Open must advance a reader
 500	// position by consumed byte length.
 501	//
 502	// If Open has not been able to parse the current line, Open should returns
 503	// (nil, NoChildren). If Open has been able to parse the current line, Open
 504	// should returns a new Block node and returns HasChildren or NoChildren.
 505	Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State)
 506
 507	// Continue parses the current line and returns a result of parsing.
 508	//
 509	// Continue must not parse beyond the current line.
 510	// If Continue has been able to parse the current line, Continue must advance
 511	// a reader position by consumed byte length.
 512	//
 513	// If Continue has not been able to parse the current line, Continue should
 514	// returns Close. If Continue has been able to parse the current line,
 515	// Continue should returns (Continue | NoChildren) or
 516	// (Continue | HasChildren)
 517	Continue(node ast.Node, reader text.Reader, pc Context) State
 518
 519	// Close will be called when the parser returns Close.
 520	Close(node ast.Node, reader text.Reader, pc Context)
 521
 522	// CanInterruptParagraph returns true if the parser can interrupt paragraphs,
 523	// otherwise false.
 524	CanInterruptParagraph() bool
 525
 526	// CanAcceptIndentedLine returns true if the parser can open new node when
 527	// the given line is being indented more than 3 spaces.
 528	CanAcceptIndentedLine() bool
 529}
 530
 531// An InlineParser interface parses an inline level element like CodeSpan, Link etc.
 532type InlineParser interface {
 533	// Trigger returns a list of characters that triggers Parse method of
 534	// this parser.
 535	// Trigger characters must be a punctuation or a halfspace.
 536	// Halfspaces triggers this parser when character is any spaces characters or
 537	// a head of line
 538	Trigger() []byte
 539
 540	// Parse parse the given block into an inline node.
 541	//
 542	// Parse can parse beyond the current line.
 543	// If Parse has been able to parse the current line, it must advance a reader
 544	// position by consumed byte length.
 545	Parse(parent ast.Node, block text.Reader, pc Context) ast.Node
 546}
 547
 548// A CloseBlocker interface is a callback function that will be
 549// called when block is closed in the inline parsing.
 550type CloseBlocker interface {
 551	// CloseBlock will be called when a block is closed.
 552	CloseBlock(parent ast.Node, block text.Reader, pc Context)
 553}
 554
 555// A ParagraphTransformer transforms parsed Paragraph nodes.
 556// For example, link references are searched in parsed Paragraphs.
 557type ParagraphTransformer interface {
 558	// Transform transforms the given paragraph.
 559	Transform(node *ast.Paragraph, reader text.Reader, pc Context)
 560}
 561
 562// ASTTransformer transforms entire Markdown document AST tree.
 563type ASTTransformer interface {
 564	// Transform transforms the given AST tree.
 565	Transform(node *ast.Document, reader text.Reader, pc Context)
 566}
 567
 568// DefaultBlockParsers returns a new list of default BlockParsers.
 569// Priorities of default BlockParsers are:
 570//
 571//	SetextHeadingParser, 100
 572//	ThematicBreakParser, 200
 573//	ListParser, 300
 574//	ListItemParser, 400
 575//	CodeBlockParser, 500
 576//	ATXHeadingParser, 600
 577//	FencedCodeBlockParser, 700
 578//	BlockquoteParser, 800
 579//	HTMLBlockParser, 900
 580//	ParagraphParser, 1000
 581func DefaultBlockParsers() []util.PrioritizedValue {
 582	return []util.PrioritizedValue{
 583		util.Prioritized(NewSetextHeadingParser(), 100),
 584		util.Prioritized(NewThematicBreakParser(), 200),
 585		util.Prioritized(NewListParser(), 300),
 586		util.Prioritized(NewListItemParser(), 400),
 587		util.Prioritized(NewCodeBlockParser(), 500),
 588		util.Prioritized(NewATXHeadingParser(), 600),
 589		util.Prioritized(NewFencedCodeBlockParser(), 700),
 590		util.Prioritized(NewBlockquoteParser(), 800),
 591		util.Prioritized(NewHTMLBlockParser(), 900),
 592		util.Prioritized(NewParagraphParser(), 1000),
 593	}
 594}
 595
 596// DefaultInlineParsers returns a new list of default InlineParsers.
 597// Priorities of default InlineParsers are:
 598//
 599//	CodeSpanParser, 100
 600//	LinkParser, 200
 601//	AutoLinkParser, 300
 602//	RawHTMLParser, 400
 603//	EmphasisParser, 500
 604func DefaultInlineParsers() []util.PrioritizedValue {
 605	return []util.PrioritizedValue{
 606		util.Prioritized(NewCodeSpanParser(), 100),
 607		util.Prioritized(NewLinkParser(), 200),
 608		util.Prioritized(NewAutoLinkParser(), 300),
 609		util.Prioritized(NewRawHTMLParser(), 400),
 610		util.Prioritized(NewEmphasisParser(), 500),
 611	}
 612}
 613
 614// DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
 615// Priorities of default ParagraphTransformers are:
 616//
 617//	LinkReferenceParagraphTransformer, 100
 618func DefaultParagraphTransformers() []util.PrioritizedValue {
 619	return []util.PrioritizedValue{
 620		util.Prioritized(LinkReferenceParagraphTransformer, 100),
 621	}
 622}
 623
 624// A Block struct holds a node and correspond parser pair.
 625type Block struct {
 626	// Node is a BlockNode.
 627	Node ast.Node
 628	// Parser is a BlockParser.
 629	Parser BlockParser
 630}
 631
 632type parser struct {
 633	options               map[OptionName]interface{}
 634	blockParsers          [256][]BlockParser
 635	freeBlockParsers      []BlockParser
 636	inlineParsers         [256][]InlineParser
 637	closeBlockers         []CloseBlocker
 638	paragraphTransformers []ParagraphTransformer
 639	astTransformers       []ASTTransformer
 640	escapedSpace          bool
 641	config                *Config
 642	initSync              sync.Once
 643}
 644
 645type withBlockParsers struct {
 646	value []util.PrioritizedValue
 647}
 648
 649func (o *withBlockParsers) SetParserOption(c *Config) {
 650	c.BlockParsers = append(c.BlockParsers, o.value...)
 651}
 652
 653// WithBlockParsers is a functional option that allow you to add
 654// BlockParsers to the parser.
 655func WithBlockParsers(bs ...util.PrioritizedValue) Option {
 656	return &withBlockParsers{bs}
 657}
 658
 659type withInlineParsers struct {
 660	value []util.PrioritizedValue
 661}
 662
 663func (o *withInlineParsers) SetParserOption(c *Config) {
 664	c.InlineParsers = append(c.InlineParsers, o.value...)
 665}
 666
 667// WithInlineParsers is a functional option that allow you to add
 668// InlineParsers to the parser.
 669func WithInlineParsers(bs ...util.PrioritizedValue) Option {
 670	return &withInlineParsers{bs}
 671}
 672
 673type withParagraphTransformers struct {
 674	value []util.PrioritizedValue
 675}
 676
 677func (o *withParagraphTransformers) SetParserOption(c *Config) {
 678	c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...)
 679}
 680
 681// WithParagraphTransformers is a functional option that allow you to add
 682// ParagraphTransformers to the parser.
 683func WithParagraphTransformers(ps ...util.PrioritizedValue) Option {
 684	return &withParagraphTransformers{ps}
 685}
 686
 687type withASTTransformers struct {
 688	value []util.PrioritizedValue
 689}
 690
 691func (o *withASTTransformers) SetParserOption(c *Config) {
 692	c.ASTTransformers = append(c.ASTTransformers, o.value...)
 693}
 694
 695// WithASTTransformers is a functional option that allow you to add
 696// ASTTransformers to the parser.
 697func WithASTTransformers(ps ...util.PrioritizedValue) Option {
 698	return &withASTTransformers{ps}
 699}
 700
 701type withEscapedSpace struct {
 702}
 703
 704func (o *withEscapedSpace) SetParserOption(c *Config) {
 705	c.EscapedSpace = true
 706}
 707
 708// WithEscapedSpace is a functional option indicates that a '\' escaped half-space(0x20) should not trigger parsers.
 709func WithEscapedSpace() Option {
 710	return &withEscapedSpace{}
 711}
 712
 713type withOption struct {
 714	name  OptionName
 715	value interface{}
 716}
 717
 718func (o *withOption) SetParserOption(c *Config) {
 719	c.Options[o.name] = o.value
 720}
 721
 722// WithOption is a functional option that allow you to set
 723// an arbitrary option to the parser.
 724func WithOption(name OptionName, value interface{}) Option {
 725	return &withOption{name, value}
 726}
 727
 728// NewParser returns a new Parser with given options.
 729func NewParser(options ...Option) Parser {
 730	config := NewConfig()
 731	for _, opt := range options {
 732		opt.SetParserOption(config)
 733	}
 734
 735	p := &parser{
 736		options: map[OptionName]interface{}{},
 737		config:  config,
 738	}
 739
 740	return p
 741}
 742
 743func (p *parser) AddOptions(opts ...Option) {
 744	for _, opt := range opts {
 745		opt.SetParserOption(p.config)
 746	}
 747}
 748
 749func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
 750	bp, ok := v.Value.(BlockParser)
 751	if !ok {
 752		panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
 753	}
 754	tcs := bp.Trigger()
 755	so, ok := v.Value.(SetOptioner)
 756	if ok {
 757		for oname, ovalue := range options {
 758			so.SetOption(oname, ovalue)
 759		}
 760	}
 761	if tcs == nil {
 762		p.freeBlockParsers = append(p.freeBlockParsers, bp)
 763	} else {
 764		for _, tc := range tcs {
 765			if p.blockParsers[tc] == nil {
 766				p.blockParsers[tc] = []BlockParser{}
 767			}
 768			p.blockParsers[tc] = append(p.blockParsers[tc], bp)
 769		}
 770	}
 771}
 772
 773func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
 774	ip, ok := v.Value.(InlineParser)
 775	if !ok {
 776		panic(fmt.Sprintf("%v is not a InlineParser", v.Value))
 777	}
 778	tcs := ip.Trigger()
 779	so, ok := v.Value.(SetOptioner)
 780	if ok {
 781		for oname, ovalue := range options {
 782			so.SetOption(oname, ovalue)
 783		}
 784	}
 785	if cb, ok := ip.(CloseBlocker); ok {
 786		p.closeBlockers = append(p.closeBlockers, cb)
 787	}
 788	for _, tc := range tcs {
 789		if p.inlineParsers[tc] == nil {
 790			p.inlineParsers[tc] = []InlineParser{}
 791		}
 792		p.inlineParsers[tc] = append(p.inlineParsers[tc], ip)
 793	}
 794}
 795
 796func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
 797	pt, ok := v.Value.(ParagraphTransformer)
 798	if !ok {
 799		panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value))
 800	}
 801	so, ok := v.Value.(SetOptioner)
 802	if ok {
 803		for oname, ovalue := range options {
 804			so.SetOption(oname, ovalue)
 805		}
 806	}
 807	p.paragraphTransformers = append(p.paragraphTransformers, pt)
 808}
 809
 810func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
 811	at, ok := v.Value.(ASTTransformer)
 812	if !ok {
 813		panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value))
 814	}
 815	so, ok := v.Value.(SetOptioner)
 816	if ok {
 817		for oname, ovalue := range options {
 818			so.SetOption(oname, ovalue)
 819		}
 820	}
 821	p.astTransformers = append(p.astTransformers, at)
 822}
 823
 824// A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
 825type ParseConfig struct {
 826	Context Context
 827}
 828
 829// A ParseOption is a functional option type for the Parser.Parse.
 830type ParseOption func(c *ParseConfig)
 831
 832// WithContext is a functional option that allow you to override
 833// a default context.
 834func WithContext(context Context) ParseOption {
 835	return func(c *ParseConfig) {
 836		c.Context = context
 837	}
 838}
 839
 840func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
 841	p.initSync.Do(func() {
 842		p.config.BlockParsers.Sort()
 843		for _, v := range p.config.BlockParsers {
 844			p.addBlockParser(v, p.config.Options)
 845		}
 846		for i := range p.blockParsers {
 847			if p.blockParsers[i] != nil {
 848				p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
 849			}
 850		}
 851
 852		p.config.InlineParsers.Sort()
 853		for _, v := range p.config.InlineParsers {
 854			p.addInlineParser(v, p.config.Options)
 855		}
 856		p.config.ParagraphTransformers.Sort()
 857		for _, v := range p.config.ParagraphTransformers {
 858			p.addParagraphTransformer(v, p.config.Options)
 859		}
 860		p.config.ASTTransformers.Sort()
 861		for _, v := range p.config.ASTTransformers {
 862			p.addASTTransformer(v, p.config.Options)
 863		}
 864		p.escapedSpace = p.config.EscapedSpace
 865		p.config = nil
 866	})
 867	c := &ParseConfig{}
 868	for _, opt := range opts {
 869		opt(c)
 870	}
 871	if c.Context == nil {
 872		c.Context = NewContext()
 873	}
 874	pc := c.Context
 875	root := ast.NewDocument()
 876	p.parseBlocks(root, reader, pc)
 877
 878	blockReader := text.NewBlockReader(reader.Source(), nil)
 879	p.walkBlock(root, func(node ast.Node) {
 880		p.parseBlock(blockReader, node, pc)
 881	})
 882	for _, at := range p.astTransformers {
 883		at.Transform(root, reader, pc)
 884	}
 885
 886	// root.Dump(reader.Source(), 0)
 887	return root
 888}
 889
 890func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
 891	for _, pt := range p.paragraphTransformers {
 892		pt.Transform(node, reader, pc)
 893		if node.Parent() == nil {
 894			return true
 895		}
 896	}
 897	return false
 898}
 899
 900func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
 901	blocks := pc.OpenedBlocks()
 902	for i := from; i >= to; i-- {
 903		node := blocks[i].Node
 904		paragraph, ok := node.(*ast.Paragraph)
 905		if ok && node.Parent() != nil {
 906			p.transformParagraph(paragraph, reader, pc)
 907		}
 908		if node.Parent() != nil { // closes only if node has not been transformed
 909			blocks[i].Parser.Close(blocks[i].Node, reader, pc)
 910		}
 911	}
 912	if from == len(blocks)-1 {
 913		blocks = blocks[0:to]
 914	} else {
 915		blocks = append(blocks[0:to], blocks[from+1:]...)
 916	}
 917	pc.SetOpenedBlocks(blocks)
 918}
 919
 920type blockOpenResult int
 921
 922const (
 923	paragraphContinuation blockOpenResult = iota + 1
 924	newBlocksOpened
 925	noBlocksOpened
 926)
 927
 928func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult {
 929	result := blockOpenResult(noBlocksOpened)
 930	continuable := false
 931	lastBlock := pc.LastOpenedBlock()
 932	if lastBlock.Node != nil {
 933		continuable = ast.IsParagraph(lastBlock.Node)
 934	}
 935retry:
 936	var bps []BlockParser
 937	line, _ := reader.PeekLine()
 938	w, pos := util.IndentWidth(line, reader.LineOffset())
 939	if w >= len(line) {
 940		pc.SetBlockOffset(-1)
 941		pc.SetBlockIndent(-1)
 942	} else {
 943		pc.SetBlockOffset(pos)
 944		pc.SetBlockIndent(w)
 945	}
 946	if line == nil || line[0] == '\n' {
 947		goto continuable
 948	}
 949	bps = p.freeBlockParsers
 950	if pos < len(line) {
 951		bps = p.blockParsers[line[pos]]
 952		if bps == nil {
 953			bps = p.freeBlockParsers
 954		}
 955	}
 956	if bps == nil {
 957		goto continuable
 958	}
 959
 960	for _, bp := range bps {
 961		if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
 962			continue
 963		}
 964		if w > 3 && !bp.CanAcceptIndentedLine() {
 965			continue
 966		}
 967		lastBlock = pc.LastOpenedBlock()
 968		last := lastBlock.Node
 969		node, state := bp.Open(parent, reader, pc)
 970		if node != nil {
 971			// Parser requires last node to be a paragraph.
 972			// With table extension:
 973			//
 974			//     0
 975			//     -:
 976			//     -
 977			//
 978			// '-' on 3rd line seems a Setext heading because 1st and 2nd lines
 979			// are being paragraph when the Settext heading parser tries to parse the 3rd
 980			// line.
 981			// But 1st line and 2nd line are a table. Thus this paragraph will be transformed
 982			// by a paragraph transformer. So this text should be converted to a table and
 983			// an empty list.
 984			if state&RequireParagraph != 0 {
 985				if last == parent.LastChild() {
 986					// Opened paragraph may be transformed by ParagraphTransformers in
 987					// closeBlocks().
 988					lastBlock.Parser.Close(last, reader, pc)
 989					blocks := pc.OpenedBlocks()
 990					pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
 991					if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
 992						// Paragraph has been transformed.
 993						// So this parser is considered as failing.
 994						continuable = false
 995						goto retry
 996					}
 997				}
 998			}
 999			node.SetBlankPreviousLines(blankLine)
1000			if last != nil && last.Parent() == nil {
1001				lastPos := len(pc.OpenedBlocks()) - 1
1002				p.closeBlocks(lastPos, lastPos, reader, pc)
1003			}
1004			parent.AppendChild(parent, node)
1005			result = newBlocksOpened
1006			be := Block{node, bp}
1007			pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
1008			if state&HasChildren != 0 {
1009				parent = node
1010				goto retry // try child block
1011			}
1012			break // no children, can not open more blocks on this line
1013		}
1014	}
1015
1016continuable:
1017	if result == noBlocksOpened && continuable {
1018		state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
1019		if state&Continue != 0 {
1020			result = paragraphContinuation
1021		}
1022	}
1023	return result
1024}
1025
1026type lineStat struct {
1027	lineNum int
1028	level   int
1029	isBlank bool
1030}
1031
1032func isBlankLine(lineNum, level int, stats []lineStat) bool {
1033	ret := true
1034	for i := len(stats) - 1 - level; i >= 0; i-- {
1035		ret = false
1036		s := stats[i]
1037		if s.lineNum == lineNum {
1038			if s.level < level && s.isBlank {
1039				return true
1040			} else if s.level == level {
1041				return s.isBlank
1042			}
1043		}
1044		if s.lineNum < lineNum {
1045			return ret
1046		}
1047	}
1048	return ret
1049}
1050
1051func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
1052	pc.SetOpenedBlocks([]Block{})
1053	blankLines := make([]lineStat, 0, 128)
1054	var isBlank bool
1055	for { // process blocks separated by blank lines
1056		_, lines, ok := reader.SkipBlankLines()
1057		if !ok {
1058			return
1059		}
1060		lineNum, _ := reader.Position()
1061		if lines != 0 {
1062			blankLines = blankLines[0:0]
1063			l := len(pc.OpenedBlocks())
1064			for i := 0; i < l; i++ {
1065				blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0})
1066			}
1067		}
1068		isBlank = isBlankLine(lineNum-1, 0, blankLines)
1069		// first, we try to open blocks
1070		if p.openBlocks(parent, isBlank, reader, pc) != newBlocksOpened {
1071			return
1072		}
1073		reader.AdvanceLine()
1074		for { // process opened blocks line by line
1075			openedBlocks := pc.OpenedBlocks()
1076			l := len(openedBlocks)
1077			if l == 0 {
1078				break
1079			}
1080			lastIndex := l - 1
1081			for i := 0; i < l; i++ {
1082				be := openedBlocks[i]
1083				line, _ := reader.PeekLine()
1084				if line == nil {
1085					p.closeBlocks(lastIndex, 0, reader, pc)
1086					reader.AdvanceLine()
1087					return
1088				}
1089				lineNum, _ := reader.Position()
1090				blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)})
1091				// If node is a paragraph, p.openBlocks determines whether it is continuable.
1092				// So we do not process paragraphs here.
1093				if !ast.IsParagraph(be.Node) {
1094					state := be.Parser.Continue(be.Node, reader, pc)
1095					if state&Continue != 0 {
1096						// When current node is a container block and has no children,
1097						// we try to open new child nodes
1098						if state&HasChildren != 0 && i == lastIndex {
1099							isBlank = isBlankLine(lineNum-1, i, blankLines)
1100							p.openBlocks(be.Node, isBlank, reader, pc)
1101							break
1102						}
1103						continue
1104					}
1105				}
1106				// current node may be closed or lazy continuation
1107				isBlank = isBlankLine(lineNum-1, i, blankLines)
1108				thisParent := parent
1109				if i != 0 {
1110					thisParent = openedBlocks[i-1].Node
1111				}
1112				lastNode := openedBlocks[lastIndex].Node
1113				result := p.openBlocks(thisParent, isBlank, reader, pc)
1114				if result != paragraphContinuation {
1115					// lastNode is a paragraph and was transformed by the paragraph
1116					// transformers.
1117					if openedBlocks[lastIndex].Node != lastNode {
1118						lastIndex--
1119					}
1120					p.closeBlocks(lastIndex, i, reader, pc)
1121				}
1122				break
1123			}
1124
1125			reader.AdvanceLine()
1126		}
1127	}
1128}
1129
1130func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
1131	for c := block.FirstChild(); c != nil; c = c.NextSibling() {
1132		p.walkBlock(c, cb)
1133	}
1134	cb(block)
1135}
1136
1137const (
1138	lineBreakHard uint8 = 1 << iota
1139	lineBreakSoft
1140	lineBreakVisible
1141)
1142
1143func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
1144	if parent.IsRaw() {
1145		return
1146	}
1147	escaped := false
1148	source := block.Source()
1149	block.Reset(parent.Lines())
1150	for {
1151	retry:
1152		line, _ := block.PeekLine()
1153		if line == nil {
1154			break
1155		}
1156		lineLength := len(line)
1157		var lineBreakFlags uint8
1158		hasNewLine := line[lineLength-1] == '\n'
1159		if ((lineLength >= 3 && line[lineLength-2] == '\\' &&
1160			line[lineLength-3] != '\\') || (lineLength == 2 && line[lineLength-2] == '\\')) && hasNewLine { // ends with \\n
1161			lineLength -= 2
1162			lineBreakFlags |= lineBreakHard | lineBreakVisible
1163		} else if ((lineLength >= 4 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' &&
1164			line[lineLength-4] != '\\') || (lineLength == 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r')) &&
1165			hasNewLine { // ends with \\r\n
1166			lineLength -= 3
1167			lineBreakFlags |= lineBreakHard | lineBreakVisible
1168		} else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' &&
1169			hasNewLine { // ends with [space][space]\n
1170			lineLength -= 3
1171			lineBreakFlags |= lineBreakHard
1172		} else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' &&
1173			line[lineLength-2] == '\r' && hasNewLine { // ends with [space][space]\r\n
1174			lineLength -= 4
1175			lineBreakFlags |= lineBreakHard
1176		} else if hasNewLine {
1177			// If the line ends with a newline character, but it is not a hardlineBreak, then it is a softLinebreak
1178			// If the line ends with a hardlineBreak, then it cannot end with a softLinebreak
1179			// See https://spec.commonmark.org/0.30/#soft-line-breaks
1180			lineBreakFlags |= lineBreakSoft
1181		}
1182
1183		l, startPosition := block.Position()
1184		n := 0
1185		for i := 0; i < lineLength; i++ {
1186			c := line[i]
1187			if c == '\n' {
1188				break
1189			}
1190			isSpace := util.IsSpace(c) && c != '\r' && c != '\n'
1191			isPunct := util.IsPunct(c)
1192			if (isPunct && !escaped) || isSpace && !(escaped && p.escapedSpace) || i == 0 {
1193				parserChar := c
1194				if isSpace || (i == 0 && !isPunct) {
1195					parserChar = ' '
1196				}
1197				ips := p.inlineParsers[parserChar]
1198				if ips != nil {
1199					block.Advance(n)
1200					n = 0
1201					savedLine, savedPosition := block.Position()
1202					if i != 0 {
1203						_, currentPosition := block.Position()
1204						ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition))
1205						_, startPosition = block.Position()
1206					}
1207					var inlineNode ast.Node
1208					for _, ip := range ips {
1209						inlineNode = ip.Parse(parent, block, pc)
1210						if inlineNode != nil {
1211							break
1212						}
1213						block.SetPosition(savedLine, savedPosition)
1214					}
1215					if inlineNode != nil {
1216						parent.AppendChild(parent, inlineNode)
1217						goto retry
1218					}
1219				}
1220			}
1221			if escaped {
1222				escaped = false
1223				n++
1224				continue
1225			}
1226
1227			if c == '\\' {
1228				escaped = true
1229				n++
1230				continue
1231			}
1232
1233			escaped = false
1234			n++
1235		}
1236		if n != 0 {
1237			block.Advance(n)
1238		}
1239		currentL, currentPosition := block.Position()
1240		if l != currentL {
1241			continue
1242		}
1243		diff := startPosition.Between(currentPosition)
1244		var text *ast.Text
1245		if lineBreakFlags&(lineBreakHard|lineBreakVisible) == lineBreakHard|lineBreakVisible {
1246			text = ast.NewTextSegment(diff)
1247		} else {
1248			text = ast.NewTextSegment(diff.TrimRightSpace(source))
1249		}
1250		text.SetSoftLineBreak(lineBreakFlags&lineBreakSoft != 0)
1251		text.SetHardLineBreak(lineBreakFlags&lineBreakHard != 0)
1252		parent.AppendChild(parent, text)
1253		block.AdvanceLine()
1254	}
1255
1256	ProcessDelimiters(nil, pc)
1257	for _, ip := range p.closeBlockers {
1258		ip.CloseBlock(parent, block, pc)
1259	}
1260
1261}