raw_html.go

  1package parser
  2
  3import (
  4	"bytes"
  5	"regexp"
  6
  7	"github.com/yuin/goldmark/ast"
  8	"github.com/yuin/goldmark/text"
  9	"github.com/yuin/goldmark/util"
 10)
 11
 12type rawHTMLParser struct {
 13}
 14
 15var defaultRawHTMLParser = &rawHTMLParser{}
 16
 17// NewRawHTMLParser return a new InlineParser that can parse
 18// inline htmls.
 19func NewRawHTMLParser() InlineParser {
 20	return defaultRawHTMLParser
 21}
 22
 23func (s *rawHTMLParser) Trigger() []byte {
 24	return []byte{'<'}
 25}
 26
 27func (s *rawHTMLParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node {
 28	line, _ := block.PeekLine()
 29	if len(line) > 1 && util.IsAlphaNumeric(line[1]) {
 30		return s.parseMultiLineRegexp(openTagRegexp, block, pc)
 31	}
 32	if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) {
 33		return s.parseMultiLineRegexp(closeTagRegexp, block, pc)
 34	}
 35	if bytes.HasPrefix(line, openComment) {
 36		return s.parseComment(block, pc)
 37	}
 38	if bytes.HasPrefix(line, openProcessingInstruction) {
 39		return s.parseUntil(block, closeProcessingInstruction, pc)
 40	}
 41	if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' {
 42		return s.parseUntil(block, closeDecl, pc)
 43	}
 44	if bytes.HasPrefix(line, openCDATA) {
 45		return s.parseUntil(block, closeCDATA, pc)
 46	}
 47	return nil
 48}
 49
 50var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)`
 51var spaceOrOneNewline = `(?:[ \t]|(?:\r\n|\n){0,1})`
 52var attributePattern = `(?:[\r\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\r\n \t]*=[\r\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)` //nolint:golint,lll
 53var openTagRegexp = regexp.MustCompile("^<" + tagnamePattern + attributePattern + `*` + spaceOrOneNewline + `*/?>`)
 54var closeTagRegexp = regexp.MustCompile("^</" + tagnamePattern + spaceOrOneNewline + `*>`)
 55
 56var openProcessingInstruction = []byte("<?")
 57var closeProcessingInstruction = []byte("?>")
 58var openCDATA = []byte("<![CDATA[")
 59var closeCDATA = []byte("]]>")
 60var closeDecl = []byte(">")
 61var emptyComment1 = []byte("<!-->")
 62var emptyComment2 = []byte("<!--->")
 63var openComment = []byte("<!--")
 64var closeComment = []byte("-->")
 65
 66func (s *rawHTMLParser) parseComment(block text.Reader, pc Context) ast.Node {
 67	savedLine, savedSegment := block.Position()
 68	node := ast.NewRawHTML()
 69	line, segment := block.PeekLine()
 70	if bytes.HasPrefix(line, emptyComment1) {
 71		node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment1)))
 72		block.Advance(len(emptyComment1))
 73		return node
 74	}
 75	if bytes.HasPrefix(line, emptyComment2) {
 76		node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment2)))
 77		block.Advance(len(emptyComment2))
 78		return node
 79	}
 80	offset := len(openComment)
 81	line = line[offset:]
 82	for {
 83		index := bytes.Index(line, closeComment)
 84		if index > -1 {
 85			node.Segments.Append(segment.WithStop(segment.Start + offset + index + len(closeComment)))
 86			block.Advance(offset + index + len(closeComment))
 87			return node
 88		}
 89		offset = 0
 90		node.Segments.Append(segment)
 91		block.AdvanceLine()
 92		line, segment = block.PeekLine()
 93		if line == nil {
 94			break
 95		}
 96	}
 97	block.SetPosition(savedLine, savedSegment)
 98	return nil
 99}
100
101func (s *rawHTMLParser) parseUntil(block text.Reader, closer []byte, pc Context) ast.Node {
102	savedLine, savedSegment := block.Position()
103	node := ast.NewRawHTML()
104	for {
105		line, segment := block.PeekLine()
106		if line == nil {
107			break
108		}
109		index := bytes.Index(line, closer)
110		if index > -1 {
111			node.Segments.Append(segment.WithStop(segment.Start + index + len(closer)))
112			block.Advance(index + len(closer))
113			return node
114		}
115		node.Segments.Append(segment)
116		block.AdvanceLine()
117	}
118	block.SetPosition(savedLine, savedSegment)
119	return nil
120}
121
122func (s *rawHTMLParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node {
123	sline, ssegment := block.Position()
124	if block.Match(reg) {
125		node := ast.NewRawHTML()
126		eline, esegment := block.Position()
127		block.SetPosition(sline, ssegment)
128		for {
129			line, segment := block.PeekLine()
130			if line == nil {
131				break
132			}
133			l, _ := block.Position()
134			start := segment.Start
135			if l == sline {
136				start = ssegment.Start
137			}
138			end := segment.Stop
139			if l == eline {
140				end = esegment.Start
141			}
142
143			node.Segments.Append(text.NewSegment(start, end))
144			if l == eline {
145				block.Advance(end - start)
146				break
147			}
148			block.AdvanceLine()
149		}
150		return node
151	}
152	return nil
153}