1package parser
2
3import (
4 "bytes"
5 "regexp"
6
7 "github.com/yuin/goldmark/ast"
8 "github.com/yuin/goldmark/text"
9 "github.com/yuin/goldmark/util"
10)
11
12type rawHTMLParser struct {
13}
14
15var defaultRawHTMLParser = &rawHTMLParser{}
16
17// NewRawHTMLParser return a new InlineParser that can parse
18// inline htmls.
19func NewRawHTMLParser() InlineParser {
20 return defaultRawHTMLParser
21}
22
23func (s *rawHTMLParser) Trigger() []byte {
24 return []byte{'<'}
25}
26
27func (s *rawHTMLParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.Node {
28 line, _ := block.PeekLine()
29 if len(line) > 1 && util.IsAlphaNumeric(line[1]) {
30 return s.parseMultiLineRegexp(openTagRegexp, block, pc)
31 }
32 if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) {
33 return s.parseMultiLineRegexp(closeTagRegexp, block, pc)
34 }
35 if bytes.HasPrefix(line, openComment) {
36 return s.parseComment(block, pc)
37 }
38 if bytes.HasPrefix(line, openProcessingInstruction) {
39 return s.parseUntil(block, closeProcessingInstruction, pc)
40 }
41 if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' {
42 return s.parseUntil(block, closeDecl, pc)
43 }
44 if bytes.HasPrefix(line, openCDATA) {
45 return s.parseUntil(block, closeCDATA, pc)
46 }
47 return nil
48}
49
50var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)`
51var spaceOrOneNewline = `(?:[ \t]|(?:\r\n|\n){0,1})`
52var attributePattern = `(?:[\r\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\r\n \t]*=[\r\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)` //nolint:golint,lll
53var openTagRegexp = regexp.MustCompile("^<" + tagnamePattern + attributePattern + `*` + spaceOrOneNewline + `*/?>`)
54var closeTagRegexp = regexp.MustCompile("^</" + tagnamePattern + spaceOrOneNewline + `*>`)
55
56var openProcessingInstruction = []byte("<?")
57var closeProcessingInstruction = []byte("?>")
58var openCDATA = []byte("<![CDATA[")
59var closeCDATA = []byte("]]>")
60var closeDecl = []byte(">")
61var emptyComment1 = []byte("<!-->")
62var emptyComment2 = []byte("<!--->")
63var openComment = []byte("<!--")
64var closeComment = []byte("-->")
65
66func (s *rawHTMLParser) parseComment(block text.Reader, pc Context) ast.Node {
67 savedLine, savedSegment := block.Position()
68 node := ast.NewRawHTML()
69 line, segment := block.PeekLine()
70 if bytes.HasPrefix(line, emptyComment1) {
71 node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment1)))
72 block.Advance(len(emptyComment1))
73 return node
74 }
75 if bytes.HasPrefix(line, emptyComment2) {
76 node.Segments.Append(segment.WithStop(segment.Start + len(emptyComment2)))
77 block.Advance(len(emptyComment2))
78 return node
79 }
80 offset := len(openComment)
81 line = line[offset:]
82 for {
83 index := bytes.Index(line, closeComment)
84 if index > -1 {
85 node.Segments.Append(segment.WithStop(segment.Start + offset + index + len(closeComment)))
86 block.Advance(offset + index + len(closeComment))
87 return node
88 }
89 offset = 0
90 node.Segments.Append(segment)
91 block.AdvanceLine()
92 line, segment = block.PeekLine()
93 if line == nil {
94 break
95 }
96 }
97 block.SetPosition(savedLine, savedSegment)
98 return nil
99}
100
101func (s *rawHTMLParser) parseUntil(block text.Reader, closer []byte, pc Context) ast.Node {
102 savedLine, savedSegment := block.Position()
103 node := ast.NewRawHTML()
104 for {
105 line, segment := block.PeekLine()
106 if line == nil {
107 break
108 }
109 index := bytes.Index(line, closer)
110 if index > -1 {
111 node.Segments.Append(segment.WithStop(segment.Start + index + len(closer)))
112 block.Advance(index + len(closer))
113 return node
114 }
115 node.Segments.Append(segment)
116 block.AdvanceLine()
117 }
118 block.SetPosition(savedLine, savedSegment)
119 return nil
120}
121
122func (s *rawHTMLParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, pc Context) ast.Node {
123 sline, ssegment := block.Position()
124 if block.Match(reg) {
125 node := ast.NewRawHTML()
126 eline, esegment := block.Position()
127 block.SetPosition(sline, ssegment)
128 for {
129 line, segment := block.PeekLine()
130 if line == nil {
131 break
132 }
133 l, _ := block.Position()
134 start := segment.Start
135 if l == sline {
136 start = ssegment.Start
137 }
138 end := segment.Stop
139 if l == eline {
140 end = esegment.Start
141 }
142
143 node.Segments.Append(text.NewSegment(start, end))
144 if l == eline {
145 block.Advance(end - start)
146 break
147 }
148 block.AdvanceLine()
149 }
150 return node
151 }
152 return nil
153}