1// Package html implements renderer that outputs HTMLs.
2package html
3
4import (
5 "bytes"
6 "fmt"
7 "strconv"
8 "unicode"
9 "unicode/utf8"
10
11 "github.com/yuin/goldmark/ast"
12 "github.com/yuin/goldmark/renderer"
13 "github.com/yuin/goldmark/util"
14)
15
16// A Config struct has configurations for the HTML based renderers.
17type Config struct {
18 Writer Writer
19 HardWraps bool
20 EastAsianLineBreaks EastAsianLineBreaks
21 XHTML bool
22 Unsafe bool
23}
24
25// NewConfig returns a new Config with defaults.
26func NewConfig() Config {
27 return Config{
28 Writer: DefaultWriter,
29 HardWraps: false,
30 EastAsianLineBreaks: EastAsianLineBreaksNone,
31 XHTML: false,
32 Unsafe: false,
33 }
34}
35
36// SetOption implements renderer.NodeRenderer.SetOption.
37func (c *Config) SetOption(name renderer.OptionName, value interface{}) {
38 switch name {
39 case optHardWraps:
40 c.HardWraps = value.(bool)
41 case optEastAsianLineBreaks:
42 c.EastAsianLineBreaks = value.(EastAsianLineBreaks)
43 case optXHTML:
44 c.XHTML = value.(bool)
45 case optUnsafe:
46 c.Unsafe = value.(bool)
47 case optTextWriter:
48 c.Writer = value.(Writer)
49 }
50}
51
52// An Option interface sets options for HTML based renderers.
53type Option interface {
54 SetHTMLOption(*Config)
55}
56
57// TextWriter is an option name used in WithWriter.
58const optTextWriter renderer.OptionName = "Writer"
59
60type withWriter struct {
61 value Writer
62}
63
64func (o *withWriter) SetConfig(c *renderer.Config) {
65 c.Options[optTextWriter] = o.value
66}
67
68func (o *withWriter) SetHTMLOption(c *Config) {
69 c.Writer = o.value
70}
71
72// WithWriter is a functional option that allow you to set the given writer to
73// the renderer.
74func WithWriter(writer Writer) interface {
75 renderer.Option
76 Option
77} {
78 return &withWriter{writer}
79}
80
81// HardWraps is an option name used in WithHardWraps.
82const optHardWraps renderer.OptionName = "HardWraps"
83
84type withHardWraps struct {
85}
86
87func (o *withHardWraps) SetConfig(c *renderer.Config) {
88 c.Options[optHardWraps] = true
89}
90
91func (o *withHardWraps) SetHTMLOption(c *Config) {
92 c.HardWraps = true
93}
94
95// WithHardWraps is a functional option that indicates whether softline breaks
96// should be rendered as '<br>'.
97func WithHardWraps() interface {
98 renderer.Option
99 Option
100} {
101 return &withHardWraps{}
102}
103
104// EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks.
105const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks"
106
107// A EastAsianLineBreaks is a style of east asian line breaks.
108type EastAsianLineBreaks int
109
110const (
111 //EastAsianLineBreaksNone renders line breaks as it is.
112 EastAsianLineBreaksNone EastAsianLineBreaks = iota
113 // EastAsianLineBreaksSimple follows east_asian_line_breaks in Pandoc.
114 EastAsianLineBreaksSimple
115 // EastAsianLineBreaksCSS3Draft follows CSS text level3 "Segment Break Transformation Rules" with some enhancements.
116 EastAsianLineBreaksCSS3Draft
117)
118
119func (b EastAsianLineBreaks) softLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
120 switch b {
121 case EastAsianLineBreaksNone:
122 return false
123 case EastAsianLineBreaksSimple:
124 return !(util.IsEastAsianWideRune(thisLastRune) && util.IsEastAsianWideRune(siblingFirstRune))
125 case EastAsianLineBreaksCSS3Draft:
126 return eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune, siblingFirstRune)
127 }
128 return false
129}
130
131func eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
132 // Implements CSS text level3 Segment Break Transformation Rules with some enhancements.
133 // References:
134 // - https://www.w3.org/TR/2020/WD-css-text-3-20200429/#line-break-transform
135 // - https://github.com/w3c/csswg-drafts/issues/5086
136
137 // Rule1:
138 // If the character immediately before or immediately after the segment break is
139 // the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space.
140 if thisLastRune == '\u200B' || siblingFirstRune == '\u200B' {
141 return false
142 }
143
144 // Rule2:
145 // Otherwise, if the East Asian Width property of both the character before and after the segment break is
146 // F, W, or H (not A), and neither side is Hangul, then the segment break is removed.
147 thisLastRuneEastAsianWidth := util.EastAsianWidth(thisLastRune)
148 siblingFirstRuneEastAsianWidth := util.EastAsianWidth(siblingFirstRune)
149 if (thisLastRuneEastAsianWidth == "F" ||
150 thisLastRuneEastAsianWidth == "W" ||
151 thisLastRuneEastAsianWidth == "H") &&
152 (siblingFirstRuneEastAsianWidth == "F" ||
153 siblingFirstRuneEastAsianWidth == "W" ||
154 siblingFirstRuneEastAsianWidth == "H") {
155 return unicode.Is(unicode.Hangul, thisLastRune) || unicode.Is(unicode.Hangul, siblingFirstRune)
156 }
157
158 // Rule3:
159 // Otherwise, if either the character before or after the segment break belongs to
160 // the space-discarding character set and it is a Unicode Punctuation (P*) or U+3000,
161 // then the segment break is removed.
162 if util.IsSpaceDiscardingUnicodeRune(thisLastRune) ||
163 unicode.IsPunct(thisLastRune) ||
164 thisLastRune == '\u3000' ||
165 util.IsSpaceDiscardingUnicodeRune(siblingFirstRune) ||
166 unicode.IsPunct(siblingFirstRune) ||
167 siblingFirstRune == '\u3000' {
168 return false
169 }
170
171 // Rule4:
172 // Otherwise, the segment break is converted to a space (U+0020).
173 return true
174}
175
176type withEastAsianLineBreaks struct {
177 eastAsianLineBreaksStyle EastAsianLineBreaks
178}
179
180func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) {
181 c.Options[optEastAsianLineBreaks] = o.eastAsianLineBreaksStyle
182}
183
184func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) {
185 c.EastAsianLineBreaks = o.eastAsianLineBreaksStyle
186}
187
188// WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
189// between east asian wide characters should be ignored.
190func WithEastAsianLineBreaks(e EastAsianLineBreaks) interface {
191 renderer.Option
192 Option
193} {
194 return &withEastAsianLineBreaks{e}
195}
196
197// XHTML is an option name used in WithXHTML.
198const optXHTML renderer.OptionName = "XHTML"
199
200type withXHTML struct {
201}
202
203func (o *withXHTML) SetConfig(c *renderer.Config) {
204 c.Options[optXHTML] = true
205}
206
207func (o *withXHTML) SetHTMLOption(c *Config) {
208 c.XHTML = true
209}
210
211// WithXHTML is a functional option indicates that nodes should be rendered in
212// xhtml instead of HTML5.
213func WithXHTML() interface {
214 Option
215 renderer.Option
216} {
217 return &withXHTML{}
218}
219
220// Unsafe is an option name used in WithUnsafe.
221const optUnsafe renderer.OptionName = "Unsafe"
222
223type withUnsafe struct {
224}
225
226func (o *withUnsafe) SetConfig(c *renderer.Config) {
227 c.Options[optUnsafe] = true
228}
229
230func (o *withUnsafe) SetHTMLOption(c *Config) {
231 c.Unsafe = true
232}
233
234// WithUnsafe is a functional option that renders dangerous contents
235// (raw htmls and potentially dangerous links) as it is.
236func WithUnsafe() interface {
237 renderer.Option
238 Option
239} {
240 return &withUnsafe{}
241}
242
243// A Renderer struct is an implementation of renderer.NodeRenderer that renders
244// nodes as (X)HTML.
245type Renderer struct {
246 Config
247}
248
249// NewRenderer returns a new Renderer with given options.
250func NewRenderer(opts ...Option) renderer.NodeRenderer {
251 r := &Renderer{
252 Config: NewConfig(),
253 }
254
255 for _, opt := range opts {
256 opt.SetHTMLOption(&r.Config)
257 }
258 return r
259}
260
261// RegisterFuncs implements NodeRenderer.RegisterFuncs .
262func (r *Renderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
263 // blocks
264
265 reg.Register(ast.KindDocument, r.renderDocument)
266 reg.Register(ast.KindHeading, r.renderHeading)
267 reg.Register(ast.KindBlockquote, r.renderBlockquote)
268 reg.Register(ast.KindCodeBlock, r.renderCodeBlock)
269 reg.Register(ast.KindFencedCodeBlock, r.renderFencedCodeBlock)
270 reg.Register(ast.KindHTMLBlock, r.renderHTMLBlock)
271 reg.Register(ast.KindList, r.renderList)
272 reg.Register(ast.KindListItem, r.renderListItem)
273 reg.Register(ast.KindParagraph, r.renderParagraph)
274 reg.Register(ast.KindTextBlock, r.renderTextBlock)
275 reg.Register(ast.KindThematicBreak, r.renderThematicBreak)
276
277 // inlines
278
279 reg.Register(ast.KindAutoLink, r.renderAutoLink)
280 reg.Register(ast.KindCodeSpan, r.renderCodeSpan)
281 reg.Register(ast.KindEmphasis, r.renderEmphasis)
282 reg.Register(ast.KindImage, r.renderImage)
283 reg.Register(ast.KindLink, r.renderLink)
284 reg.Register(ast.KindRawHTML, r.renderRawHTML)
285 reg.Register(ast.KindText, r.renderText)
286 reg.Register(ast.KindString, r.renderString)
287}
288
289func (r *Renderer) writeLines(w util.BufWriter, source []byte, n ast.Node) {
290 l := n.Lines().Len()
291 for i := 0; i < l; i++ {
292 line := n.Lines().At(i)
293 r.Writer.RawWrite(w, line.Value(source))
294 }
295}
296
297// GlobalAttributeFilter defines attribute names which any elements can have.
298var GlobalAttributeFilter = util.NewBytesFilter(
299 []byte("accesskey"),
300 []byte("autocapitalize"),
301 []byte("autofocus"),
302 []byte("class"),
303 []byte("contenteditable"),
304 []byte("dir"),
305 []byte("draggable"),
306 []byte("enterkeyhint"),
307 []byte("hidden"),
308 []byte("id"),
309 []byte("inert"),
310 []byte("inputmode"),
311 []byte("is"),
312 []byte("itemid"),
313 []byte("itemprop"),
314 []byte("itemref"),
315 []byte("itemscope"),
316 []byte("itemtype"),
317 []byte("lang"),
318 []byte("part"),
319 []byte("role"),
320 []byte("slot"),
321 []byte("spellcheck"),
322 []byte("style"),
323 []byte("tabindex"),
324 []byte("title"),
325 []byte("translate"),
326)
327
328func (r *Renderer) renderDocument(
329 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
330 // nothing to do
331 return ast.WalkContinue, nil
332}
333
334// HeadingAttributeFilter defines attribute names which heading elements can have.
335var HeadingAttributeFilter = GlobalAttributeFilter
336
337func (r *Renderer) renderHeading(
338 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
339 n := node.(*ast.Heading)
340 if entering {
341 _, _ = w.WriteString("<h")
342 _ = w.WriteByte("0123456"[n.Level])
343 if n.Attributes() != nil {
344 RenderAttributes(w, node, HeadingAttributeFilter)
345 }
346 _ = w.WriteByte('>')
347 } else {
348 _, _ = w.WriteString("</h")
349 _ = w.WriteByte("0123456"[n.Level])
350 _, _ = w.WriteString(">\n")
351 }
352 return ast.WalkContinue, nil
353}
354
355// BlockquoteAttributeFilter defines attribute names which blockquote elements can have.
356var BlockquoteAttributeFilter = GlobalAttributeFilter.Extend(
357 []byte("cite"),
358)
359
360func (r *Renderer) renderBlockquote(
361 w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
362 if entering {
363 if n.Attributes() != nil {
364 _, _ = w.WriteString("<blockquote")
365 RenderAttributes(w, n, BlockquoteAttributeFilter)
366 _ = w.WriteByte('>')
367 } else {
368 _, _ = w.WriteString("<blockquote>\n")
369 }
370 } else {
371 _, _ = w.WriteString("</blockquote>\n")
372 }
373 return ast.WalkContinue, nil
374}
375
376func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
377 if entering {
378 _, _ = w.WriteString("<pre><code>")
379 r.writeLines(w, source, n)
380 } else {
381 _, _ = w.WriteString("</code></pre>\n")
382 }
383 return ast.WalkContinue, nil
384}
385
386func (r *Renderer) renderFencedCodeBlock(
387 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
388 n := node.(*ast.FencedCodeBlock)
389 if entering {
390 _, _ = w.WriteString("<pre><code")
391 language := n.Language(source)
392 if language != nil {
393 _, _ = w.WriteString(" class=\"language-")
394 r.Writer.Write(w, language)
395 _, _ = w.WriteString("\"")
396 }
397 _ = w.WriteByte('>')
398 r.writeLines(w, source, n)
399 } else {
400 _, _ = w.WriteString("</code></pre>\n")
401 }
402 return ast.WalkContinue, nil
403}
404
405func (r *Renderer) renderHTMLBlock(
406 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
407 n := node.(*ast.HTMLBlock)
408 if entering {
409 if r.Unsafe {
410 l := n.Lines().Len()
411 for i := 0; i < l; i++ {
412 line := n.Lines().At(i)
413 r.Writer.SecureWrite(w, line.Value(source))
414 }
415 } else {
416 _, _ = w.WriteString("<!-- raw HTML omitted -->\n")
417 }
418 } else {
419 if n.HasClosure() {
420 if r.Unsafe {
421 closure := n.ClosureLine
422 r.Writer.SecureWrite(w, closure.Value(source))
423 } else {
424 _, _ = w.WriteString("<!-- raw HTML omitted -->\n")
425 }
426 }
427 }
428 return ast.WalkContinue, nil
429}
430
431// ListAttributeFilter defines attribute names which list elements can have.
432var ListAttributeFilter = GlobalAttributeFilter.Extend(
433 []byte("start"),
434 []byte("reversed"),
435 []byte("type"),
436)
437
438func (r *Renderer) renderList(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
439 n := node.(*ast.List)
440 tag := "ul"
441 if n.IsOrdered() {
442 tag = "ol"
443 }
444 if entering {
445 _ = w.WriteByte('<')
446 _, _ = w.WriteString(tag)
447 if n.IsOrdered() && n.Start != 1 {
448 _, _ = fmt.Fprintf(w, " start=\"%d\"", n.Start)
449 }
450 if n.Attributes() != nil {
451 RenderAttributes(w, n, ListAttributeFilter)
452 }
453 _, _ = w.WriteString(">\n")
454 } else {
455 _, _ = w.WriteString("</")
456 _, _ = w.WriteString(tag)
457 _, _ = w.WriteString(">\n")
458 }
459 return ast.WalkContinue, nil
460}
461
462// ListItemAttributeFilter defines attribute names which list item elements can have.
463var ListItemAttributeFilter = GlobalAttributeFilter.Extend(
464 []byte("value"),
465)
466
467func (r *Renderer) renderListItem(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
468 if entering {
469 if n.Attributes() != nil {
470 _, _ = w.WriteString("<li")
471 RenderAttributes(w, n, ListItemAttributeFilter)
472 _ = w.WriteByte('>')
473 } else {
474 _, _ = w.WriteString("<li>")
475 }
476 fc := n.FirstChild()
477 if fc != nil {
478 if _, ok := fc.(*ast.TextBlock); !ok {
479 _ = w.WriteByte('\n')
480 }
481 }
482 } else {
483 _, _ = w.WriteString("</li>\n")
484 }
485 return ast.WalkContinue, nil
486}
487
488// ParagraphAttributeFilter defines attribute names which paragraph elements can have.
489var ParagraphAttributeFilter = GlobalAttributeFilter
490
491func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
492 if entering {
493 if n.Attributes() != nil {
494 _, _ = w.WriteString("<p")
495 RenderAttributes(w, n, ParagraphAttributeFilter)
496 _ = w.WriteByte('>')
497 } else {
498 _, _ = w.WriteString("<p>")
499 }
500 } else {
501 _, _ = w.WriteString("</p>\n")
502 }
503 return ast.WalkContinue, nil
504}
505
506func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
507 if !entering {
508 if n.NextSibling() != nil && n.FirstChild() != nil {
509 _ = w.WriteByte('\n')
510 }
511 }
512 return ast.WalkContinue, nil
513}
514
515// ThematicAttributeFilter defines attribute names which hr elements can have.
516var ThematicAttributeFilter = GlobalAttributeFilter.Extend(
517 []byte("align"), // [Deprecated]
518 []byte("color"), // [Not Standardized]
519 []byte("noshade"), // [Deprecated]
520 []byte("size"), // [Deprecated]
521 []byte("width"), // [Deprecated]
522)
523
524func (r *Renderer) renderThematicBreak(
525 w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
526 if !entering {
527 return ast.WalkContinue, nil
528 }
529 _, _ = w.WriteString("<hr")
530 if n.Attributes() != nil {
531 RenderAttributes(w, n, ThematicAttributeFilter)
532 }
533 if r.XHTML {
534 _, _ = w.WriteString(" />\n")
535 } else {
536 _, _ = w.WriteString(">\n")
537 }
538 return ast.WalkContinue, nil
539}
540
541// LinkAttributeFilter defines attribute names which link elements can have.
542var LinkAttributeFilter = GlobalAttributeFilter.Extend(
543 []byte("download"),
544 // []byte("href"),
545 []byte("hreflang"),
546 []byte("media"),
547 []byte("ping"),
548 []byte("referrerpolicy"),
549 []byte("rel"),
550 []byte("shape"),
551 []byte("target"),
552)
553
554func (r *Renderer) renderAutoLink(
555 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
556 n := node.(*ast.AutoLink)
557 if !entering {
558 return ast.WalkContinue, nil
559 }
560 _, _ = w.WriteString(`<a href="`)
561 url := n.URL(source)
562 label := n.Label(source)
563 if n.AutoLinkType == ast.AutoLinkEmail && !bytes.HasPrefix(bytes.ToLower(url), []byte("mailto:")) {
564 _, _ = w.WriteString("mailto:")
565 }
566 _, _ = w.Write(util.EscapeHTML(util.URLEscape(url, false)))
567 if n.Attributes() != nil {
568 _ = w.WriteByte('"')
569 RenderAttributes(w, n, LinkAttributeFilter)
570 _ = w.WriteByte('>')
571 } else {
572 _, _ = w.WriteString(`">`)
573 }
574 _, _ = w.Write(util.EscapeHTML(label))
575 _, _ = w.WriteString(`</a>`)
576 return ast.WalkContinue, nil
577}
578
579// CodeAttributeFilter defines attribute names which code elements can have.
580var CodeAttributeFilter = GlobalAttributeFilter
581
582func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
583 if entering {
584 if n.Attributes() != nil {
585 _, _ = w.WriteString("<code")
586 RenderAttributes(w, n, CodeAttributeFilter)
587 _ = w.WriteByte('>')
588 } else {
589 _, _ = w.WriteString("<code>")
590 }
591 for c := n.FirstChild(); c != nil; c = c.NextSibling() {
592 segment := c.(*ast.Text).Segment
593 value := segment.Value(source)
594 if bytes.HasSuffix(value, []byte("\n")) {
595 r.Writer.RawWrite(w, value[:len(value)-1])
596 r.Writer.RawWrite(w, []byte(" "))
597 } else {
598 r.Writer.RawWrite(w, value)
599 }
600 }
601 return ast.WalkSkipChildren, nil
602 }
603 _, _ = w.WriteString("</code>")
604 return ast.WalkContinue, nil
605}
606
607// EmphasisAttributeFilter defines attribute names which emphasis elements can have.
608var EmphasisAttributeFilter = GlobalAttributeFilter
609
610func (r *Renderer) renderEmphasis(
611 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
612 n := node.(*ast.Emphasis)
613 tag := "em"
614 if n.Level == 2 {
615 tag = "strong"
616 }
617 if entering {
618 _ = w.WriteByte('<')
619 _, _ = w.WriteString(tag)
620 if n.Attributes() != nil {
621 RenderAttributes(w, n, EmphasisAttributeFilter)
622 }
623 _ = w.WriteByte('>')
624 } else {
625 _, _ = w.WriteString("</")
626 _, _ = w.WriteString(tag)
627 _ = w.WriteByte('>')
628 }
629 return ast.WalkContinue, nil
630}
631
632func (r *Renderer) renderLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
633 n := node.(*ast.Link)
634 if entering {
635 _, _ = w.WriteString("<a href=\"")
636 if r.Unsafe || !IsDangerousURL(n.Destination) {
637 _, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true)))
638 }
639 _ = w.WriteByte('"')
640 if n.Title != nil {
641 _, _ = w.WriteString(` title="`)
642 r.Writer.Write(w, n.Title)
643 _ = w.WriteByte('"')
644 }
645 if n.Attributes() != nil {
646 RenderAttributes(w, n, LinkAttributeFilter)
647 }
648 _ = w.WriteByte('>')
649 } else {
650 _, _ = w.WriteString("</a>")
651 }
652 return ast.WalkContinue, nil
653}
654
655// ImageAttributeFilter defines attribute names which image elements can have.
656var ImageAttributeFilter = GlobalAttributeFilter.Extend(
657 []byte("align"),
658 []byte("border"),
659 []byte("crossorigin"),
660 []byte("decoding"),
661 []byte("height"),
662 []byte("importance"),
663 []byte("intrinsicsize"),
664 []byte("ismap"),
665 []byte("loading"),
666 []byte("referrerpolicy"),
667 []byte("sizes"),
668 []byte("srcset"),
669 []byte("usemap"),
670 []byte("width"),
671)
672
673func (r *Renderer) renderImage(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
674 if !entering {
675 return ast.WalkContinue, nil
676 }
677 n := node.(*ast.Image)
678 _, _ = w.WriteString("<img src=\"")
679 if r.Unsafe || !IsDangerousURL(n.Destination) {
680 _, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true)))
681 }
682 _, _ = w.WriteString(`" alt="`)
683 r.renderTexts(w, source, n)
684 _ = w.WriteByte('"')
685 if n.Title != nil {
686 _, _ = w.WriteString(` title="`)
687 r.Writer.Write(w, n.Title)
688 _ = w.WriteByte('"')
689 }
690 if n.Attributes() != nil {
691 RenderAttributes(w, n, ImageAttributeFilter)
692 }
693 if r.XHTML {
694 _, _ = w.WriteString(" />")
695 } else {
696 _, _ = w.WriteString(">")
697 }
698 return ast.WalkSkipChildren, nil
699}
700
701func (r *Renderer) renderRawHTML(
702 w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
703 if !entering {
704 return ast.WalkSkipChildren, nil
705 }
706 if r.Unsafe {
707 n := node.(*ast.RawHTML)
708 l := n.Segments.Len()
709 for i := 0; i < l; i++ {
710 segment := n.Segments.At(i)
711 _, _ = w.Write(segment.Value(source))
712 }
713 return ast.WalkSkipChildren, nil
714 }
715 _, _ = w.WriteString("<!-- raw HTML omitted -->")
716 return ast.WalkSkipChildren, nil
717}
718
719func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
720 if !entering {
721 return ast.WalkContinue, nil
722 }
723 n := node.(*ast.Text)
724 segment := n.Segment
725 if n.IsRaw() {
726 r.Writer.RawWrite(w, segment.Value(source))
727 } else {
728 value := segment.Value(source)
729 r.Writer.Write(w, value)
730 if n.HardLineBreak() || (n.SoftLineBreak() && r.HardWraps) {
731 if r.XHTML {
732 _, _ = w.WriteString("<br />\n")
733 } else {
734 _, _ = w.WriteString("<br>\n")
735 }
736 } else if n.SoftLineBreak() {
737 if r.EastAsianLineBreaks != EastAsianLineBreaksNone && len(value) != 0 {
738 sibling := node.NextSibling()
739 if sibling != nil && sibling.Kind() == ast.KindText {
740 if siblingText := sibling.(*ast.Text).Value(source); len(siblingText) != 0 {
741 thisLastRune := util.ToRune(value, len(value)-1)
742 siblingFirstRune, _ := utf8.DecodeRune(siblingText)
743 if r.EastAsianLineBreaks.softLineBreak(thisLastRune, siblingFirstRune) {
744 _ = w.WriteByte('\n')
745 }
746 }
747 }
748 } else {
749 _ = w.WriteByte('\n')
750 }
751 }
752 }
753 return ast.WalkContinue, nil
754}
755
756func (r *Renderer) renderString(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
757 if !entering {
758 return ast.WalkContinue, nil
759 }
760 n := node.(*ast.String)
761 if n.IsCode() {
762 _, _ = w.Write(n.Value)
763 } else {
764 if n.IsRaw() {
765 r.Writer.RawWrite(w, n.Value)
766 } else {
767 r.Writer.Write(w, n.Value)
768 }
769 }
770 return ast.WalkContinue, nil
771}
772
773func (r *Renderer) renderTexts(w util.BufWriter, source []byte, n ast.Node) {
774 for c := n.FirstChild(); c != nil; c = c.NextSibling() {
775 if s, ok := c.(*ast.String); ok {
776 _, _ = r.renderString(w, source, s, true)
777 } else if t, ok := c.(*ast.Text); ok {
778 _, _ = r.renderText(w, source, t, true)
779 } else {
780 r.renderTexts(w, source, c)
781 }
782 }
783}
784
785var dataPrefix = []byte("data-")
786
787// RenderAttributes renders given node's attributes.
788// You can specify attribute names to render by the filter.
789// If filter is nil, RenderAttributes renders all attributes.
790func RenderAttributes(w util.BufWriter, node ast.Node, filter util.BytesFilter) {
791 for _, attr := range node.Attributes() {
792 if filter != nil && !filter.Contains(attr.Name) {
793 if !bytes.HasPrefix(attr.Name, dataPrefix) {
794 continue
795 }
796 }
797 _, _ = w.WriteString(" ")
798 _, _ = w.Write(attr.Name)
799 _, _ = w.WriteString(`="`)
800 // TODO: convert numeric values to strings
801 var value []byte
802 switch typed := attr.Value.(type) {
803 case []byte:
804 value = typed
805 case string:
806 value = util.StringToReadOnlyBytes(typed)
807 }
808 _, _ = w.Write(util.EscapeHTML(value))
809 _ = w.WriteByte('"')
810 }
811}
812
813// A Writer interface writes textual contents to a writer.
814type Writer interface {
815 // Write writes the given source to writer with resolving references and unescaping
816 // backslash escaped characters.
817 Write(writer util.BufWriter, source []byte)
818
819 // RawWrite writes the given source to writer without resolving references and
820 // unescaping backslash escaped characters.
821 RawWrite(writer util.BufWriter, source []byte)
822
823 // SecureWrite writes the given source to writer with replacing insecure characters.
824 SecureWrite(writer util.BufWriter, source []byte)
825}
826
827var replacementCharacter = []byte("\ufffd")
828
829// A WriterConfig struct has configurations for the HTML based writers.
830type WriterConfig struct {
831 // EscapedSpace is an option that indicates that a '\' escaped half-space(0x20) should not be rendered.
832 EscapedSpace bool
833}
834
835// A WriterOption interface sets options for HTML based writers.
836type WriterOption func(*WriterConfig)
837
838// WithEscapedSpace is a WriterOption indicates that a '\' escaped half-space(0x20) should not be rendered.
839func WithEscapedSpace() WriterOption {
840 return func(c *WriterConfig) {
841 c.EscapedSpace = true
842 }
843}
844
845type defaultWriter struct {
846 WriterConfig
847}
848
849// NewWriter returns a new Writer.
850func NewWriter(opts ...WriterOption) Writer {
851 w := &defaultWriter{}
852 for _, opt := range opts {
853 opt(&w.WriterConfig)
854 }
855 return w
856}
857
858func escapeRune(writer util.BufWriter, r rune) {
859 if r < 256 {
860 v := util.EscapeHTMLByte(byte(r))
861 if v != nil {
862 _, _ = writer.Write(v)
863 return
864 }
865 }
866 _, _ = writer.WriteRune(util.ToValidRune(r))
867}
868
869func (d *defaultWriter) SecureWrite(writer util.BufWriter, source []byte) {
870 n := 0
871 l := len(source)
872 for i := 0; i < l; i++ {
873 if source[i] == '\u0000' {
874 _, _ = writer.Write(source[i-n : i])
875 n = 0
876 _, _ = writer.Write(replacementCharacter)
877 continue
878 }
879 n++
880 }
881 if n != 0 {
882 _, _ = writer.Write(source[l-n:])
883 }
884}
885
886func (d *defaultWriter) RawWrite(writer util.BufWriter, source []byte) {
887 n := 0
888 l := len(source)
889 for i := 0; i < l; i++ {
890 v := util.EscapeHTMLByte(source[i])
891 if v != nil {
892 _, _ = writer.Write(source[i-n : i])
893 n = 0
894 _, _ = writer.Write(v)
895 continue
896 }
897 n++
898 }
899 if n != 0 {
900 _, _ = writer.Write(source[l-n:])
901 }
902}
903
904func (d *defaultWriter) Write(writer util.BufWriter, source []byte) {
905 escaped := false
906 var ok bool
907 limit := len(source)
908 n := 0
909 for i := 0; i < limit; i++ {
910 c := source[i]
911 if escaped {
912 if util.IsPunct(c) {
913 d.RawWrite(writer, source[n:i-1])
914 n = i
915 escaped = false
916 continue
917 }
918 if d.EscapedSpace && c == ' ' {
919 d.RawWrite(writer, source[n:i-1])
920 n = i + 1
921 escaped = false
922 continue
923 }
924 }
925 if c == '\x00' {
926 d.RawWrite(writer, source[n:i])
927 d.RawWrite(writer, replacementCharacter)
928 n = i + 1
929 escaped = false
930 continue
931 }
932 if c == '&' {
933 pos := i
934 next := i + 1
935 if next < limit && source[next] == '#' {
936 nnext := next + 1
937 if nnext < limit {
938 nc := source[nnext]
939 // code point like #x22;
940 if nnext < limit && nc == 'x' || nc == 'X' {
941 start := nnext + 1
942 i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsHexDecimal)
943 if ok && i < limit && source[i] == ';' && i-start < 7 {
944 v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 16, 32)
945 d.RawWrite(writer, source[n:pos])
946 n = i + 1
947 escapeRune(writer, rune(v))
948 continue
949 }
950 // code point like #1234;
951 } else if nc >= '0' && nc <= '9' {
952 start := nnext
953 i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsNumeric)
954 if ok && i < limit && i-start < 8 && source[i] == ';' {
955 v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 10, 32)
956 d.RawWrite(writer, source[n:pos])
957 n = i + 1
958 escapeRune(writer, rune(v))
959 continue
960 }
961 }
962 }
963 } else {
964 start := next
965 i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsAlphaNumeric)
966 // entity reference
967 if ok && i < limit && source[i] == ';' {
968 name := util.BytesToReadOnlyString(source[start:i])
969 entity, ok := util.LookUpHTML5EntityByName(name)
970 if ok {
971 d.RawWrite(writer, source[n:pos])
972 n = i + 1
973 d.RawWrite(writer, entity.Characters)
974 continue
975 }
976 }
977 }
978 i = next - 1
979 }
980 if c == '\\' {
981 escaped = true
982 continue
983 }
984 escaped = false
985 }
986 d.RawWrite(writer, source[n:])
987}
988
989// DefaultWriter is a default instance of the Writer.
990var DefaultWriter = NewWriter()
991
992var bDataImage = []byte("data:image/")
993var bPng = []byte("png;")
994var bGif = []byte("gif;")
995var bJpeg = []byte("jpeg;")
996var bWebp = []byte("webp;")
997var bSvg = []byte("svg+xml;")
998var bJs = []byte("javascript:")
999var bVb = []byte("vbscript:")
1000var bFile = []byte("file:")
1001var bData = []byte("data:")
1002
1003func hasPrefix(s, prefix []byte) bool {
1004 return len(s) >= len(prefix) && bytes.Equal(bytes.ToLower(s[0:len(prefix)]), bytes.ToLower(prefix))
1005}
1006
1007// IsDangerousURL returns true if the given url seems a potentially dangerous url,
1008// otherwise false.
1009func IsDangerousURL(url []byte) bool {
1010 if hasPrefix(url, bDataImage) && len(url) >= 11 {
1011 v := url[11:]
1012 if hasPrefix(v, bPng) || hasPrefix(v, bGif) ||
1013 hasPrefix(v, bJpeg) || hasPrefix(v, bWebp) ||
1014 hasPrefix(v, bSvg) {
1015 return false
1016 }
1017 return true
1018 }
1019 return hasPrefix(url, bJs) || hasPrefix(url, bVb) ||
1020 hasPrefix(url, bFile) || hasPrefix(url, bData)
1021}