linkify.go

  1package extension
  2
  3import (
  4	"bytes"
  5	"regexp"
  6
  7	"github.com/yuin/goldmark"
  8	"github.com/yuin/goldmark/ast"
  9	"github.com/yuin/goldmark/parser"
 10	"github.com/yuin/goldmark/text"
 11	"github.com/yuin/goldmark/util"
 12)
 13
 14var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?:[/#?][-a-zA-Z0-9@:%_\+.~#!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`) //nolint:golint,lll
 15
 16var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp)://[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]+(?::\d+)?(?:[/#?][-a-zA-Z0-9@:%_+.~#$!?&/=\(\);,'">\^{}\[\]` + "`" + `]*)?`) //nolint:golint,lll
 17
 18// An LinkifyConfig struct is a data structure that holds configuration of the
 19// Linkify extension.
 20type LinkifyConfig struct {
 21	AllowedProtocols [][]byte
 22	URLRegexp        *regexp.Regexp
 23	WWWRegexp        *regexp.Regexp
 24	EmailRegexp      *regexp.Regexp
 25}
 26
 27const (
 28	optLinkifyAllowedProtocols parser.OptionName = "LinkifyAllowedProtocols"
 29	optLinkifyURLRegexp        parser.OptionName = "LinkifyURLRegexp"
 30	optLinkifyWWWRegexp        parser.OptionName = "LinkifyWWWRegexp"
 31	optLinkifyEmailRegexp      parser.OptionName = "LinkifyEmailRegexp"
 32)
 33
 34// SetOption implements SetOptioner.
 35func (c *LinkifyConfig) SetOption(name parser.OptionName, value interface{}) {
 36	switch name {
 37	case optLinkifyAllowedProtocols:
 38		c.AllowedProtocols = value.([][]byte)
 39	case optLinkifyURLRegexp:
 40		c.URLRegexp = value.(*regexp.Regexp)
 41	case optLinkifyWWWRegexp:
 42		c.WWWRegexp = value.(*regexp.Regexp)
 43	case optLinkifyEmailRegexp:
 44		c.EmailRegexp = value.(*regexp.Regexp)
 45	}
 46}
 47
 48// A LinkifyOption interface sets options for the LinkifyOption.
 49type LinkifyOption interface {
 50	parser.Option
 51	SetLinkifyOption(*LinkifyConfig)
 52}
 53
 54type withLinkifyAllowedProtocols struct {
 55	value [][]byte
 56}
 57
 58func (o *withLinkifyAllowedProtocols) SetParserOption(c *parser.Config) {
 59	c.Options[optLinkifyAllowedProtocols] = o.value
 60}
 61
 62func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) {
 63	p.AllowedProtocols = o.value
 64}
 65
 66// WithLinkifyAllowedProtocols is a functional option that specify allowed
 67// protocols in autolinks. Each protocol must end with ':' like
 68// 'http:' .
 69func WithLinkifyAllowedProtocols[T []byte | string](value []T) LinkifyOption {
 70	opt := &withLinkifyAllowedProtocols{}
 71	for _, v := range value {
 72		opt.value = append(opt.value, []byte(v))
 73	}
 74	return opt
 75}
 76
 77type withLinkifyURLRegexp struct {
 78	value *regexp.Regexp
 79}
 80
 81func (o *withLinkifyURLRegexp) SetParserOption(c *parser.Config) {
 82	c.Options[optLinkifyURLRegexp] = o.value
 83}
 84
 85func (o *withLinkifyURLRegexp) SetLinkifyOption(p *LinkifyConfig) {
 86	p.URLRegexp = o.value
 87}
 88
 89// WithLinkifyURLRegexp is a functional option that specify
 90// a pattern of the URL including a protocol.
 91func WithLinkifyURLRegexp(value *regexp.Regexp) LinkifyOption {
 92	return &withLinkifyURLRegexp{
 93		value: value,
 94	}
 95}
 96
 97type withLinkifyWWWRegexp struct {
 98	value *regexp.Regexp
 99}
100
101func (o *withLinkifyWWWRegexp) SetParserOption(c *parser.Config) {
102	c.Options[optLinkifyWWWRegexp] = o.value
103}
104
105func (o *withLinkifyWWWRegexp) SetLinkifyOption(p *LinkifyConfig) {
106	p.WWWRegexp = o.value
107}
108
109// WithLinkifyWWWRegexp is a functional option that specify
110// a pattern of the URL without a protocol.
111// This pattern must start with 'www.' .
112func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption {
113	return &withLinkifyWWWRegexp{
114		value: value,
115	}
116}
117
118type withLinkifyEmailRegexp struct {
119	value *regexp.Regexp
120}
121
122func (o *withLinkifyEmailRegexp) SetParserOption(c *parser.Config) {
123	c.Options[optLinkifyEmailRegexp] = o.value
124}
125
126func (o *withLinkifyEmailRegexp) SetLinkifyOption(p *LinkifyConfig) {
127	p.EmailRegexp = o.value
128}
129
130// WithLinkifyEmailRegexp is a functional otpion that specify
131// a pattern of the email address.
132func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption {
133	return &withLinkifyEmailRegexp{
134		value: value,
135	}
136}
137
138type linkifyParser struct {
139	LinkifyConfig
140}
141
142// NewLinkifyParser return a new InlineParser can parse
143// text that seems like a URL.
144func NewLinkifyParser(opts ...LinkifyOption) parser.InlineParser {
145	p := &linkifyParser{
146		LinkifyConfig: LinkifyConfig{
147			AllowedProtocols: nil,
148			URLRegexp:        urlRegexp,
149			WWWRegexp:        wwwURLRegxp,
150		},
151	}
152	for _, o := range opts {
153		o.SetLinkifyOption(&p.LinkifyConfig)
154	}
155	return p
156}
157
158func (s *linkifyParser) Trigger() []byte {
159	// ' ' indicates any white spaces and a line head
160	return []byte{' ', '*', '_', '~', '('}
161}
162
163var (
164	protoHTTP  = []byte("http:")
165	protoHTTPS = []byte("https:")
166	protoFTP   = []byte("ftp:")
167	domainWWW  = []byte("www.")
168)
169
170func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
171	if pc.IsInLinkLabel() {
172		return nil
173	}
174	line, segment := block.PeekLine()
175	consumes := 0
176	start := segment.Start
177	c := line[0]
178	// advance if current position is not a line head.
179	if c == ' ' || c == '*' || c == '_' || c == '~' || c == '(' {
180		consumes++
181		start++
182		line = line[1:]
183	}
184
185	var m []int
186	var protocol []byte
187	var typ ast.AutoLinkType = ast.AutoLinkURL
188	if s.LinkifyConfig.AllowedProtocols == nil {
189		if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
190			m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
191		}
192	} else {
193		for _, prefix := range s.LinkifyConfig.AllowedProtocols {
194			if bytes.HasPrefix(line, prefix) {
195				m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
196				break
197			}
198		}
199	}
200	if m == nil && bytes.HasPrefix(line, domainWWW) {
201		m = s.LinkifyConfig.WWWRegexp.FindSubmatchIndex(line)
202		protocol = []byte("http")
203	}
204	if m != nil && m[0] != 0 {
205		m = nil
206	}
207	if m != nil && m[0] == 0 {
208		lastChar := line[m[1]-1]
209		if lastChar == '.' {
210			m[1]--
211		} else if lastChar == ')' {
212			closing := 0
213			for i := m[1] - 1; i >= m[0]; i-- {
214				if line[i] == ')' {
215					closing++
216				} else if line[i] == '(' {
217					closing--
218				}
219			}
220			if closing > 0 {
221				m[1] -= closing
222			}
223		} else if lastChar == ';' {
224			i := m[1] - 2
225			for ; i >= m[0]; i-- {
226				if util.IsAlphaNumeric(line[i]) {
227					continue
228				}
229				break
230			}
231			if i != m[1]-2 {
232				if line[i] == '&' {
233					m[1] -= m[1] - i
234				}
235			}
236		}
237	}
238	if m == nil {
239		if len(line) > 0 && util.IsPunct(line[0]) {
240			return nil
241		}
242		typ = ast.AutoLinkEmail
243		stop := -1
244		if s.LinkifyConfig.EmailRegexp == nil {
245			stop = util.FindEmailIndex(line)
246		} else {
247			m := s.LinkifyConfig.EmailRegexp.FindSubmatchIndex(line)
248			if m != nil && m[0] == 0 {
249				stop = m[1]
250			}
251		}
252		if stop < 0 {
253			return nil
254		}
255		at := bytes.IndexByte(line, '@')
256		m = []int{0, stop, at, stop - 1}
257		if m == nil || bytes.IndexByte(line[m[2]:m[3]], '.') < 0 {
258			return nil
259		}
260		lastChar := line[m[1]-1]
261		if lastChar == '.' {
262			m[1]--
263		}
264		if m[1] < len(line) {
265			nextChar := line[m[1]]
266			if nextChar == '-' || nextChar == '_' {
267				return nil
268			}
269		}
270	}
271	if m == nil {
272		return nil
273	}
274	if consumes != 0 {
275		s := segment.WithStop(segment.Start + 1)
276		ast.MergeOrAppendTextSegment(parent, s)
277	}
278	i := m[1] - 1
279	for ; i > 0; i-- {
280		c := line[i]
281		switch c {
282		case '?', '!', '.', ',', ':', '*', '_', '~':
283		default:
284			goto endfor
285		}
286	}
287endfor:
288	i++
289	consumes += i
290	block.Advance(consumes)
291	n := ast.NewTextSegment(text.NewSegment(start, start+i))
292	link := ast.NewAutoLink(typ, n)
293	link.Protocol = protocol
294	return link
295}
296
297func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) {
298	// nothing to do
299}
300
301type linkify struct {
302	options []LinkifyOption
303}
304
305// Linkify is an extension that allow you to parse text that seems like a URL.
306var Linkify = &linkify{}
307
308// NewLinkify creates a new [goldmark.Extender] that
309// allow you to parse text that seems like a URL.
310func NewLinkify(opts ...LinkifyOption) goldmark.Extender {
311	return &linkify{
312		options: opts,
313	}
314}
315
316func (e *linkify) Extend(m goldmark.Markdown) {
317	m.Parser().AddOptions(
318		parser.WithInlineParsers(
319			util.Prioritized(NewLinkifyParser(e.options...), 999),
320		),
321	)
322}