1package parser
2
3import (
4 "bytes"
5 "fmt"
6 "golang.org/x/net/html"
7 "regexp"
8 "strings"
9)
10
11type parser interface {
12 Parse() (string, error)
13}
14
15type parserType int
16
17const (
18 TitleParser parserType = iota
19)
20
21// NewWithInput returns a new parser instance
22func NewWithInput(t parserType, input string) parser {
23 var p parser
24
25 switch t {
26 case TitleParser:
27 p = titleParser{input: input}
28 }
29
30 return p
31}
32
33type titleParser struct {
34 input string
35}
36
37// Parse is used to fetch the new title from a "changed title" event
38//
39// this func is a great example of something that is _extremely_ fragile; the
40// input string is pulled from the body of a gitlab message containing html
41// fragments, and has changed on at least [one occasion][0], breaking our test
42// pipelines and preventing feature development. i think querying for an issue's
43// _iterations_ [1] would likely be a better approach.
44//
45// example p.input values:
46// - changed title from **some title** to **some{+ new +}title**
47// - changed title from **some{- new-} title** to **some title**
48// - <p>changed title from <code class="idiff">some title</code> to <code class="idiff">some<span class="idiff left addition"> new</span> title</code></p>
49//
50// [0]: https://github.com/git-bug/git-bug/issues/1367
51// [1]: https://docs.gitlab.com/api/resource_iteration_events/#list-project-issue-iteration-events
52func (p titleParser) Parse() (string, error) {
53 var reHTML = regexp.MustCompile(`.* to <code\s+class="idiff"\s*>(.*?)</code>`)
54 var reMD = regexp.MustCompile(`.* to \*\*(.*)\*\*`)
55
56 matchHTML := reHTML.FindAllStringSubmatch(p.input, -1)
57 matchMD := reMD.FindAllStringSubmatch(p.input, -1)
58
59 if len(matchHTML) == 1 {
60 t, err := p.stripHTML(matchHTML[0][1])
61 if err != nil {
62 return "", fmt.Errorf("unable to strip HTML from new title: %q", t)
63 }
64 return strings.TrimSpace(t), nil
65 }
66
67 if len(matchMD) == 1 {
68 reDiff := regexp.MustCompile(`{\+(.*?)\+}`)
69
70 t := matchMD[0][1]
71 t = reDiff.ReplaceAllString(t, "$1")
72
73 return strings.TrimSpace(t), nil
74 }
75
76 return "", fmt.Errorf(
77 "failed to extract title: html=%d md=%d input=%q",
78 len(matchHTML),
79 len(matchMD),
80 p.input,
81 )
82}
83
84// stripHTML removes all html tags from a provided string
85func (p titleParser) stripHTML(s string) (string, error) {
86 nodes, err := html.Parse(strings.NewReader(s))
87 if err != nil {
88 // return the original unmodified string in the event html.Parse()
89 // fails; let the downstream callsites decide if they want to proceed
90 // with the value or not.
91 return s, err
92 }
93
94 var buf bytes.Buffer
95 var walk func(*html.Node)
96 walk = func(n *html.Node) {
97 if n.Type == html.TextNode {
98 buf.WriteString(n.Data)
99 }
100 for c := n.FirstChild; c != nil; c = c.NextSibling {
101 walk(c)
102 }
103 }
104 walk(nodes)
105
106 return buf.String(), nil
107}