fix: refactor how gitlab title changes are detected (#1370)

sudoforge created

This change refactors how issue title changes from gitlab events are
detected, fixing an issue (due to upstream changing the format of the
event body from markdown-esque to html), and improving on error
handling.

The error boiled down to a change in the issue title format. Gitlab
changed this on April 17 2025 with the release of version 17.11 [0],
although the only place a reference to this change exists is in the
changelog [1], which is not linked to from the releases page.

To account for the potential future in which other fields need to be
parsed in this way, an internal parser library was introduced at
`//bridge/gitlab/parser:parser.go` with initial support for parsing
title change messages.

An issue was opened with the Gitlab team discussing the fact that this
was a breaking change [2]. This may lead to moving title changes (or
maybe all changes) to `resource_*_events`, which would likely provide a
smoother experience for our use case.

Debugging this issue surfaced a few pain points with this bridge:

- Errors are few and far between, and when they do exist and are
  managed, they are often not propagated, often existing as simple
  `fmt.Printf` calls
- Inconsistent and uninformative logging structure when there _are_
  errors, leading to challenges in debugging unexpected behavior
- Fragility: we are parsing random text from event fields (for title
  changes and more). This will likely lead to future breakage should
  Gitlab change the format of other fields. Ideally, the gitlab SDK
  would start classifying notes and have fields like `type`, `old`,
  `new`... but this is unlikely to happen in the short term

[0]: https://about.gitlab.com/releases/2025/04/17/gitlab-17-11-released/
[1]:
https://gitlab.com/gitlab-org/gitlab/-/commit/b3e1fdcf45f8b18110a2f5217b9964a11616d316#ab09011fa121d0a2bb9fa4ca76094f2482b902b7_5_232
[2]: https://gitlab.com/gitlab-org/gitlab/-/issues/536827

Closes: #1367
Change-Id: I3bd7fa1c39a9e4dd2176d6e482e30ab68965f6e7

Change summary

bridge/gitlab/event.go              |  29 +++----
bridge/gitlab/event_test.go         |  50 ------------
bridge/gitlab/export_test.go        |   6 
bridge/gitlab/import.go             |  11 ++
bridge/gitlab/parser/parser.go      | 107 +++++++++++++++++++++++++++
bridge/gitlab/parser/parser_test.go | 122 +++++++++++++++++++++++++++++++
6 files changed, 252 insertions(+), 73 deletions(-)

Detailed changes

bridge/gitlab/event.go 🔗

@@ -7,6 +7,7 @@ import (
 
 	"gitlab.com/gitlab-org/api/client-go"
 
+	"github.com/git-bug/git-bug/bridge/gitlab/parser"
 	"github.com/git-bug/git-bug/util/text"
 )
 
@@ -50,7 +51,11 @@ type NoteEvent struct{ gitlab.Note }
 func (n NoteEvent) ID() string           { return fmt.Sprintf("%d", n.Note.ID) }
 func (n NoteEvent) UserID() int          { return n.Author.ID }
 func (n NoteEvent) CreatedAt() time.Time { return *n.Note.CreatedAt }
+
 func (n NoteEvent) Kind() EventKind {
+	if _, err := parser.NewWithInput(parser.TitleParser, n.Body).Parse(); err == nil {
+		return EventTitleChanged
+	}
 
 	switch {
 	case !n.System:
@@ -71,9 +76,6 @@ func (n NoteEvent) Kind() EventKind {
 	case n.Body == "unlocked this issue":
 		return EventUnlocked
 
-	case strings.HasPrefix(n.Body, "changed title from"):
-		return EventTitleChanged
-
 	case strings.HasPrefix(n.Body, "changed due date to"):
 		return EventChangedDuedate
 
@@ -107,11 +109,15 @@ func (n NoteEvent) Kind() EventKind {
 
 }
 
-func (n NoteEvent) Title() string {
+func (n NoteEvent) Title() (string, error) {
 	if n.Kind() == EventTitleChanged {
-		return getNewTitle(n.Body)
+		t, err := parser.NewWithInput(parser.TitleParser, n.Body).Parse()
+		if err != nil {
+			return "", err
+		}
+		return t, nil
 	}
-	return text.CleanupOneLine(n.Body)
+	return text.CleanupOneLine(n.Body), nil
 }
 
 var _ Event = &LabelEvent{}
@@ -207,14 +213,3 @@ func SortedEvents(inputs ...<-chan Event) chan Event {
 
 	return out
 }
-
-// getNewTitle parses body diff given by gitlab api and return it final form
-// examples:
-// - "changed title from **fourth issue** to **fourth issue{+ changed+}**"
-// - "changed title from **fourth issue{- changed-}** to **fourth issue**"
-func getNewTitle(diff string) string {
-	newTitle := strings.Split(diff, "** to **")[1]
-	newTitle = strings.Replace(newTitle, "{+", "", -1)
-	newTitle = strings.Replace(newTitle, "+}", "", -1)
-	return strings.TrimSuffix(newTitle, "**")
-}

bridge/gitlab/event_test.go 🔗

@@ -4,59 +4,9 @@ import (
 	"testing"
 	"time"
 
-	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
-func TestGetNewTitle(t *testing.T) {
-	type args struct {
-		diff string
-	}
-	type want struct {
-		title string
-	}
-	tests := []struct {
-		name string
-		args args
-		want want
-	}{
-		{
-			name: "addition diff",
-			args: args{
-				diff: "**first issue** to **first issue{+ edited+}**",
-			},
-			want: want{
-				title: "first issue edited",
-			},
-		},
-		{
-			name: "deletion diff",
-			args: args{
-				diff: "**first issue{- edited-}** to **first issue**",
-			},
-			want: want{
-				title: "first issue",
-			},
-		},
-		{
-			name: "mixed diff",
-			args: args{
-				diff: "**first {-issue-}** to **first {+bug+}**",
-			},
-			want: want{
-				title: "first bug",
-			},
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			title := getNewTitle(tt.args.diff)
-			assert.Equal(t, tt.want.title, title)
-		})
-	}
-}
-
 var _ Event = mockEvent(0)
 
 type mockEvent int64

bridge/gitlab/export_test.go 🔗

@@ -179,14 +179,14 @@ func TestGitlabPushPull(t *testing.T) {
 	projectID, err := createRepository(context.TODO(), projectName, token)
 	require.NoError(t, err)
 
-	fmt.Println("created repository", projectName)
+	fmt.Printf("created project: %s (%d)\n", projectName, projectID)
 
 	// Make sure to remove the Gitlab repository when the test end
 	defer func(t *testing.T) {
 		if err := deleteRepository(context.TODO(), projectID, token); err != nil {
 			t.Fatal(err)
 		}
-		fmt.Println("deleted repository:", projectName)
+		fmt.Printf("deleted repository: %s (%d)\n", projectName, projectID)
 	}(t)
 
 	interrupt.RegisterCleaner(func() error {
@@ -215,7 +215,7 @@ func TestGitlabPushPull(t *testing.T) {
 	}
 	require.NoError(t, err)
 
-	fmt.Printf("test repository exported in %f seconds\n", time.Since(start).Seconds())
+	fmt.Printf("repository exported in %f seconds\n", time.Since(start).Seconds())
 
 	repoTwo := repository.CreateGoGitTestRepo(t, false)
 

bridge/gitlab/import.go 🔗

@@ -81,7 +81,7 @@ func (gi *gitlabImporter) ImportAll(ctx context.Context, repo *cache.RepoCache,
 					continue
 				}
 				if err := gi.ensureIssueEvent(repo, b, issue, e); err != nil {
-					err := fmt.Errorf("issue event creation: %v", err)
+					err := fmt.Errorf("unable to create issue event: %v", err)
 					out <- core.NewImportError(err, entity.Id(e.ID()))
 				}
 			}
@@ -277,10 +277,15 @@ func (gi *gitlabImporter) ensureIssueEvent(repo *cache.RepoCache, b *cache.BugCa
 			return nil
 		}
 
+		title, err := event.(NoteEvent).Title()
+		if err != nil {
+			return err
+		}
+
 		op, err := b.SetTitleRaw(
 			author,
 			event.CreatedAt().Unix(),
-			event.(NoteEvent).Title(),
+			title,
 			map[string]string{
 				metaKeyGitlabId: event.ID(),
 			},
@@ -330,7 +335,7 @@ func (gi *gitlabImporter) ensureIssueEvent(repo *cache.RepoCache, b *cache.BugCa
 		return nil
 
 	default:
-		return fmt.Errorf("unexpected event")
+		return fmt.Errorf("unexpected event: %v", event)
 	}
 
 	return nil

bridge/gitlab/parser/parser.go 🔗

@@ -0,0 +1,107 @@
+package parser
+
+import (
+	"bytes"
+	"fmt"
+	"golang.org/x/net/html"
+	"regexp"
+	"strings"
+)
+
+type parser interface {
+	Parse() (string, error)
+}
+
+type parserType int
+
+const (
+	TitleParser parserType = iota
+)
+
+// NewWithInput returns a new parser instance
+func NewWithInput(t parserType, input string) parser {
+	var p parser
+
+	switch t {
+	case TitleParser:
+		p = titleParser{input: input}
+	}
+
+	return p
+}
+
+type titleParser struct {
+	input string
+}
+
+// Parse is used to fetch the new title from a "changed title" event
+//
+// this func is a great example of something that is _extremely_ fragile; the
+// input string is pulled from the body of a gitlab message containing html
+// fragments, and has changed on at least [one occasion][0], breaking our test
+// pipelines and preventing feature development. i think querying for an issue's
+// _iterations_ [1] would likely be a better approach.
+//
+// example p.input values:
+// - changed title from **some title** to **some{+ new +}title**
+// - changed title from **some{- new-} title** to **some title**
+// - <p>changed title from <code class="idiff">some title</code> to <code class="idiff">some<span class="idiff left addition"> new</span> title</code></p>
+//
+// [0]: https://github.com/git-bug/git-bug/issues/1367
+// [1]: https://docs.gitlab.com/api/resource_iteration_events/#list-project-issue-iteration-events
+func (p titleParser) Parse() (string, error) {
+	var reHTML = regexp.MustCompile(`.* to <code\s+class="idiff"\s*>(.*?)</code>`)
+	var reMD = regexp.MustCompile(`.* to \*\*(.*)\*\*`)
+
+	matchHTML := reHTML.FindAllStringSubmatch(p.input, -1)
+	matchMD := reMD.FindAllStringSubmatch(p.input, -1)
+
+	if len(matchHTML) == 1 {
+		t, err := p.stripHTML(matchHTML[0][1])
+		if err != nil {
+			return "", fmt.Errorf("unable to strip HTML from new title: %q", t)
+		}
+		return strings.TrimSpace(t), nil
+	}
+
+	if len(matchMD) == 1 {
+		reDiff := regexp.MustCompile(`{\+(.*?)\+}`)
+
+		t := matchMD[0][1]
+		t = reDiff.ReplaceAllString(t, "$1")
+
+		return strings.TrimSpace(t), nil
+	}
+
+	return "", fmt.Errorf(
+		"failed to extract title: html=%d md=%d input=%q",
+		len(matchHTML),
+		len(matchMD),
+		p.input,
+	)
+}
+
+// stripHTML removes all html tags from a provided string
+func (p titleParser) stripHTML(s string) (string, error) {
+	nodes, err := html.Parse(strings.NewReader(s))
+	if err != nil {
+		// return the original unmodified string in the event html.Parse()
+		// fails; let the downstream callsites decide if they want to proceed
+		// with the value or not.
+		return s, err
+	}
+
+	var buf bytes.Buffer
+	var walk func(*html.Node)
+	walk = func(n *html.Node) {
+		if n.Type == html.TextNode {
+			buf.WriteString(n.Data)
+		}
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			walk(c)
+		}
+	}
+	walk(nodes)
+
+	return buf.String(), nil
+}

bridge/gitlab/parser/parser_test.go 🔗

@@ -0,0 +1,122 @@
+package parser
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestTitleParser(t *testing.T) {
+	type args struct {
+		diff string
+	}
+	type want struct {
+		title string
+	}
+	tests := []struct {
+		name string
+		args args
+		want want
+	}{
+		{
+			name: "simple addition (html)",
+			args: args{
+				diff: `<p>changed title from <code class="idiff">simple title</code> to <code class="idiff">simple title<span class="idiff left addition"> addition</span></code></p>`,
+			},
+			want: want{
+				title: "simple title addition",
+			},
+		},
+		{
+			name: "simple addition (markdown)",
+			args: args{
+				diff: `changed title from **simple title** to **simple title{+ addition+}**`,
+			},
+			want: want{
+				title: "simple title addition",
+			},
+		},
+		{
+			name: "simple deletion (html)",
+			args: args{
+				diff: `<p>changed title from <code class="idiff">simple<span class="idiff left right deletion"> deleted</span> title</code> to <code class="idiff">simple title</code></p>`,
+			},
+			want: want{
+				title: "simple title",
+			},
+		},
+		{
+			name: "simple deletion (markdown)",
+			args: args{
+				diff: `changed title from **simple{- deleted-} title** to **simple title**`,
+			},
+			want: want{
+				title: "simple title",
+			},
+		},
+		{
+			name: "tail replacement (html)",
+			args: args{
+				diff: `<p>changed title from <code class="idiff">tail <span class="idiff left right deletion">title</span></code> to <code class="idiff">tail <span class="idiff left addition">replacement</span></code></p>`,
+			},
+			want: want{
+				title: "tail replacement",
+			},
+		},
+		{
+			name: "tail replacement (markdown)",
+			args: args{
+				diff: `changed title from **tail {-title-}** to **tail {+replacement+}**`,
+			},
+			want: want{
+				title: "tail replacement",
+			},
+		},
+		{
+			name: "head replacement (html)",
+			args: args{
+				diff: `<p>changed title from <code class="idiff"><span class="idiff left right deletion">title</span> replacement</code> to <code class="idiff"><span class="idiff left addition">head</span> replacement</code></p>`,
+			},
+			want: want{
+				title: "head replacement",
+			},
+		},
+		{
+			name: "head replacement (markdown)",
+			args: args{
+				diff: `changed title from **{-title-} replacement** to **{+head+} replacement**`,
+			},
+			want: want{
+				title: "head replacement",
+			},
+		},
+		{
+			name: "complex multi-section diff (html)",
+			args: args{
+				diff: `<p>changed title from <code class="idiff">this <span class="idiff left right deletion">is</span> an <span class="idiff left right deletion">issue</span></code> to <code class="idiff">this <span class="idiff left addition">may be</span> an <span class="idiff left right addition">amazing bug</span></code></p>`,
+			},
+			want: want{
+				title: "this may be an amazing bug",
+			},
+		},
+		{
+			name: "complex multi-section diff (markdown)",
+			args: args{
+				diff: `changed title from **this {-is-} an {-issue-}** to **this {+may be+} an {+amazing bug+}**`,
+			},
+			want: want{
+				title: "this may be an amazing bug",
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			title, err := NewWithInput(TitleParser, tt.args.diff).Parse()
+			if err != nil {
+				t.Error(err)
+			}
+			assert.Equal(t, tt.want.title, title)
+		})
+	}
+}