From 197eb59912577641e5379daf137e144415abcaa5 Mon Sep 17 00:00:00 2001 From: sudoforge Date: Fri, 25 Apr 2025 08:49:09 -0700 Subject: [PATCH] fix: refactor how gitlab title changes are detected (#1370) This change refactors how issue title changes from gitlab events are detected, fixing an issue (due to upstream changing the format of the event body from markdown-esque to html), and improving on error handling. The error boiled down to a change in the issue title format. Gitlab changed this on April 17 2025 with the release of version 17.11 [0], although the only place a reference to this change exists is in the changelog [1], which is not linked to from the releases page. To account for the potential future in which other fields need to be parsed in this way, an internal parser library was introduced at `//bridge/gitlab/parser:parser.go` with initial support for parsing title change messages. An issue was opened with the Gitlab team discussing the fact that this was a breaking change [2]. This may lead to moving title changes (or maybe all changes) to `resource_*_events`, which would likely provide a smoother experience for our use case. Debugging this issue surfaced a few pain points with this bridge: - Errors are few and far between, and when they do exist and are managed, they are often not propagated, often existing as simple `fmt.Printf` calls - Inconsistent and uninformative logging structure when there _are_ errors, leading to challenges in debugging unexpected behavior - Fragility: we are parsing random text from event fields (for title changes and more). This will likely lead to future breakage should Gitlab change the format of other fields. Ideally, the gitlab SDK would start classifying notes and have fields like `type`, `old`, `new`... but this is unlikely to happen in the short term [0]: https://about.gitlab.com/releases/2025/04/17/gitlab-17-11-released/ [1]: https://gitlab.com/gitlab-org/gitlab/-/commit/b3e1fdcf45f8b18110a2f5217b9964a11616d316#ab09011fa121d0a2bb9fa4ca76094f2482b902b7_5_232 [2]: https://gitlab.com/gitlab-org/gitlab/-/issues/536827 Closes: #1367 Change-Id: I3bd7fa1c39a9e4dd2176d6e482e30ab68965f6e7 --- bridge/gitlab/event.go | 29 +++---- bridge/gitlab/event_test.go | 50 ------------ bridge/gitlab/export_test.go | 6 +- bridge/gitlab/import.go | 11 ++- bridge/gitlab/parser/parser.go | 107 ++++++++++++++++++++++++ bridge/gitlab/parser/parser_test.go | 122 ++++++++++++++++++++++++++++ 6 files changed, 252 insertions(+), 73 deletions(-) create mode 100644 bridge/gitlab/parser/parser.go create mode 100644 bridge/gitlab/parser/parser_test.go diff --git a/bridge/gitlab/event.go b/bridge/gitlab/event.go index 6070c8e8afe5c0add2aa4b2ab43c7dd074fb159f..69e7e5439369009c1d2072e28a133a7f23f88dc0 100644 --- a/bridge/gitlab/event.go +++ b/bridge/gitlab/event.go @@ -7,6 +7,7 @@ import ( "gitlab.com/gitlab-org/api/client-go" + "github.com/git-bug/git-bug/bridge/gitlab/parser" "github.com/git-bug/git-bug/util/text" ) @@ -50,7 +51,11 @@ type NoteEvent struct{ gitlab.Note } func (n NoteEvent) ID() string { return fmt.Sprintf("%d", n.Note.ID) } func (n NoteEvent) UserID() int { return n.Author.ID } func (n NoteEvent) CreatedAt() time.Time { return *n.Note.CreatedAt } + func (n NoteEvent) Kind() EventKind { + if _, err := parser.NewWithInput(parser.TitleParser, n.Body).Parse(); err == nil { + return EventTitleChanged + } switch { case !n.System: @@ -71,9 +76,6 @@ func (n NoteEvent) Kind() EventKind { case n.Body == "unlocked this issue": return EventUnlocked - case strings.HasPrefix(n.Body, "changed title from"): - return EventTitleChanged - case strings.HasPrefix(n.Body, "changed due date to"): return EventChangedDuedate @@ -107,11 +109,15 @@ func (n NoteEvent) Kind() EventKind { } -func (n NoteEvent) Title() string { +func (n NoteEvent) Title() (string, error) { if n.Kind() == EventTitleChanged { - return getNewTitle(n.Body) + t, err := parser.NewWithInput(parser.TitleParser, n.Body).Parse() + if err != nil { + return "", err + } + return t, nil } - return text.CleanupOneLine(n.Body) + return text.CleanupOneLine(n.Body), nil } var _ Event = &LabelEvent{} @@ -207,14 +213,3 @@ func SortedEvents(inputs ...<-chan Event) chan Event { return out } - -// getNewTitle parses body diff given by gitlab api and return it final form -// examples: -// - "changed title from **fourth issue** to **fourth issue{+ changed+}**" -// - "changed title from **fourth issue{- changed-}** to **fourth issue**" -func getNewTitle(diff string) string { - newTitle := strings.Split(diff, "** to **")[1] - newTitle = strings.Replace(newTitle, "{+", "", -1) - newTitle = strings.Replace(newTitle, "+}", "", -1) - return strings.TrimSuffix(newTitle, "**") -} diff --git a/bridge/gitlab/event_test.go b/bridge/gitlab/event_test.go index 860570d1f09b464129bf3cd23d95ead23c80cc71..b8ef5aaf24d70603acd1860b13a95f5562d54e1e 100644 --- a/bridge/gitlab/event_test.go +++ b/bridge/gitlab/event_test.go @@ -4,59 +4,9 @@ import ( "testing" "time" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -func TestGetNewTitle(t *testing.T) { - type args struct { - diff string - } - type want struct { - title string - } - tests := []struct { - name string - args args - want want - }{ - { - name: "addition diff", - args: args{ - diff: "**first issue** to **first issue{+ edited+}**", - }, - want: want{ - title: "first issue edited", - }, - }, - { - name: "deletion diff", - args: args{ - diff: "**first issue{- edited-}** to **first issue**", - }, - want: want{ - title: "first issue", - }, - }, - { - name: "mixed diff", - args: args{ - diff: "**first {-issue-}** to **first {+bug+}**", - }, - want: want{ - title: "first bug", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - title := getNewTitle(tt.args.diff) - assert.Equal(t, tt.want.title, title) - }) - } -} - var _ Event = mockEvent(0) type mockEvent int64 diff --git a/bridge/gitlab/export_test.go b/bridge/gitlab/export_test.go index 6846cb215b6f09de0e0b9b50a7472a606ee38371..2f59f5ad3739ec16cebdd5cf86ea6607abc23b1f 100644 --- a/bridge/gitlab/export_test.go +++ b/bridge/gitlab/export_test.go @@ -179,14 +179,14 @@ func TestGitlabPushPull(t *testing.T) { projectID, err := createRepository(context.TODO(), projectName, token) require.NoError(t, err) - fmt.Println("created repository", projectName) + fmt.Printf("created project: %s (%d)\n", projectName, projectID) // Make sure to remove the Gitlab repository when the test end defer func(t *testing.T) { if err := deleteRepository(context.TODO(), projectID, token); err != nil { t.Fatal(err) } - fmt.Println("deleted repository:", projectName) + fmt.Printf("deleted repository: %s (%d)\n", projectName, projectID) }(t) interrupt.RegisterCleaner(func() error { @@ -215,7 +215,7 @@ func TestGitlabPushPull(t *testing.T) { } require.NoError(t, err) - fmt.Printf("test repository exported in %f seconds\n", time.Since(start).Seconds()) + fmt.Printf("repository exported in %f seconds\n", time.Since(start).Seconds()) repoTwo := repository.CreateGoGitTestRepo(t, false) diff --git a/bridge/gitlab/import.go b/bridge/gitlab/import.go index b33a21796022d2ed6a008af633c9f9624f8cc4b7..68efbf5a5e4d2e99d603d952aeb93fca0446247a 100644 --- a/bridge/gitlab/import.go +++ b/bridge/gitlab/import.go @@ -81,7 +81,7 @@ func (gi *gitlabImporter) ImportAll(ctx context.Context, repo *cache.RepoCache, continue } if err := gi.ensureIssueEvent(repo, b, issue, e); err != nil { - err := fmt.Errorf("issue event creation: %v", err) + err := fmt.Errorf("unable to create issue event: %v", err) out <- core.NewImportError(err, entity.Id(e.ID())) } } @@ -277,10 +277,15 @@ func (gi *gitlabImporter) ensureIssueEvent(repo *cache.RepoCache, b *cache.BugCa return nil } + title, err := event.(NoteEvent).Title() + if err != nil { + return err + } + op, err := b.SetTitleRaw( author, event.CreatedAt().Unix(), - event.(NoteEvent).Title(), + title, map[string]string{ metaKeyGitlabId: event.ID(), }, @@ -330,7 +335,7 @@ func (gi *gitlabImporter) ensureIssueEvent(repo *cache.RepoCache, b *cache.BugCa return nil default: - return fmt.Errorf("unexpected event") + return fmt.Errorf("unexpected event: %v", event) } return nil diff --git a/bridge/gitlab/parser/parser.go b/bridge/gitlab/parser/parser.go new file mode 100644 index 0000000000000000000000000000000000000000..7ab0d58fd573b6e063f7207a55200ad03618f948 --- /dev/null +++ b/bridge/gitlab/parser/parser.go @@ -0,0 +1,107 @@ +package parser + +import ( + "bytes" + "fmt" + "golang.org/x/net/html" + "regexp" + "strings" +) + +type parser interface { + Parse() (string, error) +} + +type parserType int + +const ( + TitleParser parserType = iota +) + +// NewWithInput returns a new parser instance +func NewWithInput(t parserType, input string) parser { + var p parser + + switch t { + case TitleParser: + p = titleParser{input: input} + } + + return p +} + +type titleParser struct { + input string +} + +// Parse is used to fetch the new title from a "changed title" event +// +// this func is a great example of something that is _extremely_ fragile; the +// input string is pulled from the body of a gitlab message containing html +// fragments, and has changed on at least [one occasion][0], breaking our test +// pipelines and preventing feature development. i think querying for an issue's +// _iterations_ [1] would likely be a better approach. +// +// example p.input values: +// - changed title from **some title** to **some{+ new +}title** +// - changed title from **some{- new-} title** to **some title** +// -

changed title from some title to some new title

+// +// [0]: https://github.com/git-bug/git-bug/issues/1367 +// [1]: https://docs.gitlab.com/api/resource_iteration_events/#list-project-issue-iteration-events +func (p titleParser) Parse() (string, error) { + var reHTML = regexp.MustCompile(`.* to (.*?)`) + var reMD = regexp.MustCompile(`.* to \*\*(.*)\*\*`) + + matchHTML := reHTML.FindAllStringSubmatch(p.input, -1) + matchMD := reMD.FindAllStringSubmatch(p.input, -1) + + if len(matchHTML) == 1 { + t, err := p.stripHTML(matchHTML[0][1]) + if err != nil { + return "", fmt.Errorf("unable to strip HTML from new title: %q", t) + } + return strings.TrimSpace(t), nil + } + + if len(matchMD) == 1 { + reDiff := regexp.MustCompile(`{\+(.*?)\+}`) + + t := matchMD[0][1] + t = reDiff.ReplaceAllString(t, "$1") + + return strings.TrimSpace(t), nil + } + + return "", fmt.Errorf( + "failed to extract title: html=%d md=%d input=%q", + len(matchHTML), + len(matchMD), + p.input, + ) +} + +// stripHTML removes all html tags from a provided string +func (p titleParser) stripHTML(s string) (string, error) { + nodes, err := html.Parse(strings.NewReader(s)) + if err != nil { + // return the original unmodified string in the event html.Parse() + // fails; let the downstream callsites decide if they want to proceed + // with the value or not. + return s, err + } + + var buf bytes.Buffer + var walk func(*html.Node) + walk = func(n *html.Node) { + if n.Type == html.TextNode { + buf.WriteString(n.Data) + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + walk(c) + } + } + walk(nodes) + + return buf.String(), nil +} diff --git a/bridge/gitlab/parser/parser_test.go b/bridge/gitlab/parser/parser_test.go new file mode 100644 index 0000000000000000000000000000000000000000..953cf271209290085b64aaf6dcb6f9bd7cf79dac --- /dev/null +++ b/bridge/gitlab/parser/parser_test.go @@ -0,0 +1,122 @@ +package parser + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestTitleParser(t *testing.T) { + type args struct { + diff string + } + type want struct { + title string + } + tests := []struct { + name string + args args + want want + }{ + { + name: "simple addition (html)", + args: args{ + diff: `

changed title from simple title to simple title addition

`, + }, + want: want{ + title: "simple title addition", + }, + }, + { + name: "simple addition (markdown)", + args: args{ + diff: `changed title from **simple title** to **simple title{+ addition+}**`, + }, + want: want{ + title: "simple title addition", + }, + }, + { + name: "simple deletion (html)", + args: args{ + diff: `

changed title from simple deleted title to simple title

`, + }, + want: want{ + title: "simple title", + }, + }, + { + name: "simple deletion (markdown)", + args: args{ + diff: `changed title from **simple{- deleted-} title** to **simple title**`, + }, + want: want{ + title: "simple title", + }, + }, + { + name: "tail replacement (html)", + args: args{ + diff: `

changed title from tail title to tail replacement

`, + }, + want: want{ + title: "tail replacement", + }, + }, + { + name: "tail replacement (markdown)", + args: args{ + diff: `changed title from **tail {-title-}** to **tail {+replacement+}**`, + }, + want: want{ + title: "tail replacement", + }, + }, + { + name: "head replacement (html)", + args: args{ + diff: `

changed title from title replacement to head replacement

`, + }, + want: want{ + title: "head replacement", + }, + }, + { + name: "head replacement (markdown)", + args: args{ + diff: `changed title from **{-title-} replacement** to **{+head+} replacement**`, + }, + want: want{ + title: "head replacement", + }, + }, + { + name: "complex multi-section diff (html)", + args: args{ + diff: `

changed title from this is an issue to this may be an amazing bug

`, + }, + want: want{ + title: "this may be an amazing bug", + }, + }, + { + name: "complex multi-section diff (markdown)", + args: args{ + diff: `changed title from **this {-is-} an {-issue-}** to **this {+may be+} an {+amazing bug+}**`, + }, + want: want{ + title: "this may be an amazing bug", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + title, err := NewWithInput(TitleParser, tt.args.diff).Parse() + if err != nil { + t.Error(err) + } + assert.Equal(t, tt.want.title, title) + }) + } +}