Merge pull request #585 from MichaelMure/dev-gh-bridge

Michael Muré created

Deal with github bridge import rate limit

Change summary

bridge/core/import.go            |  12 
bridge/github/config.go          |   2 
bridge/github/export.go          |   4 
bridge/github/export_test.go     |   2 
bridge/github/import.go          | 517 ++++++++++++++++-----------------
bridge/github/import_mediator.go | 436 ++++++++++++++++++++++++++++
bridge/github/import_query.go    | 252 +++++++++------
bridge/github/iterator.go        | 423 ---------------------------
8 files changed, 855 insertions(+), 793 deletions(-)

Detailed changes

bridge/core/import.go 🔗

@@ -34,6 +34,9 @@ const (
 	// but not severe enough to consider the import a failure.
 	ImportEventWarning
 
+	// The import system (web API) has reached the rate limit
+	ImportEventRateLimiting
+
 	// Error happened during import
 	ImportEventError
 )
@@ -87,6 +90,8 @@ func (er ImportResult) String() string {
 			parts = append(parts, fmt.Sprintf("err: %s", er.Err))
 		}
 		return strings.Join(parts, " ")
+	case ImportEventRateLimiting:
+		return fmt.Sprintf("rate limiting: %s", er.Reason)
 
 	default:
 		panic("unknown import result")
@@ -165,3 +170,10 @@ func NewImportIdentity(id entity.Id) ImportResult {
 		Event: ImportEventIdentity,
 	}
 }
+
+func NewImportRateLimiting(msg string) ImportResult {
+	return ImportResult{
+		Reason: msg,
+		Event:  ImportEventRateLimiting,
+	}
+}

bridge/github/config.go 🔗

@@ -251,7 +251,7 @@ func promptUserToGoToBrowser(url, userCode string) {
 	fmt.Println("Please visit the following Github URL in a browser and enter your user authentication code.")
 	fmt.Println()
 	fmt.Println("  URL:", url)
-	fmt.Println("  user authentiation code:", userCode)
+	fmt.Println("  user authentication code:", userCode)
 	fmt.Println()
 }
 

bridge/github/export.go 🔗

@@ -504,7 +504,7 @@ func (ge *githubExporter) cacheGithubLabels(ctx context.Context, gc *githubv4.Cl
 	return nil
 }
 
-func (ge *githubExporter) getLabelID(gc *githubv4.Client, label string) (string, error) {
+func (ge *githubExporter) getLabelID(label string) (string, error) {
 	label = strings.ToLower(label)
 	for cachedLabel, ID := range ge.cachedLabels {
 		if label == strings.ToLower(cachedLabel) {
@@ -598,7 +598,7 @@ func (ge *githubExporter) createGithubLabelV4(gc *githubv4.Client, label, labelC
 
 func (ge *githubExporter) getOrCreateGithubLabelID(ctx context.Context, gc *githubv4.Client, repositoryID string, label bug.Label) (string, error) {
 	// try to get label id from cache
-	labelID, err := ge.getLabelID(gc, string(label))
+	labelID, err := ge.getLabelID(string(label))
 	if err == nil {
 		return labelID, nil
 	}

bridge/github/export_test.go 🔗

@@ -268,7 +268,7 @@ func TestGithubPushPull(t *testing.T) {
 			require.True(t, ok)
 			require.Equal(t, issueOrigin, target)
 
-			//TODO: maybe more tests to ensure bug final state
+			// TODO: maybe more tests to ensure bug final state
 		})
 	}
 }

bridge/github/import.go 🔗

@@ -15,6 +15,8 @@ import (
 	"github.com/MichaelMure/git-bug/util/text"
 )
 
+const EmptyTitlePlaceholder = "<empty string>"
+
 // githubImporter implement the Importer interface
 type githubImporter struct {
 	conf core.Configuration
@@ -22,8 +24,8 @@ type githubImporter struct {
 	// default client
 	client *githubv4.Client
 
-	// iterator
-	iterator *iterator
+	// mediator to access the Github API
+	mediator *importMediator
 
 	// send only channel
 	out chan<- core.ImportResult
@@ -31,7 +33,6 @@ type githubImporter struct {
 
 func (gi *githubImporter) Init(_ context.Context, repo *cache.RepoCache, conf core.Configuration) error {
 	gi.conf = conf
-
 	creds, err := auth.List(repo,
 		auth.WithTarget(target),
 		auth.WithKind(auth.KindToken),
@@ -40,11 +41,9 @@ func (gi *githubImporter) Init(_ context.Context, repo *cache.RepoCache, conf co
 	if err != nil {
 		return err
 	}
-
-	if len(creds) == 0 {
+	if len(creds) <= 0 {
 		return ErrMissingIdentityToken
 	}
-
 	gi.client = buildClient(creds[0].(*auth.Token))
 
 	return nil
@@ -53,46 +52,95 @@ func (gi *githubImporter) Init(_ context.Context, repo *cache.RepoCache, conf co
 // ImportAll iterate over all the configured repository issues and ensure the creation of the
 // missing issues / timeline items / edits / label events ...
 func (gi *githubImporter) ImportAll(ctx context.Context, repo *cache.RepoCache, since time.Time) (<-chan core.ImportResult, error) {
-	gi.iterator = NewIterator(ctx, gi.client, 10, gi.conf[confKeyOwner], gi.conf[confKeyProject], since)
+	gi.mediator = NewImportMediator(ctx, gi.client, gi.conf[confKeyOwner], gi.conf[confKeyProject], since)
 	out := make(chan core.ImportResult)
 	gi.out = out
 
 	go func() {
 		defer close(gi.out)
-
-		// Loop over all matching issues
-		for gi.iterator.NextIssue() {
-			issue := gi.iterator.IssueValue()
-			// create issue
-			b, err := gi.ensureIssue(repo, issue)
-			if err != nil {
-				err := fmt.Errorf("issue creation: %v", err)
-				out <- core.NewImportError(err, "")
-				return
+		var currBug *cache.BugCache
+		var currEvent ImportEvent
+		var nextEvent ImportEvent
+		var err error
+		for {
+			// An IssueEvent contains the issue in its most recent state. If an issue
+			// has at least one issue edit, then the history of the issue edits is
+			// represented by IssueEditEvents. That is, the unedited (original) issue
+			// might be saved only in the IssueEditEvent following the IssueEvent.
+			// Since we replicate the edit history we need to either use the IssueEvent
+			// (if there are no edits) or the IssueEvent together with its first
+			// IssueEditEvent (if there are edits).
+			// Exactly the same is true for comments and comment edits.
+			// As a consequence we need to look at the current event and one look ahead
+			// event.
+			currEvent = nextEvent
+			if currEvent == nil {
+				currEvent = gi.getEventHandleMsgs()
 			}
-
-			// loop over timeline items
-			for gi.iterator.NextTimelineItem() {
-				item := gi.iterator.TimelineItemValue()
-				err := gi.ensureTimelineItem(repo, b, item)
+			if currEvent == nil {
+				break
+			}
+			nextEvent = gi.getEventHandleMsgs()
+
+			switch event := currEvent.(type) {
+			case RateLimitingEvent:
+				out <- core.NewImportRateLimiting(event.msg)
+			case IssueEvent:
+				// first: commit what is being held in currBug
+				if err = gi.commit(currBug, out); err != nil {
+					out <- core.NewImportError(err, "")
+					return
+				}
+				// second: create new issue
+				switch next := nextEvent.(type) {
+				case IssueEditEvent:
+					// consuming and using next event
+					nextEvent = nil
+					currBug, err = gi.ensureIssue(ctx, repo, &event.issue, &next.userContentEdit)
+				default:
+					currBug, err = gi.ensureIssue(ctx, repo, &event.issue, nil)
+				}
+				if err != nil {
+					err := fmt.Errorf("issue creation: %v", err)
+					out <- core.NewImportError(err, "")
+					return
+				}
+			case IssueEditEvent:
+				err = gi.ensureIssueEdit(ctx, repo, currBug, event.issueId, &event.userContentEdit)
+				if err != nil {
+					err = fmt.Errorf("issue edit: %v", err)
+					out <- core.NewImportError(err, "")
+					return
+				}
+			case TimelineEvent:
+				if next, ok := nextEvent.(CommentEditEvent); ok && event.Typename == "IssueComment" {
+					// consuming and using next event
+					nextEvent = nil
+					err = gi.ensureComment(ctx, repo, currBug, &event.timelineItem.IssueComment, &next.userContentEdit)
+				} else {
+					err = gi.ensureTimelineItem(ctx, repo, currBug, &event.timelineItem)
+				}
 				if err != nil {
 					err = fmt.Errorf("timeline item creation: %v", err)
 					out <- core.NewImportError(err, "")
 					return
 				}
-			}
-
-			if !b.NeedCommit() {
-				out <- core.NewImportNothing(b.Id(), "no imported operation")
-			} else if err := b.Commit(); err != nil {
-				// commit bug state
-				err = fmt.Errorf("bug commit: %v", err)
-				out <- core.NewImportError(err, "")
-				return
+			case CommentEditEvent:
+				err = gi.ensureCommentEdit(ctx, repo, currBug, event.commentId, &event.userContentEdit)
+				if err != nil {
+					err = fmt.Errorf("comment edit: %v", err)
+					out <- core.NewImportError(err, "")
+					return
+				}
+			default:
+				panic("Unknown event type")
 			}
 		}
-
-		if err := gi.iterator.Error(); err != nil {
+		// commit what is being held in currBug before returning
+		if err = gi.commit(currBug, out); err != nil {
+			out <- core.NewImportError(err, "")
+		}
+		if err = gi.mediator.Error(); err != nil {
 			gi.out <- core.NewImportError(err, "")
 		}
 	}()
@@ -100,9 +148,35 @@ func (gi *githubImporter) ImportAll(ctx context.Context, repo *cache.RepoCache,
 	return out, nil
 }
 
-func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issue) (*cache.BugCache, error) {
-	// ensure issue author
-	author, err := gi.ensurePerson(repo, issue.Author)
+func (gi *githubImporter) getEventHandleMsgs() ImportEvent {
+	for {
+		// read event from import mediator
+		event := gi.mediator.NextImportEvent()
+		// consume (and use) all rate limiting events
+		if e, ok := event.(RateLimitingEvent); ok {
+			gi.out <- core.NewImportRateLimiting(e.msg)
+			continue
+		}
+		return event
+	}
+}
+
+func (gi *githubImporter) commit(b *cache.BugCache, out chan<- core.ImportResult) error {
+	if b == nil {
+		return nil
+	}
+	if !b.NeedCommit() {
+		out <- core.NewImportNothing(b.Id(), "no imported operation")
+		return nil
+	} else if err := b.Commit(); err != nil {
+		// commit bug state
+		return fmt.Errorf("bug commit: %v", err)
+	}
+	return nil
+}
+
+func (gi *githubImporter) ensureIssue(ctx context.Context, repo *cache.RepoCache, issue *issue, issueEdit *userContentEdit) (*cache.BugCache, error) {
+	author, err := gi.ensurePerson(ctx, repo, issue.Author)
 	if err != nil {
 		return nil, err
 	}
@@ -112,113 +186,66 @@ func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issue) (*cach
 		return excerpt.CreateMetadata[core.MetaKeyOrigin] == target &&
 			excerpt.CreateMetadata[metaKeyGithubId] == parseId(issue.Id)
 	})
-	if err != nil && err != bug.ErrBugNotExist {
+	if err == nil {
+		return b, nil
+	}
+	if err != bug.ErrBugNotExist {
 		return nil, err
 	}
 
-	// get issue edits
-	var issueEdits []userContentEdit
-	for gi.iterator.NextIssueEdit() {
-		issueEdits = append(issueEdits, gi.iterator.IssueEditValue())
+	// At Github there exist issues with seemingly empty titles. An example is
+	// https://github.com/NixOS/nixpkgs/issues/72730 .
+	// The title provided by the GraphQL API actually consists of a space followed by a
+	// zero width space (U+200B). This title would cause the NewBugRaw() function to
+	// return an error: empty title.
+	title := string(issue.Title)
+	if title == " \u200b" { // U+200B == zero width space
+		title = EmptyTitlePlaceholder
 	}
 
-	// if issueEdits is empty
-	if len(issueEdits) == 0 {
-		if err == bug.ErrBugNotExist {
-			cleanText, err := text.Cleanup(string(issue.Body))
-			if err != nil {
-				return nil, err
-			}
-
-			// create bug
-			b, _, err = repo.NewBugRaw(
-				author,
-				issue.CreatedAt.Unix(),
-				issue.Title,
-				cleanText,
-				nil,
-				map[string]string{
-					core.MetaKeyOrigin: target,
-					metaKeyGithubId:    parseId(issue.Id),
-					metaKeyGithubUrl:   issue.Url.String(),
-				})
-			if err != nil {
-				return nil, err
-			}
-
-			// importing a new bug
-			gi.out <- core.NewImportBug(b.Id())
-		}
+	var textInput string
+	if issueEdit != nil {
+		// use the first issue edit: it represents the bug creation itself
+		textInput = string(*issueEdit.Diff)
 	} else {
-		// create bug from given issueEdits
-		for i, edit := range issueEdits {
-			if i == 0 && b != nil {
-				// The first edit in the github result is the issue creation itself, we already have that
-				continue
-			}
-
-			cleanText, err := text.Cleanup(string(*edit.Diff))
-			if err != nil {
-				return nil, err
-			}
-
-			// if the bug doesn't exist
-			if b == nil {
-				// we create the bug as soon as we have a legit first edition
-				b, _, err = repo.NewBugRaw(
-					author,
-					issue.CreatedAt.Unix(),
-					issue.Title, // TODO: this is the *current* title, not the original one
-					cleanText,
-					nil,
-					map[string]string{
-						core.MetaKeyOrigin: target,
-						metaKeyGithubId:    parseId(issue.Id),
-						metaKeyGithubUrl:   issue.Url.String(),
-					},
-				)
-
-				if err != nil {
-					return nil, err
-				}
-				// importing a new bug
-				gi.out <- core.NewImportBug(b.Id())
-				continue
-			}
-
-			// other edits will be added as CommentEdit operations
-			target, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(issue.Id))
-			if err == cache.ErrNoMatchingOp {
-				// original comment is missing somehow, issuing a warning
-				gi.out <- core.NewImportWarning(fmt.Errorf("comment ID %s to edit is missing", parseId(issue.Id)), b.Id())
-				continue
-			}
-			if err != nil {
-				return nil, err
-			}
+		// if there are no issue edits then the issue struct holds the bug creation
+		textInput = string(issue.Body)
+	}
+	cleanText, err := text.Cleanup(textInput)
+	if err != nil {
+		return nil, err
+	}
 
-			err = gi.ensureCommentEdit(repo, b, target, edit)
-			if err != nil {
-				return nil, err
-			}
-		}
+	// create bug
+	b, _, err = repo.NewBugRaw(
+		author,
+		issue.CreatedAt.Unix(),
+		title, // TODO: this is the *current* title, not the original one
+		cleanText,
+		nil,
+		map[string]string{
+			core.MetaKeyOrigin: target,
+			metaKeyGithubId:    parseId(issue.Id),
+			metaKeyGithubUrl:   issue.Url.String(),
+		})
+	if err != nil {
+		return nil, err
 	}
+	// importing a new bug
+	gi.out <- core.NewImportBug(b.Id())
 
 	return b, nil
 }
 
-func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.BugCache, item timelineItem) error {
+func (gi *githubImporter) ensureIssueEdit(ctx context.Context, repo *cache.RepoCache, bug *cache.BugCache, ghIssueId githubv4.ID, edit *userContentEdit) error {
+	return gi.ensureCommentEdit(ctx, repo, bug, ghIssueId, edit)
+}
+
+func (gi *githubImporter) ensureTimelineItem(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, item *timelineItem) error {
 
 	switch item.Typename {
 	case "IssueComment":
-		// collect all comment edits
-		var commentEdits []userContentEdit
-		for gi.iterator.NextCommentEdit() {
-			commentEdits = append(commentEdits, gi.iterator.CommentEditValue())
-		}
-
-		// ensureTimelineComment send import events over out chanel
-		err := gi.ensureTimelineComment(repo, b, item.IssueComment, commentEdits)
+		err := gi.ensureComment(ctx, repo, b, &item.IssueComment, nil)
 		if err != nil {
 			return fmt.Errorf("timeline comment creation: %v", err)
 		}
@@ -234,7 +261,7 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
 		if err != cache.ErrNoMatchingOp {
 			return err
 		}
-		author, err := gi.ensurePerson(repo, item.LabeledEvent.Actor)
+		author, err := gi.ensurePerson(ctx, repo, item.LabeledEvent.Actor)
 		if err != nil {
 			return err
 		}
@@ -263,7 +290,7 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
 		if err != cache.ErrNoMatchingOp {
 			return err
 		}
-		author, err := gi.ensurePerson(repo, item.UnlabeledEvent.Actor)
+		author, err := gi.ensurePerson(ctx, repo, item.UnlabeledEvent.Actor)
 		if err != nil {
 			return err
 		}
@@ -293,7 +320,7 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
 		if err == nil {
 			return nil
 		}
-		author, err := gi.ensurePerson(repo, item.ClosedEvent.Actor)
+		author, err := gi.ensurePerson(ctx, repo, item.ClosedEvent.Actor)
 		if err != nil {
 			return err
 		}
@@ -319,7 +346,7 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
 		if err == nil {
 			return nil
 		}
-		author, err := gi.ensurePerson(repo, item.ReopenedEvent.Actor)
+		author, err := gi.ensurePerson(ctx, repo, item.ReopenedEvent.Actor)
 		if err != nil {
 			return err
 		}
@@ -345,14 +372,25 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
 		if err == nil {
 			return nil
 		}
-		author, err := gi.ensurePerson(repo, item.RenamedTitleEvent.Actor)
+		author, err := gi.ensurePerson(ctx, repo, item.RenamedTitleEvent.Actor)
 		if err != nil {
 			return err
 		}
+
+		// At Github there exist issues with seemingly empty titles. An example is
+		// https://github.com/NixOS/nixpkgs/issues/72730 .
+		// The title provided by the GraphQL API actually consists of a space followed
+		// by a zero width space (U+200B). This title would cause the NewBugRaw()
+		// function to return an error: empty title.
+		title := string(item.RenamedTitleEvent.CurrentTitle)
+		if title == " \u200b" { // U+200B == zero width space
+			title = EmptyTitlePlaceholder
+		}
+
 		op, err := b.SetTitleRaw(
 			author,
 			item.RenamedTitleEvent.CreatedAt.Unix(),
-			string(item.RenamedTitleEvent.CurrentTitle),
+			title,
 			map[string]string{metaKeyGithubId: id},
 		)
 		if err != nil {
@@ -366,97 +404,62 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
 	return nil
 }
 
-func (gi *githubImporter) ensureTimelineComment(repo *cache.RepoCache, b *cache.BugCache, item issueComment, edits []userContentEdit) error {
-	// ensure person
-	author, err := gi.ensurePerson(repo, item.Author)
+func (gi *githubImporter) ensureCommentEdit(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, ghTargetId githubv4.ID, edit *userContentEdit) error {
+	// find comment
+	target, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(ghTargetId))
 	if err != nil {
 		return err
 	}
-
-	targetOpID, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(item.Id))
-	if err != nil && err != cache.ErrNoMatchingOp {
+	_, err = b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(edit.Id))
+	if err == nil {
+		return nil
+	}
+	if err != cache.ErrNoMatchingOp {
 		// real error
 		return err
 	}
 
-	// if no edits are given we create the comment
-	if len(edits) == 0 {
-		if err == cache.ErrNoMatchingOp {
-			cleanText, err := text.Cleanup(string(item.Body))
-			if err != nil {
-				return err
-			}
-
-			// add comment operation
-			op, err := b.AddCommentRaw(
-				author,
-				item.CreatedAt.Unix(),
-				cleanText,
-				nil,
-				map[string]string{
-					metaKeyGithubId:  parseId(item.Id),
-					metaKeyGithubUrl: parseId(item.Url.String()),
-				},
-			)
-			if err != nil {
-				return err
-			}
-
-			gi.out <- core.NewImportComment(op.Id())
-			return nil
-		}
-
-	} else {
-		for i, edit := range edits {
-			if i == 0 && targetOpID != "" {
-				// The first edit in the github result is the comment creation itself, we already have that
-				continue
-			}
-
-			// ensure editor identity
-			editor, err := gi.ensurePerson(repo, edit.Editor)
-			if err != nil {
-				return err
-			}
+	editor, err := gi.ensurePerson(ctx, repo, edit.Editor)
+	if err != nil {
+		return err
+	}
 
-			// create comment when target is empty
-			if targetOpID == "" {
-				cleanText, err := text.Cleanup(string(*edit.Diff))
-				if err != nil {
-					return err
-				}
+	if edit.DeletedAt != nil {
+		// comment deletion, not supported yet
+		return nil
+	}
 
-				op, err := b.AddCommentRaw(
-					editor,
-					edit.CreatedAt.Unix(),
-					cleanText,
-					nil,
-					map[string]string{
-						metaKeyGithubId:  parseId(item.Id),
-						metaKeyGithubUrl: item.Url.String(),
-					},
-				)
-				if err != nil {
-					return err
-				}
-				gi.out <- core.NewImportComment(op.Id())
+	cleanText, err := text.Cleanup(string(*edit.Diff))
+	if err != nil {
+		return err
+	}
 
-				// set target for the next edit now that the comment is created
-				targetOpID = op.Id()
-				continue
-			}
+	// comment edition
+	op, err := b.EditCommentRaw(
+		editor,
+		edit.CreatedAt.Unix(),
+		target,
+		cleanText,
+		map[string]string{
+			metaKeyGithubId: parseId(edit.Id),
+		},
+	)
 
-			err = gi.ensureCommentEdit(repo, b, targetOpID, edit)
-			if err != nil {
-				return err
-			}
-		}
+	if err != nil {
+		return err
 	}
+
+	gi.out <- core.NewImportCommentEdition(op.Id())
 	return nil
 }
 
-func (gi *githubImporter) ensureCommentEdit(repo *cache.RepoCache, b *cache.BugCache, target entity.Id, edit userContentEdit) error {
-	_, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(edit.Id))
+func (gi *githubImporter) ensureComment(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, comment *issueComment, firstEdit *userContentEdit) error {
+	author, err := gi.ensurePerson(ctx, repo, comment.Author)
+	if err != nil {
+		return err
+	}
+
+	_, err = b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(comment.Id))
 	if err == nil {
 		return nil
 	}
@@ -465,50 +468,44 @@ func (gi *githubImporter) ensureCommentEdit(repo *cache.RepoCache, b *cache.BugC
 		return err
 	}
 
-	editor, err := gi.ensurePerson(repo, edit.Editor)
+	var textInput string
+	if firstEdit != nil {
+		// use the first comment edit: it represents the comment creation itself
+		textInput = string(*firstEdit.Diff)
+	} else {
+		// if there are not comment edits, then the comment struct holds the comment creation
+		textInput = string(comment.Body)
+	}
+	cleanText, err := text.Cleanup(textInput)
 	if err != nil {
 		return err
 	}
 
-	switch {
-	case edit.DeletedAt != nil:
-		// comment deletion, not supported yet
-		return nil
-
-	case edit.DeletedAt == nil:
-
-		cleanText, err := text.Cleanup(string(*edit.Diff))
-		if err != nil {
-			return err
-		}
-
-		// comment edition
-		op, err := b.EditCommentRaw(
-			editor,
-			edit.CreatedAt.Unix(),
-			target,
-			cleanText,
-			map[string]string{
-				metaKeyGithubId: parseId(edit.Id),
-			},
-		)
-
-		if err != nil {
-			return err
-		}
-
-		gi.out <- core.NewImportCommentEdition(op.Id())
-		return nil
+	// add comment operation
+	op, err := b.AddCommentRaw(
+		author,
+		comment.CreatedAt.Unix(),
+		cleanText,
+		nil,
+		map[string]string{
+			metaKeyGithubId:  parseId(comment.Id),
+			metaKeyGithubUrl: comment.Url.String(),
+		},
+	)
+	if err != nil {
+		return err
 	}
+
+	gi.out <- core.NewImportComment(op.Id())
 	return nil
 }
 
 // ensurePerson create a bug.Person from the Github data
-func (gi *githubImporter) ensurePerson(repo *cache.RepoCache, actor *actor) (*cache.IdentityCache, error) {
+func (gi *githubImporter) ensurePerson(ctx context.Context, repo *cache.RepoCache, actor *actor) (*cache.IdentityCache, error) {
 	// When a user has been deleted, Github return a null actor, while displaying a profile named "ghost"
 	// in it's UI. So we need a special case to get it.
 	if actor == nil {
-		return gi.getGhost(repo)
+		return gi.getGhost(ctx, repo)
 	}
 
 	// Look first in the cache
@@ -521,7 +518,6 @@ func (gi *githubImporter) ensurePerson(repo *cache.RepoCache, actor *actor) (*ca
 	}
 
 	// importing a new identity
-
 	var name string
 	var email string
 
@@ -565,48 +561,37 @@ func (gi *githubImporter) ensurePerson(repo *cache.RepoCache, actor *actor) (*ca
 	return i, nil
 }
 
-func (gi *githubImporter) getGhost(repo *cache.RepoCache) (*cache.IdentityCache, error) {
+func (gi *githubImporter) getGhost(ctx context.Context, repo *cache.RepoCache) (*cache.IdentityCache, error) {
+	loginName := "ghost"
 	// Look first in the cache
-	i, err := repo.ResolveIdentityImmutableMetadata(metaKeyGithubLogin, "ghost")
+	i, err := repo.ResolveIdentityImmutableMetadata(metaKeyGithubLogin, loginName)
 	if err == nil {
 		return i, nil
 	}
 	if entity.IsErrMultipleMatch(err) {
 		return nil, err
 	}
-
-	var q ghostQuery
-
-	variables := map[string]interface{}{
-		"login": githubv4.String("ghost"),
-	}
-
-	ctx, cancel := context.WithTimeout(gi.iterator.ctx, defaultTimeout)
-	defer cancel()
-
-	err = gi.client.Query(ctx, &q, variables)
+	user, err := gi.mediator.User(ctx, loginName)
 	if err != nil {
 		return nil, err
 	}
-
-	var name string
-	if q.User.Name != nil {
-		name = string(*q.User.Name)
+	userName := ""
+	if user.Name != nil {
+		userName = string(*user.Name)
 	}
-
 	return repo.NewIdentityRaw(
-		name,
+		userName,
 		"",
-		string(q.User.Login),
-		string(q.User.AvatarUrl),
+		string(user.Login),
+		string(user.AvatarUrl),
 		nil,
 		map[string]string{
-			metaKeyGithubLogin: string(q.User.Login),
+			metaKeyGithubLogin: string(user.Login),
 		},
 	)
 }
 
-// parseId convert the unusable githubv4.ID (an interface{}) into a string
+// parseId converts the unusable githubv4.ID (an interface{}) into a string
 func parseId(id githubv4.ID) string {
 	return fmt.Sprintf("%v", id)
 }

bridge/github/import_mediator.go 🔗

@@ -0,0 +1,436 @@
+package github
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/shurcooL/githubv4"
+)
+
+const (
+	// These values influence how fast the github graphql rate limit is exhausted.
+	NumIssues        = 40
+	NumIssueEdits    = 100
+	NumTimelineItems = 100
+	NumCommentEdits  = 100
+
+	ChanCapacity = 128
+)
+
+// importMediator provides a convenient interface to retrieve issues from the Github GraphQL API.
+type importMediator struct {
+	// Github graphql client
+	gc *githubv4.Client
+
+	// name of the repository owner on Github
+	owner string
+
+	// name of the Github repository
+	project string
+
+	// since specifies which issues to import. Issues that have been updated at or after the
+	// given date should be imported.
+	since time.Time
+
+	// importEvents holds events representing issues, comments, edits, ...
+	// In this channel issues are immediately followed by their issue edits and comments are
+	// immediately followed by their comment edits.
+	importEvents chan ImportEvent
+
+	// Sticky error
+	err error
+}
+
+type ImportEvent interface {
+	isImportEvent()
+}
+
+type RateLimitingEvent struct {
+	msg string
+}
+
+func (RateLimitingEvent) isImportEvent() {}
+
+type IssueEvent struct {
+	issue
+}
+
+func (IssueEvent) isImportEvent() {}
+
+type IssueEditEvent struct {
+	issueId githubv4.ID
+	userContentEdit
+}
+
+func (IssueEditEvent) isImportEvent() {}
+
+type TimelineEvent struct {
+	issueId githubv4.ID
+	timelineItem
+}
+
+func (TimelineEvent) isImportEvent() {}
+
+type CommentEditEvent struct {
+	commentId githubv4.ID
+	userContentEdit
+}
+
+func (CommentEditEvent) isImportEvent() {}
+
+func (mm *importMediator) NextImportEvent() ImportEvent {
+	return <-mm.importEvents
+}
+
+func NewImportMediator(ctx context.Context, client *githubv4.Client, owner, project string, since time.Time) *importMediator {
+	mm := importMediator{
+		gc:           client,
+		owner:        owner,
+		project:      project,
+		since:        since,
+		importEvents: make(chan ImportEvent, ChanCapacity),
+		err:          nil,
+	}
+	go func() {
+		mm.fillImportEvents(ctx)
+		close(mm.importEvents)
+	}()
+	return &mm
+}
+
+type varmap map[string]interface{}
+
+func newIssueVars(owner, project string, since time.Time) varmap {
+	return varmap{
+		"owner":             githubv4.String(owner),
+		"name":              githubv4.String(project),
+		"issueSince":        githubv4.DateTime{Time: since},
+		"issueFirst":        githubv4.Int(NumIssues),
+		"issueEditLast":     githubv4.Int(NumIssueEdits),
+		"issueEditBefore":   (*githubv4.String)(nil),
+		"timelineFirst":     githubv4.Int(NumTimelineItems),
+		"timelineAfter":     (*githubv4.String)(nil),
+		"commentEditLast":   githubv4.Int(NumCommentEdits),
+		"commentEditBefore": (*githubv4.String)(nil),
+	}
+}
+
+func newIssueEditVars() varmap {
+	return varmap{
+		"issueEditLast": githubv4.Int(NumIssueEdits),
+	}
+}
+
+func newTimelineVars() varmap {
+	return varmap{
+		"timelineFirst":     githubv4.Int(NumTimelineItems),
+		"commentEditLast":   githubv4.Int(NumCommentEdits),
+		"commentEditBefore": (*githubv4.String)(nil),
+	}
+}
+
+func newCommentEditVars() varmap {
+	return varmap{
+		"commentEditLast": githubv4.Int(NumCommentEdits),
+	}
+}
+
+func (mm *importMediator) Error() error {
+	return mm.err
+}
+
+func (mm *importMediator) User(ctx context.Context, loginName string) (*user, error) {
+	query := userQuery{}
+	vars := varmap{"login": githubv4.String(loginName)}
+	if err := mm.mQuery(ctx, &query, vars); err != nil {
+		return nil, err
+	}
+	return &query.User, nil
+}
+
+func (mm *importMediator) fillImportEvents(ctx context.Context) {
+	initialCursor := githubv4.String("")
+	issues, hasIssues := mm.queryIssue(ctx, initialCursor)
+	for hasIssues {
+		for _, node := range issues.Nodes {
+			select {
+			case <-ctx.Done():
+				return
+			case mm.importEvents <- IssueEvent{node.issue}:
+			}
+
+			// issue edit events follow the issue event
+			mm.fillIssueEditEvents(ctx, &node)
+			// last come the timeline events
+			mm.fillTimelineEvents(ctx, &node)
+		}
+		if !issues.PageInfo.HasNextPage {
+			break
+		}
+		issues, hasIssues = mm.queryIssue(ctx, issues.PageInfo.EndCursor)
+	}
+}
+
+func (mm *importMediator) fillIssueEditEvents(ctx context.Context, issueNode *issueNode) {
+	edits := &issueNode.UserContentEdits
+	hasEdits := true
+	for hasEdits {
+		for edit := range reverse(edits.Nodes) {
+			if edit.Diff == nil || string(*edit.Diff) == "" {
+				// issueEdit.Diff == nil happen if the event is older than early
+				// 2018, Github doesn't have the data before that. Best we can do is
+				// to ignore the event.
+				continue
+			}
+			select {
+			case <-ctx.Done():
+				return
+			case mm.importEvents <- IssueEditEvent{issueId: issueNode.issue.Id, userContentEdit: edit}:
+			}
+		}
+		if !edits.PageInfo.HasPreviousPage {
+			break
+		}
+		edits, hasEdits = mm.queryIssueEdits(ctx, issueNode.issue.Id, edits.PageInfo.EndCursor)
+	}
+}
+
+func (mm *importMediator) queryIssueEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) {
+	vars := newIssueEditVars()
+	vars["gqlNodeId"] = nid
+	if cursor == "" {
+		vars["issueEditBefore"] = (*githubv4.String)(nil)
+	} else {
+		vars["issueEditBefore"] = cursor
+	}
+	query := issueEditQuery{}
+	if err := mm.mQuery(ctx, &query, vars); err != nil {
+		mm.err = err
+		return nil, false
+	}
+	connection := &query.Node.Issue.UserContentEdits
+	if len(connection.Nodes) <= 0 {
+		return nil, false
+	}
+	return connection, true
+}
+
+func (mm *importMediator) fillTimelineEvents(ctx context.Context, issueNode *issueNode) {
+	items := &issueNode.TimelineItems
+	hasItems := true
+	for hasItems {
+		for _, item := range items.Nodes {
+			select {
+			case <-ctx.Done():
+				return
+			case mm.importEvents <- TimelineEvent{issueId: issueNode.issue.Id, timelineItem: item}:
+			}
+			if item.Typename == "IssueComment" {
+				// Issue comments are different than other timeline items in that
+				// they may have associated user content edits.
+				// Right after the comment we send the comment edits.
+				mm.fillCommentEdits(ctx, &item)
+			}
+		}
+		if !items.PageInfo.HasNextPage {
+			break
+		}
+		items, hasItems = mm.queryTimeline(ctx, issueNode.issue.Id, items.PageInfo.EndCursor)
+	}
+}
+
+func (mm *importMediator) queryTimeline(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*timelineItemsConnection, bool) {
+	vars := newTimelineVars()
+	vars["gqlNodeId"] = nid
+	if cursor == "" {
+		vars["timelineAfter"] = (*githubv4.String)(nil)
+	} else {
+		vars["timelineAfter"] = cursor
+	}
+	query := timelineQuery{}
+	if err := mm.mQuery(ctx, &query, vars); err != nil {
+		mm.err = err
+		return nil, false
+	}
+	connection := &query.Node.Issue.TimelineItems
+	if len(connection.Nodes) <= 0 {
+		return nil, false
+	}
+	return connection, true
+}
+
+func (mm *importMediator) fillCommentEdits(ctx context.Context, item *timelineItem) {
+	// Here we are only concerned with timeline items of type issueComment.
+	if item.Typename != "IssueComment" {
+		return
+	}
+	// First: setup message handling while submitting GraphQL queries.
+	comment := &item.IssueComment
+	edits := &comment.UserContentEdits
+	hasEdits := true
+	for hasEdits {
+		for edit := range reverse(edits.Nodes) {
+			if edit.Diff == nil || string(*edit.Diff) == "" {
+				// issueEdit.Diff == nil happen if the event is older than early
+				// 2018, Github doesn't have the data before that. Best we can do is
+				// to ignore the event.
+				continue
+			}
+			select {
+			case <-ctx.Done():
+				return
+			case mm.importEvents <- CommentEditEvent{commentId: comment.Id, userContentEdit: edit}:
+			}
+		}
+		if !edits.PageInfo.HasPreviousPage {
+			break
+		}
+		edits, hasEdits = mm.queryCommentEdits(ctx, comment.Id, edits.PageInfo.EndCursor)
+	}
+}
+
+func (mm *importMediator) queryCommentEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) {
+	vars := newCommentEditVars()
+	vars["gqlNodeId"] = nid
+	if cursor == "" {
+		vars["commentEditBefore"] = (*githubv4.String)(nil)
+	} else {
+		vars["commentEditBefore"] = cursor
+	}
+	query := commentEditQuery{}
+	if err := mm.mQuery(ctx, &query, vars); err != nil {
+		mm.err = err
+		return nil, false
+	}
+	connection := &query.Node.IssueComment.UserContentEdits
+	if len(connection.Nodes) <= 0 {
+		return nil, false
+	}
+	return connection, true
+}
+
+func (mm *importMediator) queryIssue(ctx context.Context, cursor githubv4.String) (*issueConnection, bool) {
+	vars := newIssueVars(mm.owner, mm.project, mm.since)
+	if cursor == "" {
+		vars["issueAfter"] = (*githubv4.String)(nil)
+	} else {
+		vars["issueAfter"] = cursor
+	}
+	query := issueQuery{}
+	if err := mm.mQuery(ctx, &query, vars); err != nil {
+		mm.err = err
+		return nil, false
+	}
+	connection := &query.Repository.Issues
+	if len(connection.Nodes) <= 0 {
+		return nil, false
+	}
+	return connection, true
+}
+
+func reverse(eds []userContentEdit) chan userContentEdit {
+	ret := make(chan userContentEdit)
+	go func() {
+		for i := range eds {
+			ret <- eds[len(eds)-1-i]
+		}
+		close(ret)
+	}()
+	return ret
+}
+
+// mQuery executes a single GraphQL query. The variable query is used to derive the GraphQL query
+// and it is used to populate the response into it. It should be a pointer to a struct that
+// corresponds to the Github graphql schema and it has to implement the rateLimiter interface. If
+// there is a Github rate limiting error, then the function sleeps and retries after the rate limit
+// is expired. If there is another error, then the method will retry before giving up.
+func (mm *importMediator) mQuery(ctx context.Context, query rateLimiter, vars map[string]interface{}) error {
+	if err := mm.queryOnce(ctx, query, vars); err == nil {
+		// success: done
+		return nil
+	}
+	// failure: we will retry
+	// To retry is important for importing projects with a big number of issues, because
+	// there may be temporary network errors or momentary internal errors of the github servers.
+	retries := 3
+	var err error
+	for i := 0; i < retries; i++ {
+		// wait a few seconds before retry
+		sleepTime := time.Duration(8*(i+1)) * time.Second
+		timer := time.NewTimer(sleepTime)
+		select {
+		case <-ctx.Done():
+			stop(timer)
+			return ctx.Err()
+		case <-timer.C:
+		}
+		err = mm.queryOnce(ctx, query, vars)
+		if err == nil {
+			// success: done
+			return nil
+		}
+	}
+	return err
+}
+
+func (mm *importMediator) queryOnce(ctx context.Context, query rateLimiter, vars map[string]interface{}) error {
+	// first: just send the query to the graphql api
+	vars["dryRun"] = githubv4.Boolean(false)
+	qctx, cancel := context.WithTimeout(ctx, defaultTimeout)
+	defer cancel()
+	err := mm.gc.Query(qctx, query, vars)
+	if err == nil {
+		// no error: done
+		return nil
+	}
+	// matching the error string
+	if !strings.Contains(err.Error(), "API rate limit exceeded") {
+		// an error, but not the API rate limit error: done
+		return err
+	}
+	// a rate limit error
+	// ask the graphql api for rate limiting information
+	vars["dryRun"] = githubv4.Boolean(true)
+	qctx, cancel = context.WithTimeout(ctx, defaultTimeout)
+	defer cancel()
+	if err := mm.gc.Query(qctx, query, vars); err != nil {
+		return err
+	}
+	rateLimit := query.rateLimit()
+	if rateLimit.Cost > rateLimit.Remaining {
+		// sleep
+		resetTime := rateLimit.ResetAt.Time
+		// Add a few seconds (8) for good measure
+		resetTime = resetTime.Add(8 * time.Second)
+		msg := fmt.Sprintf("Github GraphQL API: import will sleep until %s", resetTime.String())
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		case mm.importEvents <- RateLimitingEvent{msg}:
+		}
+		timer := time.NewTimer(time.Until(resetTime))
+		select {
+		case <-ctx.Done():
+			stop(timer)
+			return ctx.Err()
+		case <-timer.C:
+		}
+	}
+	// run the original query again
+	vars["dryRun"] = githubv4.Boolean(false)
+	qctx, cancel = context.WithTimeout(ctx, defaultTimeout)
+	defer cancel()
+	err = mm.gc.Query(qctx, query, vars)
+	return err // might be nil
+}
+
+func stop(t *time.Timer) {
+	if !t.Stop() {
+		select {
+		case <-t.C:
+		default:
+		}
+	}
+}

bridge/github/import_query.go 🔗

@@ -2,37 +2,136 @@ package github
 
 import "github.com/shurcooL/githubv4"
 
-type pageInfo struct {
-	EndCursor       githubv4.String
-	HasNextPage     bool
-	StartCursor     githubv4.String
-	HasPreviousPage bool
+type rateLimit struct {
+	Cost      githubv4.Int
+	Limit     githubv4.Int
+	NodeCount githubv4.Int
+	Remaining githubv4.Int
+	ResetAt   githubv4.DateTime
+	Used      githubv4.Int
 }
 
-type actor struct {
-	Typename  githubv4.String `graphql:"__typename"`
+type rateLimiter interface {
+	rateLimit() rateLimit
+}
+
+type userQuery struct {
+	RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+	User      user      `graphql:"user(login: $login)"`
+}
+
+func (q *userQuery) rateLimit() rateLimit {
+	return q.RateLimit
+}
+
+type labelsQuery struct {
+	//RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+	Repository struct {
+		Labels struct {
+			Nodes []struct {
+				ID          string `graphql:"id"`
+				Name        string `graphql:"name"`
+				Color       string `graphql:"color"`
+				Description string `graphql:"description"`
+			}
+			PageInfo pageInfo
+		} `graphql:"labels(first: $first, after: $after)"`
+	} `graphql:"repository(owner: $owner, name: $name)"`
+}
+
+type loginQuery struct {
+	//RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+	Viewer struct {
+		Login string `graphql:"login"`
+	} `graphql:"viewer"`
+}
+
+type issueQuery struct {
+	RateLimit  rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+	Repository struct {
+		Issues issueConnection `graphql:"issues(first: $issueFirst, after: $issueAfter, orderBy: {field: CREATED_AT, direction: ASC}, filterBy: {since: $issueSince})"`
+	} `graphql:"repository(owner: $owner, name: $name)"`
+}
+
+func (q *issueQuery) rateLimit() rateLimit {
+	return q.RateLimit
+}
+
+type issueEditQuery struct {
+	RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+	Node      struct {
+		Typename githubv4.String `graphql:"__typename"`
+		Issue    struct {
+			UserContentEdits userContentEditConnection `graphql:"userContentEdits(last: $issueEditLast, before: $issueEditBefore)"`
+		} `graphql:"... on Issue"`
+	} `graphql:"node(id: $gqlNodeId)"`
+}
+
+func (q *issueEditQuery) rateLimit() rateLimit {
+	return q.RateLimit
+}
+
+type timelineQuery struct {
+	RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+	Node      struct {
+		Typename githubv4.String `graphql:"__typename"`
+		Issue    struct {
+			TimelineItems timelineItemsConnection `graphql:"timelineItems(first: $timelineFirst, after: $timelineAfter)"`
+		} `graphql:"... on Issue"`
+	} `graphql:"node(id: $gqlNodeId)"`
+}
+
+func (q *timelineQuery) rateLimit() rateLimit {
+	return q.RateLimit
+}
+
+type commentEditQuery struct {
+	RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+	Node      struct {
+		Typename     githubv4.String `graphql:"__typename"`
+		IssueComment struct {
+			UserContentEdits userContentEditConnection `graphql:"userContentEdits(last: $commentEditLast, before: $commentEditBefore)"`
+		} `graphql:"... on IssueComment"`
+	} `graphql:"node(id: $gqlNodeId)"`
+}
+
+func (q *commentEditQuery) rateLimit() rateLimit {
+	return q.RateLimit
+}
+
+type user struct {
 	Login     githubv4.String
 	AvatarUrl githubv4.String
-	User      struct {
-		Name  *githubv4.String
-		Email githubv4.String
-	} `graphql:"... on User"`
-	Organization struct {
-		Name  *githubv4.String
-		Email *githubv4.String
-	} `graphql:"... on Organization"`
+	Name      *githubv4.String
 }
 
-type actorEvent struct {
-	Id        githubv4.ID
-	CreatedAt githubv4.DateTime
-	Actor     *actor
+type issueConnection struct {
+	Nodes    []issueNode
+	PageInfo pageInfo
 }
 
-type authorEvent struct {
-	Id        githubv4.ID
-	CreatedAt githubv4.DateTime
-	Author    *actor
+type issueNode struct {
+	issue
+	UserContentEdits userContentEditConnection `graphql:"userContentEdits(last: $issueEditLast, before: $issueEditBefore)"`
+	TimelineItems    timelineItemsConnection   `graphql:"timelineItems(first: $timelineFirst, after: $timelineAfter)"`
+}
+
+type issue struct {
+	authorEvent
+	Title  githubv4.String
+	Number githubv4.Int
+	Body   githubv4.String
+	Url    githubv4.URI
+}
+
+type timelineItemsConnection struct {
+	Nodes    []timelineItem
+	PageInfo pageInfo
+}
+
+type userContentEditConnection struct {
+	Nodes    []userContentEdit
+	PageInfo pageInfo
 }
 
 type userContentEdit struct {
@@ -46,12 +145,6 @@ type userContentEdit struct {
 	Diff      *githubv4.String
 }
 
-type issueComment struct {
-	authorEvent // NOTE: contains Id
-	Body        githubv4.String
-	Url         githubv4.URI
-}
-
 type timelineItem struct {
 	Typename githubv4.String `graphql:"__typename"`
 
@@ -91,84 +184,43 @@ type timelineItem struct {
 	} `graphql:"... on RenamedTitleEvent"`
 }
 
-type ghostQuery struct {
-	User struct {
-		Login     githubv4.String
-		AvatarUrl githubv4.String
-		Name      *githubv4.String
-	} `graphql:"user(login: $login)"`
-}
-
-type labelsQuery struct {
-	Repository struct {
-		Labels struct {
-			Nodes []struct {
-				ID          string `graphql:"id"`
-				Name        string `graphql:"name"`
-				Color       string `graphql:"color"`
-				Description string `graphql:"description"`
-			}
-			PageInfo pageInfo
-		} `graphql:"labels(first: $first, after: $after)"`
-	} `graphql:"repository(owner: $owner, name: $name)"`
-}
-
-type loginQuery struct {
-	Viewer struct {
-		Login string `graphql:"login"`
-	} `graphql:"viewer"`
-}
+type issueComment struct {
+	authorEvent // NOTE: contains Id
+	Body        githubv4.String
+	Url         githubv4.URI
 
-type issueQuery struct {
-	Repository struct {
-		Issues struct {
-			Nodes    []issue
-			PageInfo pageInfo
-		} `graphql:"issues(first: $issueFirst, after: $issueAfter, orderBy: {field: CREATED_AT, direction: ASC}, filterBy: {since: $issueSince})"`
-	} `graphql:"repository(owner: $owner, name: $name)"`
+	UserContentEdits userContentEditConnection `graphql:"userContentEdits(last: $commentEditLast, before: $commentEditBefore)"`
 }
 
-type issue struct {
-	authorEvent
-	Title  string
-	Number githubv4.Int
-	Body   githubv4.String
-	Url    githubv4.URI
+type actor struct {
+	Typename  githubv4.String `graphql:"__typename"`
+	Login     githubv4.String
+	AvatarUrl githubv4.String
+	User      struct {
+		Name  *githubv4.String
+		Email githubv4.String
+	} `graphql:"... on User"`
+	Organization struct {
+		Name  *githubv4.String
+		Email *githubv4.String
+	} `graphql:"... on Organization"`
 }
 
-type issueEditQuery struct {
-	Node struct {
-		Typename githubv4.String `graphql:"__typename"`
-		Issue    struct {
-			UserContentEdits struct {
-				Nodes      []userContentEdit
-				TotalCount githubv4.Int
-				PageInfo   pageInfo
-			} `graphql:"userContentEdits(last: $issueEditLast, before: $issueEditBefore)"`
-		} `graphql:"... on Issue"`
-	} `graphql:"node(id: $gqlNodeId)"`
+type actorEvent struct {
+	Id        githubv4.ID
+	CreatedAt githubv4.DateTime
+	Actor     *actor
 }
 
-type timelineQuery struct {
-	Node struct {
-		Typename githubv4.String `graphql:"__typename"`
-		Issue    struct {
-			TimelineItems struct {
-				Nodes    []timelineItem
-				PageInfo pageInfo
-			} `graphql:"timelineItems(first: $timelineFirst, after: $timelineAfter)"`
-		} `graphql:"... on Issue"`
-	} `graphql:"node(id: $gqlNodeId)"`
+type authorEvent struct {
+	Id        githubv4.ID
+	CreatedAt githubv4.DateTime
+	Author    *actor
 }
 
-type commentEditQuery struct {
-	Node struct {
-		Typename     githubv4.String `graphql:"__typename"`
-		IssueComment struct {
-			UserContentEdits struct {
-				Nodes    []userContentEdit
-				PageInfo pageInfo
-			} `graphql:"userContentEdits(last: $commentEditLast, before: $commentEditBefore)"`
-		} `graphql:"... on IssueComment"`
-	} `graphql:"node(id: $gqlNodeId)"`
+type pageInfo struct {
+	EndCursor       githubv4.String
+	HasNextPage     bool
+	StartCursor     githubv4.String
+	HasPreviousPage bool
 }

bridge/github/iterator.go 🔗

@@ -1,423 +0,0 @@
-package github
-
-import (
-	"context"
-	"time"
-
-	"github.com/pkg/errors"
-	"github.com/shurcooL/githubv4"
-)
-
-type iterator struct {
-	// Github graphql client
-	gc *githubv4.Client
-
-	// The iterator will only query issues updated or created after the date given in
-	// the variable since.
-	since time.Time
-
-	// Shared context, which is used for all graphql queries.
-	ctx context.Context
-
-	// Sticky error
-	err error
-
-	// Issue iterator
-	issueIter issueIter
-}
-
-type issueIter struct {
-	iterVars
-	query         issueQuery
-	issueEditIter []issueEditIter
-	timelineIter  []timelineIter
-}
-
-type issueEditIter struct {
-	iterVars
-	query issueEditQuery
-}
-
-type timelineIter struct {
-	iterVars
-	query           timelineQuery
-	commentEditIter []commentEditIter
-}
-
-type commentEditIter struct {
-	iterVars
-	query commentEditQuery
-}
-
-type iterVars struct {
-	// Iterator index
-	index int
-
-	// capacity is the number of elements (issues, issue edits, timeline items, or
-	// comment edits) to query at a time. More capacity = more used memory =
-	// less queries to make.
-	capacity int
-
-	// Variable assignments for graphql query
-	variables varmap
-}
-
-type varmap map[string]interface{}
-
-func newIterVars(capacity int) iterVars {
-	return iterVars{
-		index:     -1,
-		capacity:  capacity,
-		variables: varmap{},
-	}
-}
-
-// NewIterator creates and initialize a new iterator.
-func NewIterator(ctx context.Context, client *githubv4.Client, capacity int, owner, project string, since time.Time) *iterator {
-	i := &iterator{
-		gc:    client,
-		since: since,
-		ctx:   ctx,
-		issueIter: issueIter{
-			iterVars:      newIterVars(capacity),
-			timelineIter:  make([]timelineIter, capacity),
-			issueEditIter: make([]issueEditIter, capacity),
-		},
-	}
-	i.issueIter.variables.setOwnerProject(owner, project)
-	for idx := range i.issueIter.issueEditIter {
-		ie := &i.issueIter.issueEditIter[idx]
-		ie.iterVars = newIterVars(capacity)
-	}
-	for i1 := range i.issueIter.timelineIter {
-		tli := &i.issueIter.timelineIter[i1]
-		tli.iterVars = newIterVars(capacity)
-		tli.commentEditIter = make([]commentEditIter, capacity)
-		for i2 := range tli.commentEditIter {
-			cei := &tli.commentEditIter[i2]
-			cei.iterVars = newIterVars(capacity)
-		}
-	}
-	i.resetIssueVars()
-	return i
-}
-
-func (v *varmap) setOwnerProject(owner, project string) {
-	(*v)["owner"] = githubv4.String(owner)
-	(*v)["name"] = githubv4.String(project)
-}
-
-func (i *iterator) resetIssueVars() {
-	vars := &i.issueIter.variables
-	(*vars)["issueFirst"] = githubv4.Int(i.issueIter.capacity)
-	(*vars)["issueAfter"] = (*githubv4.String)(nil)
-	(*vars)["issueSince"] = githubv4.DateTime{Time: i.since}
-	i.issueIter.query.Repository.Issues.PageInfo.HasNextPage = true
-	i.issueIter.query.Repository.Issues.PageInfo.EndCursor = ""
-}
-
-func (i *iterator) resetIssueEditVars() {
-	for idx := range i.issueIter.issueEditIter {
-		ie := &i.issueIter.issueEditIter[idx]
-		ie.variables["issueEditLast"] = githubv4.Int(ie.capacity)
-		ie.variables["issueEditBefore"] = (*githubv4.String)(nil)
-		ie.query.Node.Issue.UserContentEdits.PageInfo.HasNextPage = true
-		ie.query.Node.Issue.UserContentEdits.PageInfo.EndCursor = ""
-	}
-}
-
-func (i *iterator) resetTimelineVars() {
-	for idx := range i.issueIter.timelineIter {
-		ip := &i.issueIter.timelineIter[idx]
-		ip.variables["timelineFirst"] = githubv4.Int(ip.capacity)
-		ip.variables["timelineAfter"] = (*githubv4.String)(nil)
-		ip.query.Node.Issue.TimelineItems.PageInfo.HasNextPage = true
-		ip.query.Node.Issue.TimelineItems.PageInfo.EndCursor = ""
-	}
-}
-
-func (i *iterator) resetCommentEditVars() {
-	for i1 := range i.issueIter.timelineIter {
-		for i2 := range i.issueIter.timelineIter[i1].commentEditIter {
-			ce := &i.issueIter.timelineIter[i1].commentEditIter[i2]
-			ce.variables["commentEditLast"] = githubv4.Int(ce.capacity)
-			ce.variables["commentEditBefore"] = (*githubv4.String)(nil)
-			ce.query.Node.IssueComment.UserContentEdits.PageInfo.HasNextPage = true
-			ce.query.Node.IssueComment.UserContentEdits.PageInfo.EndCursor = ""
-		}
-	}
-}
-
-// Error return last encountered error
-func (i *iterator) Error() error {
-	if i.err != nil {
-		return i.err
-	}
-	return i.ctx.Err() // might return nil
-}
-
-func (i *iterator) HasError() bool {
-	return i.err != nil || i.ctx.Err() != nil
-}
-
-func (i *iterator) currIssueItem() *issue {
-	return &i.issueIter.query.Repository.Issues.Nodes[i.issueIter.index]
-}
-
-func (i *iterator) currIssueEditIter() *issueEditIter {
-	return &i.issueIter.issueEditIter[i.issueIter.index]
-}
-
-func (i *iterator) currTimelineIter() *timelineIter {
-	return &i.issueIter.timelineIter[i.issueIter.index]
-}
-
-func (i *iterator) currCommentEditIter() *commentEditIter {
-	timelineIter := i.currTimelineIter()
-	return &timelineIter.commentEditIter[timelineIter.index]
-}
-
-func (i *iterator) currIssueGqlNodeId() githubv4.ID {
-	return i.currIssueItem().Id
-}
-
-// NextIssue returns true if there exists a next issue and advances the iterator by one.
-// It is used to iterate over all issues. Queries to github are made when necessary.
-func (i *iterator) NextIssue() bool {
-	if i.HasError() {
-		return false
-	}
-	index := &i.issueIter.index
-	issues := &i.issueIter.query.Repository.Issues
-	issueItems := &issues.Nodes
-	if 0 <= *index && *index < len(*issueItems)-1 {
-		*index += 1
-		return true
-	}
-
-	if !issues.PageInfo.HasNextPage {
-		return false
-	}
-	nextIssue := i.queryIssue()
-	return nextIssue
-}
-
-// IssueValue returns the actual issue value.
-func (i *iterator) IssueValue() issue {
-	return *i.currIssueItem()
-}
-
-func (i *iterator) queryIssue() bool {
-	ctx, cancel := context.WithTimeout(i.ctx, defaultTimeout)
-	defer cancel()
-	if endCursor := i.issueIter.query.Repository.Issues.PageInfo.EndCursor; endCursor != "" {
-		i.issueIter.variables["issueAfter"] = endCursor
-	}
-	if err := i.gc.Query(ctx, &i.issueIter.query, i.issueIter.variables); err != nil {
-		i.err = err
-		return false
-	}
-	i.resetIssueEditVars()
-	i.resetTimelineVars()
-	issueItems := &i.issueIter.query.Repository.Issues.Nodes
-	if len(*issueItems) <= 0 {
-		i.issueIter.index = -1
-		return false
-	}
-	i.issueIter.index = 0
-	return true
-}
-
-// NextIssueEdit returns true if there exists a next issue edit and advances the iterator
-// by one. It is used to iterate over all the issue edits. Queries to github are made when
-// necessary.
-func (i *iterator) NextIssueEdit() bool {
-	if i.HasError() {
-		return false
-	}
-	ieIter := i.currIssueEditIter()
-	ieIdx := &ieIter.index
-	ieItems := ieIter.query.Node.Issue.UserContentEdits
-	if 0 <= *ieIdx && *ieIdx < len(ieItems.Nodes)-1 {
-		*ieIdx += 1
-		return i.nextValidIssueEdit()
-	}
-	if !ieItems.PageInfo.HasNextPage {
-		return false
-	}
-	querySucc := i.queryIssueEdit()
-	if !querySucc {
-		return false
-	}
-	return i.nextValidIssueEdit()
-}
-
-func (i *iterator) nextValidIssueEdit() bool {
-	// issueEdit.Diff == nil happen if the event is older than early 2018, Github doesn't have
-	// the data before that. Best we can do is to ignore the event.
-	if issueEdit := i.IssueEditValue(); issueEdit.Diff == nil || string(*issueEdit.Diff) == "" {
-		return i.NextIssueEdit()
-	}
-	return true
-}
-
-// IssueEditValue returns the actual issue edit value.
-func (i *iterator) IssueEditValue() userContentEdit {
-	iei := i.currIssueEditIter()
-	return iei.query.Node.Issue.UserContentEdits.Nodes[iei.index]
-}
-
-func (i *iterator) queryIssueEdit() bool {
-	ctx, cancel := context.WithTimeout(i.ctx, defaultTimeout)
-	defer cancel()
-	iei := i.currIssueEditIter()
-	if endCursor := iei.query.Node.Issue.UserContentEdits.PageInfo.EndCursor; endCursor != "" {
-		iei.variables["issueEditBefore"] = endCursor
-	}
-	iei.variables["gqlNodeId"] = i.currIssueGqlNodeId()
-	if err := i.gc.Query(ctx, &iei.query, iei.variables); err != nil {
-		i.err = err
-		return false
-	}
-	issueEditItems := iei.query.Node.Issue.UserContentEdits.Nodes
-	if len(issueEditItems) <= 0 {
-		iei.index = -1
-		return false
-	}
-	// The UserContentEditConnection in the Github API serves its elements in reverse chronological
-	// order. For our purpose we have to reverse the edits.
-	reverseEdits(issueEditItems)
-	iei.index = 0
-	return true
-}
-
-// NextTimelineItem returns true if there exists a next timeline item and advances the iterator
-// by one. It is used to iterate over all the timeline items. Queries to github are made when
-// necessary.
-func (i *iterator) NextTimelineItem() bool {
-	if i.HasError() {
-		return false
-	}
-	tlIter := &i.issueIter.timelineIter[i.issueIter.index]
-	tlIdx := &tlIter.index
-	tlItems := tlIter.query.Node.Issue.TimelineItems
-	if 0 <= *tlIdx && *tlIdx < len(tlItems.Nodes)-1 {
-		*tlIdx += 1
-		return true
-	}
-	if !tlItems.PageInfo.HasNextPage {
-		return false
-	}
-	nextTlItem := i.queryTimeline()
-	return nextTlItem
-}
-
-// TimelineItemValue returns the actual timeline item value.
-func (i *iterator) TimelineItemValue() timelineItem {
-	tli := i.currTimelineIter()
-	return tli.query.Node.Issue.TimelineItems.Nodes[tli.index]
-}
-
-func (i *iterator) queryTimeline() bool {
-	ctx, cancel := context.WithTimeout(i.ctx, defaultTimeout)
-	defer cancel()
-	tli := i.currTimelineIter()
-	if endCursor := tli.query.Node.Issue.TimelineItems.PageInfo.EndCursor; endCursor != "" {
-		tli.variables["timelineAfter"] = endCursor
-	}
-	tli.variables["gqlNodeId"] = i.currIssueGqlNodeId()
-	if err := i.gc.Query(ctx, &tli.query, tli.variables); err != nil {
-		i.err = err
-		return false
-	}
-	i.resetCommentEditVars()
-	timelineItems := &tli.query.Node.Issue.TimelineItems
-	if len(timelineItems.Nodes) <= 0 {
-		tli.index = -1
-		return false
-	}
-	tli.index = 0
-	return true
-}
-
-// NextCommentEdit returns true if there exists a next comment edit and advances the iterator
-// by one. It is used to iterate over all issue edits. Queries to github are made when
-// necessary.
-func (i *iterator) NextCommentEdit() bool {
-	if i.HasError() {
-		return false
-	}
-
-	tmlnVal := i.TimelineItemValue()
-	if tmlnVal.Typename != "IssueComment" {
-		// The timeline iterator does not point to a comment.
-		i.err = errors.New("Call to NextCommentEdit() while timeline item is not a comment")
-		return false
-	}
-
-	cei := i.currCommentEditIter()
-	ceIdx := &cei.index
-	ceItems := &cei.query.Node.IssueComment.UserContentEdits
-	if 0 <= *ceIdx && *ceIdx < len(ceItems.Nodes)-1 {
-		*ceIdx += 1
-		return i.nextValidCommentEdit()
-	}
-	if !ceItems.PageInfo.HasNextPage {
-		return false
-	}
-	querySucc := i.queryCommentEdit()
-	if !querySucc {
-		return false
-	}
-	return i.nextValidCommentEdit()
-}
-
-func (i *iterator) nextValidCommentEdit() bool {
-	// if comment edit diff is a nil pointer or points to an empty string look for next value
-	if commentEdit := i.CommentEditValue(); commentEdit.Diff == nil || string(*commentEdit.Diff) == "" {
-		return i.NextCommentEdit()
-	}
-	return true
-}
-
-// CommentEditValue returns the actual comment edit value.
-func (i *iterator) CommentEditValue() userContentEdit {
-	cei := i.currCommentEditIter()
-	return cei.query.Node.IssueComment.UserContentEdits.Nodes[cei.index]
-}
-
-func (i *iterator) queryCommentEdit() bool {
-	ctx, cancel := context.WithTimeout(i.ctx, defaultTimeout)
-	defer cancel()
-	cei := i.currCommentEditIter()
-
-	if endCursor := cei.query.Node.IssueComment.UserContentEdits.PageInfo.EndCursor; endCursor != "" {
-		cei.variables["commentEditBefore"] = endCursor
-	}
-	tmlnVal := i.TimelineItemValue()
-	if tmlnVal.Typename != "IssueComment" {
-		i.err = errors.New("Call to queryCommentEdit() while timeline item is not a comment")
-		return false
-	}
-	cei.variables["gqlNodeId"] = tmlnVal.IssueComment.Id
-	if err := i.gc.Query(ctx, &cei.query, cei.variables); err != nil {
-		i.err = err
-		return false
-	}
-	ceItems := cei.query.Node.IssueComment.UserContentEdits.Nodes
-	if len(ceItems) <= 0 {
-		cei.index = -1
-		return false
-	}
-	// The UserContentEditConnection in the Github API serves its elements in reverse chronological
-	// order. For our purpose we have to reverse the edits.
-	reverseEdits(ceItems)
-	cei.index = 0
-	return true
-}
-
-func reverseEdits(edits []userContentEdit) {
-	for i, j := 0, len(edits)-1; i < j; i, j = i+1, j-1 {
-		edits[i], edits[j] = edits[j], edits[i]
-	}
-}