Fix importer/exporter design errors

Amine Hilaly created

Change summary

bridge/github/import.go   | 82 ++++++++++++++++++++--------------------
bridge/github/iterator.go | 35 ++++++++++++-----
2 files changed, 66 insertions(+), 51 deletions(-)

Detailed changes

bridge/github/import.go 🔗

@@ -23,6 +23,15 @@ const (
 // githubImporter implement the Importer interface
 type githubImporter struct {
 	conf core.Configuration
+
+	// iterator
+	iterator *iterator
+
+	// number of imported issues
+	importedIssues int
+
+	// number of imported identities
+	importedIdentities int
 }
 
 func (gi *githubImporter) Init(conf core.Configuration) error {
@@ -30,23 +39,25 @@ func (gi *githubImporter) Init(conf core.Configuration) error {
 	return nil
 }
 
+func (gi *githubImporter) Reset() {
+	gi.importedIssues = 0
+	gi.importedIdentities = 0
+}
+
 // ImportAll .
 func (gi *githubImporter) ImportAll(repo *cache.RepoCache, since time.Time) error {
-	iterator := NewIterator(gi.conf[keyUser], gi.conf[keyProject], gi.conf[keyToken], since)
+	gi.iterator = NewIterator(gi.conf[keyUser], gi.conf[keyProject], gi.conf[keyToken], since)
 
 	// Loop over all matching issues
-	for iterator.NextIssue() {
-		issue := iterator.IssueValue()
+	for gi.iterator.NextIssue() {
+		issue := gi.iterator.IssueValue()
+		fmt.Printf("importing issue: %v %v\n", gi.importedIssues+1, issue.Title)
+		gi.importedIssues++
 
-		fmt.Printf("importing issue: %v %v\n", iterator.importedIssues, issue.Title)
 		// get issue edits
 		issueEdits := []userContentEdit{}
-		for iterator.NextIssueEdit() {
-			// issueEdit.Diff == nil happen if the event is older than early 2018, Github doesn't have the data before that.
-			// Best we can do is to ignore the event.
-			if issueEdit := iterator.IssueEditValue(); issueEdit.Diff != nil && string(*issueEdit.Diff) != "" {
-				issueEdits = append(issueEdits, issueEdit)
-			}
+		for gi.iterator.NextIssueEdit() {
+			issueEdits = append(issueEdits, gi.iterator.IssueEditValue())
 		}
 
 		// create issue
@@ -56,28 +67,10 @@ func (gi *githubImporter) ImportAll(repo *cache.RepoCache, since time.Time) erro
 		}
 
 		// loop over timeline items
-		for iterator.NextTimeline() {
-			item := iterator.TimelineValue()
-
-			// if item is comment
-			if item.Typename == "IssueComment" {
-				// collect all edits
-				commentEdits := []userContentEdit{}
-				for iterator.NextCommentEdit() {
-					if commentEdit := iterator.CommentEditValue(); commentEdit.Diff != nil && string(*commentEdit.Diff) != "" {
-						commentEdits = append(commentEdits, commentEdit)
-					}
-				}
-
-				err := gi.ensureTimelineComment(repo, b, item.IssueComment, commentEdits)
-				if err != nil {
-					return fmt.Errorf("timeline comment creation: %v", err)
-				}
-
-			} else {
-				if err := gi.ensureTimelineItem(repo, b, item); err != nil {
-					return fmt.Errorf("timeline event creation: %v", err)
-				}
+		for gi.iterator.NextTimeline() {
+			item := gi.iterator.TimelineValue()
+			if err := gi.ensureTimelineItem(repo, b, item); err != nil {
+				return fmt.Errorf("timeline event creation: %v", err)
 			}
 		}
 
@@ -87,12 +80,13 @@ func (gi *githubImporter) ImportAll(repo *cache.RepoCache, since time.Time) erro
 		}
 	}
 
-	if err := iterator.Error(); err != nil {
+	if err := gi.iterator.Error(); err != nil {
 		fmt.Printf("import error: %v\n", err)
 		return err
 	}
 
-	fmt.Printf("Successfully imported %v issues from Github\n", iterator.ImportedIssues())
+	fmt.Printf("Successfully imported %d issues from Github\n", gi.importedIssues)
+	fmt.Printf("Total imported identities: %d\n", gi.importedIdentities)
 	return nil
 }
 
@@ -189,6 +183,16 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
 
 	switch item.Typename {
 	case "IssueComment":
+		// collect all comment edits
+		commentEdits := []userContentEdit{}
+		for gi.iterator.NextCommentEdit() {
+			commentEdits = append(commentEdits, gi.iterator.CommentEditValue())
+		}
+
+		err := gi.ensureTimelineComment(repo, b, item.IssueComment, commentEdits)
+		if err != nil {
+			return fmt.Errorf("timeline comment creation: %v", err)
+		}
 
 	case "LabeledEvent":
 		id := parseId(item.LabeledEvent.Id)
@@ -455,6 +459,9 @@ func (gi *githubImporter) ensurePerson(repo *cache.RepoCache, actor *actor) (*ca
 		return nil, err
 	}
 
+	// importing a new identity
+	gi.importedIdentities++
+
 	var name string
 	var email string
 
@@ -528,10 +535,3 @@ func (gi *githubImporter) getGhost(repo *cache.RepoCache) (*cache.IdentityCache,
 func parseId(id githubv4.ID) string {
 	return fmt.Sprintf("%v", id)
 }
-
-func reverseEdits(edits []userContentEdit) []userContentEdit {
-	for i, j := 0, len(edits)-1; i < j; i, j = i+1, j-1 {
-		edits[i], edits[j] = edits[j], edits[i]
-	}
-	return edits
-}

bridge/github/iterator.go 🔗

@@ -49,8 +49,8 @@ type iterator struct {
 	// sticky error
 	err error
 
-	// number of imported issues
-	importedIssues int
+	// started
+	started bool
 
 	// timeline iterator
 	timeline timelineIterator
@@ -145,11 +145,6 @@ func (i *iterator) Error() error {
 	return i.err
 }
 
-// ImportedIssues return the number of issues we iterated over
-func (i *iterator) ImportedIssues() int {
-	return i.importedIssues
-}
-
 func (i *iterator) queryIssue() bool {
 	if err := i.gc.Query(context.TODO(), &i.timeline.query, i.timeline.variables); err != nil {
 		i.err = err
@@ -161,17 +156,16 @@ func (i *iterator) queryIssue() bool {
 	}
 
 	i.reverseTimelineEditNodes()
-	i.importedIssues++
 	return true
 }
 
 // Next issue
 func (i *iterator) NextIssue() bool {
 	// we make the first move
-	if i.importedIssues == 0 {
-
+	if !i.started {
 		// init variables and goto queryIssue block
 		i.initTimelineQueryVariables()
+		i.started = true
 		return i.queryIssue()
 	}
 
@@ -266,6 +260,9 @@ func (i *iterator) NextIssueEdit() bool {
 	if i.timeline.issueEdit.index == -2 {
 		if i.issueEdit.index < min(i.capacity, len(i.issueEdit.query.Repository.Issues.Nodes[0].UserContentEdits.Nodes))-1 {
 			i.issueEdit.index++
+			if issueEdit := i.IssueEditValue(); issueEdit.Diff == nil || string(*issueEdit.Diff) == "" {
+				return i.NextIssueEdit()
+			}
 			return true
 		}
 
@@ -297,6 +294,11 @@ func (i *iterator) NextIssueEdit() bool {
 	// loop over them timeline comment edits
 	if i.timeline.issueEdit.index < min(i.capacity, len(i.timeline.query.Repository.Issues.Nodes[0].UserContentEdits.Nodes))-1 {
 		i.timeline.issueEdit.index++
+		// issueEdit.Diff == nil happen if the event is older than early 2018, Github doesn't have the data before that.
+		// Best we can do is to ignore the event.
+		if issueEdit := i.IssueEditValue(); issueEdit.Diff == nil || string(*issueEdit.Diff) == "" {
+			return i.NextIssueEdit()
+		}
 		return true
 	}
 
@@ -350,6 +352,9 @@ func (i *iterator) NextCommentEdit() bool {
 
 		if i.commentEdit.index < min(i.capacity, len(i.commentEdit.query.Repository.Issues.Nodes[0].Timeline.Nodes[0].IssueComment.UserContentEdits.Nodes))-1 {
 			i.commentEdit.index++
+			if commentEdit := i.CommentEditValue(); commentEdit.Diff == nil || string(*commentEdit.Diff) == "" {
+				return i.NextCommentEdit()
+			}
 			return true
 		}
 
@@ -372,6 +377,10 @@ func (i *iterator) NextCommentEdit() bool {
 	// loop over them timeline comment edits
 	if i.timeline.commentEdit.index < min(i.capacity, len(i.timeline.query.Repository.Issues.Nodes[0].Timeline.Edges[i.timeline.index].Node.IssueComment.UserContentEdits.Nodes))-1 {
 		i.timeline.commentEdit.index++
+		// if comment edit diff is nil or point to an empty string look for next value
+		if commentEdit := i.CommentEditValue(); commentEdit.Diff == nil || string(*commentEdit.Diff) == "" {
+			return i.NextCommentEdit()
+		}
 		return true
 	}
 
@@ -407,3 +416,9 @@ func min(a, b int) int {
 
 	return a
 }
+
+func reverseEdits(edits []userContentEdit) {
+	for i, j := 0, len(edits)-1; i < j; i, j = i+1, j-1 {
+		edits[i], edits[j] = edits[j], edits[i]
+	}
+}