import.go

  1package github
  2
  3import (
  4	"context"
  5	"fmt"
  6	"time"
  7
  8	"github.com/shurcooL/githubv4"
  9
 10	"github.com/MichaelMure/git-bug/bridge/core"
 11	"github.com/MichaelMure/git-bug/bridge/core/auth"
 12	"github.com/MichaelMure/git-bug/bug"
 13	"github.com/MichaelMure/git-bug/cache"
 14	"github.com/MichaelMure/git-bug/entity"
 15	"github.com/MichaelMure/git-bug/util/text"
 16)
 17
 18const EmptyTitlePlaceholder = "<empty string>"
 19
 20// githubImporter implement the Importer interface
 21type githubImporter struct {
 22	conf core.Configuration
 23
 24	// mediator to access the Github API
 25	mediator *importMediator
 26
 27	// send only channel
 28	out chan<- core.ImportResult
 29}
 30
 31func (gi *githubImporter) Init(_ context.Context, _ *cache.RepoCache, conf core.Configuration) error {
 32	gi.conf = conf
 33	return nil
 34}
 35
 36// ImportAll iterate over all the configured repository issues and ensure the creation of the
 37// missing issues / timeline items / edits / label events ...
 38func (gi *githubImporter) ImportAll(ctx context.Context, repo *cache.RepoCache, since time.Time) (<-chan core.ImportResult, error) {
 39	creds, err := auth.List(repo,
 40		auth.WithTarget(target),
 41		auth.WithKind(auth.KindToken),
 42		auth.WithMeta(auth.MetaKeyLogin, gi.conf[confKeyDefaultLogin]),
 43	)
 44	if err != nil {
 45		return nil, err
 46	}
 47	if len(creds) <= 0 {
 48		return nil, ErrMissingIdentityToken
 49	}
 50	client := buildClient(creds[0].(*auth.Token))
 51	gi.mediator = NewImportMediator(ctx, client, gi.conf[confKeyOwner], gi.conf[confKeyProject], since)
 52	out := make(chan core.ImportResult)
 53	gi.out = out
 54
 55	go func() {
 56		defer close(gi.out)
 57		var currBug *cache.BugCache
 58		var currEvent ImportEvent
 59		var nextEvent ImportEvent
 60		var err error
 61		for {
 62			// An IssueEvent contains the issue in its most recent state. If an issue
 63			// has at least one issue edit, then the history of the issue edits is
 64			// represented by IssueEditEvents. That is, the unedited (original) issue
 65			// might be saved only in the IssueEditEvent following the IssueEvent.
 66			// Since we replicate the edit history we need to either use the IssueEvent
 67			// (if there are no edits) or the IssueEvent together with its first
 68			// IssueEditEvent (if there are edits).
 69			// Exactly the same is true for comments and comment edits.
 70			// As a consequence we need to look at the current event and one look ahead
 71			// event.
 72			currEvent = nextEvent
 73			if currEvent == nil {
 74				currEvent = gi.getEventHandleMsgs()
 75			}
 76			if currEvent == nil {
 77				break
 78			}
 79			nextEvent = gi.getEventHandleMsgs()
 80
 81			switch event := currEvent.(type) {
 82			case RateLimitingEvent:
 83				out <- core.NewImportRateLimiting(event.msg)
 84			case IssueEvent:
 85				// first: commit what is being held in currBug
 86				if err = gi.commit(currBug, out); err != nil {
 87					out <- core.NewImportError(err, "")
 88					return
 89				}
 90				// second: create new issue
 91				switch next := nextEvent.(type) {
 92				case IssueEditEvent:
 93					// consuming and using next event
 94					nextEvent = nil
 95					currBug, err = gi.ensureIssue(ctx, repo, &event.issue, &next.userContentEdit)
 96				default:
 97					currBug, err = gi.ensureIssue(ctx, repo, &event.issue, nil)
 98				}
 99				if err != nil {
100					err := fmt.Errorf("issue creation: %v", err)
101					out <- core.NewImportError(err, "")
102					return
103				}
104			case IssueEditEvent:
105				err = gi.ensureIssueEdit(ctx, repo, currBug, event.issueId, &event.userContentEdit)
106				if err != nil {
107					err = fmt.Errorf("issue edit: %v", err)
108					out <- core.NewImportError(err, "")
109					return
110				}
111			case TimelineEvent:
112				if next, ok := nextEvent.(CommentEditEvent); ok && event.Typename == "IssueComment" {
113					// consuming and using next event
114					nextEvent = nil
115					err = gi.ensureComment(ctx, repo, currBug, &event.timelineItem.IssueComment, &next.userContentEdit)
116				} else {
117					err = gi.ensureTimelineItem(ctx, repo, currBug, &event.timelineItem)
118				}
119				if err != nil {
120					err = fmt.Errorf("timeline item creation: %v", err)
121					out <- core.NewImportError(err, "")
122					return
123				}
124			case CommentEditEvent:
125				err = gi.ensureCommentEdit(ctx, repo, currBug, event.commentId, &event.userContentEdit)
126				if err != nil {
127					err = fmt.Errorf("comment edit: %v", err)
128					out <- core.NewImportError(err, "")
129					return
130				}
131			default:
132				panic("Unknown event type")
133			}
134		}
135		// commit what is being held in currBug before returning
136		if err = gi.commit(currBug, out); err != nil {
137			out <- core.NewImportError(err, "")
138		}
139		if err = gi.mediator.Error(); err != nil {
140			gi.out <- core.NewImportError(err, "")
141		}
142	}()
143
144	return out, nil
145}
146
147func (gi *githubImporter) getEventHandleMsgs() ImportEvent {
148	for {
149		// read event from import mediator
150		event := gi.mediator.NextImportEvent()
151		// consume (and use) all rate limiting events
152		if e, ok := event.(RateLimitingEvent); ok {
153			gi.out <- core.NewImportRateLimiting(e.msg)
154			continue
155		}
156		return event
157	}
158}
159
160func (gi *githubImporter) commit(b *cache.BugCache, out chan<- core.ImportResult) error {
161	if b == nil {
162		return nil
163	}
164	if !b.NeedCommit() {
165		out <- core.NewImportNothing(b.Id(), "no imported operation")
166		return nil
167	} else if err := b.Commit(); err != nil {
168		// commit bug state
169		return fmt.Errorf("bug commit: %v", err)
170	}
171	return nil
172}
173
174func (gi *githubImporter) ensureIssue(ctx context.Context, repo *cache.RepoCache, issue *issue, issueEdit *userContentEdit) (*cache.BugCache, error) {
175	author, err := gi.ensurePerson(ctx, repo, issue.Author)
176	if err != nil {
177		return nil, err
178	}
179
180	// resolve bug
181	b, err := repo.ResolveBugMatcher(func(excerpt *cache.BugExcerpt) bool {
182		return excerpt.CreateMetadata[core.MetaKeyOrigin] == target &&
183			excerpt.CreateMetadata[metaKeyGithubId] == parseId(issue.Id)
184	})
185	if err == nil {
186		return b, nil
187	}
188	if err != bug.ErrBugNotExist {
189		return nil, err
190	}
191
192	// At Github there exist issues with seemingly empty titles. An example is
193	// https://github.com/NixOS/nixpkgs/issues/72730 .
194	// The title provided by the GraphQL API actually consists of a space followed by a
195	// zero width space (U+200B). This title would cause the NewBugRaw() function to
196	// return an error: empty title.
197	title := string(issue.Title)
198	if title == " \u200b" { // U+200B == zero width space
199		title = EmptyTitlePlaceholder
200	}
201
202	var textInput string
203	if issueEdit != nil {
204		// use the first issue edit: it represents the bug creation itself
205		textInput = string(*issueEdit.Diff)
206	} else {
207		// if there are no issue edits then the issue struct holds the bug creation
208		textInput = string(issue.Body)
209	}
210	cleanText, err := text.Cleanup(textInput)
211	if err != nil {
212		return nil, err
213	}
214
215	// create bug
216	b, _, err = repo.NewBugRaw(
217		author,
218		issue.CreatedAt.Unix(),
219		title, // TODO: this is the *current* title, not the original one
220		cleanText,
221		nil,
222		map[string]string{
223			core.MetaKeyOrigin: target,
224			metaKeyGithubId:    parseId(issue.Id),
225			metaKeyGithubUrl:   issue.Url.String(),
226		})
227	if err != nil {
228		return nil, err
229	}
230	// importing a new bug
231	gi.out <- core.NewImportBug(b.Id())
232
233	return b, nil
234}
235
236func (gi *githubImporter) ensureIssueEdit(ctx context.Context, repo *cache.RepoCache, bug *cache.BugCache, ghIssueId githubv4.ID, edit *userContentEdit) error {
237	return gi.ensureCommentEdit(ctx, repo, bug, ghIssueId, edit)
238}
239
240func (gi *githubImporter) ensureTimelineItem(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, item *timelineItem) error {
241
242	switch item.Typename {
243	case "IssueComment":
244		err := gi.ensureComment(ctx, repo, b, &item.IssueComment, nil)
245		if err != nil {
246			return fmt.Errorf("timeline comment creation: %v", err)
247		}
248		return nil
249
250	case "LabeledEvent":
251		id := parseId(item.LabeledEvent.Id)
252		_, err := b.ResolveOperationWithMetadata(metaKeyGithubId, id)
253		if err == nil {
254			return nil
255		}
256
257		if err != cache.ErrNoMatchingOp {
258			return err
259		}
260		author, err := gi.ensurePerson(ctx, repo, item.LabeledEvent.Actor)
261		if err != nil {
262			return err
263		}
264		op, err := b.ForceChangeLabelsRaw(
265			author,
266			item.LabeledEvent.CreatedAt.Unix(),
267			[]string{
268				string(item.LabeledEvent.Label.Name),
269			},
270			nil,
271			map[string]string{metaKeyGithubId: id},
272		)
273		if err != nil {
274			return err
275		}
276
277		gi.out <- core.NewImportLabelChange(op.Id())
278		return nil
279
280	case "UnlabeledEvent":
281		id := parseId(item.UnlabeledEvent.Id)
282		_, err := b.ResolveOperationWithMetadata(metaKeyGithubId, id)
283		if err == nil {
284			return nil
285		}
286		if err != cache.ErrNoMatchingOp {
287			return err
288		}
289		author, err := gi.ensurePerson(ctx, repo, item.UnlabeledEvent.Actor)
290		if err != nil {
291			return err
292		}
293
294		op, err := b.ForceChangeLabelsRaw(
295			author,
296			item.UnlabeledEvent.CreatedAt.Unix(),
297			nil,
298			[]string{
299				string(item.UnlabeledEvent.Label.Name),
300			},
301			map[string]string{metaKeyGithubId: id},
302		)
303		if err != nil {
304			return err
305		}
306
307		gi.out <- core.NewImportLabelChange(op.Id())
308		return nil
309
310	case "ClosedEvent":
311		id := parseId(item.ClosedEvent.Id)
312		_, err := b.ResolveOperationWithMetadata(metaKeyGithubId, id)
313		if err != cache.ErrNoMatchingOp {
314			return err
315		}
316		if err == nil {
317			return nil
318		}
319		author, err := gi.ensurePerson(ctx, repo, item.ClosedEvent.Actor)
320		if err != nil {
321			return err
322		}
323		op, err := b.CloseRaw(
324			author,
325			item.ClosedEvent.CreatedAt.Unix(),
326			map[string]string{metaKeyGithubId: id},
327		)
328
329		if err != nil {
330			return err
331		}
332
333		gi.out <- core.NewImportStatusChange(op.Id())
334		return nil
335
336	case "ReopenedEvent":
337		id := parseId(item.ReopenedEvent.Id)
338		_, err := b.ResolveOperationWithMetadata(metaKeyGithubId, id)
339		if err != cache.ErrNoMatchingOp {
340			return err
341		}
342		if err == nil {
343			return nil
344		}
345		author, err := gi.ensurePerson(ctx, repo, item.ReopenedEvent.Actor)
346		if err != nil {
347			return err
348		}
349		op, err := b.OpenRaw(
350			author,
351			item.ReopenedEvent.CreatedAt.Unix(),
352			map[string]string{metaKeyGithubId: id},
353		)
354
355		if err != nil {
356			return err
357		}
358
359		gi.out <- core.NewImportStatusChange(op.Id())
360		return nil
361
362	case "RenamedTitleEvent":
363		id := parseId(item.RenamedTitleEvent.Id)
364		_, err := b.ResolveOperationWithMetadata(metaKeyGithubId, id)
365		if err != cache.ErrNoMatchingOp {
366			return err
367		}
368		if err == nil {
369			return nil
370		}
371		author, err := gi.ensurePerson(ctx, repo, item.RenamedTitleEvent.Actor)
372		if err != nil {
373			return err
374		}
375
376		// At Github there exist issues with seemingly empty titles. An example is
377		// https://github.com/NixOS/nixpkgs/issues/72730 .
378		// The title provided by the GraphQL API actually consists of a space followed
379		// by a zero width space (U+200B). This title would cause the NewBugRaw()
380		// function to return an error: empty title.
381		title := string(item.RenamedTitleEvent.CurrentTitle)
382		if title == " \u200b" { // U+200B == zero width space
383			title = EmptyTitlePlaceholder
384		}
385
386		op, err := b.SetTitleRaw(
387			author,
388			item.RenamedTitleEvent.CreatedAt.Unix(),
389			title,
390			map[string]string{metaKeyGithubId: id},
391		)
392		if err != nil {
393			return err
394		}
395
396		gi.out <- core.NewImportTitleEdition(op.Id())
397		return nil
398	}
399
400	return nil
401}
402
403func (gi *githubImporter) ensureCommentEdit(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, ghTargetId githubv4.ID, edit *userContentEdit) error {
404	// find comment
405	target, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(ghTargetId))
406	if err != nil {
407		return err
408	}
409	_, err = b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(edit.Id))
410	if err == nil {
411		return nil
412	}
413	if err != cache.ErrNoMatchingOp {
414		// real error
415		return err
416	}
417
418	editor, err := gi.ensurePerson(ctx, repo, edit.Editor)
419	if err != nil {
420		return err
421	}
422
423	if edit.DeletedAt != nil {
424		// comment deletion, not supported yet
425		return nil
426	}
427
428	cleanText, err := text.Cleanup(string(*edit.Diff))
429	if err != nil {
430		return err
431	}
432
433	// comment edition
434	op, err := b.EditCommentRaw(
435		editor,
436		edit.CreatedAt.Unix(),
437		target,
438		cleanText,
439		map[string]string{
440			metaKeyGithubId: parseId(edit.Id),
441		},
442	)
443
444	if err != nil {
445		return err
446	}
447
448	gi.out <- core.NewImportCommentEdition(op.Id())
449	return nil
450}
451
452func (gi *githubImporter) ensureComment(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, comment *issueComment, firstEdit *userContentEdit) error {
453	author, err := gi.ensurePerson(ctx, repo, comment.Author)
454	if err != nil {
455		return err
456	}
457
458	_, err = b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(comment.Id))
459	if err == nil {
460		return nil
461	}
462	if err != cache.ErrNoMatchingOp {
463		// real error
464		return err
465	}
466
467	var textInput string
468	if firstEdit != nil {
469		// use the first comment edit: it represents the comment creation itself
470		textInput = string(*firstEdit.Diff)
471	} else {
472		// if there are not comment edits, then the comment struct holds the comment creation
473		textInput = string(comment.Body)
474	}
475	cleanText, err := text.Cleanup(textInput)
476	if err != nil {
477		return err
478	}
479
480	// add comment operation
481	op, err := b.AddCommentRaw(
482		author,
483		comment.CreatedAt.Unix(),
484		cleanText,
485		nil,
486		map[string]string{
487			metaKeyGithubId:  parseId(comment.Id),
488			metaKeyGithubUrl: comment.Url.String(),
489		},
490	)
491	if err != nil {
492		return err
493	}
494
495	gi.out <- core.NewImportComment(op.Id())
496	return nil
497}
498
499// ensurePerson create a bug.Person from the Github data
500func (gi *githubImporter) ensurePerson(ctx context.Context, repo *cache.RepoCache, actor *actor) (*cache.IdentityCache, error) {
501	// When a user has been deleted, Github return a null actor, while displaying a profile named "ghost"
502	// in it's UI. So we need a special case to get it.
503	if actor == nil {
504		return gi.getGhost(ctx, repo)
505	}
506
507	// Look first in the cache
508	i, err := repo.ResolveIdentityImmutableMetadata(metaKeyGithubLogin, string(actor.Login))
509	if err == nil {
510		return i, nil
511	}
512	if entity.IsErrMultipleMatch(err) {
513		return nil, err
514	}
515
516	// importing a new identity
517	var name string
518	var email string
519
520	switch actor.Typename {
521	case "User":
522		if actor.User.Name != nil {
523			name = string(*(actor.User.Name))
524		}
525		email = string(actor.User.Email)
526	case "Organization":
527		if actor.Organization.Name != nil {
528			name = string(*(actor.Organization.Name))
529		}
530		if actor.Organization.Email != nil {
531			email = string(*(actor.Organization.Email))
532		}
533	case "Bot":
534	}
535
536	// Name is not necessarily set, fallback to login as a name is required in the identity
537	if name == "" {
538		name = string(actor.Login)
539	}
540
541	i, err = repo.NewIdentityRaw(
542		name,
543		email,
544		string(actor.Login),
545		string(actor.AvatarUrl),
546		nil,
547		map[string]string{
548			metaKeyGithubLogin: string(actor.Login),
549		},
550	)
551
552	if err != nil {
553		return nil, err
554	}
555
556	gi.out <- core.NewImportIdentity(i.Id())
557	return i, nil
558}
559
560func (gi *githubImporter) getGhost(ctx context.Context, repo *cache.RepoCache) (*cache.IdentityCache, error) {
561	loginName := "ghost"
562	// Look first in the cache
563	i, err := repo.ResolveIdentityImmutableMetadata(metaKeyGithubLogin, loginName)
564	if err == nil {
565		return i, nil
566	}
567	if entity.IsErrMultipleMatch(err) {
568		return nil, err
569	}
570	user, err := gi.mediator.User(ctx, loginName)
571	if err != nil {
572		return nil, err
573	}
574	userName := ""
575	if user.Name != nil {
576		userName = string(*user.Name)
577	}
578	return repo.NewIdentityRaw(
579		userName,
580		"",
581		string(user.Login),
582		string(user.AvatarUrl),
583		nil,
584		map[string]string{
585			metaKeyGithubLogin: string(user.Login),
586		},
587	)
588}
589
590// parseId converts the unusable githubv4.ID (an interface{}) into a string
591func parseId(id githubv4.ID) string {
592	return fmt.Sprintf("%v", id)
593}