import.go

  1package github
  2
  3import (
  4	"context"
  5	"fmt"
  6	"strings"
  7
  8	"github.com/MichaelMure/git-bug/bridge/core"
  9	"github.com/MichaelMure/git-bug/bug"
 10	"github.com/MichaelMure/git-bug/cache"
 11	"github.com/MichaelMure/git-bug/identity"
 12	"github.com/MichaelMure/git-bug/util/git"
 13	"github.com/shurcooL/githubv4"
 14)
 15
 16const keyGithubId = "github-id"
 17const keyGithubUrl = "github-url"
 18const keyGithubLogin = "github-login"
 19
 20// githubImporter implement the Importer interface
 21type githubImporter struct {
 22	client *githubv4.Client
 23	conf   core.Configuration
 24}
 25
 26func (gi *githubImporter) Init(conf core.Configuration) error {
 27	gi.conf = conf
 28	gi.client = buildClient(conf)
 29
 30	return nil
 31}
 32
 33func (gi *githubImporter) ImportAll(repo *cache.RepoCache) error {
 34	q := &issueTimelineQuery{}
 35	variables := map[string]interface{}{
 36		"owner":         githubv4.String(gi.conf[keyUser]),
 37		"name":          githubv4.String(gi.conf[keyProject]),
 38		"issueFirst":    githubv4.Int(1),
 39		"issueAfter":    (*githubv4.String)(nil),
 40		"timelineFirst": githubv4.Int(10),
 41		"timelineAfter": (*githubv4.String)(nil),
 42
 43		// Fun fact, github provide the comment edition in reverse chronological
 44		// order, because haha. Look at me, I'm dying of laughter.
 45		"issueEditLast":     githubv4.Int(10),
 46		"issueEditBefore":   (*githubv4.String)(nil),
 47		"commentEditLast":   githubv4.Int(10),
 48		"commentEditBefore": (*githubv4.String)(nil),
 49	}
 50
 51	var b *cache.BugCache
 52
 53	for {
 54		err := gi.client.Query(context.TODO(), &q, variables)
 55		if err != nil {
 56			return err
 57		}
 58
 59		if len(q.Repository.Issues.Nodes) == 0 {
 60			return nil
 61		}
 62
 63		issue := q.Repository.Issues.Nodes[0]
 64
 65		if b == nil {
 66			b, err = gi.ensureIssue(repo, issue, variables)
 67			if err != nil {
 68				return err
 69			}
 70		}
 71
 72		for _, itemEdge := range q.Repository.Issues.Nodes[0].Timeline.Edges {
 73			err = gi.ensureTimelineItem(repo, b, itemEdge.Cursor, itemEdge.Node, variables)
 74			if err != nil {
 75				return err
 76			}
 77		}
 78
 79		if !issue.Timeline.PageInfo.HasNextPage {
 80			err = b.CommitAsNeeded()
 81			if err != nil {
 82				return err
 83			}
 84
 85			b = nil
 86
 87			if !q.Repository.Issues.PageInfo.HasNextPage {
 88				break
 89			}
 90
 91			variables["issueAfter"] = githubv4.NewString(q.Repository.Issues.PageInfo.EndCursor)
 92			variables["timelineAfter"] = (*githubv4.String)(nil)
 93			continue
 94		}
 95
 96		variables["timelineAfter"] = githubv4.NewString(issue.Timeline.PageInfo.EndCursor)
 97	}
 98
 99	return nil
100}
101
102func (gi *githubImporter) Import(repo *cache.RepoCache, id string) error {
103	fmt.Println("IMPORT")
104
105	return nil
106}
107
108func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issueTimeline, rootVariables map[string]interface{}) (*cache.BugCache, error) {
109	fmt.Printf("import issue: %s\n", issue.Title)
110
111	b, err := repo.ResolveBugCreateMetadata(keyGithubId, parseId(issue.Id))
112	if err != nil && err != bug.ErrBugNotExist {
113		return nil, err
114	}
115
116	author, err := gi.makePerson(repo, issue.Author)
117	if err != nil {
118		return nil, err
119	}
120
121	// if there is no edit, the UserContentEdits given by github is empty. That
122	// means that the original message is given by the issue message.
123	//
124	// if there is edits, the UserContentEdits given by github contains both the
125	// original message and the following edits. The issue message give the last
126	// version so we don't care about that.
127	//
128	// the tricky part: for an issue older than the UserContentEdits API, github
129	// doesn't have the previous message version anymore and give an edition
130	// with .Diff == nil. We have to filter them.
131
132	if len(issue.UserContentEdits.Nodes) == 0 {
133		if err == bug.ErrBugNotExist {
134			b, err = repo.NewBugRaw(
135				author,
136				issue.CreatedAt.Unix(),
137				// Todo: this might not be the initial title, we need to query the
138				// timeline to be sure
139				issue.Title,
140				cleanupText(string(issue.Body)),
141				nil,
142				map[string]string{
143					keyGithubId:  parseId(issue.Id),
144					keyGithubUrl: issue.Url.String(),
145				},
146			)
147			if err != nil {
148				return nil, err
149			}
150		}
151
152		return b, nil
153	}
154
155	// reverse the order, because github
156	reverseEdits(issue.UserContentEdits.Nodes)
157
158	for i, edit := range issue.UserContentEdits.Nodes {
159		if b != nil && i == 0 {
160			// The first edit in the github result is the creation itself, we already have that
161			continue
162		}
163
164		if b == nil {
165			if edit.Diff == nil {
166				// not enough data given by github for old edit, ignore them
167				continue
168			}
169
170			// we create the bug as soon as we have a legit first edition
171			b, err = repo.NewBugRaw(
172				author,
173				issue.CreatedAt.Unix(),
174				// Todo: this might not be the initial title, we need to query the
175				// timeline to be sure
176				issue.Title,
177				cleanupText(string(*edit.Diff)),
178				nil,
179				map[string]string{
180					keyGithubId:  parseId(issue.Id),
181					keyGithubUrl: issue.Url.String(),
182				},
183			)
184			if err != nil {
185				return nil, err
186			}
187			continue
188		}
189
190		target, err := b.ResolveTargetWithMetadata(keyGithubId, parseId(issue.Id))
191		if err != nil {
192			return nil, err
193		}
194
195		err = gi.ensureCommentEdit(repo, b, target, edit)
196		if err != nil {
197			return nil, err
198		}
199	}
200
201	if !issue.UserContentEdits.PageInfo.HasNextPage {
202		// if we still didn't get a legit edit, create the bug from the issue data
203		if b == nil {
204			return repo.NewBugRaw(
205				author,
206				issue.CreatedAt.Unix(),
207				// Todo: this might not be the initial title, we need to query the
208				// timeline to be sure
209				issue.Title,
210				cleanupText(string(issue.Body)),
211				nil,
212				map[string]string{
213					keyGithubId:  parseId(issue.Id),
214					keyGithubUrl: issue.Url.String(),
215				},
216			)
217		}
218		return b, nil
219	}
220
221	// We have more edit, querying them
222
223	q := &issueEditQuery{}
224	variables := map[string]interface{}{
225		"owner":           rootVariables["owner"],
226		"name":            rootVariables["name"],
227		"issueFirst":      rootVariables["issueFirst"],
228		"issueAfter":      rootVariables["issueAfter"],
229		"issueEditLast":   githubv4.Int(10),
230		"issueEditBefore": issue.UserContentEdits.PageInfo.StartCursor,
231	}
232
233	for {
234		err := gi.client.Query(context.TODO(), &q, variables)
235		if err != nil {
236			return nil, err
237		}
238
239		edits := q.Repository.Issues.Nodes[0].UserContentEdits
240
241		if len(edits.Nodes) == 0 {
242			return b, nil
243		}
244
245		for _, edit := range edits.Nodes {
246			if b == nil {
247				if edit.Diff == nil {
248					// not enough data given by github for old edit, ignore them
249					continue
250				}
251
252				// we create the bug as soon as we have a legit first edition
253				b, err = repo.NewBugRaw(
254					author,
255					issue.CreatedAt.Unix(),
256					// Todo: this might not be the initial title, we need to query the
257					// timeline to be sure
258					issue.Title,
259					cleanupText(string(*edit.Diff)),
260					nil,
261					map[string]string{
262						keyGithubId:  parseId(issue.Id),
263						keyGithubUrl: issue.Url.String(),
264					},
265				)
266				if err != nil {
267					return nil, err
268				}
269				continue
270			}
271
272			target, err := b.ResolveTargetWithMetadata(keyGithubId, parseId(issue.Id))
273			if err != nil {
274				return nil, err
275			}
276
277			err = gi.ensureCommentEdit(repo, b, target, edit)
278			if err != nil {
279				return nil, err
280			}
281		}
282
283		if !edits.PageInfo.HasNextPage {
284			break
285		}
286
287		variables["issueEditBefore"] = edits.PageInfo.StartCursor
288	}
289
290	// TODO: check + import files
291
292	// if we still didn't get a legit edit, create the bug from the issue data
293	if b == nil {
294		return repo.NewBugRaw(
295			author,
296			issue.CreatedAt.Unix(),
297			// Todo: this might not be the initial title, we need to query the
298			// timeline to be sure
299			issue.Title,
300			cleanupText(string(issue.Body)),
301			nil,
302			map[string]string{
303				keyGithubId:  parseId(issue.Id),
304				keyGithubUrl: issue.Url.String(),
305			},
306		)
307	}
308
309	return b, nil
310}
311
312func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.BugCache, cursor githubv4.String, item timelineItem, rootVariables map[string]interface{}) error {
313	fmt.Printf("import %s\n", item.Typename)
314
315	switch item.Typename {
316	case "IssueComment":
317		return gi.ensureComment(repo, b, cursor, item.IssueComment, rootVariables)
318
319	case "LabeledEvent":
320		id := parseId(item.LabeledEvent.Id)
321		_, err := b.ResolveTargetWithMetadata(keyGithubId, id)
322		if err != cache.ErrNoMatchingOp {
323			return err
324		}
325		author, err := gi.makePerson(repo, item.LabeledEvent.Actor)
326		if err != nil {
327			return err
328		}
329		_, err = b.ChangeLabelsRaw(
330			author,
331			item.LabeledEvent.CreatedAt.Unix(),
332			[]string{
333				string(item.LabeledEvent.Label.Name),
334			},
335			nil,
336			map[string]string{keyGithubId: id},
337		)
338		return err
339
340	case "UnlabeledEvent":
341		id := parseId(item.UnlabeledEvent.Id)
342		_, err := b.ResolveTargetWithMetadata(keyGithubId, id)
343		if err != cache.ErrNoMatchingOp {
344			return err
345		}
346		author, err := gi.makePerson(repo, item.UnlabeledEvent.Actor)
347		if err != nil {
348			return err
349		}
350		_, err = b.ChangeLabelsRaw(
351			author,
352			item.UnlabeledEvent.CreatedAt.Unix(),
353			nil,
354			[]string{
355				string(item.UnlabeledEvent.Label.Name),
356			},
357			map[string]string{keyGithubId: id},
358		)
359		return err
360
361	case "ClosedEvent":
362		id := parseId(item.ClosedEvent.Id)
363		_, err := b.ResolveTargetWithMetadata(keyGithubId, id)
364		if err != cache.ErrNoMatchingOp {
365			return err
366		}
367		author, err := gi.makePerson(repo, item.ClosedEvent.Actor)
368		if err != nil {
369			return err
370		}
371		return b.CloseRaw(
372			author,
373			item.ClosedEvent.CreatedAt.Unix(),
374			map[string]string{keyGithubId: id},
375		)
376
377	case "ReopenedEvent":
378		id := parseId(item.ReopenedEvent.Id)
379		_, err := b.ResolveTargetWithMetadata(keyGithubId, id)
380		if err != cache.ErrNoMatchingOp {
381			return err
382		}
383		author, err := gi.makePerson(repo, item.ReopenedEvent.Actor)
384		if err != nil {
385			return err
386		}
387		return b.OpenRaw(
388			author,
389			item.ReopenedEvent.CreatedAt.Unix(),
390			map[string]string{keyGithubId: id},
391		)
392
393	case "RenamedTitleEvent":
394		id := parseId(item.RenamedTitleEvent.Id)
395		_, err := b.ResolveTargetWithMetadata(keyGithubId, id)
396		if err != cache.ErrNoMatchingOp {
397			return err
398		}
399		author, err := gi.makePerson(repo, item.RenamedTitleEvent.Actor)
400		if err != nil {
401			return err
402		}
403		return b.SetTitleRaw(
404			author,
405			item.RenamedTitleEvent.CreatedAt.Unix(),
406			string(item.RenamedTitleEvent.CurrentTitle),
407			map[string]string{keyGithubId: id},
408		)
409
410	default:
411		fmt.Println("ignore event ", item.Typename)
412	}
413
414	return nil
415}
416
417func (gi *githubImporter) ensureComment(repo *cache.RepoCache, b *cache.BugCache, cursor githubv4.String, comment issueComment, rootVariables map[string]interface{}) error {
418	target, err := b.ResolveTargetWithMetadata(keyGithubId, parseId(comment.Id))
419	if err != nil && err != cache.ErrNoMatchingOp {
420		// real error
421		return err
422	}
423
424	author, err := gi.makePerson(repo, comment.Author)
425	if err != nil {
426		return err
427	}
428
429	// if there is no edit, the UserContentEdits given by github is empty. That
430	// means that the original message is given by the comment message.
431	//
432	// if there is edits, the UserContentEdits given by github contains both the
433	// original message and the following edits. The comment message give the last
434	// version so we don't care about that.
435	//
436	// the tricky part: for a comment older than the UserContentEdits API, github
437	// doesn't have the previous message version anymore and give an edition
438	// with .Diff == nil. We have to filter them.
439
440	if len(comment.UserContentEdits.Nodes) == 0 {
441		if err == cache.ErrNoMatchingOp {
442			err = b.AddCommentRaw(
443				author,
444				comment.CreatedAt.Unix(),
445				cleanupText(string(comment.Body)),
446				nil,
447				map[string]string{
448					keyGithubId: parseId(comment.Id),
449				},
450			)
451
452			if err != nil {
453				return err
454			}
455		}
456
457		return nil
458	}
459
460	// reverse the order, because github
461	reverseEdits(comment.UserContentEdits.Nodes)
462
463	for i, edit := range comment.UserContentEdits.Nodes {
464		if target != "" && i == 0 {
465			// The first edit in the github result is the comment creation itself, we already have that
466			continue
467		}
468
469		if target == "" {
470			if edit.Diff == nil {
471				// not enough data given by github for old edit, ignore them
472				continue
473			}
474
475			err = b.AddCommentRaw(
476				author,
477				comment.CreatedAt.Unix(),
478				cleanupText(string(*edit.Diff)),
479				nil,
480				map[string]string{
481					keyGithubId:  parseId(comment.Id),
482					keyGithubUrl: comment.Url.String(),
483				},
484			)
485			if err != nil {
486				return err
487			}
488		}
489
490		err := gi.ensureCommentEdit(repo, b, target, edit)
491		if err != nil {
492			return err
493		}
494	}
495
496	if !comment.UserContentEdits.PageInfo.HasNextPage {
497		return nil
498	}
499
500	// We have more edit, querying them
501
502	q := &commentEditQuery{}
503	variables := map[string]interface{}{
504		"owner":             rootVariables["owner"],
505		"name":              rootVariables["name"],
506		"issueFirst":        rootVariables["issueFirst"],
507		"issueAfter":        rootVariables["issueAfter"],
508		"timelineFirst":     githubv4.Int(1),
509		"timelineAfter":     cursor,
510		"commentEditLast":   githubv4.Int(10),
511		"commentEditBefore": comment.UserContentEdits.PageInfo.StartCursor,
512	}
513
514	for {
515		err := gi.client.Query(context.TODO(), &q, variables)
516		if err != nil {
517			return err
518		}
519
520		edits := q.Repository.Issues.Nodes[0].Timeline.Nodes[0].IssueComment.UserContentEdits
521
522		if len(edits.Nodes) == 0 {
523			return nil
524		}
525
526		for i, edit := range edits.Nodes {
527			if i == 0 {
528				// The first edit in the github result is the creation itself, we already have that
529				continue
530			}
531
532			err := gi.ensureCommentEdit(repo, b, target, edit)
533			if err != nil {
534				return err
535			}
536		}
537
538		if !edits.PageInfo.HasNextPage {
539			break
540		}
541
542		variables["commentEditBefore"] = edits.PageInfo.StartCursor
543	}
544
545	// TODO: check + import files
546
547	return nil
548}
549
550func (gi *githubImporter) ensureCommentEdit(repo *cache.RepoCache, b *cache.BugCache, target git.Hash, edit userContentEdit) error {
551	if edit.Diff == nil {
552		// this happen if the event is older than early 2018, Github doesn't have the data before that.
553		// Best we can do is to ignore the event.
554		return nil
555	}
556
557	if edit.Editor == nil {
558		return fmt.Errorf("no editor")
559	}
560
561	_, err := b.ResolveTargetWithMetadata(keyGithubId, parseId(edit.Id))
562	if err == nil {
563		// already imported
564		return nil
565	}
566	if err != cache.ErrNoMatchingOp {
567		// real error
568		return err
569	}
570
571	fmt.Println("import edition")
572
573	editor, err := gi.makePerson(repo, edit.Editor)
574	if err != nil {
575		return err
576	}
577
578	switch {
579	case edit.DeletedAt != nil:
580		// comment deletion, not supported yet
581
582	case edit.DeletedAt == nil:
583		// comment edition
584		err := b.EditCommentRaw(
585			editor,
586			edit.CreatedAt.Unix(),
587			target,
588			cleanupText(string(*edit.Diff)),
589			map[string]string{
590				keyGithubId: parseId(edit.Id),
591			},
592		)
593		if err != nil {
594			return err
595		}
596	}
597
598	return nil
599}
600
601// makePerson create a bug.Person from the Github data
602func (gi *githubImporter) makePerson(repo *cache.RepoCache, actor *actor) (*identity.Identity, error) {
603	// When a user has been deleted, Github return a null actor, while displaying a profile named "ghost"
604	// in it's UI. So we need a special case to get it.
605	if actor == nil {
606		return gi.getGhost(repo)
607	}
608
609	// Look first in the cache
610	i, err := repo.ResolveIdentityImmutableMetadata(keyGithubLogin, string(actor.Login))
611	if err == nil {
612		return i, nil
613	}
614	if _, ok := err.(identity.ErrMultipleMatch); ok {
615		return nil, err
616	}
617
618	var name string
619	var email string
620
621	switch actor.Typename {
622	case "User":
623		if actor.User.Name != nil {
624			name = string(*(actor.User.Name))
625		}
626		email = string(actor.User.Email)
627	case "Organization":
628		if actor.Organization.Name != nil {
629			name = string(*(actor.Organization.Name))
630		}
631		if actor.Organization.Email != nil {
632			email = string(*(actor.Organization.Email))
633		}
634	case "Bot":
635	}
636
637	return repo.NewIdentityRaw(
638		name,
639		email,
640		string(actor.Login),
641		string(actor.AvatarUrl),
642		map[string]string{
643			keyGithubLogin: string(actor.Login),
644		},
645	)
646}
647
648func (gi *githubImporter) getGhost(repo *cache.RepoCache) (*identity.Identity, error) {
649	// Look first in the cache
650	i, err := repo.ResolveIdentityImmutableMetadata(keyGithubLogin, "ghost")
651	if err == nil {
652		return i, nil
653	}
654	if _, ok := err.(identity.ErrMultipleMatch); ok {
655		return nil, err
656	}
657
658	var q userQuery
659
660	variables := map[string]interface{}{
661		"login": githubv4.String("ghost"),
662	}
663
664	err = gi.client.Query(context.TODO(), &q, variables)
665	if err != nil {
666		return nil, err
667	}
668
669	var name string
670	if q.User.Name != nil {
671		name = string(*q.User.Name)
672	}
673
674	return repo.NewIdentityRaw(
675		name,
676		string(q.User.Email),
677		string(q.User.Login),
678		string(q.User.AvatarUrl),
679		map[string]string{
680			keyGithubLogin: string(q.User.Login),
681		},
682	)
683}
684
685// parseId convert the unusable githubv4.ID (an interface{}) into a string
686func parseId(id githubv4.ID) string {
687	return fmt.Sprintf("%v", id)
688}
689
690func cleanupText(text string) string {
691	// windows new line, Github, really ?
692	text = strings.Replace(text, "\r\n", "\n", -1)
693
694	// trim extra new line not displayed in the github UI but still present in the data
695	return strings.TrimSpace(text)
696}
697
698func reverseEdits(edits []userContentEdit) []userContentEdit {
699	for i, j := 0, len(edits)-1; i < j; i, j = i+1, j-1 {
700		edits[i], edits[j] = edits[j], edits[i]
701	}
702	return edits
703}