1package github
  2
  3import (
  4	"context"
  5	"time"
  6
  7	"github.com/shurcooL/githubv4"
  8)
  9
 10const (
 11	// These values influence how fast the github graphql rate limit is exhausted.
 12
 13	NumIssues        = 40
 14	NumIssueEdits    = 100
 15	NumTimelineItems = 100
 16	NumCommentEdits  = 100
 17
 18	ChanCapacity = 128
 19)
 20
 21// importMediator provides a convenient interface to retrieve issues from the Github GraphQL API.
 22type importMediator struct {
 23	// Github graphql client
 24	gh *rateLimitHandlerClient
 25
 26	// name of the repository owner on Github
 27	owner string
 28
 29	// name of the Github repository
 30	project string
 31
 32	// since specifies which issues to import. Issues that have been updated at or after the
 33	// given date should be imported.
 34	since time.Time
 35
 36	// importEvents holds events representing issues, comments, edits, ...
 37	// In this channel issues are immediately followed by their issue edits and comments are
 38	// immediately followed by their comment edits.
 39	importEvents chan ImportEvent
 40
 41	// Sticky error
 42	err error
 43}
 44
 45func NewImportMediator(ctx context.Context, client *rateLimitHandlerClient, owner, project string, since time.Time) *importMediator {
 46	mm := importMediator{
 47		gh:           client,
 48		owner:        owner,
 49		project:      project,
 50		since:        since,
 51		importEvents: make(chan ImportEvent, ChanCapacity),
 52		err:          nil,
 53	}
 54
 55	go mm.start(ctx)
 56
 57	return &mm
 58}
 59
 60func (mm *importMediator) start(ctx context.Context) {
 61	ctx, cancel := context.WithCancel(ctx)
 62	mm.fillImportEvents(ctx)
 63	// Make sure we cancel everything when we are done, instead of relying on the parent context
 64	// This should unblock pending send to the channel if the capacity was reached and avoid a panic/race when closing.
 65	cancel()
 66	close(mm.importEvents)
 67}
 68
 69// NextImportEvent returns the next ImportEvent, or nil if done.
 70func (mm *importMediator) NextImportEvent() ImportEvent {
 71	return <-mm.importEvents
 72}
 73
 74func (mm *importMediator) Error() error {
 75	return mm.err
 76}
 77
 78func (mm *importMediator) User(ctx context.Context, loginName string) (*user, error) {
 79	query := userQuery{}
 80	vars := varmap{"login": githubv4.String(loginName)}
 81	if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
 82		return nil, err
 83	}
 84	return &query.User, nil
 85}
 86
 87func (mm *importMediator) fillImportEvents(ctx context.Context) {
 88	initialCursor := githubv4.String("")
 89	issues, hasIssues := mm.queryIssue(ctx, initialCursor)
 90	for hasIssues {
 91		for _, node := range issues.Nodes {
 92			select {
 93			case <-ctx.Done():
 94				return
 95			case mm.importEvents <- IssueEvent{node.issue}:
 96			}
 97
 98			// issue edit events follow the issue event
 99			mm.fillIssueEditEvents(ctx, &node)
100			// last come the timeline events
101			mm.fillTimelineEvents(ctx, &node)
102		}
103		if !issues.PageInfo.HasNextPage {
104			break
105		}
106		issues, hasIssues = mm.queryIssue(ctx, issues.PageInfo.EndCursor)
107	}
108}
109
110func (mm *importMediator) fillIssueEditEvents(ctx context.Context, issueNode *issueNode) {
111	edits := &issueNode.UserContentEdits
112	hasEdits := true
113	for hasEdits {
114		for edit := range reverse(edits.Nodes) {
115			if edit.Diff == nil || string(*edit.Diff) == "" {
116				// issueEdit.Diff == nil happen if the event is older than early
117				// 2018, Github doesn't have the data before that. Best we can do is
118				// to ignore the event.
119				continue
120			}
121			select {
122			case <-ctx.Done():
123				return
124			case mm.importEvents <- IssueEditEvent{issueId: issueNode.issue.Id, userContentEdit: edit}:
125			}
126		}
127		if !edits.PageInfo.HasPreviousPage {
128			break
129		}
130		edits, hasEdits = mm.queryIssueEdits(ctx, issueNode.issue.Id, edits.PageInfo.EndCursor)
131	}
132}
133
134func (mm *importMediator) queryIssueEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) {
135	vars := newIssueEditVars()
136	vars["gqlNodeId"] = nid
137	if cursor == "" {
138		vars["issueEditBefore"] = (*githubv4.String)(nil)
139	} else {
140		vars["issueEditBefore"] = cursor
141	}
142	query := issueEditQuery{}
143	if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
144		mm.err = err
145		return nil, false
146	}
147	connection := &query.Node.Issue.UserContentEdits
148	if len(connection.Nodes) <= 0 {
149		return nil, false
150	}
151	return connection, true
152}
153
154func (mm *importMediator) fillTimelineEvents(ctx context.Context, issueNode *issueNode) {
155	items := &issueNode.TimelineItems
156	hasItems := true
157	for hasItems {
158		for _, item := range items.Nodes {
159			select {
160			case <-ctx.Done():
161				return
162			case mm.importEvents <- TimelineEvent{issueId: issueNode.issue.Id, timelineItem: item}:
163			}
164			if item.Typename == "IssueComment" {
165				// Issue comments are different than other timeline items in that
166				// they may have associated user content edits.
167				// Right after the comment we send the comment edits.
168				mm.fillCommentEdits(ctx, &item)
169			}
170		}
171		if !items.PageInfo.HasNextPage {
172			break
173		}
174		items, hasItems = mm.queryTimeline(ctx, issueNode.issue.Id, items.PageInfo.EndCursor)
175	}
176}
177
178func (mm *importMediator) queryTimeline(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*timelineItemsConnection, bool) {
179	vars := newTimelineVars()
180	vars["gqlNodeId"] = nid
181	if cursor == "" {
182		vars["timelineAfter"] = (*githubv4.String)(nil)
183	} else {
184		vars["timelineAfter"] = cursor
185	}
186	query := timelineQuery{}
187	if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
188		mm.err = err
189		return nil, false
190	}
191	connection := &query.Node.Issue.TimelineItems
192	if len(connection.Nodes) <= 0 {
193		return nil, false
194	}
195	return connection, true
196}
197
198func (mm *importMediator) fillCommentEdits(ctx context.Context, item *timelineItem) {
199	// Here we are only concerned with timeline items of type issueComment.
200	if item.Typename != "IssueComment" {
201		return
202	}
203	// First: setup message handling while submitting GraphQL queries.
204	comment := &item.IssueComment
205	edits := &comment.UserContentEdits
206	hasEdits := true
207	for hasEdits {
208		for edit := range reverse(edits.Nodes) {
209			if edit.Diff == nil || string(*edit.Diff) == "" {
210				// issueEdit.Diff == nil happen if the event is older than early
211				// 2018, Github doesn't have the data before that. Best we can do is
212				// to ignore the event.
213				continue
214			}
215			select {
216			case <-ctx.Done():
217				return
218			case mm.importEvents <- CommentEditEvent{commentId: comment.Id, userContentEdit: edit}:
219			}
220		}
221		if !edits.PageInfo.HasPreviousPage {
222			break
223		}
224		edits, hasEdits = mm.queryCommentEdits(ctx, comment.Id, edits.PageInfo.EndCursor)
225	}
226}
227
228func (mm *importMediator) queryCommentEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) {
229	vars := newCommentEditVars()
230	vars["gqlNodeId"] = nid
231	if cursor == "" {
232		vars["commentEditBefore"] = (*githubv4.String)(nil)
233	} else {
234		vars["commentEditBefore"] = cursor
235	}
236	query := commentEditQuery{}
237	if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
238		mm.err = err
239		return nil, false
240	}
241	connection := &query.Node.IssueComment.UserContentEdits
242	if len(connection.Nodes) <= 0 {
243		return nil, false
244	}
245	return connection, true
246}
247
248func (mm *importMediator) queryIssue(ctx context.Context, cursor githubv4.String) (*issueConnection, bool) {
249	vars := newIssueVars(mm.owner, mm.project, mm.since)
250	if cursor == "" {
251		vars["issueAfter"] = (*githubv4.String)(nil)
252	} else {
253		vars["issueAfter"] = cursor
254	}
255	query := issueQuery{}
256	if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
257		mm.err = err
258		return nil, false
259	}
260	connection := &query.Repository.Issues
261	if len(connection.Nodes) <= 0 {
262		return nil, false
263	}
264	return connection, true
265}
266
267func reverse(eds []userContentEdit) chan userContentEdit {
268	ret := make(chan userContentEdit)
269	go func() {
270		for i := range eds {
271			ret <- eds[len(eds)-1-i]
272		}
273		close(ret)
274	}()
275	return ret
276}
277
278// varmap is a container for Github API's pagination variables
279type varmap map[string]interface{}
280
281func newIssueVars(owner, project string, since time.Time) varmap {
282	return varmap{
283		"owner":             githubv4.String(owner),
284		"name":              githubv4.String(project),
285		"issueSince":        githubv4.DateTime{Time: since},
286		"issueFirst":        githubv4.Int(NumIssues),
287		"issueEditLast":     githubv4.Int(NumIssueEdits),
288		"issueEditBefore":   (*githubv4.String)(nil),
289		"timelineFirst":     githubv4.Int(NumTimelineItems),
290		"timelineAfter":     (*githubv4.String)(nil),
291		"commentEditLast":   githubv4.Int(NumCommentEdits),
292		"commentEditBefore": (*githubv4.String)(nil),
293	}
294}
295
296func newIssueEditVars() varmap {
297	return varmap{
298		"issueEditLast": githubv4.Int(NumIssueEdits),
299	}
300}
301
302func newTimelineVars() varmap {
303	return varmap{
304		"timelineFirst":     githubv4.Int(NumTimelineItems),
305		"commentEditLast":   githubv4.Int(NumCommentEdits),
306		"commentEditBefore": (*githubv4.String)(nil),
307	}
308}
309
310func newCommentEditVars() varmap {
311	return varmap{
312		"commentEditLast": githubv4.Int(NumCommentEdits),
313	}
314}