import_mediator.go

  1package github
  2
  3import (
  4	"context"
  5	"time"
  6
  7	"github.com/shurcooL/githubv4"
  8)
  9
 10const (
 11	// These values influence how fast the github graphql rate limit is exhausted.
 12	NumIssues        = 40
 13	NumIssueEdits    = 100
 14	NumTimelineItems = 100
 15	NumCommentEdits  = 100
 16
 17	ChanCapacity = 128
 18)
 19
 20// importMediator provides a convenient interface to retrieve issues from the Github GraphQL API.
 21type importMediator struct {
 22	// Github graphql client
 23	gh *rateLimitHandlerClient
 24
 25	// name of the repository owner on Github
 26	owner string
 27
 28	// name of the Github repository
 29	project string
 30
 31	// since specifies which issues to import. Issues that have been updated at or after the
 32	// given date should be imported.
 33	since time.Time
 34
 35	// importEvents holds events representing issues, comments, edits, ...
 36	// In this channel issues are immediately followed by their issue edits and comments are
 37	// immediately followed by their comment edits.
 38	importEvents chan ImportEvent
 39
 40	// Sticky error
 41	err error
 42}
 43
 44type ImportEvent interface {
 45	isImportEvent()
 46}
 47
 48func (RateLimitingEvent) isImportEvent() {}
 49
 50type IssueEvent struct {
 51	issue
 52}
 53
 54func (IssueEvent) isImportEvent() {}
 55
 56type IssueEditEvent struct {
 57	issueId githubv4.ID
 58	userContentEdit
 59}
 60
 61func (IssueEditEvent) isImportEvent() {}
 62
 63type TimelineEvent struct {
 64	issueId githubv4.ID
 65	timelineItem
 66}
 67
 68func (TimelineEvent) isImportEvent() {}
 69
 70type CommentEditEvent struct {
 71	commentId githubv4.ID
 72	userContentEdit
 73}
 74
 75func (CommentEditEvent) isImportEvent() {}
 76
 77func (mm *importMediator) NextImportEvent() ImportEvent {
 78	return <-mm.importEvents
 79}
 80
 81func NewImportMediator(ctx context.Context, client *rateLimitHandlerClient, owner, project string, since time.Time) *importMediator {
 82	mm := importMediator{
 83		gh:           client,
 84		owner:        owner,
 85		project:      project,
 86		since:        since,
 87		importEvents: make(chan ImportEvent, ChanCapacity),
 88		err:          nil,
 89	}
 90	go func() {
 91		mm.fillImportEvents(ctx)
 92		close(mm.importEvents)
 93	}()
 94	return &mm
 95}
 96
 97type varmap map[string]interface{}
 98
 99func newIssueVars(owner, project string, since time.Time) varmap {
100	return varmap{
101		"owner":             githubv4.String(owner),
102		"name":              githubv4.String(project),
103		"issueSince":        githubv4.DateTime{Time: since},
104		"issueFirst":        githubv4.Int(NumIssues),
105		"issueEditLast":     githubv4.Int(NumIssueEdits),
106		"issueEditBefore":   (*githubv4.String)(nil),
107		"timelineFirst":     githubv4.Int(NumTimelineItems),
108		"timelineAfter":     (*githubv4.String)(nil),
109		"commentEditLast":   githubv4.Int(NumCommentEdits),
110		"commentEditBefore": (*githubv4.String)(nil),
111	}
112}
113
114func newIssueEditVars() varmap {
115	return varmap{
116		"issueEditLast": githubv4.Int(NumIssueEdits),
117	}
118}
119
120func newTimelineVars() varmap {
121	return varmap{
122		"timelineFirst":     githubv4.Int(NumTimelineItems),
123		"commentEditLast":   githubv4.Int(NumCommentEdits),
124		"commentEditBefore": (*githubv4.String)(nil),
125	}
126}
127
128func newCommentEditVars() varmap {
129	return varmap{
130		"commentEditLast": githubv4.Int(NumCommentEdits),
131	}
132}
133
134func (mm *importMediator) Error() error {
135	return mm.err
136}
137
138func (mm *importMediator) User(ctx context.Context, loginName string) (*user, error) {
139	query := userQuery{}
140	vars := varmap{"login": githubv4.String(loginName)}
141	if err := mm.gh.queryWithImportEvents(ctx, &query, vars, mm.importEvents); err != nil {
142		return nil, err
143	}
144	return &query.User, nil
145}
146
147func (mm *importMediator) fillImportEvents(ctx context.Context) {
148	initialCursor := githubv4.String("")
149	issues, hasIssues := mm.queryIssue(ctx, initialCursor)
150	for hasIssues {
151		for _, node := range issues.Nodes {
152			select {
153			case <-ctx.Done():
154				return
155			case mm.importEvents <- IssueEvent{node.issue}:
156			}
157
158			// issue edit events follow the issue event
159			mm.fillIssueEditEvents(ctx, &node)
160			// last come the timeline events
161			mm.fillTimelineEvents(ctx, &node)
162		}
163		if !issues.PageInfo.HasNextPage {
164			break
165		}
166		issues, hasIssues = mm.queryIssue(ctx, issues.PageInfo.EndCursor)
167	}
168}
169
170func (mm *importMediator) fillIssueEditEvents(ctx context.Context, issueNode *issueNode) {
171	edits := &issueNode.UserContentEdits
172	hasEdits := true
173	for hasEdits {
174		for edit := range reverse(edits.Nodes) {
175			if edit.Diff == nil || string(*edit.Diff) == "" {
176				// issueEdit.Diff == nil happen if the event is older than early
177				// 2018, Github doesn't have the data before that. Best we can do is
178				// to ignore the event.
179				continue
180			}
181			select {
182			case <-ctx.Done():
183				return
184			case mm.importEvents <- IssueEditEvent{issueId: issueNode.issue.Id, userContentEdit: edit}:
185			}
186		}
187		if !edits.PageInfo.HasPreviousPage {
188			break
189		}
190		edits, hasEdits = mm.queryIssueEdits(ctx, issueNode.issue.Id, edits.PageInfo.EndCursor)
191	}
192}
193
194func (mm *importMediator) queryIssueEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) {
195	vars := newIssueEditVars()
196	vars["gqlNodeId"] = nid
197	if cursor == "" {
198		vars["issueEditBefore"] = (*githubv4.String)(nil)
199	} else {
200		vars["issueEditBefore"] = cursor
201	}
202	query := issueEditQuery{}
203	if err := mm.gh.queryWithImportEvents(ctx, &query, vars, mm.importEvents); err != nil {
204		mm.err = err
205		return nil, false
206	}
207	connection := &query.Node.Issue.UserContentEdits
208	if len(connection.Nodes) <= 0 {
209		return nil, false
210	}
211	return connection, true
212}
213
214func (mm *importMediator) fillTimelineEvents(ctx context.Context, issueNode *issueNode) {
215	items := &issueNode.TimelineItems
216	hasItems := true
217	for hasItems {
218		for _, item := range items.Nodes {
219			select {
220			case <-ctx.Done():
221				return
222			case mm.importEvents <- TimelineEvent{issueId: issueNode.issue.Id, timelineItem: item}:
223			}
224			if item.Typename == "IssueComment" {
225				// Issue comments are different than other timeline items in that
226				// they may have associated user content edits.
227				// Right after the comment we send the comment edits.
228				mm.fillCommentEdits(ctx, &item)
229			}
230		}
231		if !items.PageInfo.HasNextPage {
232			break
233		}
234		items, hasItems = mm.queryTimeline(ctx, issueNode.issue.Id, items.PageInfo.EndCursor)
235	}
236}
237
238func (mm *importMediator) queryTimeline(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*timelineItemsConnection, bool) {
239	vars := newTimelineVars()
240	vars["gqlNodeId"] = nid
241	if cursor == "" {
242		vars["timelineAfter"] = (*githubv4.String)(nil)
243	} else {
244		vars["timelineAfter"] = cursor
245	}
246	query := timelineQuery{}
247	if err := mm.gh.queryWithImportEvents(ctx, &query, vars, mm.importEvents); err != nil {
248		mm.err = err
249		return nil, false
250	}
251	connection := &query.Node.Issue.TimelineItems
252	if len(connection.Nodes) <= 0 {
253		return nil, false
254	}
255	return connection, true
256}
257
258func (mm *importMediator) fillCommentEdits(ctx context.Context, item *timelineItem) {
259	// Here we are only concerned with timeline items of type issueComment.
260	if item.Typename != "IssueComment" {
261		return
262	}
263	// First: setup message handling while submitting GraphQL queries.
264	comment := &item.IssueComment
265	edits := &comment.UserContentEdits
266	hasEdits := true
267	for hasEdits {
268		for edit := range reverse(edits.Nodes) {
269			if edit.Diff == nil || string(*edit.Diff) == "" {
270				// issueEdit.Diff == nil happen if the event is older than early
271				// 2018, Github doesn't have the data before that. Best we can do is
272				// to ignore the event.
273				continue
274			}
275			select {
276			case <-ctx.Done():
277				return
278			case mm.importEvents <- CommentEditEvent{commentId: comment.Id, userContentEdit: edit}:
279			}
280		}
281		if !edits.PageInfo.HasPreviousPage {
282			break
283		}
284		edits, hasEdits = mm.queryCommentEdits(ctx, comment.Id, edits.PageInfo.EndCursor)
285	}
286}
287
288func (mm *importMediator) queryCommentEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) {
289	vars := newCommentEditVars()
290	vars["gqlNodeId"] = nid
291	if cursor == "" {
292		vars["commentEditBefore"] = (*githubv4.String)(nil)
293	} else {
294		vars["commentEditBefore"] = cursor
295	}
296	query := commentEditQuery{}
297	if err := mm.gh.queryWithImportEvents(ctx, &query, vars, mm.importEvents); err != nil {
298		mm.err = err
299		return nil, false
300	}
301	connection := &query.Node.IssueComment.UserContentEdits
302	if len(connection.Nodes) <= 0 {
303		return nil, false
304	}
305	return connection, true
306}
307
308func (mm *importMediator) queryIssue(ctx context.Context, cursor githubv4.String) (*issueConnection, bool) {
309	vars := newIssueVars(mm.owner, mm.project, mm.since)
310	if cursor == "" {
311		vars["issueAfter"] = (*githubv4.String)(nil)
312	} else {
313		vars["issueAfter"] = cursor
314	}
315	query := issueQuery{}
316	if err := mm.gh.queryWithImportEvents(ctx, &query, vars, mm.importEvents); err != nil {
317		mm.err = err
318		return nil, false
319	}
320	connection := &query.Repository.Issues
321	if len(connection.Nodes) <= 0 {
322		return nil, false
323	}
324	return connection, true
325}
326
327func reverse(eds []userContentEdit) chan userContentEdit {
328	ret := make(chan userContentEdit)
329	go func() {
330		for i := range eds {
331			ret <- eds[len(eds)-1-i]
332		}
333		close(ret)
334	}()
335	return ret
336}