1package github
2
3import (
4 "context"
5 "time"
6
7 "github.com/shurcooL/githubv4"
8)
9
10const (
11 // These values influence how fast the github graphql rate limit is exhausted.
12
13 NumIssues = 40
14 NumIssueEdits = 100
15 NumTimelineItems = 100
16 NumCommentEdits = 100
17
18 ChanCapacity = 128
19)
20
21// importMediator provides a convenient interface to retrieve issues from the Github GraphQL API.
22type importMediator struct {
23 // Github graphql client
24 gh *rateLimitHandlerClient
25
26 // name of the repository owner on Github
27 owner string
28
29 // name of the Github repository
30 project string
31
32 // since specifies which issues to import. Issues that have been updated at or after the
33 // given date should be imported.
34 since time.Time
35
36 // importEvents holds events representing issues, comments, edits, ...
37 // In this channel issues are immediately followed by their issue edits and comments are
38 // immediately followed by their comment edits.
39 importEvents chan ImportEvent
40
41 // Sticky error
42 err error
43}
44
45func NewImportMediator(ctx context.Context, client *rateLimitHandlerClient, owner, project string, since time.Time) *importMediator {
46 mm := importMediator{
47 gh: client,
48 owner: owner,
49 project: project,
50 since: since,
51 importEvents: make(chan ImportEvent, ChanCapacity),
52 err: nil,
53 }
54
55 go mm.start(ctx)
56
57 return &mm
58}
59
60func (mm *importMediator) start(ctx context.Context) {
61 ctx, cancel := context.WithCancel(ctx)
62 mm.fillImportEvents(ctx)
63 // Make sure we cancel everything when we are done, instead of relying on the parent context
64 // This should unblock pending send to the channel if the capacity was reached and avoid a panic/race when closing.
65 cancel()
66 close(mm.importEvents)
67}
68
69// NextImportEvent returns the next ImportEvent, or nil if done.
70func (mm *importMediator) NextImportEvent() ImportEvent {
71 return <-mm.importEvents
72}
73
74func (mm *importMediator) Error() error {
75 return mm.err
76}
77
78func (mm *importMediator) User(ctx context.Context, loginName string) (*user, error) {
79 query := userQuery{}
80 vars := varmap{"login": githubv4.String(loginName)}
81 if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
82 return nil, err
83 }
84 return &query.User, nil
85}
86
87func (mm *importMediator) fillImportEvents(ctx context.Context) {
88 initialCursor := githubv4.String("")
89 issues, hasIssues := mm.queryIssue(ctx, initialCursor)
90 for hasIssues {
91 for _, node := range issues.Nodes {
92 select {
93 case <-ctx.Done():
94 return
95 case mm.importEvents <- IssueEvent{node.issue}:
96 }
97
98 // issue edit events follow the issue event
99 mm.fillIssueEditEvents(ctx, &node)
100 // last come the timeline events
101 mm.fillTimelineEvents(ctx, &node)
102 }
103 if !issues.PageInfo.HasNextPage {
104 break
105 }
106 issues, hasIssues = mm.queryIssue(ctx, issues.PageInfo.EndCursor)
107 }
108}
109
110func (mm *importMediator) fillIssueEditEvents(ctx context.Context, issueNode *issueNode) {
111 edits := &issueNode.UserContentEdits
112 hasEdits := true
113 for hasEdits {
114 for edit := range reverse(edits.Nodes) {
115 if edit.Diff == nil || string(*edit.Diff) == "" {
116 // issueEdit.Diff == nil happen if the event is older than early
117 // 2018, Github doesn't have the data before that. Best we can do is
118 // to ignore the event.
119 continue
120 }
121 select {
122 case <-ctx.Done():
123 return
124 case mm.importEvents <- IssueEditEvent{issueId: issueNode.issue.Id, userContentEdit: edit}:
125 }
126 }
127 if !edits.PageInfo.HasPreviousPage {
128 break
129 }
130 edits, hasEdits = mm.queryIssueEdits(ctx, issueNode.issue.Id, edits.PageInfo.EndCursor)
131 }
132}
133
134func (mm *importMediator) queryIssueEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) {
135 vars := newIssueEditVars()
136 vars["gqlNodeId"] = nid
137 if cursor == "" {
138 vars["issueEditBefore"] = (*githubv4.String)(nil)
139 } else {
140 vars["issueEditBefore"] = cursor
141 }
142 query := issueEditQuery{}
143 if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
144 mm.err = err
145 return nil, false
146 }
147 connection := &query.Node.Issue.UserContentEdits
148 if len(connection.Nodes) <= 0 {
149 return nil, false
150 }
151 return connection, true
152}
153
154func (mm *importMediator) fillTimelineEvents(ctx context.Context, issueNode *issueNode) {
155 items := &issueNode.TimelineItems
156 hasItems := true
157 for hasItems {
158 for _, item := range items.Nodes {
159 select {
160 case <-ctx.Done():
161 return
162 case mm.importEvents <- TimelineEvent{issueId: issueNode.issue.Id, timelineItem: item}:
163 }
164 if item.Typename == "IssueComment" {
165 // Issue comments are different than other timeline items in that
166 // they may have associated user content edits.
167 // Right after the comment we send the comment edits.
168 mm.fillCommentEdits(ctx, &item)
169 }
170 }
171 if !items.PageInfo.HasNextPage {
172 break
173 }
174 items, hasItems = mm.queryTimeline(ctx, issueNode.issue.Id, items.PageInfo.EndCursor)
175 }
176}
177
178func (mm *importMediator) queryTimeline(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*timelineItemsConnection, bool) {
179 vars := newTimelineVars()
180 vars["gqlNodeId"] = nid
181 if cursor == "" {
182 vars["timelineAfter"] = (*githubv4.String)(nil)
183 } else {
184 vars["timelineAfter"] = cursor
185 }
186 query := timelineQuery{}
187 if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
188 mm.err = err
189 return nil, false
190 }
191 connection := &query.Node.Issue.TimelineItems
192 if len(connection.Nodes) <= 0 {
193 return nil, false
194 }
195 return connection, true
196}
197
198func (mm *importMediator) fillCommentEdits(ctx context.Context, item *timelineItem) {
199 // Here we are only concerned with timeline items of type issueComment.
200 if item.Typename != "IssueComment" {
201 return
202 }
203 // First: setup message handling while submitting GraphQL queries.
204 comment := &item.IssueComment
205 edits := &comment.UserContentEdits
206 hasEdits := true
207 for hasEdits {
208 for edit := range reverse(edits.Nodes) {
209 if edit.Diff == nil || string(*edit.Diff) == "" {
210 // issueEdit.Diff == nil happen if the event is older than early
211 // 2018, Github doesn't have the data before that. Best we can do is
212 // to ignore the event.
213 continue
214 }
215 select {
216 case <-ctx.Done():
217 return
218 case mm.importEvents <- CommentEditEvent{commentId: comment.Id, userContentEdit: edit}:
219 }
220 }
221 if !edits.PageInfo.HasPreviousPage {
222 break
223 }
224 edits, hasEdits = mm.queryCommentEdits(ctx, comment.Id, edits.PageInfo.EndCursor)
225 }
226}
227
228func (mm *importMediator) queryCommentEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) {
229 vars := newCommentEditVars()
230 vars["gqlNodeId"] = nid
231 if cursor == "" {
232 vars["commentEditBefore"] = (*githubv4.String)(nil)
233 } else {
234 vars["commentEditBefore"] = cursor
235 }
236 query := commentEditQuery{}
237 if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
238 mm.err = err
239 return nil, false
240 }
241 connection := &query.Node.IssueComment.UserContentEdits
242 if len(connection.Nodes) <= 0 {
243 return nil, false
244 }
245 return connection, true
246}
247
248func (mm *importMediator) queryIssue(ctx context.Context, cursor githubv4.String) (*issueConnection, bool) {
249 vars := newIssueVars(mm.owner, mm.project, mm.since)
250 if cursor == "" {
251 vars["issueAfter"] = (*githubv4.String)(nil)
252 } else {
253 vars["issueAfter"] = cursor
254 }
255 query := issueQuery{}
256 if err := mm.gh.queryImport(ctx, &query, vars, mm.importEvents); err != nil {
257 mm.err = err
258 return nil, false
259 }
260 connection := &query.Repository.Issues
261 if len(connection.Nodes) <= 0 {
262 return nil, false
263 }
264 return connection, true
265}
266
267func reverse(eds []userContentEdit) chan userContentEdit {
268 ret := make(chan userContentEdit)
269 go func() {
270 for i := range eds {
271 ret <- eds[len(eds)-1-i]
272 }
273 close(ret)
274 }()
275 return ret
276}
277
278// varmap is a container for Github API's pagination variables
279type varmap map[string]interface{}
280
281func newIssueVars(owner, project string, since time.Time) varmap {
282 return varmap{
283 "owner": githubv4.String(owner),
284 "name": githubv4.String(project),
285 "issueSince": githubv4.DateTime{Time: since},
286 "issueFirst": githubv4.Int(NumIssues),
287 "issueEditLast": githubv4.Int(NumIssueEdits),
288 "issueEditBefore": (*githubv4.String)(nil),
289 "timelineFirst": githubv4.Int(NumTimelineItems),
290 "timelineAfter": (*githubv4.String)(nil),
291 "commentEditLast": githubv4.Int(NumCommentEdits),
292 "commentEditBefore": (*githubv4.String)(nil),
293 }
294}
295
296func newIssueEditVars() varmap {
297 return varmap{
298 "issueEditLast": githubv4.Int(NumIssueEdits),
299 }
300}
301
302func newTimelineVars() varmap {
303 return varmap{
304 "timelineFirst": githubv4.Int(NumTimelineItems),
305 "commentEditLast": githubv4.Int(NumCommentEdits),
306 "commentEditBefore": (*githubv4.String)(nil),
307 }
308}
309
310func newCommentEditVars() varmap {
311 return varmap{
312 "commentEditLast": githubv4.Int(NumCommentEdits),
313 }
314}