1import json
2import os
3import pathlib
4from collections import defaultdict
5from datetime import datetime, timedelta
6from typing import Optional
7
8import typer
9from github import Github
10from github.Issue import Issue
11from github.Repository import Repository
12from pytz import timezone
13from typer import Typer
14
15app: Typer = typer.Typer()
16
17DATETIME_FORMAT: str = "%m/%d/%Y %I:%M %p"
18LABEL_DATA_FILE_PATH = pathlib.Path(__file__).parent.parent / "label_data.json"
19ISSUES_PER_LABEL: int = 20
20MISSING_LABEL_ERROR_MESSAGE: str = "missing core label"
21
22with open(LABEL_DATA_FILE_PATH, "r") as label_data_file:
23 label_data = json.load(label_data_file)
24 CORE_LABELS: set[str] = set(label_data["core_labels"])
25 # A set of labels for adding in labels that we want present in the final
26 # report, but that we don't want being defined as a core label, since issues
27 # with without core labels are flagged as errors.
28 ADDITIONAL_LABELS: set[str] = set(label_data["additional_labels"])
29 NEW_ISSUE_LABELS: set[str] = set(label_data["new_issue_labels"])
30 IGNORED_LABEL: str = label_data["ignored_label"]
31
32
33class IssueData:
34 def __init__(self, issue: Issue) -> None:
35 self.title = issue.title
36 self.url: str = issue.html_url
37 self.like_count: int = issue._rawData["reactions"]["+1"] # type: ignore [attr-defined]
38 self.creation_datetime: str = issue.created_at.strftime(DATETIME_FORMAT)
39 # TODO: Change script to support storing labels here, rather than directly in the script
40 self.labels: set[str] = {label["name"] for label in issue._rawData["labels"]} # type: ignore [attr-defined]
41 self._issue = issue
42
43
44@app.command()
45def main(
46 github_token: Optional[str] = None,
47 issue_reference_number: Optional[int] = None,
48 query_day_interval: Optional[int] = None,
49) -> None:
50 start_time: datetime = datetime.now()
51
52 start_date: datetime | None = None
53
54 if query_day_interval:
55 tz = timezone("america/new_york")
56 current_time = datetime.now(tz).replace(
57 hour=0, minute=0, second=0, microsecond=0
58 )
59 start_date = current_time - timedelta(days=query_day_interval)
60
61 # GitHub Workflow will pass in the token as an environment variable,
62 # but we can place it in our env when running the script locally, for convenience
63 github_token = github_token or os.getenv("GITHUB_ACCESS_TOKEN")
64 github = Github(github_token)
65
66 remaining_requests_before: int = github.rate_limiting[0]
67 print(f"Remaining requests before: {remaining_requests_before}")
68
69 repo_name: str = "zed-industries/zed"
70 repository: Repository = github.get_repo(repo_name)
71
72 # There has to be a nice way of adding types to tuple unpacking
73 label_to_issue_data: dict[str, list[IssueData]]
74 error_message_to_erroneous_issue_data: dict[str, list[IssueData]]
75 (
76 label_to_issue_data,
77 error_message_to_erroneous_issue_data,
78 ) = get_issue_maps(github, repository, start_date)
79
80 issue_text: str = get_issue_text(
81 label_to_issue_data,
82 error_message_to_erroneous_issue_data,
83 )
84
85 if issue_reference_number:
86 top_ranking_issues_issue: Issue = repository.get_issue(issue_reference_number)
87 top_ranking_issues_issue.edit(body=issue_text)
88 else:
89 print(issue_text)
90
91 for error_message, issue_data in error_message_to_erroneous_issue_data.items():
92 if error_message == MISSING_LABEL_ERROR_MESSAGE:
93 for issue in issue_data:
94 # Used as a dry-run flag
95 if issue_reference_number:
96 issue._issue.add_to_labels(*NEW_ISSUE_LABELS)
97
98 remaining_requests_after: int = github.rate_limiting[0]
99 print(f"Remaining requests after: {remaining_requests_after}")
100 print(f"Requests used: {remaining_requests_before - remaining_requests_after}")
101
102 run_duration: timedelta = datetime.now() - start_time
103 print(run_duration)
104
105
106def get_issue_maps(
107 github: Github,
108 repository: Repository,
109 start_date: datetime | None = None,
110) -> tuple[dict[str, list[IssueData]], dict[str, list[IssueData]]]:
111 label_to_issues: defaultdict[str, list[Issue]] = get_label_to_issues(
112 github,
113 repository,
114 start_date,
115 )
116 label_to_issue_data: dict[str, list[IssueData]] = get_label_to_issue_data(
117 label_to_issues
118 )
119
120 error_message_to_erroneous_issues: defaultdict[str, list[Issue]] = (
121 get_error_message_to_erroneous_issues(github, repository)
122 )
123 error_message_to_erroneous_issue_data: dict[str, list[IssueData]] = (
124 get_error_message_to_erroneous_issue_data(error_message_to_erroneous_issues)
125 )
126
127 # Create a new dictionary with labels ordered by the summation the of likes on the associated issues
128 labels = list(label_to_issue_data.keys())
129
130 labels.sort(
131 key=lambda label: sum(
132 issue_data.like_count for issue_data in label_to_issue_data[label]
133 ),
134 reverse=True,
135 )
136
137 label_to_issue_data = {label: label_to_issue_data[label] for label in labels}
138
139 return (
140 label_to_issue_data,
141 error_message_to_erroneous_issue_data,
142 )
143
144
145def get_label_to_issues(
146 github: Github,
147 repository: Repository,
148 start_date: datetime | None = None,
149) -> defaultdict[str, list[Issue]]:
150 label_to_issues: defaultdict[str, list[Issue]] = defaultdict(list)
151
152 labels: set[str] = CORE_LABELS | ADDITIONAL_LABELS
153
154 date_query: str = (
155 f"created:>={start_date.strftime('%Y-%m-%d')}" if start_date else ""
156 )
157
158 for label in labels:
159 query: str = f'repo:{repository.full_name} is:open is:issue {date_query} label:"{label}" -label:"{IGNORED_LABEL}" sort:reactions-+1-desc'
160
161 issues = github.search_issues(query)
162
163 if issues.totalCount > 0:
164 for issue in issues[0:ISSUES_PER_LABEL]:
165 label_to_issues[label].append(issue)
166
167 return label_to_issues
168
169
170def get_label_to_issue_data(
171 label_to_issues: defaultdict[str, list[Issue]],
172) -> dict[str, list[IssueData]]:
173 label_to_issue_data: dict[str, list[IssueData]] = {}
174
175 for label in label_to_issues:
176 issues: list[Issue] = label_to_issues[label]
177 issue_data: list[IssueData] = [IssueData(issue) for issue in issues]
178 issue_data.sort(
179 key=lambda issue_data: (
180 -issue_data.like_count,
181 issue_data.creation_datetime,
182 )
183 )
184
185 if issue_data:
186 label_to_issue_data[label] = issue_data
187
188 return label_to_issue_data
189
190
191def get_error_message_to_erroneous_issues(
192 github: Github, repository: Repository
193) -> defaultdict[str, list[Issue]]:
194 error_message_to_erroneous_issues: defaultdict[str, list[Issue]] = defaultdict(list)
195
196 # Query for all open issues that don't have either a core or the ignored label and mark those as erroneous
197 filter_labels: set[str] = CORE_LABELS | {IGNORED_LABEL}
198 filter_labels_text: str = " ".join([f'-label:"{label}"' for label in filter_labels])
199 query: str = f"repo:{repository.full_name} is:open is:issue {filter_labels_text}"
200
201 for issue in github.search_issues(query):
202 error_message_to_erroneous_issues[MISSING_LABEL_ERROR_MESSAGE].append(issue)
203
204 return error_message_to_erroneous_issues
205
206
207def get_error_message_to_erroneous_issue_data(
208 error_message_to_erroneous_issues: defaultdict[str, list[Issue]],
209) -> dict[str, list[IssueData]]:
210 error_message_to_erroneous_issue_data: dict[str, list[IssueData]] = {}
211
212 for label in error_message_to_erroneous_issues:
213 issues: list[Issue] = error_message_to_erroneous_issues[label]
214 issue_data: list[IssueData] = [IssueData(issue) for issue in issues]
215 error_message_to_erroneous_issue_data[label] = issue_data
216
217 return error_message_to_erroneous_issue_data
218
219
220def get_issue_text(
221 label_to_issue_data: dict[str, list[IssueData]],
222 error_message_to_erroneous_issue_data: dict[str, list[IssueData]],
223) -> str:
224 tz = timezone("america/new_york")
225 current_datetime: str = datetime.now(tz).strftime(f"{DATETIME_FORMAT} (%Z)")
226
227 highest_ranking_issues_lines: list[str] = get_highest_ranking_issues_lines(
228 label_to_issue_data
229 )
230
231 issue_text_lines: list[str] = [
232 f"*Updated on {current_datetime}*",
233 *highest_ranking_issues_lines,
234 "",
235 "---\n",
236 ]
237
238 erroneous_issues_lines: list[str] = get_erroneous_issues_lines(
239 error_message_to_erroneous_issue_data
240 )
241
242 if erroneous_issues_lines:
243 core_labels_text: str = ", ".join(
244 f'"{core_label}"' for core_label in CORE_LABELS
245 )
246
247 issue_text_lines.extend(
248 [
249 "## errors with issues (this section only shows when there are errors with issues)\n",
250 f"This script expects every issue to have at least one of the following core labels: {core_labels_text}",
251 f"This script currently ignores issues that have the following label: {IGNORED_LABEL}\n",
252 "### what to do?\n",
253 "- Adjust the core labels on an issue to put it into a correct state or add a currently-ignored label to the issue",
254 "- Adjust the core and ignored labels registered in this script",
255 *erroneous_issues_lines,
256 "",
257 "---\n",
258 ]
259 )
260
261 issue_text_lines.extend(
262 [
263 "*For details on how this issue is generated, [see the script](https://github.com/zed-industries/zed/blob/main/script/update_top_ranking_issues/main.py)*",
264 ]
265 )
266
267 return "\n".join(issue_text_lines)
268
269
270def get_highest_ranking_issues_lines(
271 label_to_issue_data: dict[str, list[IssueData]],
272) -> list[str]:
273 highest_ranking_issues_lines: list[str] = []
274
275 if label_to_issue_data:
276 for label, issue_data in label_to_issue_data.items():
277 highest_ranking_issues_lines.append(f"\n## {label}\n")
278
279 for i, issue_data in enumerate(issue_data):
280 markdown_bullet_point: str = (
281 f"{issue_data.url} ({issue_data.like_count} :thumbsup:)"
282 )
283
284 markdown_bullet_point = f"{i + 1}. {markdown_bullet_point}"
285 highest_ranking_issues_lines.append(markdown_bullet_point)
286
287 return highest_ranking_issues_lines
288
289
290def get_erroneous_issues_lines(
291 error_message_to_erroneous_issue_data,
292) -> list[str]:
293 erroneous_issues_lines: list[str] = []
294
295 if error_message_to_erroneous_issue_data:
296 for (
297 error_message,
298 erroneous_issue_data,
299 ) in error_message_to_erroneous_issue_data.items():
300 erroneous_issues_lines.append(f"\n#### {error_message}\n")
301
302 for erroneous_issue_data in erroneous_issue_data:
303 erroneous_issues_lines.append(f"- {erroneous_issue_data.url}")
304
305 return erroneous_issues_lines
306
307
308if __name__ == "__main__":
309 app()
310
311# TODO: Sort label output into core and non core sections