1import os
2from datetime import datetime, timedelta
3from typing import Optional
4
5import typer
6from github import Github
7from github.Issue import Issue
8from github.Repository import Repository
9from pytz import timezone
10from typer import Typer
11
12app: Typer = typer.Typer()
13
14DATETIME_FORMAT: str = "%m/%d/%Y %I:%M %p"
15ISSUES_PER_LABEL: int = 50
16
17
18class IssueData:
19 def __init__(self, issue: Issue) -> None:
20 self.title = issue.title
21 self.url: str = issue.html_url
22 self.like_count: int = issue._rawData["reactions"]["+1"] # type: ignore [attr-defined]
23 self.creation_datetime: str = issue.created_at.strftime(DATETIME_FORMAT)
24 # TODO: Change script to support storing labels here, rather than directly in the script
25 self.labels: set[str] = {label["name"] for label in issue._rawData["labels"]} # type: ignore [attr-defined]
26 self._issue = issue
27
28
29@app.command()
30def main(
31 github_token: Optional[str] = None,
32 issue_reference_number: Optional[int] = None,
33 query_day_interval: Optional[int] = None,
34) -> None:
35 start_time: datetime = datetime.now()
36
37 start_date: datetime | None = None
38
39 if query_day_interval:
40 tz = timezone("america/new_york")
41 current_time = datetime.now(tz).replace(
42 hour=0, minute=0, second=0, microsecond=0
43 )
44 start_date = current_time - timedelta(days=query_day_interval)
45
46 # GitHub Workflow will pass in the token as an environment variable,
47 # but we can place it in our env when running the script locally, for convenience
48 github_token = github_token or os.getenv("GITHUB_ACCESS_TOKEN")
49
50 with Github(github_token, per_page=100) as github:
51 remaining_requests_before: int = github.rate_limiting[0]
52 print(f"Remaining requests before: {remaining_requests_before}")
53
54 repo_name: str = "zed-industries/zed"
55 repository: Repository = github.get_repo(repo_name)
56
57 label_to_issue_data: dict[str, list[IssueData]] = get_issue_maps(
58 github, repository, start_date
59 )
60
61 issue_text: str = get_issue_text(label_to_issue_data)
62
63 if issue_reference_number:
64 top_ranking_issues_issue: Issue = repository.get_issue(issue_reference_number)
65 top_ranking_issues_issue.edit(body=issue_text)
66 else:
67 print(issue_text)
68
69 remaining_requests_after: int = github.rate_limiting[0]
70 print(f"Remaining requests after: {remaining_requests_after}")
71 print(f"Requests used: {remaining_requests_before - remaining_requests_after}")
72
73 run_duration: timedelta = datetime.now() - start_time
74 print(run_duration)
75
76
77def get_issue_maps(
78 github: Github,
79 repository: Repository,
80 start_date: datetime | None = None,
81) -> dict[str, list[IssueData]]:
82 label_to_issue_data: dict[str, list[IssueData]] = get_label_to_issue_data(
83 github,
84 repository,
85 start_date,
86 )
87
88 # Create a new dictionary with labels ordered by the summation the of likes on the associated issues
89 labels = list(label_to_issue_data.keys())
90
91 labels.sort(
92 key=lambda label: sum(
93 issue_data.like_count for issue_data in label_to_issue_data[label]
94 ),
95 reverse=True,
96 )
97
98 label_to_issue_data = {label: label_to_issue_data[label] for label in labels}
99
100 return label_to_issue_data
101
102
103def get_label_to_issue_data(
104 github: Github,
105 repository: Repository,
106 start_date: datetime | None = None,
107) -> dict[str, list[IssueData]]:
108 common_queries = [
109 f"repo:{repository.full_name}",
110 "is:open",
111 "is:issue",
112 '-label:"ignore top-ranking issues"',
113 "sort:reactions-+1-desc",
114 ]
115
116 date_query: str | None = (
117 f"created:>={start_date.strftime('%Y-%m-%d')}" if start_date else None
118 )
119
120 if date_query:
121 common_queries.append(date_query)
122
123 common_query = " ".join(common_queries)
124
125 # Because PyGithub doesn't seem to support logical operators `AND` and `OR`
126 # that GitHub issue queries can use, we use lists as values, rather than
127 # using `(label:bug OR type:Bug)`. This is not as efficient, as we might
128 # query the same issue multiple times. Issues that are potentially queried
129 # multiple times are deduplicated in the `label_to_issues` dictionary. If
130 # PyGithub ever supports logical operators, we should definitely make the
131 # switch.
132 section_queries: dict[str, list[str]] = {
133 "bug": ["label:bug", "type:Bug"],
134 "crash": ["label:crash", "type:Crash"],
135 "feature": ["label:feature", "type:Feature"],
136 "meta": ["type:Meta"],
137 "windows": ["label:windows"],
138 "unlabeled": ["no:label no:type"],
139 }
140
141 label_to_issue_data: dict[str, list[IssueData]] = {}
142
143 for section, queries in section_queries.items():
144 unique_issues = set()
145
146 for query in queries:
147 query: str = f"{common_query} {query}"
148 issues = github.search_issues(query)
149
150 for issue in issues:
151 unique_issues.add(issue)
152
153 if len(unique_issues) <= 0:
154 continue
155
156 issue_data: list[IssueData] = [IssueData(issue) for issue in unique_issues]
157 issue_data.sort(
158 key=lambda issue_data: (
159 -issue_data.like_count,
160 issue_data.creation_datetime,
161 )
162 )
163
164 label_to_issue_data[section] = issue_data[0:ISSUES_PER_LABEL]
165
166 return label_to_issue_data
167
168
169def get_issue_text(
170 label_to_issue_data: dict[str, list[IssueData]],
171) -> str:
172 tz = timezone("america/new_york")
173 current_datetime: str = datetime.now(tz).strftime(f"{DATETIME_FORMAT} (%Z)")
174
175 highest_ranking_issues_lines: list[str] = get_highest_ranking_issues_lines(
176 label_to_issue_data
177 )
178
179 issue_text_lines: list[str] = [
180 f"*Updated on {current_datetime}*",
181 *highest_ranking_issues_lines,
182 "\n---\n",
183 "*For details on how this issue is generated, [see the script](https://github.com/zed-industries/zed/blob/main/script/update_top_ranking_issues/main.py)*",
184 ]
185
186 return "\n".join(issue_text_lines)
187
188
189def get_highest_ranking_issues_lines(
190 label_to_issue_data: dict[str, list[IssueData]],
191) -> list[str]:
192 highest_ranking_issues_lines: list[str] = []
193
194 if label_to_issue_data:
195 for label, issue_data in label_to_issue_data.items():
196 highest_ranking_issues_lines.append(f"\n## {label}\n")
197
198 for i, issue_data in enumerate(issue_data):
199 markdown_bullet_point: str = (
200 f"{issue_data.url} ({issue_data.like_count} :thumbsup:)"
201 )
202
203 markdown_bullet_point = f"{i + 1}. {markdown_bullet_point}"
204 highest_ranking_issues_lines.append(markdown_bullet_point)
205
206 return highest_ranking_issues_lines
207
208
209if __name__ == "__main__":
210 app()
211
212# TODO: Sort label output into core and non core sections