1import os
2from collections import defaultdict
3from datetime import datetime, timedelta
4from typing import Optional
5
6import typer
7from github import Github
8from github.Issue import Issue
9from github.Repository import Repository
10from pytz import timezone
11from typer import Typer
12
13app: Typer = typer.Typer()
14
15DATETIME_FORMAT: str = "%m/%d/%Y %I:%M %p"
16ISSUES_PER_LABEL: int = 50
17
18
19class IssueData:
20 def __init__(self, issue: Issue) -> None:
21 self.title = issue.title
22 self.url: str = issue.html_url
23 self.like_count: int = issue._rawData["reactions"]["+1"] # type: ignore [attr-defined]
24 self.creation_datetime: str = issue.created_at.strftime(DATETIME_FORMAT)
25 # TODO: Change script to support storing labels here, rather than directly in the script
26 self.labels: set[str] = {label["name"] for label in issue._rawData["labels"]} # type: ignore [attr-defined]
27 self._issue = issue
28
29
30@app.command()
31def main(
32 github_token: Optional[str] = None,
33 issue_reference_number: Optional[int] = None,
34 query_day_interval: Optional[int] = None,
35) -> None:
36 start_time: datetime = datetime.now()
37
38 start_date: datetime | None = None
39
40 if query_day_interval:
41 tz = timezone("america/new_york")
42 current_time = datetime.now(tz).replace(
43 hour=0, minute=0, second=0, microsecond=0
44 )
45 start_date = current_time - timedelta(days=query_day_interval)
46
47 # GitHub Workflow will pass in the token as an environment variable,
48 # but we can place it in our env when running the script locally, for convenience
49 github_token = github_token or os.getenv("GITHUB_ACCESS_TOKEN")
50
51 with Github(github_token, per_page=100) as github:
52 remaining_requests_before: int = github.rate_limiting[0]
53 print(f"Remaining requests before: {remaining_requests_before}")
54
55 repo_name: str = "zed-industries/zed"
56 repository: Repository = github.get_repo(repo_name)
57
58 label_to_issue_data: dict[str, list[IssueData]] = get_issue_maps(
59 github, repository, start_date
60 )
61
62 issue_text: str = get_issue_text(label_to_issue_data)
63
64 if issue_reference_number:
65 top_ranking_issues_issue: Issue = repository.get_issue(issue_reference_number)
66 top_ranking_issues_issue.edit(body=issue_text)
67 else:
68 print(issue_text)
69
70 remaining_requests_after: int = github.rate_limiting[0]
71 print(f"Remaining requests after: {remaining_requests_after}")
72 print(f"Requests used: {remaining_requests_before - remaining_requests_after}")
73
74 run_duration: timedelta = datetime.now() - start_time
75 print(run_duration)
76
77
78def get_issue_maps(
79 github: Github,
80 repository: Repository,
81 start_date: datetime | None = None,
82) -> dict[str, list[IssueData]]:
83 label_to_issue_data: dict[str, list[IssueData]] = get_label_to_issue_data(
84 github,
85 repository,
86 start_date,
87 )
88
89 # Create a new dictionary with labels ordered by the summation the of likes on the associated issues
90 labels = list(label_to_issue_data.keys())
91
92 labels.sort(
93 key=lambda label: sum(
94 issue_data.like_count for issue_data in label_to_issue_data[label]
95 ),
96 reverse=True,
97 )
98
99 label_to_issue_data = {label: label_to_issue_data[label] for label in labels}
100
101 return label_to_issue_data
102
103
104def get_label_to_issue_data(
105 github: Github,
106 repository: Repository,
107 start_date: datetime | None = None,
108) -> dict[str, list[IssueData]]:
109 common_filters = [
110 f"repo:{repository.full_name}",
111 "is:open",
112 "is:issue",
113 '-label:"ignore top-ranking issues"',
114 "sort:reactions-+1-desc",
115 ]
116
117 date_query: str | None = (
118 f"created:>={start_date.strftime('%Y-%m-%d')}" if start_date else None
119 )
120
121 if date_query:
122 common_filters.append(date_query)
123
124 common_filter_string = " ".join(common_filters)
125
126 # Because PyGithub doesn't seem to support logical operators `AND` and `OR`
127 # that GitHub issue queries can use, we use lists as values, rather than
128 # using `(label:bug OR type:Bug)`. This is not as efficient, as we might
129 # query the same issue multiple times. Issues that are potentially queried
130 # multiple times are deduplicated in the `label_to_issues` dictionary. If
131 # PyGithub ever supports logical operators, we should definitely make the
132 # switch.
133 section_queries: dict[str, list[str]] = {
134 "bug": ["label:bug", "type:Bug"],
135 "crash": ["label:crash", "type:Crash"],
136 "feature": ["label:feature", "type:Feature"],
137 "meta": ["type:Meta"],
138 "unlabeled": ["no:label no:type"],
139 }
140
141 label_to_issue_data: dict[str, list[IssueData]] = {}
142
143 for section, section_queries in section_queries.items():
144 unique_issues = set()
145
146 for section_query in section_queries:
147 query: str = f"{common_filter_string} {section_query}"
148 issues = github.search_issues(query)
149
150 for issue in issues:
151 unique_issues.add(issue)
152
153 if len(unique_issues) <= 0:
154 continue
155
156 issue_data: list[IssueData] = [IssueData(issue) for issue in unique_issues]
157 issue_data.sort(
158 key=lambda issue_data: (
159 -issue_data.like_count,
160 issue_data.creation_datetime,
161 )
162 )
163
164 label_to_issue_data[section] = issue_data[0:ISSUES_PER_LABEL]
165
166 return label_to_issue_data
167
168
169def get_issue_text(
170 label_to_issue_data: dict[str, list[IssueData]],
171) -> str:
172 tz = timezone("america/new_york")
173 current_datetime: str = datetime.now(tz).strftime(f"{DATETIME_FORMAT} (%Z)")
174
175 highest_ranking_issues_lines: list[str] = get_highest_ranking_issues_lines(
176 label_to_issue_data
177 )
178
179 issue_text_lines: list[str] = [
180 f"*Updated on {current_datetime}*",
181 *highest_ranking_issues_lines,
182 "\n---\n",
183 "*For details on how this issue is generated, [see the script](https://github.com/zed-industries/zed/blob/main/script/update_top_ranking_issues/main.py)*",
184 ]
185
186 return "\n".join(issue_text_lines)
187
188
189def get_highest_ranking_issues_lines(
190 label_to_issue_data: dict[str, list[IssueData]],
191) -> list[str]:
192 highest_ranking_issues_lines: list[str] = []
193
194 if label_to_issue_data:
195 for label, issue_data in label_to_issue_data.items():
196 highest_ranking_issues_lines.append(f"\n## {label}\n")
197
198 for i, issue_data in enumerate(issue_data):
199 markdown_bullet_point: str = (
200 f"{issue_data.url} ({issue_data.like_count} :thumbsup:)"
201 )
202
203 markdown_bullet_point = f"{i + 1}. {markdown_bullet_point}"
204 highest_ranking_issues_lines.append(markdown_bullet_point)
205
206 return highest_ranking_issues_lines
207
208
209if __name__ == "__main__":
210 app()
211
212# TODO: Sort label output into core and non core sections