main.py

  1import os
  2from collections import defaultdict
  3from datetime import datetime, timedelta
  4from typing import Optional
  5
  6from github import Github
  7from github.Issue import Issue
  8from github.Repository import Repository
  9from pytz import timezone
 10
 11import typer
 12from typer import Typer
 13
 14app: Typer = typer.Typer()
 15
 16DATETIME_FORMAT: str = "%m/%d/%Y %I:%M %p"
 17CORE_LABELS: set[str] = {
 18    "defect",
 19    "design",
 20    "documentation",
 21    "duplicate",
 22    "enhancement",
 23    "panic / crash",
 24    "support",
 25}
 26# A set of labels for adding in labels that we want present in the final
 27# report, but that we don't want being defined as a core label, since issues
 28# with without core labels are flagged as errors.
 29ADDITIONAL_LABELS: set[str] = {
 30    "ai",
 31    "linux",
 32    "vim",
 33    "windows",
 34}
 35IGNORED_LABEL_TEXT: str = "ignore top-ranking issues"
 36ISSUES_PER_LABEL: int = 20
 37
 38
 39class IssueData:
 40    def __init__(self, issue: Issue) -> None:
 41        self.url: str = issue.html_url
 42        self.like_count: int = issue._rawData["reactions"]["+1"]  # type: ignore [attr-defined]
 43        self.creation_datetime: str = issue.created_at.strftime(DATETIME_FORMAT)
 44        # TODO: Change script to support storing labels here, rather than directly in the script
 45        self.labels: set[str] = {label["name"] for label in issue._rawData["labels"]}  # type: ignore [attr-defined]
 46
 47
 48@app.command()
 49def main(
 50    github_token: Optional[str] = None,
 51    issue_reference_number: Optional[int] = None,
 52    query_day_interval: Optional[int] = None,
 53) -> None:
 54    start_time: datetime = datetime.now()
 55
 56    start_date: datetime | None = None
 57
 58    if query_day_interval:
 59        tz = timezone("america/new_york")
 60        current_time = datetime.now(tz).replace(
 61            hour=0, minute=0, second=0, microsecond=0
 62        )
 63        start_date = current_time - timedelta(days=query_day_interval)
 64
 65    # GitHub Workflow will pass in the token as an environment variable,
 66    # but we can place it in our env when running the script locally, for convenience
 67    github_token = github_token or os.getenv("GITHUB_ACCESS_TOKEN")
 68    github = Github(github_token)
 69
 70    remaining_requests_before: int = github.rate_limiting[0]
 71    print(f"Remaining requests before: {remaining_requests_before}")
 72
 73    repo_name: str = "zed-industries/zed"
 74    repository: Repository = github.get_repo(repo_name)
 75
 76    # There has to be a nice way of adding types to tuple unpacking
 77    label_to_issue_data: dict[str, list[IssueData]]
 78    error_message_to_erroneous_issue_data: dict[str, list[IssueData]]
 79    (
 80        label_to_issue_data,
 81        error_message_to_erroneous_issue_data,
 82    ) = get_issue_maps(github, repository, start_date)
 83
 84    issue_text: str = get_issue_text(
 85        label_to_issue_data,
 86        error_message_to_erroneous_issue_data,
 87    )
 88
 89    if issue_reference_number:
 90        top_ranking_issues_issue: Issue = repository.get_issue(issue_reference_number)
 91        top_ranking_issues_issue.edit(body=issue_text)
 92    else:
 93        print(issue_text)
 94
 95    remaining_requests_after: int = github.rate_limiting[0]
 96    print(f"Remaining requests after: {remaining_requests_after}")
 97    print(f"Requests used: {remaining_requests_before - remaining_requests_after}")
 98
 99    run_duration: timedelta = datetime.now() - start_time
100    print(run_duration)
101
102
103def get_issue_maps(
104    github: Github,
105    repository: Repository,
106    start_date: datetime | None = None,
107) -> tuple[dict[str, list[IssueData]], dict[str, list[IssueData]]]:
108    label_to_issues: defaultdict[str, list[Issue]] = get_label_to_issues(
109        github,
110        repository,
111        start_date,
112    )
113    label_to_issue_data: dict[str, list[IssueData]] = get_label_to_issue_data(
114        label_to_issues
115    )
116
117    error_message_to_erroneous_issues: defaultdict[str, list[Issue]] = (
118        get_error_message_to_erroneous_issues(github, repository)
119    )
120    error_message_to_erroneous_issue_data: dict[str, list[IssueData]] = (
121        get_error_message_to_erroneous_issue_data(error_message_to_erroneous_issues)
122    )
123
124    # Create a new dictionary with labels ordered by the summation the of likes on the associated issues
125    labels = list(label_to_issue_data.keys())
126
127    labels.sort(
128        key=lambda label: sum(
129            issue_data.like_count for issue_data in label_to_issue_data[label]
130        ),
131        reverse=True,
132    )
133
134    label_to_issue_data = {label: label_to_issue_data[label] for label in labels}
135
136    return (
137        label_to_issue_data,
138        error_message_to_erroneous_issue_data,
139    )
140
141
142def get_label_to_issues(
143    github: Github,
144    repository: Repository,
145    start_date: datetime | None = None,
146) -> defaultdict[str, list[Issue]]:
147    label_to_issues: defaultdict[str, list[Issue]] = defaultdict(list)
148
149    labels: set[str] = CORE_LABELS | ADDITIONAL_LABELS
150
151    date_query: str = (
152        f"created:>={start_date.strftime('%Y-%m-%d')}" if start_date else ""
153    )
154
155    for label in labels:
156        query: str = f'repo:{repository.full_name} is:open is:issue {date_query} label:"{label}" -label:"{IGNORED_LABEL_TEXT}" sort:reactions-+1-desc'
157
158        issues = github.search_issues(query)
159
160        if issues.totalCount > 0:
161            for issue in issues[0:ISSUES_PER_LABEL]:
162                label_to_issues[label].append(issue)
163
164    return label_to_issues
165
166
167def get_label_to_issue_data(
168    label_to_issues: defaultdict[str, list[Issue]],
169) -> dict[str, list[IssueData]]:
170    label_to_issue_data: dict[str, list[IssueData]] = {}
171
172    for label in label_to_issues:
173        issues: list[Issue] = label_to_issues[label]
174        issue_data: list[IssueData] = [IssueData(issue) for issue in issues]
175        issue_data.sort(
176            key=lambda issue_data: (
177                -issue_data.like_count,
178                issue_data.creation_datetime,
179            )
180        )
181
182        if issue_data:
183            label_to_issue_data[label] = issue_data
184
185    return label_to_issue_data
186
187
188def get_error_message_to_erroneous_issues(
189    github: Github, repository: Repository
190) -> defaultdict[str, list[Issue]]:
191    error_message_to_erroneous_issues: defaultdict[str, list[Issue]] = defaultdict(list)
192
193    # Query for all open issues that don't have either a core or the ignored label and mark those as erroneous
194    filter_labels: set[str] = CORE_LABELS | {IGNORED_LABEL_TEXT}
195    filter_labels_text: str = " ".join([f'-label:"{label}"' for label in filter_labels])
196    query: str = f"repo:{repository.full_name} is:open is:issue {filter_labels_text}"
197
198    for issue in github.search_issues(query):
199        error_message_to_erroneous_issues["missing core label"].append(issue)
200
201    return error_message_to_erroneous_issues
202
203
204def get_error_message_to_erroneous_issue_data(
205    error_message_to_erroneous_issues: defaultdict[str, list[Issue]],
206) -> dict[str, list[IssueData]]:
207    error_message_to_erroneous_issue_data: dict[str, list[IssueData]] = {}
208
209    for label in error_message_to_erroneous_issues:
210        issues: list[Issue] = error_message_to_erroneous_issues[label]
211        issue_data: list[IssueData] = [IssueData(issue) for issue in issues]
212        error_message_to_erroneous_issue_data[label] = issue_data
213
214    return error_message_to_erroneous_issue_data
215
216
217def get_issue_text(
218    label_to_issue_data: dict[str, list[IssueData]],
219    error_message_to_erroneous_issue_data: dict[str, list[IssueData]],
220) -> str:
221    tz = timezone("america/new_york")
222    current_datetime: str = datetime.now(tz).strftime(f"{DATETIME_FORMAT} (%Z)")
223
224    highest_ranking_issues_lines: list[str] = get_highest_ranking_issues_lines(
225        label_to_issue_data
226    )
227
228    issue_text_lines: list[str] = [
229        f"*Updated on {current_datetime}*",
230        *highest_ranking_issues_lines,
231        "",
232        "---\n",
233    ]
234
235    erroneous_issues_lines: list[str] = get_erroneous_issues_lines(
236        error_message_to_erroneous_issue_data
237    )
238
239    if erroneous_issues_lines:
240        core_labels_text: str = ", ".join(
241            f'"{core_label}"' for core_label in CORE_LABELS
242        )
243
244        issue_text_lines.extend(
245            [
246                "## errors with issues (this section only shows when there are errors with issues)\n",
247                f"This script expects every issue to have at least one of the following core labels: {core_labels_text}",
248                f"This script currently ignores issues that have the following label: {IGNORED_LABEL_TEXT}\n",
249                "### what to do?\n",
250                "- Adjust the core labels on an issue to put it into a correct state or add a currently-ignored label to the issue",
251                "- Adjust the core and ignored labels registered in this script",
252                *erroneous_issues_lines,
253                "",
254                "---\n",
255            ]
256        )
257
258    issue_text_lines.extend(
259        [
260            "*For details on how this issue is generated, [see the script](https://github.com/zed-industries/zed/blob/main/script/update_top_ranking_issues/main.py)*",
261        ]
262    )
263
264    return "\n".join(issue_text_lines)
265
266
267def get_highest_ranking_issues_lines(
268    label_to_issue_data: dict[str, list[IssueData]],
269) -> list[str]:
270    highest_ranking_issues_lines: list[str] = []
271
272    if label_to_issue_data:
273        for label, issue_data in label_to_issue_data.items():
274            highest_ranking_issues_lines.append(f"\n## {label}\n")
275
276            for i, issue_data in enumerate(issue_data):
277                markdown_bullet_point: str = (
278                    f"{issue_data.url} ({issue_data.like_count} :thumbsup:)"
279                )
280
281                markdown_bullet_point = f"{i + 1}. {markdown_bullet_point}"
282                highest_ranking_issues_lines.append(markdown_bullet_point)
283
284    return highest_ranking_issues_lines
285
286
287def get_erroneous_issues_lines(
288    error_message_to_erroneous_issue_data,
289) -> list[str]:
290    erroneous_issues_lines: list[str] = []
291
292    if error_message_to_erroneous_issue_data:
293        for (
294            error_message,
295            erroneous_issue_data,
296        ) in error_message_to_erroneous_issue_data.items():
297            erroneous_issues_lines.append(f"\n#### {error_message}\n")
298
299            for erroneous_issue_data in erroneous_issue_data:
300                erroneous_issues_lines.append(f"- {erroneous_issue_data.url}")
301
302    return erroneous_issues_lines
303
304
305if __name__ == "__main__":
306    app()
307
308# TODO: Sort label output into core and non core sections