main.py

  1import json
  2import os
  3import pathlib
  4from collections import defaultdict
  5from datetime import datetime, timedelta
  6from typing import Optional
  7
  8import typer
  9from github import Github
 10from github.Issue import Issue
 11from github.Repository import Repository
 12from pytz import timezone
 13from typer import Typer
 14
 15app: Typer = typer.Typer()
 16
 17DATETIME_FORMAT: str = "%m/%d/%Y %I:%M %p"
 18
 19label_data_file_path = pathlib.Path(__file__).parent.parent / "label_data.json"
 20
 21with open(label_data_file_path, "r") as label_data_file:
 22    label_data = json.load(label_data_file)
 23    CORE_LABELS: set[str] = set(label_data["core_labels"])
 24    # A set of labels for adding in labels that we want present in the final
 25    # report, but that we don't want being defined as a core label, since issues
 26    # with without core labels are flagged as errors.
 27    ADDITIONAL_LABELS: set[str] = set(label_data["additional_labels"])
 28    IGNORED_LABEL: str = label_data["ignored_label"]
 29
 30
 31ISSUES_PER_LABEL: int = 20
 32
 33
 34class IssueData:
 35    def __init__(self, issue: Issue) -> None:
 36        self.url: str = issue.html_url
 37        self.like_count: int = issue._rawData["reactions"]["+1"]  # type: ignore [attr-defined]
 38        self.creation_datetime: str = issue.created_at.strftime(DATETIME_FORMAT)
 39        # TODO: Change script to support storing labels here, rather than directly in the script
 40        self.labels: set[str] = {label["name"] for label in issue._rawData["labels"]}  # type: ignore [attr-defined]
 41
 42
 43@app.command()
 44def main(
 45    github_token: Optional[str] = None,
 46    issue_reference_number: Optional[int] = None,
 47    query_day_interval: Optional[int] = None,
 48) -> None:
 49    start_time: datetime = datetime.now()
 50
 51    start_date: datetime | None = None
 52
 53    if query_day_interval:
 54        tz = timezone("america/new_york")
 55        current_time = datetime.now(tz).replace(
 56            hour=0, minute=0, second=0, microsecond=0
 57        )
 58        start_date = current_time - timedelta(days=query_day_interval)
 59
 60    # GitHub Workflow will pass in the token as an environment variable,
 61    # but we can place it in our env when running the script locally, for convenience
 62    github_token = github_token or os.getenv("GITHUB_ACCESS_TOKEN")
 63    github = Github(github_token)
 64
 65    remaining_requests_before: int = github.rate_limiting[0]
 66    print(f"Remaining requests before: {remaining_requests_before}")
 67
 68    repo_name: str = "zed-industries/zed"
 69    repository: Repository = github.get_repo(repo_name)
 70
 71    # There has to be a nice way of adding types to tuple unpacking
 72    label_to_issue_data: dict[str, list[IssueData]]
 73    error_message_to_erroneous_issue_data: dict[str, list[IssueData]]
 74    (
 75        label_to_issue_data,
 76        error_message_to_erroneous_issue_data,
 77    ) = get_issue_maps(github, repository, start_date)
 78
 79    issue_text: str = get_issue_text(
 80        label_to_issue_data,
 81        error_message_to_erroneous_issue_data,
 82    )
 83
 84    if issue_reference_number:
 85        top_ranking_issues_issue: Issue = repository.get_issue(issue_reference_number)
 86        top_ranking_issues_issue.edit(body=issue_text)
 87    else:
 88        print(issue_text)
 89
 90    remaining_requests_after: int = github.rate_limiting[0]
 91    print(f"Remaining requests after: {remaining_requests_after}")
 92    print(f"Requests used: {remaining_requests_before - remaining_requests_after}")
 93
 94    run_duration: timedelta = datetime.now() - start_time
 95    print(run_duration)
 96
 97
 98def get_issue_maps(
 99    github: Github,
100    repository: Repository,
101    start_date: datetime | None = None,
102) -> tuple[dict[str, list[IssueData]], dict[str, list[IssueData]]]:
103    label_to_issues: defaultdict[str, list[Issue]] = get_label_to_issues(
104        github,
105        repository,
106        start_date,
107    )
108    label_to_issue_data: dict[str, list[IssueData]] = get_label_to_issue_data(
109        label_to_issues
110    )
111
112    error_message_to_erroneous_issues: defaultdict[str, list[Issue]] = (
113        get_error_message_to_erroneous_issues(github, repository)
114    )
115    error_message_to_erroneous_issue_data: dict[str, list[IssueData]] = (
116        get_error_message_to_erroneous_issue_data(error_message_to_erroneous_issues)
117    )
118
119    # Create a new dictionary with labels ordered by the summation the of likes on the associated issues
120    labels = list(label_to_issue_data.keys())
121
122    labels.sort(
123        key=lambda label: sum(
124            issue_data.like_count for issue_data in label_to_issue_data[label]
125        ),
126        reverse=True,
127    )
128
129    label_to_issue_data = {label: label_to_issue_data[label] for label in labels}
130
131    return (
132        label_to_issue_data,
133        error_message_to_erroneous_issue_data,
134    )
135
136
137def get_label_to_issues(
138    github: Github,
139    repository: Repository,
140    start_date: datetime | None = None,
141) -> defaultdict[str, list[Issue]]:
142    label_to_issues: defaultdict[str, list[Issue]] = defaultdict(list)
143
144    labels: set[str] = CORE_LABELS | ADDITIONAL_LABELS
145
146    date_query: str = (
147        f"created:>={start_date.strftime('%Y-%m-%d')}" if start_date else ""
148    )
149
150    for label in labels:
151        query: str = f'repo:{repository.full_name} is:open is:issue {date_query} label:"{label}" -label:"{IGNORED_LABEL}" sort:reactions-+1-desc'
152
153        issues = github.search_issues(query)
154
155        if issues.totalCount > 0:
156            for issue in issues[0:ISSUES_PER_LABEL]:
157                label_to_issues[label].append(issue)
158
159    return label_to_issues
160
161
162def get_label_to_issue_data(
163    label_to_issues: defaultdict[str, list[Issue]],
164) -> dict[str, list[IssueData]]:
165    label_to_issue_data: dict[str, list[IssueData]] = {}
166
167    for label in label_to_issues:
168        issues: list[Issue] = label_to_issues[label]
169        issue_data: list[IssueData] = [IssueData(issue) for issue in issues]
170        issue_data.sort(
171            key=lambda issue_data: (
172                -issue_data.like_count,
173                issue_data.creation_datetime,
174            )
175        )
176
177        if issue_data:
178            label_to_issue_data[label] = issue_data
179
180    return label_to_issue_data
181
182
183def get_error_message_to_erroneous_issues(
184    github: Github, repository: Repository
185) -> defaultdict[str, list[Issue]]:
186    error_message_to_erroneous_issues: defaultdict[str, list[Issue]] = defaultdict(list)
187
188    # Query for all open issues that don't have either a core or the ignored label and mark those as erroneous
189    filter_labels: set[str] = CORE_LABELS | {IGNORED_LABEL}
190    filter_labels_text: str = " ".join([f'-label:"{label}"' for label in filter_labels])
191    query: str = f"repo:{repository.full_name} is:open is:issue {filter_labels_text}"
192
193    for issue in github.search_issues(query):
194        error_message_to_erroneous_issues["missing core label"].append(issue)
195
196    return error_message_to_erroneous_issues
197
198
199def get_error_message_to_erroneous_issue_data(
200    error_message_to_erroneous_issues: defaultdict[str, list[Issue]],
201) -> dict[str, list[IssueData]]:
202    error_message_to_erroneous_issue_data: dict[str, list[IssueData]] = {}
203
204    for label in error_message_to_erroneous_issues:
205        issues: list[Issue] = error_message_to_erroneous_issues[label]
206        issue_data: list[IssueData] = [IssueData(issue) for issue in issues]
207        error_message_to_erroneous_issue_data[label] = issue_data
208
209    return error_message_to_erroneous_issue_data
210
211
212def get_issue_text(
213    label_to_issue_data: dict[str, list[IssueData]],
214    error_message_to_erroneous_issue_data: dict[str, list[IssueData]],
215) -> str:
216    tz = timezone("america/new_york")
217    current_datetime: str = datetime.now(tz).strftime(f"{DATETIME_FORMAT} (%Z)")
218
219    highest_ranking_issues_lines: list[str] = get_highest_ranking_issues_lines(
220        label_to_issue_data
221    )
222
223    issue_text_lines: list[str] = [
224        f"*Updated on {current_datetime}*",
225        *highest_ranking_issues_lines,
226        "",
227        "---\n",
228    ]
229
230    erroneous_issues_lines: list[str] = get_erroneous_issues_lines(
231        error_message_to_erroneous_issue_data
232    )
233
234    if erroneous_issues_lines:
235        core_labels_text: str = ", ".join(
236            f'"{core_label}"' for core_label in CORE_LABELS
237        )
238
239        issue_text_lines.extend(
240            [
241                "## errors with issues (this section only shows when there are errors with issues)\n",
242                f"This script expects every issue to have at least one of the following core labels: {core_labels_text}",
243                f"This script currently ignores issues that have the following label: {IGNORED_LABEL}\n",
244                "### what to do?\n",
245                "- Adjust the core labels on an issue to put it into a correct state or add a currently-ignored label to the issue",
246                "- Adjust the core and ignored labels registered in this script",
247                *erroneous_issues_lines,
248                "",
249                "---\n",
250            ]
251        )
252
253    issue_text_lines.extend(
254        [
255            "*For details on how this issue is generated, [see the script](https://github.com/zed-industries/zed/blob/main/script/update_top_ranking_issues/main.py)*",
256        ]
257    )
258
259    return "\n".join(issue_text_lines)
260
261
262def get_highest_ranking_issues_lines(
263    label_to_issue_data: dict[str, list[IssueData]],
264) -> list[str]:
265    highest_ranking_issues_lines: list[str] = []
266
267    if label_to_issue_data:
268        for label, issue_data in label_to_issue_data.items():
269            highest_ranking_issues_lines.append(f"\n## {label}\n")
270
271            for i, issue_data in enumerate(issue_data):
272                markdown_bullet_point: str = (
273                    f"{issue_data.url} ({issue_data.like_count} :thumbsup:)"
274                )
275
276                markdown_bullet_point = f"{i + 1}. {markdown_bullet_point}"
277                highest_ranking_issues_lines.append(markdown_bullet_point)
278
279    return highest_ranking_issues_lines
280
281
282def get_erroneous_issues_lines(
283    error_message_to_erroneous_issue_data,
284) -> list[str]:
285    erroneous_issues_lines: list[str] = []
286
287    if error_message_to_erroneous_issue_data:
288        for (
289            error_message,
290            erroneous_issue_data,
291        ) in error_message_to_erroneous_issue_data.items():
292            erroneous_issues_lines.append(f"\n#### {error_message}\n")
293
294            for erroneous_issue_data in erroneous_issue_data:
295                erroneous_issues_lines.append(f"- {erroneous_issue_data.url}")
296
297    return erroneous_issues_lines
298
299
300if __name__ == "__main__":
301    app()
302
303# TODO: Sort label output into core and non core sections