main.py

  1import os
  2from collections import defaultdict
  3from datetime import datetime, timedelta
  4from typing import Optional
  5
  6from github import Github
  7from github.Issue import Issue
  8from github.Repository import Repository
  9from pytz import timezone
 10
 11import typer
 12from typer import Typer
 13
 14app: Typer = typer.Typer()
 15
 16DATETIME_FORMAT: str = "%m/%d/%Y %I:%M %p"
 17CORE_LABELS: set[str] = set(
 18    [
 19        "defect",
 20        "design",
 21        "documentation",
 22        "enhancement",
 23        "panic / crash",
 24        "platform support",
 25    ]
 26)
 27# A set of labels for adding in labels that we want present in the final
 28# report, but that we don't want being defined as a core label, since issues
 29# with without core labels are flagged as errors.
 30ADDITIONAL_LABELS: set[str] = set(["ai", "vim"])
 31IGNORED_LABELS: set[str] = set(
 32    [
 33        "meta",
 34    ]
 35)
 36ISSUES_PER_LABEL: int = 20
 37
 38
 39class IssueData:
 40    def __init__(self, issue: Issue) -> None:
 41        self.url: str = issue.html_url
 42        self.like_count: int = issue._rawData["reactions"]["+1"]  # type: ignore [attr-defined]
 43        self.creation_datetime: str = issue.created_at.strftime(DATETIME_FORMAT)
 44        # TODO: Change script to support storing labels here, rather than directly in the script
 45        self.labels: set[str] = set(label["name"] for label in issue._rawData["labels"])  # type: ignore [attr-defined]
 46
 47
 48@app.command()
 49def main(
 50    issue_reference_number: int,
 51    github_token: Optional[str] = None,
 52    prod: bool = False,
 53    query_day_interval: Optional[int] = None,
 54) -> None:
 55    start_time: datetime = datetime.now()
 56
 57    start_date: datetime | None = None
 58
 59    if query_day_interval:
 60        tz = timezone("america/new_york")
 61        current_time = datetime.now(tz).replace(hour=0, minute=0, second=0, microsecond=0)
 62        start_date = current_time - timedelta(days=query_day_interval)
 63
 64    # GitHub Workflow will pass in the token as an environment variable,
 65    # but we can place it in our env when running the script locally, for convenience
 66    github_token = github_token or os.getenv("GITHUB_ACCESS_TOKEN")
 67    github = Github(github_token)
 68
 69    remaining_requests_before: int = github.rate_limiting[0]
 70    print(f"Remaining requests before: {remaining_requests_before}")
 71
 72    repo_name: str = "zed-industries/zed"
 73    repository: Repository = github.get_repo(repo_name)
 74
 75    # There has to be a nice way of adding types to tuple unpacking
 76    label_to_issue_data: dict[str, list[IssueData]]
 77    error_message_to_erroneous_issue_data: dict[str, list[IssueData]]
 78    (
 79        label_to_issue_data,
 80        error_message_to_erroneous_issue_data,
 81    ) = get_issue_maps(github, repository, start_date)
 82
 83    issue_text: str = get_issue_text(
 84        label_to_issue_data,
 85        error_message_to_erroneous_issue_data,
 86    )
 87
 88    if prod:
 89        top_ranking_issues_issue: Issue = repository.get_issue(issue_reference_number)
 90        top_ranking_issues_issue.edit(body=issue_text)
 91    else:
 92        print(issue_text)
 93
 94    remaining_requests_after: int = github.rate_limiting[0]
 95    print(f"Remaining requests after: {remaining_requests_after}")
 96    print(f"Requests used: {remaining_requests_before - remaining_requests_after}")
 97
 98    run_duration: timedelta = datetime.now() - start_time
 99    print(run_duration)
100
101
102def get_issue_maps(
103    github: Github,
104    repository: Repository,
105    start_date: datetime | None = None,
106) -> tuple[dict[str, list[IssueData]], dict[str, list[IssueData]]]:
107    label_to_issues: defaultdict[str, list[Issue]] = get_label_to_issues(
108        github,
109        repository,
110        start_date,
111    )
112    label_to_issue_data: dict[str, list[IssueData]] = get_label_to_issue_data(
113        label_to_issues
114    )
115
116    error_message_to_erroneous_issues: defaultdict[
117        str, list[Issue]
118    ] = get_error_message_to_erroneous_issues(github, repository)
119    error_message_to_erroneous_issue_data: dict[
120        str, list[IssueData]
121    ] = get_error_message_to_erroneous_issue_data(error_message_to_erroneous_issues)
122
123    # Create a new dictionary with labels ordered by the summation the of likes on the associated issues
124    labels = list(label_to_issue_data.keys())
125
126    labels.sort(
127        key=lambda label: sum(
128            issue_data.like_count for issue_data in label_to_issue_data[label]
129        ),
130        reverse=True,
131    )
132
133    label_to_issue_data = {label: label_to_issue_data[label] for label in labels}
134
135    return (
136        label_to_issue_data,
137        error_message_to_erroneous_issue_data,
138    )
139
140
141def get_label_to_issues(
142    github: Github,
143    repository: Repository,
144    start_date: datetime | None = None,
145) -> defaultdict[str, list[Issue]]:
146    label_to_issues: defaultdict[str, list[Issue]] = defaultdict(list)
147
148    labels: set[str] = CORE_LABELS | ADDITIONAL_LABELS
149    ignored_labels_text: str = " ".join(
150        [f'-label:"{label}"' for label in IGNORED_LABELS]
151    )
152
153    date_query: str = (
154        f"created:>={start_date.strftime('%Y-%m-%d')}" if start_date else ""
155    )
156
157    for label in labels:
158        query: str = f'repo:{repository.full_name} is:open is:issue {date_query} label:"{label}" {ignored_labels_text} sort:reactions-+1-desc'
159
160        issues = github.search_issues(query)
161
162        if issues.totalCount > 0:
163            for issue in issues[0:ISSUES_PER_LABEL]:
164                label_to_issues[label].append(issue)
165
166    return label_to_issues
167
168
169def get_label_to_issue_data(
170    label_to_issues: defaultdict[str, list[Issue]]
171) -> dict[str, list[IssueData]]:
172    label_to_issue_data: dict[str, list[IssueData]] = {}
173
174    for label in label_to_issues:
175        issues: list[Issue] = label_to_issues[label]
176        issue_data: list[IssueData] = [IssueData(issue) for issue in issues]
177        issue_data.sort(
178            key=lambda issue_data: (
179                -issue_data.like_count,
180                issue_data.creation_datetime,
181            )
182        )
183
184        if issue_data:
185            label_to_issue_data[label] = issue_data
186
187    return label_to_issue_data
188
189
190def get_error_message_to_erroneous_issues(
191    github: Github, repository: Repository
192) -> defaultdict[str, list[Issue]]:
193    error_message_to_erroneous_issues: defaultdict[str, list[Issue]] = defaultdict(list)
194
195    # Query for all open issues that don't have either a core or ignored label and mark those as erroneous
196    filter_labels: set[str] = CORE_LABELS | IGNORED_LABELS
197    filter_labels_text: str = " ".join([f'-label:"{label}"' for label in filter_labels])
198    query: str = f"repo:{repository.full_name} is:open is:issue {filter_labels_text}"
199
200    for issue in github.search_issues(query):
201        error_message_to_erroneous_issues["missing core label"].append(issue)
202
203    return error_message_to_erroneous_issues
204
205
206def get_error_message_to_erroneous_issue_data(
207    error_message_to_erroneous_issues: defaultdict[str, list[Issue]],
208) -> dict[str, list[IssueData]]:
209    error_message_to_erroneous_issue_data: dict[str, list[IssueData]] = {}
210
211    for label in error_message_to_erroneous_issues:
212        issues: list[Issue] = error_message_to_erroneous_issues[label]
213        issue_data: list[IssueData] = [IssueData(issue) for issue in issues]
214        error_message_to_erroneous_issue_data[label] = issue_data
215
216    return error_message_to_erroneous_issue_data
217
218
219def get_issue_text(
220    label_to_issue_data: dict[str, list[IssueData]],
221    error_message_to_erroneous_issue_data: dict[str, list[IssueData]],
222) -> str:
223    tz = timezone("america/new_york")
224    current_datetime: str = datetime.now(tz).strftime(f"{DATETIME_FORMAT} (%Z)")
225
226    highest_ranking_issues_lines: list[str] = get_highest_ranking_issues_lines(
227        label_to_issue_data
228    )
229
230    issue_text_lines: list[str] = [
231        f"*Updated on {current_datetime}*",
232        *highest_ranking_issues_lines,
233        "",
234        "---\n",
235    ]
236
237    erroneous_issues_lines: list[str] = get_erroneous_issues_lines(
238        error_message_to_erroneous_issue_data
239    )
240
241    if erroneous_issues_lines:
242        core_labels_text: str = ", ".join(
243            f'"{core_label}"' for core_label in CORE_LABELS
244        )
245        ignored_labels_text: str = ", ".join(
246            f'"{ignored_label}"' for ignored_label in IGNORED_LABELS
247        )
248
249        issue_text_lines.extend(
250            [
251                "## errors with issues (this section only shows when there are errors with issues)\n",
252                f"This script expects every issue to have at least one of the following core labels: {core_labels_text}",
253                f"This script currently ignores issues that have one of the following labels: {ignored_labels_text}\n",
254                "### what to do?\n",
255                "- Adjust the core labels on an issue to put it into a correct state or add a currently-ignored label to the issue",
256                "- Adjust the core and ignored labels registered in this script",
257                *erroneous_issues_lines,
258                "",
259                "---\n",
260            ]
261        )
262
263    issue_text_lines.extend(
264        [
265            "*For details on how this issue is generated, [see the script](https://github.com/zed-industries/zed/blob/main/script/update_top_ranking_issues/main.py)*",
266        ]
267    )
268
269    return "\n".join(issue_text_lines)
270
271
272def get_highest_ranking_issues_lines(
273    label_to_issue_data: dict[str, list[IssueData]],
274) -> list[str]:
275    highest_ranking_issues_lines: list[str] = []
276
277    if label_to_issue_data:
278        for label, issue_data in label_to_issue_data.items():
279            highest_ranking_issues_lines.append(f"\n## {label}\n")
280
281            for i, issue_data in enumerate(issue_data):
282                markdown_bullet_point: str = (
283                    f"{issue_data.url} ({issue_data.like_count} :thumbsup:)"
284                )
285
286                markdown_bullet_point = f"{i + 1}. {markdown_bullet_point}"
287                highest_ranking_issues_lines.append(markdown_bullet_point)
288
289    return highest_ranking_issues_lines
290
291
292def get_erroneous_issues_lines(
293    error_message_to_erroneous_issue_data,
294) -> list[str]:
295    erroneous_issues_lines: list[str] = []
296
297    if error_message_to_erroneous_issue_data:
298        for (
299            error_message,
300            erroneous_issue_data,
301        ) in error_message_to_erroneous_issue_data.items():
302            erroneous_issues_lines.append(f"\n#### {error_message}\n")
303
304            for erroneous_issue_data in erroneous_issue_data:
305                erroneous_issues_lines.append(f"- {erroneous_issue_data.url}")
306
307    return erroneous_issues_lines
308
309
310if __name__ == "__main__":
311    app()
312
313# TODO: Sort label output into core and non core sections