main.py

  1import os
  2from collections import defaultdict
  3from datetime import datetime, timedelta
  4from typing import Optional
  5
  6from github import Github
  7from github.Issue import Issue
  8from github.Repository import Repository
  9from pytz import timezone
 10
 11import typer
 12from typer import Typer
 13
 14app: Typer = typer.Typer()
 15
 16DATETIME_FORMAT: str = "%m/%d/%Y %I:%M %p"
 17CORE_LABELS: set[str] = set(
 18    [
 19        "defect",
 20        "design",
 21        "documentation",
 22        "enhancement",
 23        "panic / crash",
 24    ]
 25)
 26# A set of labels for adding in labels that we want present in the final
 27# report, but that we don't want being defined as a core label, since issues
 28# with without core labels are flagged as errors.
 29ADDITIONAL_LABELS: set[str] = set(
 30    [
 31        "ai",
 32        "linux",
 33        "vim",
 34        "windows",
 35    ]
 36)
 37IGNORED_LABELS: set[str] = set(
 38    [
 39        "ignore top-ranking issues",
 40        "meta",
 41    ]
 42)
 43ISSUES_PER_LABEL: int = 20
 44
 45
 46class IssueData:
 47    def __init__(self, issue: Issue) -> None:
 48        self.url: str = issue.html_url
 49        self.like_count: int = issue._rawData["reactions"]["+1"]  # type: ignore [attr-defined]
 50        self.creation_datetime: str = issue.created_at.strftime(DATETIME_FORMAT)
 51        # TODO: Change script to support storing labels here, rather than directly in the script
 52        self.labels: set[str] = set(label["name"] for label in issue._rawData["labels"])  # type: ignore [attr-defined]
 53
 54
 55@app.command()
 56def main(
 57    issue_reference_number: Optional[int] = None,
 58    github_token: Optional[str] = None,
 59    prod: bool = False,
 60    query_day_interval: Optional[int] = None,
 61) -> None:
 62    if prod and not issue_reference_number:
 63        raise ValueError("Issue reference number must be provided in prod mode")
 64
 65    start_time: datetime = datetime.now()
 66
 67    start_date: datetime | None = None
 68
 69    if query_day_interval:
 70        tz = timezone("america/new_york")
 71        current_time = datetime.now(tz).replace(
 72            hour=0, minute=0, second=0, microsecond=0
 73        )
 74        start_date = current_time - timedelta(days=query_day_interval)
 75
 76    # GitHub Workflow will pass in the token as an environment variable,
 77    # but we can place it in our env when running the script locally, for convenience
 78    github_token = github_token or os.getenv("GITHUB_ACCESS_TOKEN")
 79    github = Github(github_token)
 80
 81    remaining_requests_before: int = github.rate_limiting[0]
 82    print(f"Remaining requests before: {remaining_requests_before}")
 83
 84    repo_name: str = "zed-industries/zed"
 85    repository: Repository = github.get_repo(repo_name)
 86
 87    # There has to be a nice way of adding types to tuple unpacking
 88    label_to_issue_data: dict[str, list[IssueData]]
 89    error_message_to_erroneous_issue_data: dict[str, list[IssueData]]
 90    (
 91        label_to_issue_data,
 92        error_message_to_erroneous_issue_data,
 93    ) = get_issue_maps(github, repository, start_date)
 94
 95    issue_text: str = get_issue_text(
 96        label_to_issue_data,
 97        error_message_to_erroneous_issue_data,
 98    )
 99
100    if prod:
101        top_ranking_issues_issue: Issue = repository.get_issue(issue_reference_number)
102        top_ranking_issues_issue.edit(body=issue_text)
103    else:
104        print(issue_text)
105
106    remaining_requests_after: int = github.rate_limiting[0]
107    print(f"Remaining requests after: {remaining_requests_after}")
108    print(f"Requests used: {remaining_requests_before - remaining_requests_after}")
109
110    run_duration: timedelta = datetime.now() - start_time
111    print(run_duration)
112
113
114def get_issue_maps(
115    github: Github,
116    repository: Repository,
117    start_date: datetime | None = None,
118) -> tuple[dict[str, list[IssueData]], dict[str, list[IssueData]]]:
119    label_to_issues: defaultdict[str, list[Issue]] = get_label_to_issues(
120        github,
121        repository,
122        start_date,
123    )
124    label_to_issue_data: dict[str, list[IssueData]] = get_label_to_issue_data(
125        label_to_issues
126    )
127
128    error_message_to_erroneous_issues: defaultdict[str, list[Issue]] = (
129        get_error_message_to_erroneous_issues(github, repository)
130    )
131    error_message_to_erroneous_issue_data: dict[str, list[IssueData]] = (
132        get_error_message_to_erroneous_issue_data(error_message_to_erroneous_issues)
133    )
134
135    # Create a new dictionary with labels ordered by the summation the of likes on the associated issues
136    labels = list(label_to_issue_data.keys())
137
138    labels.sort(
139        key=lambda label: sum(
140            issue_data.like_count for issue_data in label_to_issue_data[label]
141        ),
142        reverse=True,
143    )
144
145    label_to_issue_data = {label: label_to_issue_data[label] for label in labels}
146
147    return (
148        label_to_issue_data,
149        error_message_to_erroneous_issue_data,
150    )
151
152
153def get_label_to_issues(
154    github: Github,
155    repository: Repository,
156    start_date: datetime | None = None,
157) -> defaultdict[str, list[Issue]]:
158    label_to_issues: defaultdict[str, list[Issue]] = defaultdict(list)
159
160    labels: set[str] = CORE_LABELS | ADDITIONAL_LABELS
161    ignored_labels_text: str = " ".join(
162        [f'-label:"{label}"' for label in IGNORED_LABELS]
163    )
164
165    date_query: str = (
166        f"created:>={start_date.strftime('%Y-%m-%d')}" if start_date else ""
167    )
168
169    for label in labels:
170        query: str = f'repo:{repository.full_name} is:open is:issue {date_query} label:"{label}" {ignored_labels_text} sort:reactions-+1-desc'
171
172        issues = github.search_issues(query)
173
174        if issues.totalCount > 0:
175            for issue in issues[0:ISSUES_PER_LABEL]:
176                label_to_issues[label].append(issue)
177
178    return label_to_issues
179
180
181def get_label_to_issue_data(
182    label_to_issues: defaultdict[str, list[Issue]],
183) -> dict[str, list[IssueData]]:
184    label_to_issue_data: dict[str, list[IssueData]] = {}
185
186    for label in label_to_issues:
187        issues: list[Issue] = label_to_issues[label]
188        issue_data: list[IssueData] = [IssueData(issue) for issue in issues]
189        issue_data.sort(
190            key=lambda issue_data: (
191                -issue_data.like_count,
192                issue_data.creation_datetime,
193            )
194        )
195
196        if issue_data:
197            label_to_issue_data[label] = issue_data
198
199    return label_to_issue_data
200
201
202def get_error_message_to_erroneous_issues(
203    github: Github, repository: Repository
204) -> defaultdict[str, list[Issue]]:
205    error_message_to_erroneous_issues: defaultdict[str, list[Issue]] = defaultdict(list)
206
207    # Query for all open issues that don't have either a core or ignored label and mark those as erroneous
208    filter_labels: set[str] = CORE_LABELS | IGNORED_LABELS
209    filter_labels_text: str = " ".join([f'-label:"{label}"' for label in filter_labels])
210    query: str = f"repo:{repository.full_name} is:open is:issue {filter_labels_text}"
211
212    for issue in github.search_issues(query):
213        error_message_to_erroneous_issues["missing core label"].append(issue)
214
215    return error_message_to_erroneous_issues
216
217
218def get_error_message_to_erroneous_issue_data(
219    error_message_to_erroneous_issues: defaultdict[str, list[Issue]],
220) -> dict[str, list[IssueData]]:
221    error_message_to_erroneous_issue_data: dict[str, list[IssueData]] = {}
222
223    for label in error_message_to_erroneous_issues:
224        issues: list[Issue] = error_message_to_erroneous_issues[label]
225        issue_data: list[IssueData] = [IssueData(issue) for issue in issues]
226        error_message_to_erroneous_issue_data[label] = issue_data
227
228    return error_message_to_erroneous_issue_data
229
230
231def get_issue_text(
232    label_to_issue_data: dict[str, list[IssueData]],
233    error_message_to_erroneous_issue_data: dict[str, list[IssueData]],
234) -> str:
235    tz = timezone("america/new_york")
236    current_datetime: str = datetime.now(tz).strftime(f"{DATETIME_FORMAT} (%Z)")
237
238    highest_ranking_issues_lines: list[str] = get_highest_ranking_issues_lines(
239        label_to_issue_data
240    )
241
242    issue_text_lines: list[str] = [
243        f"*Updated on {current_datetime}*",
244        *highest_ranking_issues_lines,
245        "",
246        "---\n",
247    ]
248
249    erroneous_issues_lines: list[str] = get_erroneous_issues_lines(
250        error_message_to_erroneous_issue_data
251    )
252
253    if erroneous_issues_lines:
254        core_labels_text: str = ", ".join(
255            f'"{core_label}"' for core_label in CORE_LABELS
256        )
257        ignored_labels_text: str = ", ".join(
258            f'"{ignored_label}"' for ignored_label in IGNORED_LABELS
259        )
260
261        issue_text_lines.extend(
262            [
263                "## errors with issues (this section only shows when there are errors with issues)\n",
264                f"This script expects every issue to have at least one of the following core labels: {core_labels_text}",
265                f"This script currently ignores issues that have one of the following labels: {ignored_labels_text}\n",
266                "### what to do?\n",
267                "- Adjust the core labels on an issue to put it into a correct state or add a currently-ignored label to the issue",
268                "- Adjust the core and ignored labels registered in this script",
269                *erroneous_issues_lines,
270                "",
271                "---\n",
272            ]
273        )
274
275    issue_text_lines.extend(
276        [
277            "*For details on how this issue is generated, [see the script](https://github.com/zed-industries/zed/blob/main/script/update_top_ranking_issues/main.py)*",
278        ]
279    )
280
281    return "\n".join(issue_text_lines)
282
283
284def get_highest_ranking_issues_lines(
285    label_to_issue_data: dict[str, list[IssueData]],
286) -> list[str]:
287    highest_ranking_issues_lines: list[str] = []
288
289    if label_to_issue_data:
290        for label, issue_data in label_to_issue_data.items():
291            highest_ranking_issues_lines.append(f"\n## {label}\n")
292
293            for i, issue_data in enumerate(issue_data):
294                markdown_bullet_point: str = (
295                    f"{issue_data.url} ({issue_data.like_count} :thumbsup:)"
296                )
297
298                markdown_bullet_point = f"{i + 1}. {markdown_bullet_point}"
299                highest_ranking_issues_lines.append(markdown_bullet_point)
300
301    return highest_ranking_issues_lines
302
303
304def get_erroneous_issues_lines(
305    error_message_to_erroneous_issue_data,
306) -> list[str]:
307    erroneous_issues_lines: list[str] = []
308
309    if error_message_to_erroneous_issue_data:
310        for (
311            error_message,
312            erroneous_issue_data,
313        ) in error_message_to_erroneous_issue_data.items():
314            erroneous_issues_lines.append(f"\n#### {error_message}\n")
315
316            for erroneous_issue_data in erroneous_issue_data:
317                erroneous_issues_lines.append(f"- {erroneous_issue_data.url}")
318
319    return erroneous_issues_lines
320
321
322if __name__ == "__main__":
323    app()
324
325# TODO: Sort label output into core and non core sections