main.py

  1import os
  2from datetime import datetime, timedelta
  3from typing import Optional
  4
  5import typer
  6from github import Github
  7from github.Issue import Issue
  8from github.Repository import Repository
  9from pytz import timezone
 10from typer import Typer
 11
 12app: Typer = typer.Typer()
 13
 14DATETIME_FORMAT: str = "%m/%d/%Y %I:%M %p"
 15ISSUES_PER_LABEL: int = 50
 16
 17
 18class IssueData:
 19    def __init__(self, issue: Issue) -> None:
 20        self.title = issue.title
 21        self.url: str = issue.html_url
 22        self.like_count: int = issue._rawData["reactions"]["+1"]  # type: ignore [attr-defined]
 23        self.creation_datetime: str = issue.created_at.strftime(DATETIME_FORMAT)
 24        # TODO: Change script to support storing labels here, rather than directly in the script
 25        self.labels: set[str] = {label["name"] for label in issue._rawData["labels"]}  # type: ignore [attr-defined]
 26        self._issue = issue
 27
 28
 29@app.command()
 30def main(
 31    github_token: Optional[str] = None,
 32    issue_reference_number: Optional[int] = None,
 33    query_day_interval: Optional[int] = None,
 34) -> None:
 35    start_time: datetime = datetime.now()
 36
 37    start_date: datetime | None = None
 38
 39    if query_day_interval:
 40        tz = timezone("america/new_york")
 41        current_time = datetime.now(tz).replace(
 42            hour=0, minute=0, second=0, microsecond=0
 43        )
 44        start_date = current_time - timedelta(days=query_day_interval)
 45
 46    # GitHub Workflow will pass in the token as an environment variable,
 47    # but we can place it in our env when running the script locally, for convenience
 48    github_token = github_token or os.getenv("GITHUB_ACCESS_TOKEN")
 49
 50    with Github(github_token, per_page=100) as github:
 51        remaining_requests_before: int = github.rate_limiting[0]
 52        print(f"Remaining requests before: {remaining_requests_before}")
 53
 54        repo_name: str = "zed-industries/zed"
 55        repository: Repository = github.get_repo(repo_name)
 56
 57        label_to_issue_data: dict[str, list[IssueData]] = get_issue_maps(
 58            github, repository, start_date
 59        )
 60
 61        issue_text: str = get_issue_text(label_to_issue_data)
 62
 63        if issue_reference_number:
 64            top_ranking_issues_issue: Issue = repository.get_issue(issue_reference_number)
 65            top_ranking_issues_issue.edit(body=issue_text)
 66        else:
 67            print(issue_text)
 68
 69        remaining_requests_after: int = github.rate_limiting[0]
 70        print(f"Remaining requests after: {remaining_requests_after}")
 71        print(f"Requests used: {remaining_requests_before - remaining_requests_after}")
 72
 73    run_duration: timedelta = datetime.now() - start_time
 74    print(run_duration)
 75
 76
 77def get_issue_maps(
 78    github: Github,
 79    repository: Repository,
 80    start_date: datetime | None = None,
 81) -> dict[str, list[IssueData]]:
 82    label_to_issue_data: dict[str, list[IssueData]] = get_label_to_issue_data(
 83        github,
 84        repository,
 85        start_date,
 86    )
 87
 88    # Create a new dictionary with labels ordered by the summation the of likes on the associated issues
 89    labels = list(label_to_issue_data.keys())
 90
 91    labels.sort(
 92        key=lambda label: sum(
 93            issue_data.like_count for issue_data in label_to_issue_data[label]
 94        ),
 95        reverse=True,
 96    )
 97
 98    label_to_issue_data = {label: label_to_issue_data[label] for label in labels}
 99
100    return label_to_issue_data
101
102
103def get_label_to_issue_data(
104    github: Github,
105    repository: Repository,
106    start_date: datetime | None = None,
107) -> dict[str, list[IssueData]]:
108    common_queries = [
109        f"repo:{repository.full_name}",
110        "is:open",
111        "is:issue",
112        '-label:"ignore top-ranking issues"',
113        "sort:reactions-+1-desc",
114    ]
115
116    date_query: str | None = (
117        f"created:>={start_date.strftime('%Y-%m-%d')}" if start_date else None
118    )
119
120    if date_query:
121        common_queries.append(date_query)
122
123    common_query = " ".join(common_queries)
124
125    # Because PyGithub doesn't seem to support logical operators `AND` and `OR`
126    # that GitHub issue queries can use, we use lists as values, rather than
127    # using `(label:bug OR type:Bug)`. This is not as efficient, as we might
128    # query the same issue multiple times. Issues that are potentially queried
129    # multiple times are deduplicated in the `label_to_issues` dictionary. If
130    # PyGithub ever supports logical operators, we should definitely make the
131    # switch.
132    section_queries: dict[str, list[str]] = {
133        "bug": ["label:bug", "type:Bug"],
134        "crash": ["label:crash", "type:Crash"],
135        "feature": ["label:feature", "type:Feature"],
136        "meta": ["type:Meta"],
137        "windows": ["label:windows"],
138        "unlabeled": ["no:label no:type"],
139    }
140
141    label_to_issue_data: dict[str, list[IssueData]] = {}
142
143    for section, queries in section_queries.items():
144        unique_issues = set()
145
146        for query in queries:
147            query: str = f"{common_query} {query}"
148            issues = github.search_issues(query)
149
150            for issue in issues:
151                unique_issues.add(issue)
152
153        if len(unique_issues) <= 0:
154            continue
155
156        issue_data: list[IssueData] = [IssueData(issue) for issue in unique_issues]
157        issue_data.sort(
158            key=lambda issue_data: (
159                -issue_data.like_count,
160                issue_data.creation_datetime,
161            )
162        )
163
164        label_to_issue_data[section] = issue_data[0:ISSUES_PER_LABEL]
165
166    return label_to_issue_data
167
168
169def get_issue_text(
170    label_to_issue_data: dict[str, list[IssueData]],
171) -> str:
172    tz = timezone("america/new_york")
173    current_datetime: str = datetime.now(tz).strftime(f"{DATETIME_FORMAT} (%Z)")
174
175    highest_ranking_issues_lines: list[str] = get_highest_ranking_issues_lines(
176        label_to_issue_data
177    )
178
179    issue_text_lines: list[str] = [
180        f"*Updated on {current_datetime}*",
181        *highest_ranking_issues_lines,
182        "\n---\n",
183        "*For details on how this issue is generated, [see the script](https://github.com/zed-industries/zed/blob/main/script/update_top_ranking_issues/main.py)*",
184    ]
185
186    return "\n".join(issue_text_lines)
187
188
189def get_highest_ranking_issues_lines(
190    label_to_issue_data: dict[str, list[IssueData]],
191) -> list[str]:
192    highest_ranking_issues_lines: list[str] = []
193
194    if label_to_issue_data:
195        for label, issue_data in label_to_issue_data.items():
196            highest_ranking_issues_lines.append(f"\n## {label}\n")
197
198            for i, issue_data in enumerate(issue_data):
199                markdown_bullet_point: str = (
200                    f"{issue_data.url} ({issue_data.like_count} :thumbsup:)"
201                )
202
203                markdown_bullet_point = f"{i + 1}. {markdown_bullet_point}"
204                highest_ranking_issues_lines.append(markdown_bullet_point)
205
206    return highest_ranking_issues_lines
207
208
209if __name__ == "__main__":
210    app()
211
212# TODO: Sort label output into core and non core sections