1#!/usr/bin/env python3
2"""
3GitHub PR Analyzer for zed-industries/zed repository
4Downloads all PRs and groups them by first assignee with status, open date, and last updated date.
5"""
6
7import urllib.request
8import urllib.parse
9import urllib.error
10import json
11from datetime import datetime
12from collections import defaultdict
13import sys
14import os
15
16# GitHub API configuration
17GITHUB_API_BASE = "https://api.github.com"
18REPO_OWNER = "zed-industries"
19REPO_NAME = "zed"
20GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
21
22def make_github_request(url, params=None):
23 """Make a request to GitHub API with proper headers and pagination support."""
24 if params:
25 url_parts = list(urllib.parse.urlparse(url))
26 query = dict(urllib.parse.parse_qsl(url_parts[4]))
27 query.update(params)
28 url_parts[4] = urllib.parse.urlencode(query)
29 url = urllib.parse.urlunparse(url_parts)
30
31 req = urllib.request.Request(url)
32 req.add_header("Accept", "application/vnd.github.v3+json")
33 req.add_header("User-Agent", "GitHub-PR-Analyzer")
34
35 if GITHUB_TOKEN:
36 req.add_header("Authorization", f"token {GITHUB_TOKEN}")
37
38 try:
39 response = urllib.request.urlopen(req)
40 return response
41 except urllib.error.URLError as e:
42 print(f"Error making request to {url}: {e}")
43 return None
44 except urllib.error.HTTPError as e:
45 print(f"HTTP error {e.code} for {url}: {e.reason}")
46 return None
47
48def fetch_all_prs():
49 """Fetch all PRs from the repository using pagination."""
50 prs = []
51 page = 1
52 per_page = 100
53
54 print("Fetching PRs from GitHub API...")
55
56 while True:
57 url = f"{GITHUB_API_BASE}/repos/{REPO_OWNER}/{REPO_NAME}/pulls"
58 params = {
59 "state": "open",
60 "sort": "updated",
61 "direction": "desc",
62 "per_page": per_page,
63 "page": page
64 }
65
66 response = make_github_request(url, params)
67 if not response:
68 break
69
70 try:
71 data = response.read().decode('utf-8')
72 page_prs = json.loads(data)
73 except (json.JSONDecodeError, UnicodeDecodeError) as e:
74 print(f"Error parsing response: {e}")
75 break
76
77 if not page_prs:
78 break
79
80 prs.extend(page_prs)
81 print(f"Fetched page {page}: {len(page_prs)} PRs (Total: {len(prs)})")
82
83 # Check if we have more pages
84 link_header = response.getheader('Link', '')
85 if 'rel="next"' not in link_header:
86 break
87
88 page += 1
89
90 print(f"Total PRs fetched: {len(prs)}")
91 return prs
92
93def format_date_as_days_ago(date_string):
94 """Format ISO date string as 'X days ago'."""
95 if not date_string:
96 return "N/A days ago"
97
98 try:
99 dt = datetime.fromisoformat(date_string.replace('Z', '+00:00'))
100 now = datetime.now(dt.tzinfo)
101 days_diff = (now - dt).days
102
103 if days_diff == 0:
104 return "today"
105 elif days_diff == 1:
106 return "1 day ago"
107 else:
108 return f"{days_diff} days ago"
109 except:
110 return "N/A days ago"
111
112def get_first_assignee(pr):
113 """Get the first assignee from a PR, or return 'Unassigned' if none."""
114 assignees = pr.get('assignees', [])
115 if assignees:
116 return assignees[0].get('login', 'Unknown')
117 return 'Unassigned'
118
119def get_pr_status(pr):
120 """Determine if PR is draft or ready for review."""
121 if pr.get('draft', False):
122 return "Draft"
123 return "Ready"
124
125def analyze_prs(prs):
126 """Group PRs by first assignee and organize the data."""
127 grouped_prs = defaultdict(list)
128
129 for pr in prs:
130 assignee = get_first_assignee(pr)
131
132 pr_info = {
133 'number': pr['number'],
134 'title': pr['title'],
135 'status': get_pr_status(pr),
136 'state': pr['state'],
137 'created_at': format_date_as_days_ago(pr['created_at']),
138 'updated_at': format_date_as_days_ago(pr['updated_at']),
139 'updated_at_raw': pr['updated_at'],
140 'url': pr['html_url'],
141 'author': pr['user']['login']
142 }
143
144 grouped_prs[assignee].append(pr_info)
145
146 # Sort PRs within each group by update date (newest first)
147 for assignee in grouped_prs:
148 grouped_prs[assignee].sort(key=lambda x: x['updated_at_raw'], reverse=True)
149
150 return dict(grouped_prs)
151
152def print_pr_report(grouped_prs):
153 """Print formatted report of PRs grouped by assignee."""
154 print(f"OPEN PR REPORT FOR {REPO_OWNER}/{REPO_NAME}")
155 print()
156
157 # Sort assignees alphabetically, but put 'Unassigned' last
158 assignees = sorted(grouped_prs.keys())
159 if 'Unassigned' in assignees:
160 assignees.remove('Unassigned')
161 assignees.append('Unassigned')
162
163 total_prs = sum(len(prs) for prs in grouped_prs.values())
164 print(f"Total Open PRs: {total_prs}")
165 print()
166
167 for assignee in assignees:
168 prs = grouped_prs[assignee]
169 assignee_display = f"@{assignee}" if assignee != 'Unassigned' else assignee
170 print(f"assigned to {assignee_display} ({len(prs)} PRs):")
171
172 for pr in prs:
173 print(f"- {pr['author']}: [{pr['title']}]({pr['url']}) opened:{pr['created_at']} updated:{pr['updated_at']}")
174
175 print()
176
177def save_json_report(grouped_prs, filename="pr_report.json"):
178 """Save the PR data to a JSON file."""
179 try:
180 with open(filename, 'w') as f:
181 json.dump(grouped_prs, f, indent=2)
182 print(f"📄 Report saved to {filename}")
183 except Exception as e:
184 print(f"Error saving JSON report: {e}")
185
186def main():
187 """Main function to orchestrate the PR analysis."""
188 print("GitHub PR Analyzer")
189 print("==================")
190
191 if not GITHUB_TOKEN:
192 print("⚠️ Warning: GITHUB_TOKEN not set. You may hit rate limits.")
193 print(" Set GITHUB_TOKEN environment variable for authenticated requests.")
194 print()
195
196 # Fetch all PRs
197 prs = fetch_all_prs()
198
199 if not prs:
200 print("❌ Failed to fetch PRs. Please check your connection and try again.")
201 sys.exit(1)
202
203 # Analyze and group PRs
204 grouped_prs = analyze_prs(prs)
205
206 # Print report
207 print_pr_report(grouped_prs)
208
209if __name__ == "__main__":
210 main()