triage_project_sync.py

  1#!/usr/bin/env python3
  2"""
  3triage_project_sync.py
  4======================
  5
  6Sync triage state from `zed-industries/zed` issues into the
  7"Zed weekly triage" project (#84).
  8
  9Auto-derives `Status`, `Stale since`, `Aged?`, `Intake week` from issue labels
 10+ comment activity + assignees. Mutates the project to
 11reflect the derived state.
 12
 13The labels and the issue thread are the source of truth. The project is a
 14*derived view* — manual edits to the synced fields will be overwritten on the
 15next sync.
 16
 17Modes
 18-----
 19    --issue N        Sync a single issue. Used by GH Actions on issue events.
 20    --all            Sync every item currently in the project. Used by daily
 21                     cron as a safety net.
 22    --dry-run        Compute derivations and log them, but don't mutate the
 23                     project. Safe for local testing / first deploy.
 24
 25Auth
 26----
 27Reads `GITHUB_TOKEN` from env. For production, this is an installation token
 28from the `ZED_COMMUNITY_BOT_APP_ID` GitHub App, scoped to
 29`owner: zed-industries`, with `Organization Projects: Read and write`.
 30
 31For local `--dry-run` testing, a personal token with `repo, read:org,
 32read:project` is sufficient.
 33
 34Idempotency / safety
 35--------------------
 36- Every run re-derives all fields from current issue state. Running twice
 37  produces the same result as once.
 38- Failures on a single issue (in `--all` mode) are logged and the run
 39  continues. One bad item doesn't poison the batch.
 40- `--dry-run` makes no GraphQL mutations and no REST writes.
 41
 42Dependencies
 43------------
 44    pip install requests
 45"""
 46
 47from __future__ import annotations
 48
 49import argparse
 50import json
 51import os
 52import sys
 53import time
 54from dataclasses import dataclass
 55from datetime import datetime, timedelta, timezone
 56
 57import requests
 58
 59# ---------------------------------------------------------------------------
 60# Constants
 61
 62REPO_OWNER = "zed-industries"
 63REPO_NAME = "zed"
 64REPO = f"{REPO_OWNER}/{REPO_NAME}"
 65
 66PROJECT_NUMBER = 84
 67PROJECT_OWNER = REPO_OWNER
 68
 69STAFF_TEAM_SLUG = "staff"
 70
 71# Status names. MUST match the option names configured in project #84.
 72# (Casing matters — GH Projects single-select option matching is case-sensitive.)
 73STATUS_NEEDS_LABELS = "Needs labels"
 74STATUS_NEEDS_REPRO_ATTEMPT = "Needs repro attempt"
 75STATUS_NEEDS_ASK = "Needs ask"
 76STATUS_USER_REPLIED = "User replied (review)"
 77STATUS_AWAITING_USER = "Awaiting user"
 78STATUS_RESPONDED_NO_REPRO = "Responded, no repro"
 79STATUS_AWAITING_EXTERNAL_REPRO = "Awaiting external repro"  # not auto-set; placeholder
 80STATUS_REPRODUCIBLE = "Reproducible"
 81STATUS_HANDOFF = "Handoff"
 82STATUS_HANDOFF_INCOMPLETE = "Handoff (incomplete)"
 83STATUS_CLAIMED_COMMUNITY = "Claimed by community"
 84STATUS_CLOSED = "Closed"
 85STATUS_UNKNOWN = "Unknown"
 86
 87# Aging thresholds (days) per spec.
 88SUBSTANTIVE_COMMENT_MIN_LEN = 50
 89AGE_THRESHOLDS_DAYS = {
 90    STATUS_NEEDS_LABELS: 7,
 91    STATUS_NEEDS_REPRO_ATTEMPT: 7,
 92    STATUS_AWAITING_USER: 14,
 93    STATUS_USER_REPLIED: 3,
 94    # Needs ask is handled explicitly in derive_aged (always flagged), so
 95    # it doesn't need a threshold here.
 96}
 97
 98TERMINAL_OR_RESTING_STATUSES = {
 99    STATUS_REPRODUCIBLE,
100    STATUS_HANDOFF,
101    STATUS_CLOSED,
102    STATUS_RESPONDED_NO_REPRO,
103    STATUS_CLAIMED_COMMUNITY,
104}
105
106# Issue types that aren't triage work items — administrative collections,
107# dashboards, and trackers. The sync detects these and skips field updates;
108# they remain in the project (auto-add put them there) but with empty fields,
109# invisible in any status-filtered view. Manually remove them in the UI if
110# they're cluttering the all-items list.
111SKIP_ISSUE_TYPES = {"Meta", "Tracking"}
112
113REST_API = "https://api.github.com"
114GRAPHQL_API = "https://api.github.com/graphql"
115
116NOW = datetime.now(timezone.utc)
117
118
119# ---------------------------------------------------------------------------
120# Logging
121
122
123def log(msg: str, level: str = "INFO") -> None:
124    ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
125    print(f"[{ts}] [{level}] {msg}", file=sys.stderr, flush=True)
126
127
128# ---------------------------------------------------------------------------
129# Auth
130
131
132def get_token() -> str:
133    token = os.environ.get("GITHUB_TOKEN", "").strip()
134    if not token:
135        sys.exit("ERROR: GITHUB_TOKEN env var is required")
136    return token
137
138
139_TOKEN: str | None = None
140
141
142def headers_rest() -> dict[str, str]:
143    return {
144        "Authorization": f"Bearer {_TOKEN}",
145        "Accept": "application/vnd.github+json",
146        "X-GitHub-Api-Version": "2022-11-28",
147    }
148
149
150def headers_graphql() -> dict[str, str]:
151    return {"Authorization": f"Bearer {_TOKEN}", "Content-Type": "application/json"}
152
153
154# ---------------------------------------------------------------------------
155# REST
156
157
158def rest_get(path: str, params: dict | None = None, retries: int = 3) -> dict | list:
159    url = f"{REST_API}/{path.lstrip('/')}"
160    last_err: Exception | None = None
161    for attempt in range(retries):
162        try:
163            r = requests.get(url, headers=headers_rest(), params=params, timeout=30)
164            if r.status_code == 200:
165                return r.json()
166            if r.status_code in (429, 502, 503, 504):
167                wait = 2**attempt * 2
168                log(f"REST {r.status_code} on {path}; retry in {wait}s", "WARN")
169                time.sleep(wait)
170                continue
171            log(f"REST GET {path} failed: {r.status_code} {r.text[:200]}", "ERROR")
172            r.raise_for_status()
173        except requests.RequestException as e:
174            last_err = e
175            wait = 2**attempt * 2
176            log(f"REST GET {path} threw {e}; retry in {wait}s", "WARN")
177            time.sleep(wait)
178    raise RuntimeError(f"REST GET {path} failed after {retries} retries: {last_err}")
179
180
181def rest_get_paginated(path: str, params: dict | None = None, max_pages: int = 20) -> list:
182    p = dict(params or {})
183    p["per_page"] = 100
184    out: list = []
185    for page in range(1, max_pages + 1):
186        p["page"] = page
187        items = rest_get(path, p)
188        if not items:
189            break
190        if not isinstance(items, list):
191            log(f"REST {path} page {page} returned non-list", "WARN")
192            break
193        out.extend(items)
194        if len(items) < 100:
195            break
196    return out
197
198
199# ---------------------------------------------------------------------------
200# GraphQL
201
202
203def graphql(query: str, variables: dict | None = None, retries: int = 3) -> dict:
204    payload = {"query": query, "variables": variables or {}}
205    last_err: Exception | None = None
206    for attempt in range(retries):
207        try:
208            r = requests.post(GRAPHQL_API, headers=headers_graphql(), json=payload, timeout=30)
209            if r.status_code == 200:
210                data = r.json()
211                if "errors" in data:
212                    log(f"GraphQL errors: {json.dumps(data['errors'])[:400]}", "ERROR")
213                    raise RuntimeError("GraphQL returned errors")
214                return data["data"]
215            if r.status_code in (429, 502, 503, 504):
216                wait = 2**attempt * 2
217                log(f"GraphQL {r.status_code}; retry in {wait}s", "WARN")
218                time.sleep(wait)
219                continue
220            log(f"GraphQL HTTP {r.status_code}: {r.text[:300]}", "ERROR")
221            r.raise_for_status()
222        except requests.RequestException as e:
223            last_err = e
224            wait = 2**attempt * 2
225            log(f"GraphQL threw {e}; retry in {wait}s", "WARN")
226            time.sleep(wait)
227    raise RuntimeError(f"GraphQL failed after {retries} retries: {last_err}")
228
229
230# ---------------------------------------------------------------------------
231# Issue data fetch
232
233
234@dataclass
235class IssueData:
236    number: int
237    node_id: str
238    title: str
239    state: str  # "open" / "closed"
240    closed_at: datetime | None
241    created_at: datetime
242    reporter: str
243    assignees: list[str]
244    labels: list[str]
245    issue_type: str | None  # e.g. "Bug", "Crash", "Meta", "Tracking", or None
246    is_pull_request: bool
247    comments: list[dict]
248
249
250def parse_dt(s: str | None) -> datetime | None:
251    if not s:
252        return None
253    return datetime.fromisoformat(s.replace("Z", "+00:00"))
254
255
256def fetch_issue(number: int) -> IssueData:
257    issue = rest_get(f"repos/{REPO}/issues/{number}")
258    if not isinstance(issue, dict):
259        raise RuntimeError(f"unexpected response for issue {number}")
260    comments = rest_get_paginated(f"repos/{REPO}/issues/{number}/comments")
261    created_at = parse_dt(issue["created_at"])
262    if created_at is None:
263        raise RuntimeError(f"issue {number} has no created_at")
264    issue_type = None
265    if isinstance(issue.get("type"), dict):
266        issue_type = issue["type"].get("name")
267    return IssueData(
268        number=number,
269        node_id=issue["node_id"],
270        title=issue["title"],
271        state=issue["state"],
272        closed_at=parse_dt(issue.get("closed_at")),
273        created_at=created_at,
274        reporter=issue["user"]["login"],
275        assignees=[a["login"] for a in (issue.get("assignees") or [])],
276        labels=[l["name"] for l in issue["labels"]],
277        issue_type=issue_type,
278        is_pull_request="pull_request" in issue,
279        comments=comments,
280    )
281
282
283# ---------------------------------------------------------------------------
284# Staff team
285
286
287_STAFF: set[str] | None = None
288
289
290def fetch_staff() -> set[str]:
291    global _STAFF
292    if _STAFF is not None:
293        return _STAFF
294    members = rest_get_paginated(f"orgs/{REPO_OWNER}/teams/{STAFF_TEAM_SLUG}/members")
295    _STAFF = {m["login"] for m in members}
296    log(f"loaded {len(_STAFF)} staff members")
297    return _STAFF
298
299
300def is_bot(user: dict) -> bool:
301    return user.get("type") == "Bot" or user.get("login", "").endswith("[bot]")
302
303
304def is_substantive_staff_comment(comment: dict, staff: set[str]) -> bool:
305    user = comment.get("user", {})
306    if user.get("login") not in staff or is_bot(user):
307        return False
308    body = comment.get("body") or ""
309    if len(body) >= SUBSTANTIVE_COMMENT_MIN_LEN:
310        return True
311    # Cheap attachment heuristic: looks for media tokens or attachment hosts.
312    if any(
313        m in body
314        for m in (
315            "user-attachments/assets",
316            ".png",
317            ".jpg",
318            ".jpeg",
319            ".gif",
320            ".mp4",
321            ".webm",
322            ".mov",
323        )
324    ):
325        return True
326    return False
327
328
329def latest_reporter_activity(issue: IssueData) -> datetime:
330    times = [issue.created_at]
331    for c in issue.comments:
332        if c["user"]["login"] == issue.reporter:
333            t = parse_dt(c["created_at"])
334            if t:
335                times.append(t)
336    return max(times)
337
338
339# ---------------------------------------------------------------------------
340# Derivation rules
341# (Mirrors the spec's R0-R6 cascade. Keep in sync with
342# spec.md → "Status derivation rules".)
343
344
345def derive_status(issue: IssueData, staff: set[str]) -> tuple[str, str, str]:
346    """Returns (status, rule_id, why)."""
347    L = set(issue.labels)
348
349    if issue.closed_at is not None:
350        return STATUS_CLOSED, "R1", "issue is closed"
351
352    if "state:claimed by community" in L:
353        return STATUS_CLAIMED_COMMUNITY, "R0", "state:claimed by community label"
354
355    if "state:reproducible" in L:
356        if issue.assignees:
357            return STATUS_REPRODUCIBLE, "R2a", f"reproducible, assignee={','.join(issue.assignees)}"
358        # R2b vs R2c: any substantive staff comment in the thread?
359        substantive = None
360        for c in issue.comments:
361            if is_substantive_staff_comment(c, staff):
362                substantive = c
363        if substantive:
364            return (
365                STATUS_HANDOFF,
366                "R2b",
367                f"reproducible, no assignee, staff context @ {substantive['created_at']} "
368                f"({len(substantive['body'])} chars by @{substantive['user']['login']})",
369            )
370        return (
371            STATUS_HANDOFF_INCOMPLETE,
372            "R2c",
373            "reproducible, no assignee, no substantive staff comment — close the loop",
374        )
375
376    # R4 (state:needs info) and R5 (state:needs repro) intentionally come
377    # before R3 (state:needs triage). Per the team's actual practice,
378    # state:needs triage is often left on while triage is in progress; only
379    # when no other state label is more specific should we treat the issue
380    # as "needs initial labels."
381    if "state:needs info" in L:
382        # R4 splits into three sub-cases based on whether we've actually
383        # asked anything (substantive staff comment) and whether the reporter
384        # or a third-party has responded.
385        substantive_staff = None
386        for c in issue.comments:
387            if is_substantive_staff_comment(c, staff):
388                substantive_staff = c
389        if substantive_staff is None:
390            # state:needs info applied without an actual question to the user.
391            # Runbook violation — we owe the reporter a comment explaining
392            # what info we need.
393            return (
394                STATUS_NEEDS_ASK,
395                "R4c",
396                "state:needs info present but no substantive staff comment exists — we haven't asked anything",
397            )
398        last_comment = issue.comments[-1] if issue.comments else None
399        if last_comment is not None:
400            author = last_comment["user"]["login"]
401            non_staff = author not in staff and not is_bot(last_comment["user"])
402            if non_staff:
403                ct = parse_dt(last_comment["created_at"])
404                st = parse_dt(substantive_staff["created_at"])
405                if ct and st and ct > st:
406                    relation = "reporter" if author == issue.reporter else "third-party"
407                    return (
408                        STATUS_USER_REPLIED,
409                        "R4b",
410                        f"{relation} (@{author}) replied {ct.isoformat()} after substantive staff @ {st.isoformat()}",
411                    )
412        return (
413            STATUS_AWAITING_USER,
414            "R4a",
415            f"substantive staff comment @ {substantive_staff['created_at']}, no non-staff reply since",
416        )
417
418    if "state:needs repro" in L:
419        cutoff = latest_reporter_activity(issue)
420        for c in reversed(issue.comments):
421            ct = parse_dt(c["created_at"])
422            if ct and ct > cutoff and is_substantive_staff_comment(c, staff):
423                return (
424                    STATUS_RESPONDED_NO_REPRO,
425                    "R5b",
426                    f"staff comment {len(c['body'])} chars by @{c['user']['login']} @ {c['created_at']}",
427                )
428        return STATUS_NEEDS_REPRO_ATTEMPT, "R5a", "no substantive staff comment after reporter's last activity"
429
430    # R3 (state:needs triage) is checked LAST among recognized state labels.
431    # If state:needs triage is the only state label, the issue genuinely needs
432    # initial labeling. If any other state label is also present, that state
433    # has already been matched above and won.
434    if "state:needs triage" in L:
435        return STATUS_NEEDS_LABELS, "R3", "state:needs triage label present (no other state:* matched)"
436
437    return STATUS_UNKNOWN, "R6", f"open with no recognized state label (labels: {sorted(L) or '<none>'})"
438
439
440def derive_stale_since(
441    issue: IssueData, status: str, staff: set[str]
442) -> datetime | None:
443    """Returns the timestamp anchor used to measure aging, or None."""
444    if status in TERMINAL_OR_RESTING_STATUSES or status == STATUS_UNKNOWN:
445        return None
446    if status == STATUS_NEEDS_LABELS:
447        return issue.created_at
448    if status == STATUS_NEEDS_REPRO_ATTEMPT:
449        return latest_reporter_activity(issue)
450    if status == STATUS_NEEDS_ASK:
451        # Anchor on issue creation — measures how long the runbook violation
452        # has gone unaddressed. Aging threshold is 0 (always flagged).
453        return issue.created_at
454    if status == STATUS_AWAITING_USER:
455        # Anchor on the most recent SUBSTANTIVE staff comment (the actual
456        # "ask"), consistent with R4's substantive-comment requirement.
457        substantive_staff = None
458        for c in issue.comments:
459            if is_substantive_staff_comment(c, staff):
460                substantive_staff = c
461        return parse_dt(substantive_staff["created_at"]) if substantive_staff else issue.created_at
462    if status == STATUS_USER_REPLIED:
463        last_non_staff = None
464        for c in issue.comments:
465            u = c["user"]
466            if u["login"] not in staff and not is_bot(u):
467                last_non_staff = c
468        return parse_dt(last_non_staff["created_at"]) if last_non_staff else None
469    if status == STATUS_HANDOFF_INCOMPLETE:
470        # Spec: when state:reproducible was applied. Approximation for v0:
471        # issue.created_at as a weak proxy. Replacing with timeline event lookup
472        # is a "parked" item.
473        return issue.created_at
474    return None
475
476
477def derive_aged(status: str, stale_since: datetime | None) -> tuple[str, str]:
478    """Returns ('Yes' | 'No', why)."""
479    if status == STATUS_HANDOFF_INCOMPLETE:
480        return "Yes", "always-flagged for loop closure"
481    if status == STATUS_NEEDS_ASK:
482        return "Yes", "always-flagged: state:needs info applied without a substantive staff comment"
483    if status in TERMINAL_OR_RESTING_STATUSES or status == STATUS_UNKNOWN:
484        return "No", "terminal/resting"
485    if not stale_since:
486        return "No", "no stale_since (status not aged-tracked)"
487    if status not in AGE_THRESHOLDS_DAYS:
488        return "No", f"status {status} not aged-tracked"
489    age = NOW - stale_since
490    threshold = AGE_THRESHOLDS_DAYS[status]
491    if age > timedelta(days=threshold):
492        return "Yes", f"{status} for {age.days}d (>{threshold}d)"
493    return "No", f"{status} for {age.days}d (≤{threshold}d)"
494
495
496# ---------------------------------------------------------------------------
497# Project schema cache
498# Discovered at runtime by name so the script doesn't break if field IDs
499# change (e.g., project recreated). Project number is stable config.
500
501
502_PROJECT_SCHEMA: dict | None = None
503
504
505def fetch_project_schema() -> dict:
506    """Returns {'id', 'fields_by_name'} where fields_by_name maps name → field dict."""
507    global _PROJECT_SCHEMA
508    if _PROJECT_SCHEMA is not None:
509        return _PROJECT_SCHEMA
510    query = """
511    query($owner: String!, $number: Int!) {
512      organization(login: $owner) {
513        projectV2(number: $number) {
514          id
515          fields(first: 30) {
516            nodes {
517              __typename
518              ... on ProjectV2Field { id name dataType }
519              ... on ProjectV2SingleSelectField {
520                id name dataType options { id name }
521              }
522              ... on ProjectV2IterationField {
523                id name dataType
524                configuration {
525                  duration startDay
526                  iterations { id title startDate duration }
527                  completedIterations { id title startDate duration }
528                }
529              }
530            }
531          }
532        }
533      }
534    }
535    """
536    data = graphql(query, {"owner": PROJECT_OWNER, "number": PROJECT_NUMBER})
537    proj = data["organization"]["projectV2"]
538    if not proj:
539        sys.exit(f"ERROR: project #{PROJECT_NUMBER} not found in {PROJECT_OWNER}")
540    fields_by_name = {f["name"]: f for f in proj["fields"]["nodes"]}
541    required = ["Status", "Intake week", "Stale since", "Aged?"]
542    missing = [n for n in required if n not in fields_by_name]
543    if missing:
544        sys.exit(f"ERROR: project missing required fields: {missing}")
545    _PROJECT_SCHEMA = {"id": proj["id"], "fields_by_name": fields_by_name}
546    log(f"loaded project schema: id={proj['id']}, fields={list(fields_by_name)}")
547    return _PROJECT_SCHEMA
548
549
550def status_option_id(status_name: str) -> str | None:
551    schema = fetch_project_schema()
552    for opt in schema["fields_by_name"]["Status"]["options"]:
553        if opt["name"] == status_name:
554            return opt["id"]
555    return None
556
557
558def aged_option_id(value: str) -> str | None:
559    schema = fetch_project_schema()
560    for opt in schema["fields_by_name"]["Aged?"]["options"]:
561        if opt["name"] == value:
562            return opt["id"]
563    return None
564
565
566def iteration_id_for_date(d: datetime) -> str | None:
567    schema = fetch_project_schema()
568    field = schema["fields_by_name"]["Intake week"]
569    cfg = field["configuration"]
570    iterations = list(cfg.get("iterations") or []) + list(cfg.get("completedIterations") or [])
571    for it in iterations:
572        start = parse_dt(it["startDate"] + "T00:00:00+00:00")
573        if start is None:
574            continue
575        end = start + timedelta(days=int(it["duration"]))
576        if start <= d < end:
577            return it["id"]
578    return None
579
580
581# ---------------------------------------------------------------------------
582# Project item lookup / mutation
583
584
585def get_project_item_id(issue_node_id: str) -> str | None:
586    """Returns the ProjectV2Item.id for the issue in our project, or None."""
587    schema = fetch_project_schema()
588    project_id = schema["id"]
589    query = """
590    query($issueId: ID!) {
591      node(id: $issueId) {
592        ... on Issue {
593          projectItems(first: 100) {
594            pageInfo { hasNextPage }
595            nodes { id project { id } }
596          }
597        }
598      }
599    }
600    """
601    data = graphql(query, {"issueId": issue_node_id})
602    node = data["node"]
603    if not node:
604        return None
605    items_block = node["projectItems"]
606    for item in items_block["nodes"]:
607        if item["project"]["id"] == project_id:
608            return item["id"]
609    if items_block["pageInfo"]["hasNextPage"]:
610        # Issue is on >100 projects; very unlikely. Log + return None.
611        log(f"issue {issue_node_id} on >100 projects, can't find ours in first page", "WARN")
612    return None
613
614
615def add_to_project(issue_node_id: str) -> str:
616    schema = fetch_project_schema()
617    mutation = """
618    mutation($projectId: ID!, $issueId: ID!) {
619      addProjectV2ItemById(input: { projectId: $projectId, contentId: $issueId }) {
620        item { id }
621      }
622    }
623    """
624    data = graphql(mutation, {"projectId": schema["id"], "issueId": issue_node_id})
625    return data["addProjectV2ItemById"]["item"]["id"]
626
627
628
629
630def update_single_select(item_id: str, field_id: str, option_id: str, dry_run: bool) -> None:
631    if dry_run:
632        log(f"  [DRY] single-select field={field_id} option={option_id} on item={item_id}")
633        return
634    schema = fetch_project_schema()
635    mutation = """
636    mutation($projectId: ID!, $itemId: ID!, $fieldId: ID!, $optionId: String!) {
637      updateProjectV2ItemFieldValue(input: {
638        projectId: $projectId, itemId: $itemId, fieldId: $fieldId,
639        value: { singleSelectOptionId: $optionId }
640      }) { projectV2Item { id } }
641    }
642    """
643    graphql(
644        mutation,
645        {
646            "projectId": schema["id"],
647            "itemId": item_id,
648            "fieldId": field_id,
649            "optionId": option_id,
650        },
651    )
652
653
654def update_date(item_id: str, field_id: str, date_iso: str, dry_run: bool) -> None:
655    if dry_run:
656        log(f"  [DRY] date field={field_id} value={date_iso} on item={item_id}")
657        return
658    schema = fetch_project_schema()
659    mutation = """
660    mutation($projectId: ID!, $itemId: ID!, $fieldId: ID!, $date: Date!) {
661      updateProjectV2ItemFieldValue(input: {
662        projectId: $projectId, itemId: $itemId, fieldId: $fieldId,
663        value: { date: $date }
664      }) { projectV2Item { id } }
665    }
666    """
667    graphql(
668        mutation,
669        {"projectId": schema["id"], "itemId": item_id, "fieldId": field_id, "date": date_iso},
670    )
671
672
673def update_iteration(item_id: str, field_id: str, iteration_id: str, dry_run: bool) -> None:
674    if dry_run:
675        log(f"  [DRY] iteration field={field_id} value={iteration_id} on item={item_id}")
676        return
677    schema = fetch_project_schema()
678    mutation = """
679    mutation($projectId: ID!, $itemId: ID!, $fieldId: ID!, $iterId: String!) {
680      updateProjectV2ItemFieldValue(input: {
681        projectId: $projectId, itemId: $itemId, fieldId: $fieldId,
682        value: { iterationId: $iterId }
683      }) { projectV2Item { id } }
684    }
685    """
686    graphql(
687        mutation,
688        {
689            "projectId": schema["id"],
690            "itemId": item_id,
691            "fieldId": field_id,
692            "iterId": iteration_id,
693        },
694    )
695
696
697# ---------------------------------------------------------------------------
698# Sync
699
700
701def sync_issue(number: int, dry_run: bool = False) -> None:
702    """Sync a single issue. Adds to project if missing, then updates fields.
703
704    Idempotent — running twice with the same issue state has no effect after
705    the first run.
706    """
707    log(f"sync #{number} (dry_run={dry_run})")
708    issue = fetch_issue(number)
709
710    if issue.is_pull_request:
711        log(f"  #{number} is a PR; skipping (project tracks issues)")
712        return
713
714    # Skip administrative issue types (Meta, Tracking, etc.). These are
715    # collections / dashboards, not triage work. The script doesn't have
716    # permission to remove items from the project (intentional — narrows blast
717    # radius). Existing Meta/Tracking items in the project should be removed
718    # manually one-time; new ones get auto-added by the project's auto-add
719    # workflow but the sync below skips them, so they sit with no Status /
720    # Aged? / Stale since fields set and don't appear in any status-filtered
721    # view.
722    if issue.issue_type in SKIP_ISSUE_TYPES:
723        log(f"  #{number} is type={issue.issue_type}; not a triage item, skipping fields")
724        return
725
726    staff = fetch_staff()
727
728    status, rule, why = derive_status(issue, staff)
729    stale_since = derive_stale_since(issue, status, staff)
730    aged, aged_why = derive_aged(status, stale_since)
731    intake_iter_id = iteration_id_for_date(issue.created_at)
732
733    log(f"  status={status} ({rule}: {why})")
734    log(f"  stale_since={stale_since.isoformat() if stale_since else 'none'}")
735    log(f"  aged={aged} ({aged_why})")
736    log(f"  intake_iteration_id={intake_iter_id or 'none (created_at outside iteration range)'}")
737
738    schema = fetch_project_schema()
739    item_id = get_project_item_id(issue.node_id)
740    if not item_id:
741        if dry_run:
742            log("  [DRY] would add to project (item not yet present)")
743            return
744        item_id = add_to_project(issue.node_id)
745        log(f"  added to project as item={item_id}")
746
747    # Status (always set)
748    sid = status_option_id(status)
749    if not sid:
750        log(f"  ERROR: no Status option named '{status}' in project; skipping status update", "ERROR")
751    else:
752        update_single_select(
753            item_id, schema["fields_by_name"]["Status"]["id"], sid, dry_run
754        )
755
756    # Aged? (always set)
757    aged_id = aged_option_id(aged)
758    if not aged_id:
759        log(f"  ERROR: no Aged? option named '{aged}'; skipping", "ERROR")
760    else:
761        update_single_select(
762            item_id, schema["fields_by_name"]["Aged?"]["id"], aged_id, dry_run
763        )
764
765    # Stale since (only set when meaningful)
766    if stale_since:
767        update_date(
768            item_id,
769            schema["fields_by_name"]["Stale since"]["id"],
770            stale_since.date().isoformat(),
771            dry_run,
772        )
773
774    # Intake week (only set when an iteration covers the created_at)
775    if intake_iter_id:
776        update_iteration(
777            item_id,
778            schema["fields_by_name"]["Intake week"]["id"],
779            intake_iter_id,
780            dry_run,
781        )
782
783
784def sync_all(dry_run: bool = False) -> None:
785    """Sync every item currently in the project. Cron mode."""
786    log("fetching all project items…")
787    cursor: str | None = None
788    total = 0
789    failed = 0
790    while True:
791        query = """
792        query($owner: String!, $number: Int!, $cursor: String) {
793          organization(login: $owner) {
794            projectV2(number: $number) {
795              items(first: 100, after: $cursor) {
796                pageInfo { hasNextPage endCursor }
797                nodes {
798                  id
799                  content {
800                    __typename
801                    ... on Issue { number }
802                    ... on PullRequest { number }
803                  }
804                }
805              }
806            }
807          }
808        }
809        """
810        data = graphql(
811            query, {"owner": PROJECT_OWNER, "number": PROJECT_NUMBER, "cursor": cursor}
812        )
813        items_block = data["organization"]["projectV2"]["items"]
814        for item in items_block["nodes"]:
815            content = item.get("content")
816            if not content:
817                continue
818            if content["__typename"] != "Issue":
819                continue
820            num = content["number"]
821            try:
822                sync_issue(num, dry_run=dry_run)
823            except Exception as e:
824                log(f"sync #{num} failed: {e}", "ERROR")
825                failed += 1
826            total += 1
827        if not items_block["pageInfo"]["hasNextPage"]:
828            break
829        cursor = items_block["pageInfo"]["endCursor"]
830    log(f"done: synced {total} items, {failed} failed")
831
832
833# ---------------------------------------------------------------------------
834# Main
835
836
837def main() -> int:
838    global _TOKEN
839
840    ap = argparse.ArgumentParser(description=__doc__)
841    grp = ap.add_mutually_exclusive_group(required=True)
842    grp.add_argument("--issue", type=int, help="sync a single issue by number")
843    grp.add_argument("--all", action="store_true", help="sync every project item")
844    ap.add_argument("--dry-run", action="store_true", help="compute but don't mutate")
845    args = ap.parse_args()
846
847    _TOKEN = get_token()
848
849    if args.issue:
850        sync_issue(args.issue, dry_run=args.dry_run)
851    elif args.all:
852        sync_all(dry_run=args.dry_run)
853
854    return 0
855
856
857if __name__ == "__main__":
858    sys.exit(main())