@@ -1,692 +0,0 @@
-name: Identify potential duplicates among new bug/crash reports
-
-on:
- issues:
- types: [opened]
- workflow_dispatch:
- inputs:
- issue_number:
- description: "Issue number to analyze (for testing)"
- required: true
- type: number
-
-concurrency:
- group: potential-duplicate-check-${{ github.event.issue.number || inputs.issue_number }}
- cancel-in-progress: true
-
-jobs:
- identify-duplicates:
- # For manual testing, allow running on any branch; for automatic runs, only on main repo
- if: github.event_name == 'workflow_dispatch' || github.repository == 'zed-industries/zed'
- runs-on: ubuntu-latest
- timeout-minutes: 5
-
- permissions:
- contents: read
- issues: read
-
- steps:
- - name: Get github app token
- id: get-app-token
- uses: actions/create-github-app-token@bef1eaf1c0ac2b148ee2a0a74c65fbe6db0631f1 # v2.1.4
- with:
- app-id: ${{ secrets.ZED_COMMUNITY_BOT_APP_ID }}
- private-key: ${{ secrets.ZED_COMMUNITY_BOT_PRIVATE_KEY }}
- owner: zed-industries
-
- - name: Fetch issue and check eligibility
- id: fetch-issue
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
- with:
- github-token: ${{ steps.get-app-token.outputs.token }}
- script: |
- const issueNumber = context.payload.issue?.number || ${{ inputs.issue_number || 0 }};
- if (!issueNumber) {
- core.setFailed('No issue number provided');
- return;
- }
-
- const { data: issue } = await github.rest.issues.get({
- owner: context.repo.owner,
- repo: context.repo.repo,
- issue_number: issueNumber
- });
-
- const typeName = issue.type?.name;
- const isTargetType = typeName === 'Bug' || typeName === 'Crash';
-
- console.log(`Issue #${issueNumber}: "${issue.title}"`);
- console.log(`Issue type: ${typeName || '(none)'}`);
- console.log(`Is target type (Bug/Crash): ${isTargetType}`);
-
- // Set default outputs for all paths
- core.setOutput('issue_number', issueNumber);
- core.setOutput('issue_title', issue.title);
- core.setOutput('issue_body', (issue.body || '').slice(0, 6000));
- core.setOutput('is_target_type', String(isTargetType));
- core.setOutput('is_staff', 'false');
- core.setOutput('should_continue', 'false');
-
- if (!isTargetType) {
- console.log('::notice::Skipping - issue type is not Bug or Crash');
- return;
- }
-
- // Check if author is staff (skip if so - they know what they're doing)
- const author = issue.user?.login || '';
- let isStaff = false;
- if (author) {
- try {
- const response = await github.rest.teams.getMembershipForUserInOrg({
- org: 'zed-industries',
- team_slug: 'staff',
- username: author
- });
- isStaff = response.data.state === 'active';
- } catch (error) {
- if (error.status !== 404) throw error;
- }
- }
-
- core.setOutput('is_staff', String(isStaff));
- if (isStaff) {
- console.log(`::notice::Skipping - author @${author} is a staff member`);
- return;
- }
-
- core.setOutput('should_continue', 'true');
-
- # ========================================================================
- # PASS 1: Detect areas using Claude with the full area taxonomy
- # ========================================================================
- - name: "Pass 1: Detect areas with Claude"
- if: steps.fetch-issue.outputs.should_continue == 'true'
- id: detect-areas
- env:
- ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY_ISSUE_DEDUP }}
- ISSUE_TITLE: ${{ steps.fetch-issue.outputs.issue_title }}
- ISSUE_BODY: ${{ steps.fetch-issue.outputs.issue_body }}
- run: |
- # shellcheck disable=SC2016
- cat > /tmp/area_prompt.txt << 'PROMPT_EOF'
- You are classifying a GitHub issue for the Zed code editor into area categories.
-
- ## Issue Title
- ISSUE_TITLE_PLACEHOLDER
-
- ## Issue Body
- ISSUE_BODY_PLACEHOLDER
-
- ## Available Area Labels
- (descriptions provided only where the label name isn't self-explanatory)
-
- accessibility
- ai, ai/acp (Agent Communication Protocol), ai/agent thread, ai/anthropic, ai/assistant, ai/bedrock, ai/codex, ai/copilot, ai/deepseek, ai/edit prediction, ai/gemini, ai/inline assistant, ai/lmstudio, ai/mcp (Model Context Protocol), ai/mistral, ai/ollama, ai/openai, ai/openai compatible, ai/openrouter, ai/qwen, ai/supermaven, ai/text thread, ai/zeta
- auth
- autocompletions
- billing
- cli
- code actions
- code folding
- collab - real-time collaboration with other Zed users (screen sharing, shared editing). NOT for remote development over SSH.
- collab/audio, collab/chat
- command palette
- controls/ime, controls/keybinds, controls/mouse
- debugger, debugger/dap/CodeLLDB, debugger/dap/debugpy, debugger/dap/gdb, debugger/dap/javascript
- design papercut - small UI/UX polish issues
- dev containers - Docker-based development environments
- diagnostics - LSP errors/warnings display
- discoverability
- editor, editor/brackets, editor/linked edits
- extensions/infrastructure
- file finder - fuzzy file search (Cmd/Ctrl+P)
- gpui - Zed's internal UI rendering framework
- inlay hints - inline hints from LSP (type annotations, parameter names)
- installer-updater
- integrations/environment - shell environment, PATH, env vars
- integrations/git, integrations/git/blame, integrations/terminal
- internationalization, internationalization/rtl support
- keymap editor
- language server, language server/server failure
- languages/* - language-specific syntax, grammar, or LSP issues (e.g., languages/python, languages/rust, languages/typescript)
- legal
- logging
- multi-buffer - viewing multiple files or search results in a single editor pane
- multi-cursor
- navigation - go to definition, find references, symbol search
- network - proxy settings, connectivity, SSL certificates. NOT for collab.
- onboarding
- outline - document symbols/structure sidebar
- parity/* - feature parity requests comparing to other editors (parity/vscode, parity/vim, parity/emacs, parity/jetbrains, parity/helix)
- performance, performance/memory leak
- permissions
- popovers - hover cards, tooltips, autocomplete dropdowns
- preview/images, preview/markdown
- project panel - file tree sidebar
- release notes
- repl
- search - project-wide search, find/replace
- security & privacy, security & privacy/workspace trust
- serialization - saving/restoring workspace state, undo history, folding state across restarts
- settings, settings/ui
- snippets
- status bar
- tasks - task runner integration
- telemetry
- tooling/* - external tool integrations (tooling/emmet, tooling/eslint, tooling/prettier, tooling/flatpak, tooling/nix)
- tree-sitter - syntax parsing and highlighting engine
- ui/animations, ui/dock, ui/file icons, ui/font, ui/menus, ui/minimap, ui/panel, ui/scaling, ui/scrolling, ui/tabs, ui/themes
- workspace - window management, pane layout, project handling
- zed account
- zed.dev
-
- ## Your Task
-
- Based on the issue title and body, identify which areas this issue relates to.
- - Select 1-5 areas that best match the issue
- - Prefer more specific sub-areas when applicable (e.g., "ai/gemini" over just "ai")
- - Only select areas that are clearly relevant
-
- ## Response Format
-
- Return ONLY a JSON object (no markdown fences, no explanation):
- {
- "areas": ["area1", "area2"],
- "reasoning": "Brief explanation of why these areas were selected"
- }
- PROMPT_EOF
-
- # Single quotes are intentional to prevent bash expansion; node reads env vars via process.env
- # shellcheck disable=SC2016
- node << 'SCRIPT_EOF'
- const fs = require('fs');
- let prompt = fs.readFileSync('/tmp/area_prompt.txt', 'utf8');
- prompt = prompt.replace('ISSUE_TITLE_PLACEHOLDER', process.env.ISSUE_TITLE || '');
- prompt = prompt.replace('ISSUE_BODY_PLACEHOLDER', process.env.ISSUE_BODY || '');
- fs.writeFileSync('/tmp/area_prompt_final.txt', prompt);
- SCRIPT_EOF
-
- HTTP_CODE=$(curl -s -w "%{http_code}" -o /tmp/area_response.json -X POST "https://api.anthropic.com/v1/messages" \
- -H "Content-Type: application/json" \
- -H "x-api-key: $ANTHROPIC_API_KEY" \
- -H "anthropic-version: 2023-06-01" \
- --data-binary @- << EOF
- {
- "model": "claude-sonnet-4-5-20250929",
- "max_tokens": 256,
- "messages": [{"role": "user", "content": $(jq -Rs . < /tmp/area_prompt_final.txt)}]
- }
- EOF
- )
-
- RESPONSE=$(< /tmp/area_response.json)
-
- if [ "$HTTP_CODE" -lt 200 ] || [ "$HTTP_CODE" -ge 300 ]; then
- echo "HTTP Error: $HTTP_CODE"
- echo "$RESPONSE" | jq . 2>/dev/null || echo "$RESPONSE"
- exit 1
- fi
-
- if echo "$RESPONSE" | jq -e '.error' > /dev/null 2>&1; then
- echo "API Error:"
- echo "$RESPONSE" | jq .
- exit 1
- fi
-
- AREA_RESULT=$(echo "$RESPONSE" | jq -r '.content[0].text // empty')
-
- if [ -z "$AREA_RESULT" ]; then
- echo "Error: No response from Claude for area detection"
- echo "$RESPONSE" | jq .
- exit 1
- fi
-
- echo "Area detection result: $AREA_RESULT"
-
- # Extract just the areas array, handling potential markdown fences
- # shellcheck disable=SC2016
- CLEAN_JSON=$(echo "$AREA_RESULT" | sed 's/^```json//; s/^```//; s/```$//' | tr -d '\n')
- AREAS=$(echo "$CLEAN_JSON" | jq -r '.areas // [] | join(",")')
- echo "Detected areas: $AREAS"
-
- echo "detected_areas=$AREAS" >> "$GITHUB_OUTPUT"
-
- INPUT_TOKENS=$(echo "$RESPONSE" | jq -r '.usage.input_tokens')
- OUTPUT_TOKENS=$(echo "$RESPONSE" | jq -r '.usage.output_tokens')
- echo "Pass 1 token usage - Input: $INPUT_TOKENS, Output: $OUTPUT_TOKENS"
-
- # ========================================================================
- # Use detected areas to filter magnets and search for candidates
- # ========================================================================
- - name: Filter magnets and search for candidates
- if: steps.fetch-issue.outputs.should_continue == 'true'
- id: gather-candidates
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
- with:
- github-token: ${{ steps.get-app-token.outputs.token }}
- script: |
- // ============================================================
- // KNOWN DUPLICATE MAGNETS (from #46355)
- // ============================================================
- const DUPLICATE_MAGNETS = [
- { number: 37074, title: "Support history with external ACP agents", areas: ["ai", "ai/gemini", "ai/acp"] },
- { number: 35780, title: "Zed consumes a lot of memory and CPU when opening ~/ or other large file trees", areas: ["workspace", "performance", "performance/memory leak", "integrations/git"] },
- { number: 16965, title: "Support for non UTF-8 text encodings", areas: ["editor", "internationalization"] },
- { number: 38109, title: "Zed out of sync with changes made outside of editor", areas: ["workspace"] },
- { number: 16727, title: "Select text in markdown preview", areas: ["preview/markdown", "languages/markdown"] },
- { number: 31102, title: "RTL Right-to-Left Text Input/Rendering Support", areas: ["internationalization"] },
- { number: 7371, title: "Restarts should be non-destructive on workspace restore/reload", areas: ["workspace", "serialization"] },
- { number: 7992, title: "Font rendering on LoDPI displays", areas: ["ui/font"] },
- { number: 40018, title: "Windows Beta: Terminal overwrites text when resized and window overflow", areas: ["integrations/terminal"] },
- { number: 29962, title: "Agent Panel: Cannot access zed hosted models (via Cloudflare HKG)", areas: ["ai", "network"] },
- { number: 15097, title: "Serialize undo history (local and remote projects)", areas: ["workspace", "serialization"] },
- { number: 29846, title: "Collapsed code blocks are not restored properly", areas: ["editor", "serialization", "code folding"] },
- { number: 38799, title: "Poor search performance in large repositories", areas: ["performance", "search"] },
- { number: 27283, title: "Inefficient memory use when opening large file in Zed", areas: ["performance"] },
- { number: 39806, title: "Raspberry Pi OS (Trixie) Zed 0.207.3 Video Memory Corruption on Start", areas: ["gpui"] },
- { number: 29970, title: "Unable to download any extensions (due to potential DigitalOcean IP block or ISP block)", areas: ["network"] },
- { number: 29026, title: "Ability to copy/paste files from the system file manager", areas: ["workspace"] },
- { number: 7940, title: "Zed is sometimes unresponsive when the OS awakes from sleep", areas: ["workspace"] },
- { number: 37025, title: "Failed to generate thread summary", areas: ["ai"] },
- { number: 16156, title: "Support for project settings to enable/disable/control AI features", areas: ["ai", "settings"] },
- { number: 24752, title: "Extra horizontal scrolling when inline blame is enabled with soft wrapping", areas: ["editor"] },
- { number: 20970, title: "Excessive memory consumption on project search with large files present", areas: ["performance/memory leak", "search", "multi-buffer"] },
- { number: 12176, title: "Only some ligatures are being applied", areas: ["ui/font", "settings"] },
- { number: 13564, title: "blade: Text is rendered either too thick or too thin", areas: ["ui/font"] },
- { number: 38901, title: "Terminal freezes in Linux session when Ctrl+C is pressed before exit", areas: ["controls/keybinds", "integrations/terminal"] },
- { number: 20167, title: "Support unsetting default keybindings", areas: ["controls/keybinds"] },
- { number: 25469, title: "Tracking - Linux non-QWERTY keyboard support", areas: ["controls/keybinds"] },
- { number: 29598, title: "Manual refresh on unsupported filesystems (nfs, fuse, exfat) without inotify/fsevents", areas: ["project panel"] },
- { number: 14428, title: "Ordering of search tokens in file finder fuzzy match", areas: ["file finder"] },
- { number: 20771, title: "Workspace: Reload to respect the desktop/workspace Zed windows were in after reload", areas: ["workspace", "serialization"] },
- { number: 7465, title: "Lines with RTL text aren't rendered correctly", areas: ["editor", "internationalization/rtl support", "parity/vscode"] },
- { number: 16120, title: "Large files without newlines (all on one line) cause Zed to hang/crash", areas: ["editor"] },
- { number: 22703, title: "Syntax aware folding (folds.scm support)", areas: ["editor", "tree-sitter"] },
- { number: 38927, title: "Find & Replace memory leak on large files", areas: ["performance", "performance/memory leak"] },
- { number: 4560, title: "Improve streaming search speed", areas: ["performance", "search"] },
- { number: 14053, title: "Linux Shortcuts don't work with non-latin / international keyboard layouts", areas: ["internationalization", "controls/keybinds"] },
- { number: 31637, title: "High memory consumption in Project Search with large codebases", areas: ["performance/memory leak", "search"] },
- { number: 11744, title: "Incorrect spacing of terminal font", areas: ["ui/font", "integrations/terminal"] },
- { number: 4746, title: "Terminal Nerd Font rendering incorrect line height", areas: ["ui/font", "integrations/terminal"] },
- { number: 10647, title: "User configurable mouse bindings (like keymap for key+mouse)", areas: ["controls/keybinds", "controls/mouse", "accessibility"] },
- { number: 34865, title: "ctrl-w with pane::CloseActiveItem binding closes the project panel instead of the active pane", areas: ["controls/keybinds", "ui/panel"] },
- { number: 12163, title: "Cannot see list of installed extensions when offline / disconnected", areas: ["network"] },
- { number: 44630, title: "Tables do not render all columns in markdown preview", areas: ["preview/markdown"] },
- { number: 39435, title: "Windows: Low fps in many cases", areas: ["gpui"] },
- { number: 36227, title: "Zed becomes unresponsive when closing", areas: ["workspace"] },
- { number: 44962, title: "Can not open file in zed if filename includes (1)", areas: ["workspace"] },
- { number: 32318, title: "Zed hangs after exiting sleep mode in Linux", areas: ["workspace"] },
- { number: 5120, title: "Add options to hide title and status bar", areas: ["settings", "status bar"] },
- { number: 29323, title: "uv: Failed to detect Python venv correctly", areas: ["language server", "languages/python", "integrations/environment"] },
- { number: 7450, title: "Support LSP Semantic Tokens", areas: ["language server", "languages", "ui/themes"] },
- { number: 31846, title: "LSP: triggerCharacters for signature help declared by servers do not seem to be respected", areas: ["language server"] },
- { number: 32792, title: "[SWAY] Zed window flashes rapidly on Sway/wlroots", areas: ["gpui"] },
- { number: 28398, title: "Stale buffers should be removed from search multibuffer", areas: ["search", "multi-buffer"] },
- { number: 35011, title: "Delete Key against remote Hosts Doesn't Delete Folders", areas: ["project panel"] },
- { number: 8626, title: "Palette File Navigation - Preview File Content", areas: ["file finder"] },
- { number: 31468, title: "Certain LSP features are not activated till you trigger them manually when working with a remote project", areas: ["language server/server failure", "autocompletions"] },
- { number: 9789, title: "Zed checks for LSP updates when offline and disables LSPs irreversibly in the process", areas: ["language server/server failure"] },
- { number: 21403, title: "Completions and code actions should not use uniform lists", areas: ["autocompletions", "popovers", "diagnostics"] },
- { number: 15196, title: "Remote Project REPL support", areas: ["repl"] },
- ];
-
- const MAX_SEARCHES = 5;
-
- const issueNumber = parseInt('${{ steps.fetch-issue.outputs.issue_number }}', 10);
- const title = process.env.ISSUE_TITLE || '';
- const body = process.env.ISSUE_BODY || '';
- const detectedAreasStr = '${{ steps.detect-areas.outputs.detected_areas }}';
- const detectedAreas = new Set(detectedAreasStr.split(',').filter(a => a.trim()));
-
- console.log(`Detected areas from Claude: ${[...detectedAreas].join(', ') || '(none)'}`);
-
- // Helper: check if two areas match (handles hierarchy like "ai" matching "ai/gemini")
- function areasMatch(detected, magnetArea) {
- if (detected === magnetArea) return true;
- if (magnetArea.startsWith(detected + '/')) return true;
- if (detected.startsWith(magnetArea + '/')) return true;
- return false;
- }
-
- // Filter magnets based on detected areas
- const relevantMagnets = DUPLICATE_MAGNETS.filter(magnet => {
- if (detectedAreas.size === 0) return true;
- return magnet.areas.some(magnetArea =>
- [...detectedAreas].some(detected => areasMatch(detected, magnetArea))
- );
- }).slice(0, 20);
-
- console.log(`Relevant duplicate magnets: ${relevantMagnets.length}`);
-
- // Build search queries
- const searchQueries = [];
- const thirtyDaysAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000).toISOString().split('T')[0];
-
- // 1. Keyword search from title
- const stopwords = ['with', 'that', 'this', 'from', 'have', 'been', 'were', 'what', 'when',
- 'where', 'which', 'while', 'does', 'doesn', 'should', 'would', 'could',
- 'about', 'after', 'before', 'between', 'into', 'through', 'during',
- 'above', 'below', 'under', 'again', 'further', 'then', 'once', 'here',
- 'there', 'some', 'such', 'only', 'same', 'than', 'very', 'just', 'also',
- 'work', 'working', 'works', 'issue', 'problem', 'error', 'bug', 'zed'];
- const titleKeywords = title
- .toLowerCase()
- .replace(/[^\w\s]/g, ' ')
- .split(/\s+/)
- .filter(w => w.length >= 3 && !stopwords.includes(w))
- .slice(0, 5);
-
- if (titleKeywords.length >= 2) {
- searchQueries.push({
- type: 'keyword',
- query: `repo:zed-industries/zed is:issue created:>${thirtyDaysAgo} ${titleKeywords.join(' ')}`
- });
- }
-
- // 2. Area-based searches (using Claude-detected areas)
- for (const area of [...detectedAreas].slice(0, 3)) {
- searchQueries.push({
- type: 'area',
- query: `repo:zed-industries/zed is:issue is:open label:"area:${area}" created:>${thirtyDaysAgo}`
- });
- }
-
- // 3. Look for error patterns in the body
- const errorPatterns = body.match(/(?:error|panic|crash|failed|exception)[:\s]+[^\n]{10,100}/gi) || [];
- if (errorPatterns.length > 0) {
- const errorSnippet = errorPatterns[0]
- .slice(0, 60)
- .replace(/[^\w\s]/g, ' ')
- .replace(/\s+/g, ' ')
- .trim();
- if (errorSnippet.length > 15) {
- searchQueries.push({
- type: 'error',
- query: `repo:zed-industries/zed is:issue "${errorSnippet.slice(0, 40)}"`
- });
- }
- }
-
- // Execute searches and collect candidates
- const candidates = [];
- const seenIssues = new Set([issueNumber]);
-
- for (const { type, query } of searchQueries.slice(0, MAX_SEARCHES)) {
- try {
- console.log(`Search (${type}): ${query}`);
- const { data: results } = await github.rest.search.issuesAndPullRequests({
- q: query,
- sort: 'created',
- order: 'desc',
- per_page: 10
- });
-
- for (const item of results.items) {
- if (!seenIssues.has(item.number) && !item.pull_request) {
- seenIssues.add(item.number);
- candidates.push({
- number: item.number,
- title: item.title,
- state: item.state,
- created_at: item.created_at,
- body_preview: (item.body || '').slice(0, 800),
- source: type
- });
- }
- }
- } catch (error) {
- console.log(`Search failed (${type}): ${error.message}`);
- }
- }
-
- console.log(`Found ${candidates.length} candidates from searches`);
-
- // Prepare issue data for Claude
- const issueData = {
- number: issueNumber,
- title: title,
- body: body.slice(0, 4000),
- };
-
- // Prepare output
- core.setOutput('issue_data', JSON.stringify(issueData));
- core.setOutput('duplicate_magnets', JSON.stringify(relevantMagnets));
- core.setOutput('candidates', JSON.stringify(candidates.slice(0, 12)));
- core.setOutput('detected_areas', [...detectedAreas].join(', '));
- core.setOutput('should_analyze', (relevantMagnets.length > 0 || candidates.length > 0) ? 'true' : 'false');
- env:
- ISSUE_TITLE: ${{ steps.fetch-issue.outputs.issue_title }}
- ISSUE_BODY: ${{ steps.fetch-issue.outputs.issue_body }}
-
- # ========================================================================
- # PASS 2: Analyze duplicates with Claude
- # ========================================================================
- - name: "Pass 2: Analyze duplicates with Claude"
- if: |
- steps.fetch-issue.outputs.should_continue == 'true' &&
- steps.gather-candidates.outputs.should_analyze == 'true'
- id: analyze
- env:
- ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY_ISSUE_DEDUP }}
- ISSUE_DATA: ${{ steps.gather-candidates.outputs.issue_data }}
- DUPLICATE_MAGNETS: ${{ steps.gather-candidates.outputs.duplicate_magnets }}
- CANDIDATES: ${{ steps.gather-candidates.outputs.candidates }}
- run: |
- # shellcheck disable=SC2016
- cat > /tmp/prompt.txt << 'PROMPT_EOF'
- You are analyzing a GitHub issue to determine if it might be a duplicate of an existing issue.
-
- ## New Issue Being Analyzed
- ISSUE_DATA_PLACEHOLDER
-
- ## Known Frequently-Duplicated Issues (High Priority)
- These issues have historically received many duplicate reports. Check these first.
- DUPLICATE_MAGNETS_PLACEHOLDER
-
- ## Recent Similar Issues Found by Search
- CANDIDATES_PLACEHOLDER
-
- ## Your Task
-
- 1. First, understand what the new issue is about:
- - What specific bug or problem is being reported?
- - What error messages, stack traces, or specific behaviors are mentioned?
- - What component/feature is affected?
-
- 2. Check against the frequently-duplicated issues first (high priority):
- - These are known "duplicate magnets" that often get re-reported
- - If the new issue describes the same problem, it's likely a duplicate
-
- 3. Then check the recent similar issues:
- - Look for issues describing the SAME bug, not just related topics
-
- ## Duplicate Criteria (be strict!)
-
- An issue IS a duplicate if:
- - It describes the EXACT same bug with the same root cause
- - It has the same error message or stack trace
- - It has the same reproduction steps leading to the same outcome
-
- An issue is NOT a duplicate if:
- - It's merely related to the same feature/area
- - It has similar symptoms but potentially different causes
- - It mentions similar things but describes a different problem
-
- Be VERY conservative. It's better to miss a duplicate than to incorrectly flag a unique issue.
-
- ## Response Format
-
- Return ONLY a JSON object (no markdown fences, no explanation before or after):
- {
- "is_potential_duplicate": boolean,
- "confidence": "high" | "medium" | "low" | "none",
- "potential_duplicates": [
- {"number": integer, "title": "string", "similarity_reason": "string explaining why this might be the same bug"}
- ],
- "analysis_summary": "Brief explanation of what the new issue is about and your conclusion",
- "recommendation": "flag_as_duplicate" | "needs_human_review" | "not_a_duplicate"
- }
- PROMPT_EOF
-
- # Single quotes are intentional to prevent bash expansion; node reads env vars via process.env
- # shellcheck disable=SC2016
- node << 'SCRIPT_EOF'
- const fs = require('fs');
-
- let prompt = fs.readFileSync('/tmp/prompt.txt', 'utf8');
- prompt = prompt.replace('ISSUE_DATA_PLACEHOLDER', process.env.ISSUE_DATA);
- prompt = prompt.replace('DUPLICATE_MAGNETS_PLACEHOLDER', process.env.DUPLICATE_MAGNETS);
- prompt = prompt.replace('CANDIDATES_PLACEHOLDER', process.env.CANDIDATES);
-
- fs.writeFileSync('/tmp/prompt_final.txt', prompt);
- SCRIPT_EOF
-
- HTTP_CODE=$(curl -s -w "%{http_code}" -o /tmp/response.json -X POST "https://api.anthropic.com/v1/messages" \
- -H "Content-Type: application/json" \
- -H "x-api-key: $ANTHROPIC_API_KEY" \
- -H "anthropic-version: 2023-06-01" \
- --data-binary @- << EOF
- {
- "model": "claude-sonnet-4-5-20250929",
- "max_tokens": 1024,
- "messages": [{"role": "user", "content": $(jq -Rs . < /tmp/prompt_final.txt)}]
- }
- EOF
- )
-
- RESPONSE=$(< /tmp/response.json)
-
- if [ "$HTTP_CODE" -lt 200 ] || [ "$HTTP_CODE" -ge 300 ]; then
- echo "HTTP Error: $HTTP_CODE"
- echo "$RESPONSE" | jq . 2>/dev/null || echo "$RESPONSE"
- exit 1
- fi
-
- if echo "$RESPONSE" | jq -e '.error' > /dev/null 2>&1; then
- echo "API Error:"
- echo "$RESPONSE" | jq .
- exit 1
- fi
-
- ANALYSIS=$(echo "$RESPONSE" | jq -r '.content[0].text // empty')
-
- if [ -z "$ANALYSIS" ]; then
- echo "Error: No response from Claude"
- echo "$RESPONSE" | jq .
- exit 1
- fi
-
- {
- echo "analysis<<ANALYSIS_EOF"
- echo "$ANALYSIS"
- echo "ANALYSIS_EOF"
- } >> "$GITHUB_OUTPUT"
-
- INPUT_TOKENS=$(echo "$RESPONSE" | jq -r '.usage.input_tokens')
- OUTPUT_TOKENS=$(echo "$RESPONSE" | jq -r '.usage.output_tokens')
- echo "Pass 2 token usage - Input: $INPUT_TOKENS, Output: $OUTPUT_TOKENS"
-
- # ========================================================================
- # Log results
- # ========================================================================
- - name: Log analysis results
- if: |
- steps.fetch-issue.outputs.should_continue == 'true' &&
- !cancelled()
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
- with:
- script: |
- const issueNumber = parseInt('${{ steps.fetch-issue.outputs.issue_number }}', 10) || 0;
- const issueTitle = process.env.ISSUE_TITLE || '';
- const detectedAreas = '${{ steps.gather-candidates.outputs.detected_areas }}' || '(none)';
- const shouldAnalyze = '${{ steps.gather-candidates.outputs.should_analyze }}' === 'true';
- const analysisRaw = process.env.ANALYSIS_OUTPUT || '';
-
- console.log('='.repeat(60));
- console.log('DUPLICATE DETECTION RESULTS (TWO-PASS)');
- console.log('='.repeat(60));
- console.log(`Issue: #${issueNumber} - ${issueTitle}`);
- console.log(`URL: https://github.com/zed-industries/zed/issues/${issueNumber}`);
- console.log(`Detected Areas: ${detectedAreas}`);
-
- if (!shouldAnalyze) {
- console.log('\nNo duplicate magnets or candidates found - skipping analysis');
- core.summary.addHeading(`✅ Issue #${issueNumber}: No similar issues found`, 2);
- core.summary.addRaw(`\n**Title:** ${issueTitle}\n\n`);
- core.summary.addRaw(`**Detected Areas:** ${detectedAreas}\n\n`);
- core.summary.addRaw('No potential duplicates were found by search or in the known duplicate magnets list.\n');
- await core.summary.write();
- return;
- }
-
- if (!analysisRaw) {
- console.log('\nNo analysis output received');
- core.summary.addHeading(`⚠️ Issue #${issueNumber}: Analysis incomplete`, 2);
- core.summary.addRaw(`**Detected Areas:** ${detectedAreas}\n\n`);
- core.summary.addRaw('The Claude analysis step did not produce output. Check workflow logs.\n');
- await core.summary.write();
- return;
- }
-
- try {
- let cleanJson = analysisRaw.trim();
- if (cleanJson.startsWith('```')) {
- cleanJson = cleanJson.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '');
- }
-
- const analysis = JSON.parse(cleanJson);
-
- console.log(`\nIs Potential Duplicate: ${analysis.is_potential_duplicate}`);
- console.log(`Confidence: ${analysis.confidence}`);
- console.log(`Recommendation: ${analysis.recommendation}`);
- console.log(`\nAnalysis Summary:\n${analysis.analysis_summary}`);
-
- if (analysis.potential_duplicates && analysis.potential_duplicates.length > 0) {
- console.log(`\nPotential Duplicates Found: ${analysis.potential_duplicates.length}`);
- for (const dup of analysis.potential_duplicates) {
- console.log(` - #${dup.number}: ${dup.title}`);
- console.log(` Reason: ${dup.similarity_reason}`);
- }
- } else {
- console.log('\nNo potential duplicates identified by analysis.');
- }
-
- console.log('\n' + '='.repeat(60));
-
- const summaryIcon = analysis.is_potential_duplicate ? '⚠️' : '✅';
- const summaryText = analysis.is_potential_duplicate
- ? `Potential duplicate detected (${analysis.confidence} confidence)`
- : 'No likely duplicates found';
-
- core.summary.addHeading(`${summaryIcon} Issue #${issueNumber}: ${summaryText}`, 2);
- core.summary.addRaw(`\n**Title:** ${issueTitle}\n\n`);
- core.summary.addRaw(`**Detected Areas:** ${detectedAreas}\n\n`);
- core.summary.addRaw(`**Recommendation:** \`${analysis.recommendation}\`\n\n`);
- core.summary.addRaw(`**Summary:** ${analysis.analysis_summary}\n\n`);
-
- if (analysis.potential_duplicates && analysis.potential_duplicates.length > 0) {
- core.summary.addHeading('Potential Duplicates', 3);
- const rows = analysis.potential_duplicates.map(d => [
- `[#${d.number}](https://github.com/zed-industries/zed/issues/${d.number})`,
- d.title.slice(0, 60) + (d.title.length > 60 ? '...' : ''),
- d.similarity_reason
- ]);
- core.summary.addTable([
- [{data: 'Issue', header: true}, {data: 'Title', header: true}, {data: 'Similarity Reason', header: true}],
- ...rows
- ]);
- }
-
- await core.summary.write();
-
- } catch (e) {
- console.log('Failed to parse analysis output:', e.message);
- console.log('Raw output:', analysisRaw);
- core.summary.addHeading(`⚠️ Issue #${issueNumber}: Failed to parse analysis`, 2);
- core.summary.addRaw(`**Detected Areas:** ${detectedAreas}\n\n`);
- core.summary.addRaw(`Error: ${e.message}\n\nRaw output:\n\`\`\`\n${analysisRaw.slice(0, 1000)}\n\`\`\``);
- await core.summary.write();
- }
- env:
- ISSUE_TITLE: ${{ steps.fetch-issue.outputs.issue_title }}
- ANALYSIS_OUTPUT: ${{ steps.analyze.outputs.analysis }}
@@ -0,0 +1,534 @@
+#!/usr/bin/env python3
+"""
+Comment on newly opened issues that might be duplicates of an existing issue.
+
+This script is run by a GitHub Actions workflow when a new bug or crash report
+is opened. It:
+1. Checks eligibility (must be bug/crash type, non-staff author)
+2. Detects relevant areas using Claude + the area label taxonomy
+3. Parses known "duplicate magnets" from tracking issue #46355
+4. Searches for similar recent issues by title keywords, area labels, and error patterns
+5. Asks Claude to analyze potential duplicates (magnets + search results)
+6. Posts a comment on the issue if high-confidence duplicates are found
+
+Requires:
+ requests (pip install requests)
+
+Usage:
+ python github-check-new-issue-for-duplicates.py <issue_number>
+
+Environment variables:
+ GITHUB_TOKEN - GitHub token (org members: read, issues: read & write)
+ ANTHROPIC_API_KEY - Anthropic API key for Claude
+
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+from datetime import datetime, timedelta
+
+import requests
+
+GITHUB_API = "https://api.github.com"
+REPO_OWNER = "zed-industries"
+REPO_NAME = "zed"
+TRACKING_ISSUE_NUMBER = 46355
+STAFF_TEAM_SLUG = "staff"
+
+# area prefixes to collapse in taxonomy (show summary instead of all sub-labels)
+PREFIXES_TO_COLLAPSE = ["languages", "parity", "tooling"]
+
+# stopwords to filter from title keyword searches (short words handled by len > 2 filter)
+STOPWORDS = {
+ "after", "all", "also", "and", "any", "but", "can't", "does", "doesn't",
+ "don't", "for", "from", "have", "just", "not", "only", "some", "that",
+ "the", "this", "when", "while", "with", "won't", "work", "working", "zed",
+}
+
+
+def log(message):
+ """Print to stderr so it doesn't interfere with JSON output on stdout."""
+ print(message, file=sys.stderr)
+
+
+def github_api_get(path, params=None):
+ """Fetch JSON from the GitHub API. Raises on non-2xx status."""
+ url = f"{GITHUB_API}/{path.lstrip('/')}"
+ response = requests.get(url, headers=GITHUB_HEADERS, params=params)
+ response.raise_for_status()
+ return response.json()
+
+
+def github_search_issues(query, per_page=15):
+ """Search issues, returning most recently created first."""
+ params = {"q": query, "sort": "created", "order": "desc", "per_page": per_page}
+ return github_api_get("/search/issues", params).get("items", [])
+
+
+def check_team_membership(org, team_slug, username):
+ """Check if user is an active member of a team."""
+ try:
+ data = github_api_get(f"/orgs/{org}/teams/{team_slug}/memberships/{username}")
+ return data.get("state") == "active"
+ except requests.HTTPError as e:
+ if e.response.status_code == 404:
+ return False
+ raise
+
+
+def post_comment(issue_number: int, body):
+ url = f"{GITHUB_API.rstrip('/')}/repos/{REPO_OWNER}/{REPO_NAME}/issues/{issue_number}/comments"
+ response = requests.post(url, headers=GITHUB_HEADERS, json={"body": body})
+ response.raise_for_status()
+ log(f" Posted comment on #{issue_number}")
+
+
+def build_duplicate_comment(matches):
+ """Build the comment body for potential duplicates."""
+ match_list = "\n".join(f"- #{m['number']}" for m in matches)
+ explanations = "\n\n".join(f"**#{m['number']}:** {m['explanation']}" for m in matches)
+
+ return f"""This issue appears to be a duplicate of:
+
+{match_list}
+
+**If this is indeed a duplicate:**
+Please close this issue and subscribe to the linked issue for updates (select "Close as not planned" → "Duplicate")
+
+**If this is a different issue:**
+No action needed. A maintainer will review this shortly.
+
+<details>
+<summary>Why were these issues selected?</summary>
+
+{explanations}
+
+</details>
+
+---
+<sub>This is an automated analysis and might be incorrect.</sub>"""
+
+
+def call_claude(api_key, system, user_content, max_tokens=1024):
+ """Send a message to Claude and return the text response. Raises on non-2xx status."""
+ response = requests.post(
+ "https://api.anthropic.com/v1/messages",
+ headers={
+ "x-api-key": api_key,
+ "anthropic-version": "2023-06-01",
+ "content-type": "application/json",
+ },
+ json={
+ "model": "claude-sonnet-4-20250514",
+ "max_tokens": max_tokens,
+ "temperature": 0.0,
+ "system": system,
+ "messages": [{"role": "user", "content": user_content}],
+ },
+ )
+ response.raise_for_status()
+ data = response.json()
+
+ usage = data.get("usage", {})
+ log(f" Token usage - Input: {usage.get('input_tokens', 'N/A')}, Output: {usage.get('output_tokens', 'N/A')}")
+
+ content = data.get("content", [])
+ if content and content[0].get("type") == "text":
+ return content[0].get("text") or ""
+ return ""
+
+
+def fetch_issue(issue_number: int):
+ """Fetch issue from GitHub and return as a dict."""
+ log(f"Fetching issue #{issue_number}")
+
+ issue_data = github_api_get(f"/repos/{REPO_OWNER}/{REPO_NAME}/issues/{issue_number}")
+ issue = {
+ "number": issue_number,
+ "title": issue_data["title"],
+ "body": issue_data.get("body") or "",
+ "author": (issue_data.get("user") or {}).get("login") or "",
+ "type": (issue_data.get("type") or {}).get("name"),
+ }
+
+ log(f" Title: {issue['title']}\n Type: {issue['type']}\n Author: {issue['author']}")
+ return issue
+
+
+def should_skip(issue):
+ """Check if issue should be skipped in duplicate detection process."""
+ if issue["type"] not in ["Bug", "Crash"]:
+ log(f" Skipping: issue type '{issue['type']}' is not a bug/crash report")
+ return True
+
+ if issue["author"] and check_team_membership(REPO_OWNER, STAFF_TEAM_SLUG, issue["author"]):
+ log(f" Skipping: author '{issue['author']}' is a {STAFF_TEAM_SLUG} member")
+ return True
+
+ return False
+
+
+def fetch_area_labels():
+ """Fetch area:* labels from the repository. Returns list of {name, description} dicts."""
+ log("Fetching area labels")
+
+ labels = []
+ page = 1
+ while page_labels := github_api_get(
+ f"/repos/{REPO_OWNER}/{REPO_NAME}/labels",
+ params={"per_page": 100, "page": page},
+ ):
+ labels.extend(page_labels)
+ page += 1
+
+ # label["name"][5:] removes the "area:" prefix
+ area_labels = [
+ {"name": label["name"][5:], "description": label.get("description") or ""}
+ for label in labels
+ if label["name"].startswith("area:")
+ ]
+
+ log(f" Found {len(area_labels)} area labels")
+ return area_labels
+
+
+def format_taxonomy_for_claude(area_labels):
+ """Format area labels into a string for Claude, collapsing certain prefixes."""
+ lines = set()
+
+ for area in area_labels:
+ name = area["name"]
+ collapsible_prefix = next(
+ (p for p in PREFIXES_TO_COLLAPSE if name.startswith(f"{p}/")), None)
+
+ if collapsible_prefix:
+ lines.add(f"- {collapsible_prefix}/* (multiple specific sub-labels exist)")
+ else:
+ desc = area["description"]
+ lines.add(f"- {name}: {desc}" if desc else f"- {name}")
+
+ return "\n".join(sorted(lines))
+
+
+def detect_areas(anthropic_key, issue, taxonomy):
+ """Use Claude to detect relevant areas for the issue."""
+ log("Detecting areas with Claude")
+
+ system_prompt = """You analyze GitHub issues to identify which area labels apply.
+
+Given an issue and a taxonomy of areas, output ONLY a comma-separated list of matching area names.
+- Output at most 3 areas, ranked by relevance
+- Use exact area names from the taxonomy
+- If no areas clearly match, output: none
+- For languages/*, tooling/*, or parity/*, use the specific sub-label (e.g., "languages/rust",
+tooling/eslint, parity/vscode)
+
+Example outputs:
+- "editor, parity/vim"
+- "ai, ai/agent panel"
+- "none"
+"""
+
+ user_content = f"""## Area Taxonomy
+{taxonomy}
+
+# Issue Title
+{issue['title']}
+
+# Issue Body
+{issue['body'][:4000]}"""
+
+ response = call_claude(anthropic_key, system_prompt, user_content, max_tokens=100).strip()
+ log(f" Detected areas: {response}")
+
+ if response.lower() == "none":
+ return []
+ return [area.strip() for area in response.split(",")]
+
+
+def parse_duplicate_magnets():
+ """Parse known duplicate magnets from tracking issue #46355.
+
+ Returns a list of magnets sorted by duplicate count (most duplicated first).
+ Magnets only have number, areas, and dupe_count — use enrich_magnets() to fetch
+ title and body_preview for the ones you need.
+ """
+ log(f"Parsing duplicate magnets from #{TRACKING_ISSUE_NUMBER}")
+
+ issue_data = github_api_get(f"/repos/{REPO_OWNER}/{REPO_NAME}/issues/{TRACKING_ISSUE_NUMBER}")
+ body = issue_data.get("body") or ""
+
+ # parse the issue body
+ # format: ## area_name
+ # - [N dupes] https://github.com/zed-industries/zed/issues/NUMBER
+ magnets = {} # number -> {number, areas, dupe_count}
+ current_area = None
+
+ for line in body.split("\n"):
+ # check for area header
+ if line.startswith("## "):
+ current_area = line[3:].strip()
+ continue
+
+ if not current_area or not line.startswith("-") or "/issues/" not in line:
+ continue
+
+ # parse: - [N dupes] https://github.com/.../issues/NUMBER
+ try:
+ dupe_count = int(line.split("[")[1].split()[0])
+ number = int(line.split("/issues/")[1].split()[0].rstrip(")"))
+ except (ValueError, IndexError):
+ continue
+
+ # skip "(unlabeled)": these magnets should match everything
+ is_unlabeled = current_area == "(unlabeled)"
+
+ if number in magnets:
+ if not is_unlabeled:
+ magnets[number]["areas"].append(current_area)
+ else:
+ magnets[number] = {
+ "number": number,
+ "areas": [] if is_unlabeled else [current_area],
+ "dupe_count": dupe_count,
+ }
+
+ magnet_list = sorted(magnets.values(), key=lambda m: m["dupe_count"], reverse=True)
+ log(f" Parsed {len(magnet_list)} duplicate magnets")
+ return magnet_list
+
+
+def enrich_magnets(magnets):
+ """Fetch title and body_preview for magnets from the API."""
+ log(f" Fetching details for {len(magnets)} magnets")
+ for magnet in magnets:
+ data = github_api_get(f"/repos/{REPO_OWNER}/{REPO_NAME}/issues/{magnet['number']}")
+ magnet["title"] = data["title"]
+ magnet["body_preview"] = (data.get("body") or "")[:500]
+
+
+def areas_match(detected, magnet_area):
+ """Check if detected area matches magnet area. Matches broadly across hierarchy levels."""
+ return (
+ detected == magnet_area
+ or magnet_area.startswith(f"{detected}/")
+ or detected.startswith(f"{magnet_area}/")
+ )
+
+
+def filter_magnets_by_areas(magnets, detected_areas):
+ """Filter magnets based on detected areas."""
+ if not detected_areas:
+ return magnets
+
+ detected_set = set(detected_areas)
+
+ def matches(magnet):
+ # unlabeled magnets (empty areas) match everything
+ if not magnet["areas"]:
+ return True
+ return any(
+ areas_match(detected, magnet_area)
+ for detected in detected_set
+ for magnet_area in magnet["areas"]
+ )
+
+ return list(filter(matches, magnets))
+
+
+def search_for_similar_issues(issue, detected_areas, max_searches=6):
+ """Search for similar issues that might be duplicates.
+
+ Searches by title keywords, area labels (last 60 days), and error patterns.
+ max_searches caps the total number of queries to keep token usage and context size under control.
+ """
+ log("Searching for similar issues")
+
+ sixty_days_ago = (datetime.now() - timedelta(days=60)).strftime("%Y-%m-%d")
+ base_query = f"repo:{REPO_OWNER}/{REPO_NAME} is:issue is:open"
+ seen_issues = {}
+ queries = []
+
+ title_keywords = [word for word in issue["title"].split() if word.lower() not in STOPWORDS and len(word) > 2]
+
+ if title_keywords:
+ keywords_query = " ".join(title_keywords)
+ queries.append(("title_keywords", f"{base_query} {keywords_query}"))
+
+ for area in detected_areas:
+ queries.append(("area_label", f'{base_query} label:"area:{area}" created:>{sixty_days_ago}'))
+
+ # error pattern search: capture 5–90 chars after keyword, colon optional
+ error_pattern = r"(?i:\b(?:error|panicked|panic|failed)\b)\s*([^\n]{5,90})"
+ match = re.search(error_pattern, issue["body"])
+ if match:
+ error_snippet = match.group(1).strip()
+ queries.append(("error_pattern", f'{base_query} in:body "{error_snippet}"'))
+
+ for search_type, query in queries[:max_searches]:
+ log(f" Search ({search_type}): {query}")
+ try:
+ results = github_search_issues(query, per_page=15)
+ for item in results:
+ number = item["number"]
+ if number != issue["number"] and number not in seen_issues:
+ body = item.get("body") or ""
+ seen_issues[number] = {
+ "number": number,
+ "title": item["title"],
+ "state": item.get("state", ""),
+ "created_at": item.get("created_at", ""),
+ "body_preview": body[:500],
+ "source": search_type,
+ }
+ except requests.RequestException as e:
+ log(f" Search failed: {e}")
+
+ similar_issues = list(seen_issues.values())
+ log(f" Found {len(similar_issues)} similar issues")
+ return similar_issues
+
+
+def analyze_duplicates(anthropic_key, issue, magnets, search_results):
+ """Use Claude to analyze potential duplicates."""
+ log("Analyzing duplicates with Claude")
+
+ top_magnets = magnets[:10]
+ enrich_magnets(top_magnets)
+ magnet_numbers = {m["number"] for m in top_magnets}
+
+ candidates = [
+ {"number": m["number"], "title": m["title"], "body_preview": m["body_preview"], "source": "known_duplicate_magnet"}
+ for m in top_magnets
+ ] + [
+ {"number": r["number"], "title": r["title"], "body_preview": r["body_preview"], "source": "search_result"}
+ for r in search_results[:10]
+ if r["number"] not in magnet_numbers
+ ]
+
+ if not candidates:
+ return [], "No candidates to analyze"
+
+ system_prompt = """You analyze GitHub issues to identify potential duplicates.
+
+Given a new issue and a list of existing issues, identify which existing issues might be duplicates.
+
+For each potential duplicate, assess confidence:
+- "high": Very likely the same issue (same root cause, same symptoms)
+- "medium": Possibly related (likely to be the same root cause)
+- Do NOT include tangentially related issues (same general area but probably different issues)
+
+Output only valid JSON (no markdown code blocks) with this structure:
+{
+ "matches": [
+ {
+ "number": 12345,
+ "confidence": "high|medium",
+ "explanation": "Brief explanation of why this might be a duplicate"
+ }
+ ],
+ "summary": "One sentence summary of findings"
+}
+
+Only include matches with "high" or "medium" confidence. Return empty matches array if none found."""
+
+ user_content = f"""## New Issue #{issue['number']}
+**Title:** {issue['title']}
+
+**Body:**
+{issue['body'][:3000]}
+
+## Existing Issues to Compare
+{json.dumps(candidates, indent=2)}"""
+
+ response = call_claude(anthropic_key, system_prompt, user_content, max_tokens=2048)
+
+ try:
+ data = json.loads(response)
+ except json.JSONDecodeError as e:
+ log(f" Failed to parse response: {e}")
+ log(f" Raw response: {response}")
+ return [], "Failed to parse analysis"
+
+ matches = data.get("matches", [])
+ summary = data.get("summary", "Analysis complete")
+ log(f" Found {len(matches)} potential matches")
+ return matches, summary
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Identify potential duplicate issues")
+ parser.add_argument("issue_number", type=int, help="Issue number to analyze")
+ parser.add_argument("--dry-run", action="store_true", help="Skip posting comment, just log what would be posted")
+ args = parser.parse_args()
+
+ github_token = os.environ.get("GITHUB_TOKEN")
+ anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
+
+ if not github_token:
+ log("Error: GITHUB_TOKEN not set")
+ sys.exit(1)
+ if not anthropic_key:
+ log("Error: ANTHROPIC_API_KEY not set")
+ sys.exit(1)
+
+ GITHUB_HEADERS = {
+ "Authorization": f"Bearer {github_token}",
+ "Accept": "application/vnd.github+json",
+ "X-GitHub-Api-Version": "2022-11-28",
+ }
+
+ issue = fetch_issue(args.issue_number)
+ if should_skip(issue):
+ print(json.dumps({"skipped": True}))
+ sys.exit(0)
+
+ # detect areas
+ taxonomy = format_taxonomy_for_claude(fetch_area_labels())
+ detected_areas = detect_areas(anthropic_key, issue, taxonomy)
+
+ # search for potential duplicates
+ all_magnets = parse_duplicate_magnets()
+ relevant_magnets = filter_magnets_by_areas(all_magnets, detected_areas)
+ search_results = search_for_similar_issues(issue, detected_areas)
+
+ # analyze potential duplicates
+ if relevant_magnets or search_results:
+ matches, summary = analyze_duplicates(anthropic_key, issue, relevant_magnets, search_results)
+ else:
+ matches, summary = [], "No potential duplicates to analyze"
+
+ # post comment if high-confidence matches found
+ high_confidence_matches = [m for m in matches if m["confidence"] == "high"]
+ commented = False
+
+ if high_confidence_matches:
+ comment_body = build_duplicate_comment(high_confidence_matches)
+ if args.dry_run:
+ log("Dry run - would post comment:\n" + "-" * 40 + "\n" + comment_body + "\n" + "-" * 40)
+ else:
+ log("Posting comment for high-confidence match(es)")
+ try:
+ post_comment(issue["number"], comment_body)
+ commented = True
+ except requests.RequestException as e:
+ log(f" Failed to post comment: {e}")
+
+ print(json.dumps({
+ "skipped": False,
+ "issue": {
+ "number": issue["number"],
+ "title": issue["title"],
+ "author": issue["author"],
+ "type": issue["type"],
+ },
+ "detected_areas": detected_areas,
+ "magnets_count": len(relevant_magnets),
+ "search_results_count": len(search_results),
+ "matches": matches,
+ "summary": summary,
+ "commented": commented,
+ }))