identify_potential_duplicate_issues.yml

  1name: Identify potential duplicates among new bug/crash reports
  2
  3on:
  4  issues:
  5    types: [opened]
  6  workflow_dispatch:
  7    inputs:
  8      issue_number:
  9        description: "Issue number to analyze (for testing)"
 10        required: true
 11        type: number
 12
 13concurrency:
 14  group: potential-duplicate-check-${{ github.event.issue.number || inputs.issue_number }}
 15  # let's not overspend tokens on multiple parallel checks of the same issue
 16  cancel-in-progress: true
 17
 18jobs:
 19  identify-duplicates:
 20    if: github.repository == 'zed-industries/zed'
 21    runs-on: ubuntu-latest
 22    # let's not overspend tokens on checks that went too deep into the rabbit hole
 23    timeout-minutes: 5
 24    permissions:
 25      contents: read
 26      issues: read
 27
 28    steps:
 29      - name: Get github app token
 30        id: get-app-token
 31        uses: actions/create-github-app-token@bef1eaf1c0ac2b148ee2a0a74c65fbe6db0631f1 # v2.1.4
 32        with:
 33          app-id: ${{ secrets.ZED_COMMUNITY_BOT_APP_ID }}
 34          private-key: ${{ secrets.ZED_COMMUNITY_BOT_PRIVATE_KEY }}
 35          owner: zed-industries
 36
 37      - name: Check issue type
 38        id: check-type
 39        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
 40        with:
 41          github-token: ${{ steps.get-app-token.outputs.token }}
 42          script: |
 43            const issueNumber = context.payload.issue?.number || ${{ inputs.issue_number || 0 }};
 44            if (!issueNumber) {
 45              core.setFailed('No issue number provided');
 46              return;
 47            }
 48
 49            const { data: issue } = await github.rest.issues.get({
 50              owner: context.repo.owner,
 51              repo: context.repo.repo,
 52              issue_number: issueNumber
 53            });
 54
 55            const typeName = issue.type?.name;
 56            const isTargetType = typeName === 'Bug' || typeName === 'Crash';
 57
 58            console.log(`Issue #${issueNumber}: "${issue.title}"`);
 59            console.log(`Issue type: ${typeName || '(none)'}`);
 60            console.log(`Is target type (Bug/Crash): ${isTargetType}`);
 61
 62            core.setOutput('issue_number', issueNumber);
 63            core.setOutput('issue_author', issue.user?.login || '');
 64            core.setOutput('is_target_type', isTargetType);
 65
 66            if (!isTargetType) {
 67              console.log('::notice::Skipping - issue type is not Bug or Crash');
 68            }
 69
 70      - name: Check if author is staff
 71        if: steps.check-type.outputs.is_target_type == 'true'
 72        id: check-staff
 73        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
 74        with:
 75          github-token: ${{ steps.get-app-token.outputs.token }}
 76          script: |
 77            const author = process.env.ISSUE_AUTHOR || '';
 78            if (!author) {
 79              console.log('Could not determine issue author, proceeding with check');
 80              core.setOutput('is_staff', 'false');
 81              return;
 82            }
 83
 84            try {
 85              const response = await github.rest.teams.getMembershipForUserInOrg({
 86                org: 'zed-industries',
 87                team_slug: 'staff',
 88                username: author
 89              });
 90              const isStaff = response.data.state === 'active';
 91              core.setOutput('is_staff', String(isStaff));
 92              if (isStaff) {
 93                console.log(`::notice::Skipping - author @${author} is a staff member`);
 94              }
 95            } catch (error) {
 96              if (error.status === 404) {
 97                core.setOutput('is_staff', 'false');
 98              } else {
 99                throw error;
100              }
101            }
102        env:
103          ISSUE_AUTHOR: ${{ steps.check-type.outputs.issue_author }}
104
105      - name: Checkout repository
106        if: |
107          steps.check-type.outputs.is_target_type == 'true' &&
108          steps.check-staff.outputs.is_staff == 'false'
109        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
110        with:
111          fetch-depth: 1
112
113      - name: Analyze for potential duplicates (DRY RUN)
114        if: |
115          steps.check-type.outputs.is_target_type == 'true' &&
116          steps.check-staff.outputs.is_staff == 'false'
117        id: analyze
118        uses: anthropics/claude-code-action@v1
119        with:
120          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY_ISSUE_DEDUP }}
121
122          prompt: |
123            You are analyzing issue #${{ steps.check-type.outputs.issue_number }} in the zed-industries/zed repository to determine if it might be a duplicate of an existing issue.
124
125            THIS IS A DRY RUN - do not post any comments or modify anything. Only analyze and return your findings.
126
127            ## Instructions
128
129            1. Use mcp__github__get_issue to fetch the full details of issue #${{ steps.check-type.outputs.issue_number }}
130
131            2. Extract key identifying information:
132               - Error messages (exact text)
133               - Stack traces or panic messages
134               - Affected features/components
135               - Steps to reproduce
136               - Platform/OS information
137
138            3. Search for potential duplicates using mcp__github__search_issues with:
139               - Key error messages or panic text (most reliable signal)
140               - Specific feature names or components mentioned
141               - Limit search to repo:zed-industries/zed and recent issues (last 90 days)
142               - Search both open AND closed issues (duplicates may have been closed)
143
144            4. For each potential match, evaluate similarity:
145               - SAME error message or stack trace = high confidence
146               - SAME steps to reproduce with same outcome = high confidence
147               - Similar description but different error/context = low confidence
148               - Vaguely related topic = NOT a duplicate
149
150            ## Critical Guidelines
151
152            - Be VERY conservative. When in doubt, conclude it is NOT a duplicate.
153            - Only flag as potential duplicate if you have HIGH confidence (same error, same repro steps, same root cause).
154            - "Similar topic" or "related feature" is NOT sufficient - the issues must describe the SAME bug.
155            - False positives are worse than false negatives. Users finding their legitimate issue incorrectly flagged as duplicate is a poor experience.
156
157            ## Output
158
159            Return your analysis as JSON with this exact structure. Do not include any other text outside the JSON.
160
161          claude_args: |
162            --max-turns 3
163            --allowedTools mcp__github__get_issue,mcp__github__search_issues,mcp__github__list_issues
164            --json-schema {"type":"object","properties":{"issue_number":{"type":"integer"},"issue_title":{"type":"string"},"is_potential_duplicate":{"type":"boolean"},"confidence":{"type":"string","enum":["high","medium","low","none"]},"potential_duplicates":{"type":"array","items":{"type":"object","properties":{"number":{"type":"integer"},"title":{"type":"string"},"similarity_reason":{"type":"string"}},"required":["number","title","similarity_reason"]}},"analysis_summary":{"type":"string"},"recommendation":{"type":"string","enum":["flag_as_duplicate","needs_human_review","not_a_duplicate"]}},"required":["issue_number","is_potential_duplicate","confidence","potential_duplicates","analysis_summary","recommendation"]}
165
166      - name: Log analysis results
167        if: |
168          steps.check-type.outputs.is_target_type == 'true' &&
169          steps.check-staff.outputs.is_staff == 'false' &&
170          !cancelled()
171        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
172        with:
173          script: |
174            const output = process.env.ANALYSIS_OUTPUT || '';
175
176            console.log('='.repeat(60));
177            console.log('DRY RUN ANALYSIS RESULTS');
178            console.log('='.repeat(60));
179
180            if (!output || output === '') {
181              console.log('No structured output received from analysis');
182              core.summary.addHeading('⚠️ Analysis did not produce output', 2);
183              core.summary.addRaw('The duplicate detection analysis did not return structured output. Check the workflow logs for details.');
184              await core.summary.write();
185              return;
186            }
187
188            try {
189              const analysis = JSON.parse(output);
190
191              console.log(`\nIssue: #${analysis.issue_number} - ${analysis.issue_title || 'N/A'}`);
192              console.log(`Is Potential Duplicate: ${analysis.is_potential_duplicate}`);
193              console.log(`Confidence: ${analysis.confidence}`);
194              console.log(`Recommendation: ${analysis.recommendation}`);
195              console.log(`\nAnalysis Summary:\n${analysis.analysis_summary}`);
196
197              if (analysis.potential_duplicates.length > 0) {
198                console.log(`\nPotential Duplicates Found: ${analysis.potential_duplicates.length}`);
199                for (const dup of analysis.potential_duplicates) {
200                  console.log(`  - #${dup.number}: ${dup.title}`);
201                  console.log(`    Reason: ${dup.similarity_reason}`);
202                }
203              } else {
204                console.log('\nNo potential duplicates identified.');
205              }
206
207              console.log('\n' + '='.repeat(60));
208
209              // set summary for workflow run
210              const summaryIcon = analysis.is_potential_duplicate ? '⚠️' : '✅';
211              const summaryText = analysis.is_potential_duplicate
212                ? `Potential duplicate detected (${analysis.confidence} confidence)`
213                : 'No duplicate detected';
214
215              core.summary.addHeading(`${summaryIcon} Issue #${analysis.issue_number}: ${summaryText}`, 2);
216              core.summary.addRaw(`\n**Recommendation:** ${analysis.recommendation}\n\n`);
217              core.summary.addRaw(`**Summary:** ${analysis.analysis_summary}\n\n`);
218
219              if (analysis.potential_duplicates.length > 0) {
220                core.summary.addHeading('Potential Duplicates', 3);
221                const rows = analysis.potential_duplicates.map(d => [
222                  `#${d.number}`,
223                  d.title,
224                  d.similarity_reason
225                ]);
226                core.summary.addTable([
227                  [{data: 'Issue', header: true}, {data: 'Title', header: true}, {data: 'Similarity Reason', header: true}],
228                  ...rows
229                ]);
230              }
231
232              await core.summary.write();
233
234            } catch (e) {
235              console.log('Failed to parse analysis output:', e.message);
236              console.log('Raw output:', output);
237              core.summary.addHeading('⚠️ Failed to parse analysis output', 2);
238              core.summary.addRaw(`Error: ${e.message}\n\nRaw output:\n\`\`\`\n${output}\n\`\`\``);
239              await core.summary.write();
240            }
241        env:
242          ANALYSIS_OUTPUT: ${{ steps.analyze.outputs.structured_output }}