1name: Identify potential duplicates among new bug/crash reports
2
3on:
4 issues:
5 types: [opened]
6 workflow_dispatch:
7 inputs:
8 issue_number:
9 description: "Issue number to analyze (for testing)"
10 required: true
11 type: number
12
13concurrency:
14 group: potential-duplicate-check-${{ github.event.issue.number || inputs.issue_number }}
15 # let's not overspend tokens on multiple parallel checks of the same issue
16 cancel-in-progress: true
17
18jobs:
19 identify-duplicates:
20 if: github.repository == 'zed-industries/zed'
21 runs-on: ubuntu-latest
22
23 permissions:
24 contents: read
25 issues: read
26 id-token: write
27
28 steps:
29 - name: Get github app token
30 id: get-app-token
31 uses: actions/create-github-app-token@bef1eaf1c0ac2b148ee2a0a74c65fbe6db0631f1 # v2.1.4
32 with:
33 app-id: ${{ secrets.ZED_COMMUNITY_BOT_APP_ID }}
34 private-key: ${{ secrets.ZED_COMMUNITY_BOT_PRIVATE_KEY }}
35 owner: zed-industries
36
37 - name: Check issue type
38 id: check-type
39 uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
40 with:
41 github-token: ${{ steps.get-app-token.outputs.token }}
42 script: |
43 const issueNumber = context.payload.issue?.number || ${{ inputs.issue_number || 0 }};
44 if (!issueNumber) {
45 core.setFailed('No issue number provided');
46 return;
47 }
48
49 const { data: issue } = await github.rest.issues.get({
50 owner: context.repo.owner,
51 repo: context.repo.repo,
52 issue_number: issueNumber
53 });
54
55 const typeName = issue.type?.name;
56 const isTargetType = typeName === 'Bug' || typeName === 'Crash';
57
58 console.log(`Issue #${issueNumber}: "${issue.title}"`);
59 console.log(`Issue type: ${typeName || '(none)'}`);
60 console.log(`Is target type (Bug/Crash): ${isTargetType}`);
61
62 core.setOutput('issue_number', issueNumber);
63 core.setOutput('issue_author', issue.user?.login || '');
64 core.setOutput('is_target_type', isTargetType);
65
66 if (!isTargetType) {
67 console.log('::notice::Skipping - issue type is not Bug or Crash');
68 }
69
70 - name: Check if author is staff
71 if: steps.check-type.outputs.is_target_type == 'true'
72 id: check-staff
73 uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
74 with:
75 github-token: ${{ steps.get-app-token.outputs.token }}
76 script: |
77 const author = process.env.ISSUE_AUTHOR || '';
78 if (!author) {
79 console.log('Could not determine issue author, proceeding with check');
80 core.setOutput('is_staff', 'false');
81 return;
82 }
83
84 try {
85 const response = await github.rest.teams.getMembershipForUserInOrg({
86 org: 'zed-industries',
87 team_slug: 'staff',
88 username: author
89 });
90 const isStaff = response.data.state === 'active';
91 core.setOutput('is_staff', String(isStaff));
92 if (isStaff) {
93 console.log(`::notice::Skipping - author @${author} is a staff member`);
94 }
95 } catch (error) {
96 if (error.status === 404) {
97 core.setOutput('is_staff', 'false');
98 } else {
99 throw error;
100 }
101 }
102 env:
103 ISSUE_AUTHOR: ${{ steps.check-type.outputs.issue_author }}
104
105 - name: Checkout repository
106 if: |
107 steps.check-type.outputs.is_target_type == 'true' &&
108 steps.check-staff.outputs.is_staff == 'false'
109 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
110 with:
111 fetch-depth: 1
112
113 - name: Analyze for potential duplicates (DRY RUN)
114 if: |
115 steps.check-type.outputs.is_target_type == 'true' &&
116 steps.check-staff.outputs.is_staff == 'false'
117 id: analyze
118 # let's not overspend tokens on checks that went too deep into the rabbit hole
119 timeout-minutes: 5
120 uses: anthropics/claude-code-action@231bd75b7196d48291c1498f1c6d277c2810d9a3 # v1
121 with:
122 anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY_ISSUE_DEDUP }}
123 github_token: ${{ steps.get-app-token.outputs.token }}
124 # this is automatic analysis, not user-invoked, and Claude only has read-only tools, so should be safe
125 allowed_non_write_users: "*"
126
127 prompt: |
128 You are analyzing issue #${{ steps.check-type.outputs.issue_number }} in the zed-industries/zed repository to determine if it might be a duplicate of an existing issue.
129
130 THIS IS A DRY RUN - do not post any comments or modify anything. Only analyze and return your findings.
131
132 ## Instructions
133
134 1. Use mcp__github__get_issue to fetch the full details of issue #${{ steps.check-type.outputs.issue_number }}
135
136 2. Extract key identifying information:
137 - Error messages (exact text)
138 - Stack traces or panic messages
139 - Affected features/components
140 - Steps to reproduce
141 - Platform/OS information
142
143 3. Search for potential duplicates using mcp__github__search_issues with:
144 - Key error messages or panic text (most reliable signal)
145 - Specific feature names or components mentioned
146 - Limit search to repo:zed-industries/zed and recent issues (last 90 days)
147 - Search both open AND closed issues (duplicates may have been closed)
148
149 4. For each potential match, evaluate similarity:
150 - SAME error message or stack trace = high confidence
151 - SAME steps to reproduce with same outcome = high confidence
152 - Similar description but different error/context = low confidence
153 - Vaguely related topic = NOT a duplicate
154
155 ## Critical Guidelines
156
157 - Be VERY conservative. When in doubt, conclude it is NOT a duplicate.
158 - Only flag as potential duplicate if you have HIGH confidence (same error, same repro steps, same root cause).
159 - "Similar topic" or "related feature" is NOT sufficient - the issues must describe the SAME bug.
160 - False positives are worse than false negatives. Users finding their legitimate issue incorrectly flagged as duplicate is a poor experience.
161
162 ## Output
163
164 Return your analysis as JSON with this exact structure. Do not include any other text outside the JSON.
165
166 claude_args: |
167 --max-turns 3
168 --allowedTools mcp__github__get_issue,mcp__github__search_issues,mcp__github__list_issues
169 --json-schema {"type":"object","properties":{"issue_number":{"type":"integer"},"issue_title":{"type":"string"},"is_potential_duplicate":{"type":"boolean"},"confidence":{"type":"string","enum":["high","medium","low","none"]},"potential_duplicates":{"type":"array","items":{"type":"object","properties":{"number":{"type":"integer"},"title":{"type":"string"},"similarity_reason":{"type":"string"}},"required":["number","title","similarity_reason"]}},"analysis_summary":{"type":"string"},"recommendation":{"type":"string","enum":["flag_as_duplicate","needs_human_review","not_a_duplicate"]}},"required":["issue_number","is_potential_duplicate","confidence","potential_duplicates","analysis_summary","recommendation"]}
170
171 - name: Log analysis results
172 if: |
173 steps.check-type.outputs.is_target_type == 'true' &&
174 steps.check-staff.outputs.is_staff == 'false' &&
175 !cancelled()
176 uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
177 with:
178 script: |
179 const output = process.env.ANALYSIS_OUTPUT || '';
180
181 console.log('='.repeat(60));
182 console.log('DRY RUN ANALYSIS RESULTS');
183 console.log('='.repeat(60));
184
185 if (!output || output === '') {
186 console.log('No structured output received from analysis');
187 core.summary.addHeading('⚠️ Analysis did not produce output', 2);
188 core.summary.addRaw('The duplicate detection analysis did not return structured output. Check the workflow logs for details.');
189 await core.summary.write();
190 return;
191 }
192
193 try {
194 const analysis = JSON.parse(output);
195
196 console.log(`\nIssue: #${analysis.issue_number} - ${analysis.issue_title || 'N/A'}`);
197 console.log(`Is Potential Duplicate: ${analysis.is_potential_duplicate}`);
198 console.log(`Confidence: ${analysis.confidence}`);
199 console.log(`Recommendation: ${analysis.recommendation}`);
200 console.log(`\nAnalysis Summary:\n${analysis.analysis_summary}`);
201
202 if (analysis.potential_duplicates.length > 0) {
203 console.log(`\nPotential Duplicates Found: ${analysis.potential_duplicates.length}`);
204 for (const dup of analysis.potential_duplicates) {
205 console.log(` - #${dup.number}: ${dup.title}`);
206 console.log(` Reason: ${dup.similarity_reason}`);
207 }
208 } else {
209 console.log('\nNo potential duplicates identified.');
210 }
211
212 console.log('\n' + '='.repeat(60));
213
214 // set summary for workflow run
215 const summaryIcon = analysis.is_potential_duplicate ? '⚠️' : '✅';
216 const summaryText = analysis.is_potential_duplicate
217 ? `Potential duplicate detected (${analysis.confidence} confidence)`
218 : 'No likely duplicates found';
219
220 core.summary.addHeading(`${summaryIcon} Issue #${analysis.issue_number}: ${summaryText}`, 2);
221 core.summary.addRaw(`\n**Recommendation:** ${analysis.recommendation}\n\n`);
222 core.summary.addRaw(`**Summary:** ${analysis.analysis_summary}\n\n`);
223
224 if (analysis.potential_duplicates.length > 0) {
225 core.summary.addHeading('Potential Duplicates', 3);
226 const rows = analysis.potential_duplicates.map(d => [
227 `#${d.number}`,
228 d.title,
229 d.similarity_reason
230 ]);
231 core.summary.addTable([
232 [{data: 'Issue', header: true}, {data: 'Title', header: true}, {data: 'Similarity Reason', header: true}],
233 ...rows
234 ]);
235 }
236
237 await core.summary.write();
238
239 } catch (e) {
240 console.log('Failed to parse analysis output:', e.message);
241 console.log('Raw output:', output);
242 core.summary.addHeading('⚠️ Failed to parse analysis output', 2);
243 core.summary.addRaw(`Error: ${e.message}\n\nRaw output:\n\`\`\`\n${output}\n\`\`\``);
244 await core.summary.write();
245 }
246 env:
247 ANALYSIS_OUTPUT: ${{ steps.analyze.outputs.structured_output }}