1package onstart
2
3import (
4 "bytes"
5 "context"
6 "os"
7 "os/exec"
8 "path/filepath"
9 "slices"
10 "testing"
11)
12
13func TestAnalyzeCodebase(t *testing.T) {
14 t.Run("Basic Analysis", func(t *testing.T) {
15 // Test basic functionality with regular ASCII filenames
16 codebase, err := AnalyzeCodebase(context.Background(), "..")
17 if err != nil {
18 t.Fatalf("AnalyzeCodebase failed: %v", err)
19 }
20
21 if codebase == nil {
22 t.Fatal("Expected non-nil codebase")
23 }
24
25 if codebase.TotalFiles == 0 {
26 t.Error("Expected some files to be analyzed")
27 }
28
29 if len(codebase.ExtensionCounts) == 0 {
30 t.Error("Expected extension counts to be populated")
31 }
32 })
33
34 t.Run("Non-ASCII Filenames", func(t *testing.T) {
35 // Create a temporary directory with unicode filenames for testing
36 tempDir := t.TempDir()
37
38 // Initialize git repository
39 cmd := exec.Command("git", "init")
40 cmd.Dir = tempDir
41 if err := cmd.Run(); err != nil {
42 t.Fatalf("Failed to init git repo: %v", err)
43 }
44
45 cmd = exec.Command("git", "config", "user.name", "Test User")
46 cmd.Dir = tempDir
47 if err := cmd.Run(); err != nil {
48 t.Fatalf("Failed to set git user.name: %v", err)
49 }
50
51 cmd = exec.Command("git", "config", "user.email", "test@example.com")
52 cmd.Dir = tempDir
53 if err := cmd.Run(); err != nil {
54 t.Fatalf("Failed to set git user.email: %v", err)
55 }
56
57 // Configure git to handle unicode filenames properly
58 cmd = exec.Command("git", "config", "core.quotepath", "false")
59 cmd.Dir = tempDir
60 if err := cmd.Run(); err != nil {
61 t.Fatalf("Failed to set git core.quotepath: %v", err)
62 }
63
64 cmd = exec.Command("git", "config", "core.precomposeunicode", "true")
65 cmd.Dir = tempDir
66 if err := cmd.Run(); err != nil {
67 t.Fatalf("Failed to set git core.precomposeunicode: %v", err)
68 }
69
70 // Create test files with unicode characters dynamically
71 testFiles := map[string]string{
72 "测试文件.go": "// Package test with Chinese characters in filename\npackage test\n\nfunc TestFunction() {\n\t// This is a test file\n}",
73 "café.js": "// JavaScript file with French characters\nconsole.log('Hello from café!');",
74 "русский.py": "# Python file with Russian characters\nprint('Привет мир!')",
75 "🚀rocket.md": "# README with Emoji\n\nThis file has an emoji in the filename.",
76 "readme-español.md": "# Spanish README\n\nEste es un archivo de documentación.",
77 "Übung.html": "<!DOCTYPE html>\n<html><head><title>German Exercise</title></head><body><h1>Übung</h1></body></html>",
78 "Makefile-日本語": "# Japanese Makefile\nall:\n\techo 'Japanese makefile'",
79 }
80
81 // Create subdirectory
82 subdir := filepath.Join(tempDir, "subdir")
83 err := os.MkdirAll(subdir, 0o755)
84 if err != nil {
85 t.Fatalf("Failed to create subdir: %v", err)
86 }
87
88 // Add file in subdirectory
89 testFiles["subdir/claude.한국어.md"] = "# Korean Claude file\n\nThis is a guidance file with Korean characters."
90
91 // Write all test files
92 for filename, content := range testFiles {
93 fullPath := filepath.Join(tempDir, filename)
94 dir := filepath.Dir(fullPath)
95 if dir != tempDir {
96 err := os.MkdirAll(dir, 0o755)
97 if err != nil {
98 t.Fatalf("Failed to create directory %s: %v", dir, err)
99 }
100 }
101 err := os.WriteFile(fullPath, []byte(content), 0o644)
102 if err != nil {
103 t.Fatalf("Failed to write file %s: %v", filename, err)
104 }
105 }
106
107 // Add all files to git at once
108 cmd = exec.Command("git", "add", ".")
109 cmd.Dir = tempDir
110 if err := cmd.Run(); err != nil {
111 t.Fatalf("Failed to add files to git: %v", err)
112 }
113
114 // Test with non-ASCII characters in filenames
115 codebase, err := AnalyzeCodebase(context.Background(), tempDir)
116 if err != nil {
117 t.Fatalf("AnalyzeCodebase failed with non-ASCII filenames: %v", err)
118 }
119
120 if codebase == nil {
121 t.Fatal("Expected non-nil codebase")
122 }
123
124 // We expect 8 files in our temp directory
125 expectedFiles := 8
126 if codebase.TotalFiles != expectedFiles {
127 t.Errorf("Expected %d files, got %d", expectedFiles, codebase.TotalFiles)
128 }
129
130 // Verify extension counts include our non-ASCII files
131 expectedExtensions := map[string]int{
132 ".go": 1, // 测试文件.go
133 ".js": 1, // café.js
134 ".py": 1, // русский.py
135 ".md": 3, // 🚀rocket.md, readme-español.md, claude.한국어.md
136 ".html": 1, // Übung.html
137 "<no-extension>": 1, // Makefile-日本語
138 }
139
140 for ext, expectedCount := range expectedExtensions {
141 actualCount, exists := codebase.ExtensionCounts[ext]
142 if !exists {
143 t.Errorf("Expected extension %s to be found", ext)
144 continue
145 }
146 if actualCount != expectedCount {
147 t.Errorf("Expected %d files with extension %s, got %d", expectedCount, ext, actualCount)
148 }
149 }
150
151 // Verify file categorization works with non-ASCII filenames
152 // Check build files
153 if !slices.Contains(codebase.BuildFiles, "Makefile-日本語") {
154 t.Error("Expected Makefile-日本語 to be categorized as a build file")
155 }
156
157 // Check documentation files
158 if !slices.Contains(codebase.DocumentationFiles, "readme-español.md") {
159 t.Error("Expected readme-español.md to be categorized as a documentation file")
160 }
161
162 // Check guidance files
163 if !slices.Contains(codebase.GuidanceFiles, "subdir/claude.한국어.md") {
164 t.Error("Expected subdir/claude.한국어.md to be categorized as a guidance file")
165 }
166 })
167}
168
169func TestCategorizeFile(t *testing.T) {
170 t.Run("Non-ASCII Filenames", func(t *testing.T) {
171 tests := []struct {
172 name string
173 path string
174 expected string
175 }{
176 {"Chinese Go file", "测试文件.go", ""},
177 {"French JS file", "café.js", ""},
178 {"Russian Python file", "русский.py", ""},
179 {"Emoji markdown file", "🚀rocket.md", ""},
180 {"German HTML file", "Übung.html", ""},
181 {"Japanese Makefile", "Makefile-日本語", "build"},
182 {"Spanish README", "readme-español.md", "documentation"},
183 {"Korean Claude file", "subdir/claude.한국어.md", "guidance"},
184 // Test edge cases with Unicode normalization and combining characters
185 {"Mixed Unicode file", "test中文🚀.txt", ""},
186 {"Combining characters", "filé̂.go", ""}, // file with combining acute and circumflex accents
187 {"Right-to-left script", "مرحبا.py", ""}, // Arabic "hello"
188 }
189
190 for _, tt := range tests {
191 t.Run(tt.name, func(t *testing.T) {
192 result := categorizeFile(tt.path)
193 if result != tt.expected {
194 t.Errorf("categorizeFile(%q) = %q, want %q", tt.path, result, tt.expected)
195 }
196 })
197 }
198 })
199}
200
201func TestTopExtensions(t *testing.T) {
202 t.Run("With Non-ASCII Files", func(t *testing.T) {
203 // Create a test codebase with known extension counts
204 codebase := &Codebase{
205 ExtensionCounts: map[string]int{
206 ".md": 5, // Most common
207 ".go": 3,
208 ".js": 2,
209 ".py": 1,
210 ".html": 1, // Least common
211 },
212 TotalFiles: 12,
213 }
214
215 topExt := codebase.TopExtensions()
216 if len(topExt) != 5 {
217 t.Errorf("Expected 5 top extensions, got %d", len(topExt))
218 }
219
220 // Check that extensions are sorted by count (descending)
221 expected := []string{
222 ".md: 5 (42%)",
223 ".go: 3 (25%)",
224 ".js: 2 (17%)",
225 ".html: 1 (8%)",
226 ".py: 1 (8%)",
227 }
228
229 for i, expectedExt := range expected {
230 if i >= len(topExt) {
231 t.Errorf("Missing expected extension at index %d: %s", i, expectedExt)
232 continue
233 }
234 if topExt[i] != expectedExt {
235 t.Errorf("Expected extension %q at index %d, got %q", expectedExt, i, topExt[i])
236 }
237 }
238 })
239}
240
241func TestAnalyzeCodebaseErrors(t *testing.T) {
242 // Test error handling for non-existent directory
243 _, err := AnalyzeCodebase(context.Background(), "/non/existent/path")
244 if err == nil {
245 t.Error("Expected error for non-existent path")
246 }
247
248 // Test with directory that doesn't have git
249 tempDir := t.TempDir()
250 _, err = AnalyzeCodebase(context.Background(), tempDir)
251 if err == nil {
252 t.Error("Expected error for directory without git")
253 }
254}
255
256func TestCategorizeFileEdgeCases(t *testing.T) {
257 tests := []struct {
258 name string
259 path string
260 expected string
261 }{
262 {
263 name: "copilot instructions",
264 path: ".github/copilot-instructions.md",
265 expected: "inject",
266 },
267 {
268 name: "agent md file",
269 path: "subdir/agent.config.md",
270 expected: "guidance",
271 },
272 {
273 name: "vscode tasks",
274 path: ".vscode/tasks.json",
275 expected: "build",
276 },
277 {
278 name: "contributing file",
279 path: "docs/contributing.md",
280 expected: "documentation",
281 },
282 {
283 name: "non matching file",
284 path: "src/main.go",
285 expected: "",
286 },
287 }
288
289 for _, tt := range tests {
290 t.Run(tt.name, func(t *testing.T) {
291 result := categorizeFile(tt.path)
292 if result != tt.expected {
293 t.Errorf("categorizeFile(%q) = %q, want %q", tt.path, result, tt.expected)
294 }
295 })
296 }
297}
298
299func TestScanZero(t *testing.T) {
300 tests := []struct {
301 name string
302 data []byte
303 atEOF bool
304 advance int
305 token []byte
306 hasError bool
307 }{
308 {
309 name: "empty at EOF",
310 data: []byte{},
311 atEOF: true,
312 advance: 0,
313 token: nil,
314 },
315 {
316 name: "data with NUL",
317 data: []byte("hello\x00world"),
318 atEOF: false,
319 advance: 6,
320 token: []byte("hello"),
321 },
322 {
323 name: "data without NUL at EOF",
324 data: []byte("hello"),
325 atEOF: true,
326 advance: 5,
327 token: []byte("hello"),
328 },
329 {
330 name: "data without NUL not at EOF",
331 data: []byte("hello"),
332 atEOF: false,
333 advance: 0,
334 token: nil,
335 },
336 }
337
338 for _, tt := range tests {
339 t.Run(tt.name, func(t *testing.T) {
340 advance, token, err := scanZero(tt.data, tt.atEOF)
341 if err != nil && !tt.hasError {
342 t.Errorf("scanZero() error = %v, want no error", err)
343 }
344 if err == nil && tt.hasError {
345 t.Error("scanZero() expected error, got none")
346 }
347 if advance != tt.advance {
348 t.Errorf("scanZero() advance = %v, want %v", advance, tt.advance)
349 }
350 if !bytes.Equal(token, tt.token) {
351 t.Errorf("scanZero() token = %v, want %v", token, tt.token)
352 }
353 })
354 }
355}
356
357func TestAnalyzeCodebaseInjectFileErrors(t *testing.T) {
358 // Create a temporary directory with a git repo
359 tempDir := t.TempDir()
360
361 // Initialize git repository
362 cmd := exec.Command("git", "init")
363 cmd.Dir = tempDir
364 if err := cmd.Run(); err != nil {
365 t.Fatalf("Failed to init git repo: %v", err)
366 }
367
368 cmd = exec.Command("git", "config", "user.name", "Test User")
369 cmd.Dir = tempDir
370 if err := cmd.Run(); err != nil {
371 t.Fatalf("Failed to set git user.name: %v", err)
372 }
373
374 cmd = exec.Command("git", "config", "user.email", "test@example.com")
375 cmd.Dir = tempDir
376 if err := cmd.Run(); err != nil {
377 t.Fatalf("Failed to set git user.email: %v", err)
378 }
379
380 // Create a test inject file
381 injectFilePath := filepath.Join(tempDir, "DEAR_LLM.md")
382 err := os.WriteFile(injectFilePath, []byte("# Test Content"), 0o644)
383 if err != nil {
384 t.Fatalf("Failed to create inject file: %v", err)
385 }
386
387 // Add to git
388 cmd = exec.Command("git", "add", ".")
389 cmd.Dir = tempDir
390 if err := cmd.Run(); err != nil {
391 t.Fatalf("Failed to add files to git: %v", err)
392 }
393
394 // Make the file unreadable by removing read permissions temporarily
395 // This test might not work on all systems, so we'll just test the basic functionality
396 codebase, err := AnalyzeCodebase(context.Background(), tempDir)
397 if err != nil {
398 t.Fatalf("AnalyzeCodebase failed: %v", err)
399 }
400
401 // Should have found the inject file
402 if len(codebase.InjectFiles) != 1 {
403 t.Errorf("Expected 1 inject file, got %d", len(codebase.InjectFiles))
404 }
405}
406
407func TestAnalyzeCodebaseEmptyRepo(t *testing.T) {
408 // Create a temporary directory with an empty git repo
409 tempDir := t.TempDir()
410
411 // Initialize git repository
412 cmd := exec.Command("git", "init")
413 cmd.Dir = tempDir
414 if err := cmd.Run(); err != nil {
415 t.Fatalf("Failed to init git repo: %v", err)
416 }
417
418 cmd = exec.Command("git", "config", "user.name", "Test User")
419 cmd.Dir = tempDir
420 if err := cmd.Run(); err != nil {
421 t.Fatalf("Failed to set git user.name: %v", err)
422 }
423
424 cmd = exec.Command("git", "config", "user.email", "test@example.com")
425 cmd.Dir = tempDir
426 if err := cmd.Run(); err != nil {
427 t.Fatalf("Failed to set git user.email: %v", err)
428 }
429
430 // Test with empty repo
431 codebase, err := AnalyzeCodebase(context.Background(), tempDir)
432 if err != nil {
433 t.Fatalf("AnalyzeCodebase failed: %v", err)
434 }
435
436 // Should have no files
437 if codebase.TotalFiles != 0 {
438 t.Errorf("Expected 0 files, got %d", codebase.TotalFiles)
439 }
440 if len(codebase.ExtensionCounts) != 0 {
441 t.Errorf("Expected 0 extension counts, got %d", len(codebase.ExtensionCounts))
442 }
443}