analyze_test.go

  1package onstart
  2
  3import (
  4	"bytes"
  5	"context"
  6	"os"
  7	"os/exec"
  8	"path/filepath"
  9	"slices"
 10	"testing"
 11)
 12
 13func TestAnalyzeCodebase(t *testing.T) {
 14	t.Run("Basic Analysis", func(t *testing.T) {
 15		// Test basic functionality with regular ASCII filenames
 16		codebase, err := AnalyzeCodebase(context.Background(), "..")
 17		if err != nil {
 18			t.Fatalf("AnalyzeCodebase failed: %v", err)
 19		}
 20
 21		if codebase == nil {
 22			t.Fatal("Expected non-nil codebase")
 23		}
 24
 25		if codebase.TotalFiles == 0 {
 26			t.Error("Expected some files to be analyzed")
 27		}
 28
 29		if len(codebase.ExtensionCounts) == 0 {
 30			t.Error("Expected extension counts to be populated")
 31		}
 32	})
 33
 34	t.Run("Non-ASCII Filenames", func(t *testing.T) {
 35		// Create a temporary directory with unicode filenames for testing
 36		tempDir := t.TempDir()
 37
 38		// Initialize git repository
 39		cmd := exec.Command("git", "init")
 40		cmd.Dir = tempDir
 41		if err := cmd.Run(); err != nil {
 42			t.Fatalf("Failed to init git repo: %v", err)
 43		}
 44
 45		cmd = exec.Command("git", "config", "user.name", "Test User")
 46		cmd.Dir = tempDir
 47		if err := cmd.Run(); err != nil {
 48			t.Fatalf("Failed to set git user.name: %v", err)
 49		}
 50
 51		cmd = exec.Command("git", "config", "user.email", "test@example.com")
 52		cmd.Dir = tempDir
 53		if err := cmd.Run(); err != nil {
 54			t.Fatalf("Failed to set git user.email: %v", err)
 55		}
 56
 57		// Configure git to handle unicode filenames properly
 58		cmd = exec.Command("git", "config", "core.quotepath", "false")
 59		cmd.Dir = tempDir
 60		if err := cmd.Run(); err != nil {
 61			t.Fatalf("Failed to set git core.quotepath: %v", err)
 62		}
 63
 64		cmd = exec.Command("git", "config", "core.precomposeunicode", "true")
 65		cmd.Dir = tempDir
 66		if err := cmd.Run(); err != nil {
 67			t.Fatalf("Failed to set git core.precomposeunicode: %v", err)
 68		}
 69
 70		// Create test files with unicode characters dynamically
 71		testFiles := map[string]string{
 72			"测试文件.go":           "// Package test with Chinese characters in filename\npackage test\n\nfunc TestFunction() {\n\t// This is a test file\n}",
 73			"café.js":           "// JavaScript file with French characters\nconsole.log('Hello from café!');",
 74			"русский.py":        "# Python file with Russian characters\nprint('Привет мир!')",
 75			"🚀rocket.md":        "# README with Emoji\n\nThis file has an emoji in the filename.",
 76			"readme-español.md": "# Spanish README\n\nEste es un archivo de documentación.",
 77			"Übung.html":        "<!DOCTYPE html>\n<html><head><title>German Exercise</title></head><body><h1>Übung</h1></body></html>",
 78			"Makefile-日本語":      "# Japanese Makefile\nall:\n\techo 'Japanese makefile'",
 79		}
 80
 81		// Create subdirectory
 82		subdir := filepath.Join(tempDir, "subdir")
 83		err := os.MkdirAll(subdir, 0o755)
 84		if err != nil {
 85			t.Fatalf("Failed to create subdir: %v", err)
 86		}
 87
 88		// Add file in subdirectory
 89		testFiles["subdir/claude.한국어.md"] = "# Korean Claude file\n\nThis is a guidance file with Korean characters."
 90
 91		// Write all test files
 92		for filename, content := range testFiles {
 93			fullPath := filepath.Join(tempDir, filename)
 94			dir := filepath.Dir(fullPath)
 95			if dir != tempDir {
 96				err := os.MkdirAll(dir, 0o755)
 97				if err != nil {
 98					t.Fatalf("Failed to create directory %s: %v", dir, err)
 99				}
100			}
101			err := os.WriteFile(fullPath, []byte(content), 0o644)
102			if err != nil {
103				t.Fatalf("Failed to write file %s: %v", filename, err)
104			}
105		}
106
107		// Add all files to git at once
108		cmd = exec.Command("git", "add", ".")
109		cmd.Dir = tempDir
110		if err := cmd.Run(); err != nil {
111			t.Fatalf("Failed to add files to git: %v", err)
112		}
113
114		// Test with non-ASCII characters in filenames
115		codebase, err := AnalyzeCodebase(context.Background(), tempDir)
116		if err != nil {
117			t.Fatalf("AnalyzeCodebase failed with non-ASCII filenames: %v", err)
118		}
119
120		if codebase == nil {
121			t.Fatal("Expected non-nil codebase")
122		}
123
124		// We expect 8 files in our temp directory
125		expectedFiles := 8
126		if codebase.TotalFiles != expectedFiles {
127			t.Errorf("Expected %d files, got %d", expectedFiles, codebase.TotalFiles)
128		}
129
130		// Verify extension counts include our non-ASCII files
131		expectedExtensions := map[string]int{
132			".go":            1, // 测试文件.go
133			".js":            1, // café.js
134			".py":            1, // русский.py
135			".md":            3, // 🚀rocket.md, readme-español.md, claude.한국어.md
136			".html":          1, // Übung.html
137			"<no-extension>": 1, // Makefile-日本語
138		}
139
140		for ext, expectedCount := range expectedExtensions {
141			actualCount, exists := codebase.ExtensionCounts[ext]
142			if !exists {
143				t.Errorf("Expected extension %s to be found", ext)
144				continue
145			}
146			if actualCount != expectedCount {
147				t.Errorf("Expected %d files with extension %s, got %d", expectedCount, ext, actualCount)
148			}
149		}
150
151		// Verify file categorization works with non-ASCII filenames
152		// Check build files
153		if !slices.Contains(codebase.BuildFiles, "Makefile-日本語") {
154			t.Error("Expected Makefile-日本語 to be categorized as a build file")
155		}
156
157		// Check documentation files
158		if !slices.Contains(codebase.DocumentationFiles, "readme-español.md") {
159			t.Error("Expected readme-español.md to be categorized as a documentation file")
160		}
161
162		// Check guidance files
163		if !slices.Contains(codebase.GuidanceFiles, "subdir/claude.한국어.md") {
164			t.Error("Expected subdir/claude.한국어.md to be categorized as a guidance file")
165		}
166	})
167}
168
169func TestCategorizeFile(t *testing.T) {
170	t.Run("Non-ASCII Filenames", func(t *testing.T) {
171		tests := []struct {
172			name     string
173			path     string
174			expected string
175		}{
176			{"Chinese Go file", "测试文件.go", ""},
177			{"French JS file", "café.js", ""},
178			{"Russian Python file", "русский.py", ""},
179			{"Emoji markdown file", "🚀rocket.md", ""},
180			{"German HTML file", "Übung.html", ""},
181			{"Japanese Makefile", "Makefile-日本語", "build"},
182			{"Spanish README", "readme-español.md", "documentation"},
183			{"Korean Claude file", "subdir/claude.한국어.md", "guidance"},
184			// Test edge cases with Unicode normalization and combining characters
185			{"Mixed Unicode file", "test中文🚀.txt", ""},
186			{"Combining characters", "filé̂.go", ""}, // file with combining acute and circumflex accents
187			{"Right-to-left script", "مرحبا.py", ""}, // Arabic "hello"
188		}
189
190		for _, tt := range tests {
191			t.Run(tt.name, func(t *testing.T) {
192				result := categorizeFile(tt.path)
193				if result != tt.expected {
194					t.Errorf("categorizeFile(%q) = %q, want %q", tt.path, result, tt.expected)
195				}
196			})
197		}
198	})
199}
200
201func TestTopExtensions(t *testing.T) {
202	t.Run("With Non-ASCII Files", func(t *testing.T) {
203		// Create a test codebase with known extension counts
204		codebase := &Codebase{
205			ExtensionCounts: map[string]int{
206				".md":   5, // Most common
207				".go":   3,
208				".js":   2,
209				".py":   1,
210				".html": 1, // Least common
211			},
212			TotalFiles: 12,
213		}
214
215		topExt := codebase.TopExtensions()
216		if len(topExt) != 5 {
217			t.Errorf("Expected 5 top extensions, got %d", len(topExt))
218		}
219
220		// Check that extensions are sorted by count (descending)
221		expected := []string{
222			".md: 5 (42%)",
223			".go: 3 (25%)",
224			".js: 2 (17%)",
225			".html: 1 (8%)",
226			".py: 1 (8%)",
227		}
228
229		for i, expectedExt := range expected {
230			if i >= len(topExt) {
231				t.Errorf("Missing expected extension at index %d: %s", i, expectedExt)
232				continue
233			}
234			if topExt[i] != expectedExt {
235				t.Errorf("Expected extension %q at index %d, got %q", expectedExt, i, topExt[i])
236			}
237		}
238	})
239}
240
241func TestAnalyzeCodebaseErrors(t *testing.T) {
242	// Test error handling for non-existent directory
243	_, err := AnalyzeCodebase(context.Background(), "/non/existent/path")
244	if err == nil {
245		t.Error("Expected error for non-existent path")
246	}
247
248	// Test with directory that doesn't have git
249	tempDir := t.TempDir()
250	_, err = AnalyzeCodebase(context.Background(), tempDir)
251	if err == nil {
252		t.Error("Expected error for directory without git")
253	}
254}
255
256func TestCategorizeFileEdgeCases(t *testing.T) {
257	tests := []struct {
258		name     string
259		path     string
260		expected string
261	}{
262		{
263			name:     "copilot instructions",
264			path:     ".github/copilot-instructions.md",
265			expected: "inject",
266		},
267		{
268			name:     "agent md file",
269			path:     "subdir/agent.config.md",
270			expected: "guidance",
271		},
272		{
273			name:     "vscode tasks",
274			path:     ".vscode/tasks.json",
275			expected: "build",
276		},
277		{
278			name:     "contributing file",
279			path:     "docs/contributing.md",
280			expected: "documentation",
281		},
282		{
283			name:     "non matching file",
284			path:     "src/main.go",
285			expected: "",
286		},
287	}
288
289	for _, tt := range tests {
290		t.Run(tt.name, func(t *testing.T) {
291			result := categorizeFile(tt.path)
292			if result != tt.expected {
293				t.Errorf("categorizeFile(%q) = %q, want %q", tt.path, result, tt.expected)
294			}
295		})
296	}
297}
298
299func TestScanZero(t *testing.T) {
300	tests := []struct {
301		name     string
302		data     []byte
303		atEOF    bool
304		advance  int
305		token    []byte
306		hasError bool
307	}{
308		{
309			name:    "empty at EOF",
310			data:    []byte{},
311			atEOF:   true,
312			advance: 0,
313			token:   nil,
314		},
315		{
316			name:    "data with NUL",
317			data:    []byte("hello\x00world"),
318			atEOF:   false,
319			advance: 6,
320			token:   []byte("hello"),
321		},
322		{
323			name:    "data without NUL at EOF",
324			data:    []byte("hello"),
325			atEOF:   true,
326			advance: 5,
327			token:   []byte("hello"),
328		},
329		{
330			name:    "data without NUL not at EOF",
331			data:    []byte("hello"),
332			atEOF:   false,
333			advance: 0,
334			token:   nil,
335		},
336	}
337
338	for _, tt := range tests {
339		t.Run(tt.name, func(t *testing.T) {
340			advance, token, err := scanZero(tt.data, tt.atEOF)
341			if err != nil && !tt.hasError {
342				t.Errorf("scanZero() error = %v, want no error", err)
343			}
344			if err == nil && tt.hasError {
345				t.Error("scanZero() expected error, got none")
346			}
347			if advance != tt.advance {
348				t.Errorf("scanZero() advance = %v, want %v", advance, tt.advance)
349			}
350			if !bytes.Equal(token, tt.token) {
351				t.Errorf("scanZero() token = %v, want %v", token, tt.token)
352			}
353		})
354	}
355}
356
357func TestAnalyzeCodebaseInjectFileErrors(t *testing.T) {
358	// Create a temporary directory with a git repo
359	tempDir := t.TempDir()
360
361	// Initialize git repository
362	cmd := exec.Command("git", "init")
363	cmd.Dir = tempDir
364	if err := cmd.Run(); err != nil {
365		t.Fatalf("Failed to init git repo: %v", err)
366	}
367
368	cmd = exec.Command("git", "config", "user.name", "Test User")
369	cmd.Dir = tempDir
370	if err := cmd.Run(); err != nil {
371		t.Fatalf("Failed to set git user.name: %v", err)
372	}
373
374	cmd = exec.Command("git", "config", "user.email", "test@example.com")
375	cmd.Dir = tempDir
376	if err := cmd.Run(); err != nil {
377		t.Fatalf("Failed to set git user.email: %v", err)
378	}
379
380	// Create a test inject file
381	injectFilePath := filepath.Join(tempDir, "DEAR_LLM.md")
382	err := os.WriteFile(injectFilePath, []byte("# Test Content"), 0o644)
383	if err != nil {
384		t.Fatalf("Failed to create inject file: %v", err)
385	}
386
387	// Add to git
388	cmd = exec.Command("git", "add", ".")
389	cmd.Dir = tempDir
390	if err := cmd.Run(); err != nil {
391		t.Fatalf("Failed to add files to git: %v", err)
392	}
393
394	// Make the file unreadable by removing read permissions temporarily
395	// This test might not work on all systems, so we'll just test the basic functionality
396	codebase, err := AnalyzeCodebase(context.Background(), tempDir)
397	if err != nil {
398		t.Fatalf("AnalyzeCodebase failed: %v", err)
399	}
400
401	// Should have found the inject file
402	if len(codebase.InjectFiles) != 1 {
403		t.Errorf("Expected 1 inject file, got %d", len(codebase.InjectFiles))
404	}
405}
406
407func TestAnalyzeCodebaseEmptyRepo(t *testing.T) {
408	// Create a temporary directory with an empty git repo
409	tempDir := t.TempDir()
410
411	// Initialize git repository
412	cmd := exec.Command("git", "init")
413	cmd.Dir = tempDir
414	if err := cmd.Run(); err != nil {
415		t.Fatalf("Failed to init git repo: %v", err)
416	}
417
418	cmd = exec.Command("git", "config", "user.name", "Test User")
419	cmd.Dir = tempDir
420	if err := cmd.Run(); err != nil {
421		t.Fatalf("Failed to set git user.name: %v", err)
422	}
423
424	cmd = exec.Command("git", "config", "user.email", "test@example.com")
425	cmd.Dir = tempDir
426	if err := cmd.Run(); err != nil {
427		t.Fatalf("Failed to set git user.email: %v", err)
428	}
429
430	// Test with empty repo
431	codebase, err := AnalyzeCodebase(context.Background(), tempDir)
432	if err != nil {
433		t.Fatalf("AnalyzeCodebase failed: %v", err)
434	}
435
436	// Should have no files
437	if codebase.TotalFiles != 0 {
438		t.Errorf("Expected 0 files, got %d", codebase.TotalFiles)
439	}
440	if len(codebase.ExtensionCounts) != 0 {
441		t.Errorf("Expected 0 extension counts, got %d", len(codebase.ExtensionCounts))
442	}
443}