shelley: sort autogenerated files last in diff viewer

Philip Zeyliger and Shelley created

Prompt: In the diff viewer in shelley, use https://github.com/boldsoftware/sketch/blob/ae3a8f22f7c47f507a7a30cc31c61b80dfc3491d/claudetool/patch.go#L541-L584 (or similar) to sort files that are "auto-generated" last in the sort order. Ideally prepare the sort order on the server side and have the UI display it in that order. Fine to copy that code into our code base but look to see if it's there already

Add detection for autogenerated files based on:
- Path patterns (vendor/, node_modules/, .pb.go, .min.js, etc.)
- Go file content analysis (Code generated, DO NOT EDIT, etc.)
- Lock files (go.sum, package-lock.json, etc.)

Files are now sorted with non-generated files first (alphabetically),
followed by generated files (alphabetically). The UI displays a
[generated] indicator for these files in the file picker.

Co-authored-by: Shelley <shelley@exe.dev>

Change summary

server/autogenerated.go          | 149 ++++++++++++++++++++++++++++++++++
server/autogenerated_test.go     | 126 ++++++++++++++++++++++++++++
server/git_handlers.go           |  38 ++++++-
ui/src/components/DiffViewer.tsx |   1 
ui/src/types.ts                  |   1 
5 files changed, 307 insertions(+), 8 deletions(-)

Detailed changes

server/autogenerated.go 🔗

@@ -0,0 +1,149 @@
+package server
+
+import (
+	"bytes"
+	"go/parser"
+	"go/token"
+	"path/filepath"
+	"strings"
+)
+
+// IsAutogeneratedPath reports whether a file path suggests it's autogenerated.
+// This checks for common autogenerated file patterns based on path alone.
+func IsAutogeneratedPath(path string) bool {
+	base := filepath.Base(path)
+
+	// Convert path separators to forward slashes for consistent matching
+	normPath := filepath.ToSlash(path)
+
+	// Check directory patterns first
+	// We check if any path component matches the autogenerated directory name
+	for _, d := range autogeneratedDirs {
+		// Match at start of path or after a /
+		if strings.HasPrefix(normPath, d) || strings.Contains(normPath, "/"+d) {
+			return true
+		}
+	}
+
+	// Check file extension patterns
+	for _, ext := range autogeneratedExtensions {
+		if strings.HasSuffix(base, ext) {
+			return true
+		}
+	}
+
+	// Check exact filename matches
+	for _, name := range autogeneratedFilenames {
+		if base == name {
+			return true
+		}
+	}
+
+	return false
+}
+
+// IsAutogeneratedFile reports whether a file is autogenerated based on its path and content.
+// For Go files, it also analyzes the content for autogeneration markers.
+func IsAutogeneratedFile(path string, content []byte) bool {
+	if IsAutogeneratedPath(path) {
+		return true
+	}
+
+	// For Go files, check content for autogeneration markers
+	if strings.HasSuffix(path, ".go") && content != nil {
+		return isAutogeneratedGoContent(content)
+	}
+
+	return false
+}
+
+// isAutogeneratedGoContent reports whether a Go file has markers indicating it was autogenerated.
+func isAutogeneratedGoContent(buf []byte) bool {
+	for _, sig := range autogeneratedSignals {
+		if bytes.Contains(buf, sig) {
+			return true
+		}
+	}
+
+	// https://pkg.go.dev/cmd/go#hdr-Generate_Go_files_by_processing_source
+	// "This line must appear before the first non-comment, non-blank text in the file."
+	// Approximate that by looking for it at the top of the file, before the last of the imports.
+	fset := token.NewFileSet()
+	f, err := parser.ParseFile(fset, "x.go", buf, parser.ImportsOnly|parser.ParseComments)
+	if err == nil {
+		for _, cg := range f.Comments {
+			t := strings.ToLower(cg.Text())
+			for _, sig := range autogeneratedHeaderSignals {
+				if strings.Contains(t, sig) {
+					return true
+				}
+			}
+		}
+	}
+
+	return false
+}
+
+// autogeneratedDirs are directory names that typically contain generated files.
+var autogeneratedDirs = []string{
+	"vendor/",
+	"node_modules/",
+	".git/",
+	"__pycache__/",
+	".next/",
+	"dist/",
+	"build/",
+	"generated/",
+	"gen/",
+}
+
+// autogeneratedExtensions are file suffixes that indicate autogenerated files.
+var autogeneratedExtensions = []string{
+	".pb.go",        // Protocol buffers
+	".pb.gw.go",     // gRPC gateway
+	"_string.go",    // stringer
+	".gen.go",       // general generated Go
+	".generated.go", // general generated Go
+	"_generated.go", // general generated Go
+	".mock.go",      // mocks
+	"_mock.go",      // mocks
+	".mocks.go",     // mocks
+	"_mocks.go",     // mocks
+	".min.js",       // minified JS
+	".min.css",      // minified CSS
+	".d.ts",         // TypeScript declarations
+	".pb.ts",        // Protocol buffers TypeScript
+	".generated.ts", // general generated TypeScript
+	"_generated.ts", // general generated TypeScript
+	".sql.go",       // sqlc generated
+	".enumer.go",    // enumer generated
+	"_easyjson.go",  // easyjson generated
+	".deepcopy.go",  // Kubernetes deepcopy
+}
+
+// autogeneratedFilenames are exact filenames that are typically autogenerated.
+var autogeneratedFilenames = []string{
+	"go.sum",
+	"package-lock.json",
+	"yarn.lock",
+	"pnpm-lock.yaml",
+	"Cargo.lock",
+	"Gemfile.lock",
+	"composer.lock",
+	"poetry.lock",
+	"uv.lock",
+}
+
+// autogeneratedSignals are signals that a Go file is autogenerated, when present anywhere in the file.
+var autogeneratedSignals = [][]byte{
+	[]byte("\nfunc bindataRead("), // pre-embed bindata packed file
+}
+
+// autogeneratedHeaderSignals are signals that a file is autogenerated, when present at the top of the file.
+var autogeneratedHeaderSignals = []string{
+	// canonical would be `(?m)^// Code generated .* DO NOT EDIT\.$`
+	// but people screw it up, a lot, so be more lenient
+	strings.ToLower("generate"),
+	strings.ToLower("DO NOT EDIT"),
+	strings.ToLower("export by"),
+}

server/autogenerated_test.go 🔗

@@ -0,0 +1,126 @@
+package server
+
+import (
+	"sort"
+	"testing"
+)
+
+func TestIsAutogeneratedPath(t *testing.T) {
+	tests := []struct {
+		path     string
+		expected bool
+	}{
+		// Not autogenerated
+		{"main.go", false},
+		{"server/handler.go", false},
+		{"README.md", false},
+		{"src/app.ts", false},
+
+		// Autogenerated by extension
+		{"api.pb.go", true},
+		{"api.pb.gw.go", true},
+		{"stringer_string.go", true}, // stringer output for type "stringer"
+		{"day_string.go", true},
+		{"types.gen.go", true},
+		{"types.generated.go", true},
+		{"types_generated.go", true},
+		{"service.mock.go", true},
+		{"service_mock.go", true},
+		{"bundle.min.js", true},
+		{"styles.min.css", true},
+		{"types.d.ts", true},
+		{"queries.sql.go", true},
+
+		// Autogenerated by directory
+		{"vendor/github.com/pkg/errors/errors.go", true},
+		{"node_modules/lodash/index.js", true},
+		{"__pycache__/module.pyc", true},
+		{"generated/models.go", true},
+		{"gen/api.go", true},
+
+		// Autogenerated lock files
+		{"go.sum", true},
+		{"package-lock.json", true},
+		{"yarn.lock", true},
+		{"pnpm-lock.yaml", true},
+		{"Cargo.lock", true},
+		{"uv.lock", true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.path, func(t *testing.T) {
+			if got := IsAutogeneratedPath(tt.path); got != tt.expected {
+				t.Errorf("IsAutogeneratedPath(%q) = %v, want %v", tt.path, got, tt.expected)
+			}
+		})
+	}
+}
+
+func TestIsAutogeneratedGoContent(t *testing.T) {
+	tests := []struct {
+		name     string
+		content  string
+		expected bool
+	}{
+		{
+			name:     "regular go file",
+			content:  "package main\n\nfunc main() {}\n",
+			expected: false,
+		},
+		{
+			name:     "code generated comment",
+			content:  "// Code generated by stringer; DO NOT EDIT.\n\npackage main\n",
+			expected: true,
+		},
+		{
+			name:     "do not edit",
+			content:  "// DO NOT EDIT\n\npackage main\n",
+			expected: true,
+		},
+		{
+			name:     "generated in comment",
+			content:  "// auto-generated file\n\npackage main\n",
+			expected: true,
+		},
+		{
+			name:     "bindata",
+			content:  "package main\n\nfunc bindataRead(name string) ([]byte, error) {\n",
+			expected: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := isAutogeneratedGoContent([]byte(tt.content)); got != tt.expected {
+				t.Errorf("isAutogeneratedGoContent() = %v, want %v", got, tt.expected)
+			}
+		})
+	}
+}
+
+func TestGitFileInfoSortOrder(t *testing.T) {
+	// Test that files are sorted with non-generated first, then generated
+	files := []GitFileInfo{
+		{Path: "api.pb.go", IsGenerated: true},
+		{Path: "main.go", IsGenerated: false},
+		{Path: "go.sum", IsGenerated: true},
+		{Path: "handler.go", IsGenerated: false},
+		{Path: "types_generated.go", IsGenerated: true},
+	}
+
+	// Sort using the same logic as handleGitDiffFiles
+	sort.Slice(files, func(i, j int) bool {
+		if files[i].IsGenerated != files[j].IsGenerated {
+			return !files[i].IsGenerated
+		}
+		return files[i].Path < files[j].Path
+	})
+
+	// Expected order: handler.go, main.go, api.pb.go, go.sum, types_generated.go
+	expected := []string{"handler.go", "main.go", "api.pb.go", "go.sum", "types_generated.go"}
+	for i, exp := range expected {
+		if files[i].Path != exp {
+			t.Errorf("position %d: expected %s, got %s", i, exp, files[i].Path)
+		}
+	}
+}

server/git_handlers.go 🔗

@@ -26,10 +26,11 @@ type GitDiffInfo struct {
 
 // GitFileInfo represents a file in a diff
 type GitFileInfo struct {
-	Path      string `json:"path"`
-	Status    string `json:"status"` // added, modified, deleted
-	Additions int    `json:"additions"`
-	Deletions int    `json:"deletions"`
+	Path        string `json:"path"`
+	Status      string `json:"status"` // added, modified, deleted
+	Additions   int    `json:"additions"`
+	Deletions   int    `json:"deletions"`
+	IsGenerated bool   `json:"isGenerated"`
 }
 
 // GitFileDiff represents the content of a file diff
@@ -240,15 +241,36 @@ func (s *Server) handleGitDiffFiles(w http.ResponseWriter, r *http.Request) {
 			}
 		}
 
+		// Check if file is autogenerated based on path.
+		// For Go files, we could also check content, but that requires reading the file
+		// which is more expensive. Path-based detection covers most cases.
+		isGenerated := IsAutogeneratedPath(parts[1])
+
+		// For Go files that aren't obviously autogenerated by path,
+		// check the file content for autogeneration markers.
+		if !isGenerated && strings.HasSuffix(parts[1], ".go") && status != "deleted" {
+			fullPath := filepath.Join(gitRoot, parts[1])
+			if content, err := os.ReadFile(fullPath); err == nil {
+				isGenerated = isAutogeneratedGoContent(content)
+			}
+		}
+
 		files = append(files, GitFileInfo{
-			Path:      parts[1],
-			Status:    status,
-			Additions: additions,
-			Deletions: deletions,
+			Path:        parts[1],
+			Status:      status,
+			Additions:   additions,
+			Deletions:   deletions,
+			IsGenerated: isGenerated,
 		})
 	}
 
+	// Sort files: non-generated first (alphabetically), then generated (alphabetically)
 	sort.Slice(files, func(i, j int) bool {
+		// If one is generated and the other isn't, non-generated comes first
+		if files[i].IsGenerated != files[j].IsGenerated {
+			return !files[i].IsGenerated
+		}
+		// Otherwise, sort alphabetically by path
 		return files[i].Path < files[j].Path
 	})
 

ui/src/components/DiffViewer.tsx 🔗

@@ -840,6 +840,7 @@ function DiffViewer({ cwd, isOpen, onClose, onCommentTextChange, initialCommit }
             {getStatusSymbol(file.status)} {file.path}
             {file.additions > 0 && ` (+${file.additions})`}
             {file.deletions > 0 && ` (-${file.deletions})`}
+            {file.isGenerated && " [generated]"}
           </option>
         ))}
       </select>

ui/src/types.ts 🔗

@@ -108,6 +108,7 @@ export interface GitFileInfo {
   status: "added" | "modified" | "deleted";
   additions: number;
   deletions: number;
+  isGenerated: boolean;
 }
 
 export interface GitFileDiff {