patch.go

  1package claudetool
  2
  3import (
  4	"bytes"
  5	"context"
  6	"encoding/json"
  7	"errors"
  8	"fmt"
  9	"go/parser"
 10	"go/token"
 11	"log/slog"
 12	"os"
 13	"path/filepath"
 14	"strings"
 15
 16	"github.com/pkg/diff"
 17	"shelley.exe.dev/llm"
 18	"sketch.dev/claudetool/editbuf"
 19	"sketch.dev/claudetool/patchkit"
 20)
 21
 22// PatchCallback defines the signature for patch tool callbacks.
 23// It runs after the patch tool has executed.
 24// It receives the patch input and the tool output,
 25// and returns a new, possibly altered tool output.
 26type PatchCallback func(input PatchInput, output llm.ToolOut) llm.ToolOut
 27
 28// PatchTool specifies an llm.Tool for patching files.
 29// PatchTools are not concurrency-safe.
 30type PatchTool struct {
 31	Callback PatchCallback // may be nil
 32	// WorkingDir is the shared mutable working directory.
 33	WorkingDir *MutableWorkingDir
 34	// Simplified indicates whether to use the simplified input schema.
 35	// Helpful for weaker models.
 36	Simplified bool
 37	// ClipboardEnabled controls whether clipboard functionality is enabled.
 38	// Ignored if Simplified is true.
 39	// NB: The actual implementation of the patch tool is unchanged,
 40	// this flag merely extends the description and input schema to include the clipboard operations.
 41	ClipboardEnabled bool
 42	// clipboards stores clipboard name -> text
 43	clipboards map[string]string
 44}
 45
 46// getWorkingDir returns the current working directory.
 47func (p *PatchTool) getWorkingDir() string {
 48	return p.WorkingDir.Get()
 49}
 50
 51// Tool returns an llm.Tool based on p.
 52func (p *PatchTool) Tool() *llm.Tool {
 53	description := PatchBaseDescription + PatchUsageNotes
 54	schema := PatchStandardInputSchema
 55	switch {
 56	case p.Simplified:
 57		schema = PatchStandardSimplifiedSchema
 58	case p.ClipboardEnabled:
 59		description = PatchBaseDescription + PatchClipboardDescription + PatchUsageNotes
 60		schema = PatchClipboardInputSchema
 61	}
 62	return &llm.Tool{
 63		Name:        PatchName,
 64		Description: strings.TrimSpace(description),
 65		InputSchema: llm.MustSchema(schema),
 66		Run:         p.Run,
 67	}
 68}
 69
 70const (
 71	PatchName            = "patch"
 72	PatchBaseDescription = `
 73File modification tool for precise text edits.
 74
 75Operations:
 76- replace: Substitute unique text with new content
 77- append_eof: Append new text at the end of the file
 78- prepend_bof: Insert new text at the beginning of the file
 79- overwrite: Replace the entire file with new content (automatically creates the file)
 80`
 81
 82	PatchClipboardDescription = `
 83Clipboard:
 84- toClipboard: Store oldText to a named clipboard before the operation
 85- fromClipboard: Use clipboard content as newText (ignores provided newText)
 86- Clipboards persist across patch calls
 87- Always use clipboards when moving/copying code (within or across files), even when the moved/copied code will also have edits.
 88  This prevents transcription errors and distinguishes intentional changes from unintentional changes.
 89
 90Indentation adjustment:
 91- reindent applies to whatever text is being inserted
 92- First strips the specified prefix from each line, then adds the new prefix
 93- Useful when moving code from one indentation to another
 94
 95Recipes:
 96- cut: replace with empty newText and toClipboard
 97- copy: replace with toClipboard and fromClipboard using the same clipboard name
 98- paste: replace with fromClipboard
 99- in-place indentation change: same as copy, but add indentation adjustment
100`
101
102	PatchUsageNotes = `
103Usage notes:
104- All inputs are interpreted literally (no automatic newline or whitespace handling)
105- For replace operations, oldText must appear EXACTLY ONCE in the file
106
107IMPORTANT: Each patch call must be less than 60k tokens total. For large file
108changes, break them into multiple smaller patch operations rather than one
109large overwrite. Prefer incremental replace operations over full file overwrites.
110`
111
112	// If you modify this, update the termui template for prettier rendering.
113	PatchStandardInputSchema = `
114{
115  "type": "object",
116  "required": ["path", "patches"],
117  "properties": {
118    "path": {
119      "type": "string",
120      "description": "Path to the file to patch"
121    },
122    "patches": {
123      "type": "array",
124      "description": "List of patch requests to apply",
125      "items": {
126        "type": "object",
127        "required": ["operation", "newText"],
128        "properties": {
129          "operation": {
130            "type": "string",
131            "enum": ["replace", "append_eof", "prepend_bof", "overwrite"],
132            "description": "Type of operation to perform"
133          },
134          "oldText": {
135            "type": "string",
136            "description": "Text to locate for the operation (must be unique in file, required for replace)"
137          },
138          "newText": {
139            "type": "string",
140            "description": "The new text to use (empty for deletions)"
141          }
142        }
143      }
144    }
145  }
146}
147`
148
149	PatchStandardSimplifiedSchema = `{
150  "type": "object",
151  "required": ["path", "patch"],
152  "properties": {
153    "path": {
154      "type": "string",
155      "description": "Path to the file to patch"
156    },
157    "patch": {
158      "type": "object",
159      "required": ["operation", "newText"],
160      "properties": {
161        "operation": {
162          "type": "string",
163          "enum": ["replace", "append_eof", "prepend_bof", "overwrite"],
164          "description": "Type of operation to perform"
165        },
166        "oldText": {
167          "type": "string",
168          "description": "Text to locate for the operation (must be unique in file, required for replace)"
169        },
170        "newText": {
171          "type": "string",
172          "description": "The new text to use (empty for deletions)"
173        }
174      }
175    }
176  }
177}`
178
179	PatchClipboardInputSchema = `
180{
181  "type": "object",
182  "required": ["path", "patches"],
183  "properties": {
184    "path": {
185      "type": "string",
186      "description": "Path to the file to patch"
187    },
188    "patches": {
189      "type": "array",
190      "description": "List of patch requests to apply",
191      "items": {
192        "type": "object",
193        "required": ["operation"],
194        "properties": {
195          "operation": {
196            "type": "string",
197            "enum": ["replace", "append_eof", "prepend_bof", "overwrite"],
198            "description": "Type of operation to perform"
199          },
200          "oldText": {
201            "type": "string",
202            "description": "Text to locate (must be unique in file, required for replace)"
203          },
204          "newText": {
205            "type": "string",
206            "description": "The new text to use (empty for deletions, leave empty if fromClipboard is set)"
207          },
208          "toClipboard": {
209            "type": "string",
210            "description": "Save oldText to this named clipboard before the operation"
211          },
212          "fromClipboard": {
213            "type": "string",
214            "description": "Use content from this clipboard as newText (overrides newText field)"
215          },
216          "reindent": {
217            "type": "object",
218            "description": "Modify indentation of the inserted text (newText or fromClipboard) before insertion",
219            "properties": {
220              "strip": {
221                "type": "string",
222                "description": "Remove this prefix from each non-empty line before insertion"
223              },
224              "add": {
225                "type": "string",
226                "description": "Add this prefix to each non-empty line after stripping"
227              }
228            }
229          }
230        }
231      }
232    }
233  }
234}
235`
236)
237
238// TODO: maybe rename PatchRequest to PatchOperation or PatchSpec or PatchPart or just Patch?
239
240// PatchInput represents the input structure for patch operations.
241type PatchInput struct {
242	Path    string         `json:"path"`
243	Patches []PatchRequest `json:"patches"`
244}
245
246// PatchInputOne is a simplified version of PatchInput for single patch operations.
247type PatchInputOne struct {
248	Path    string        `json:"path"`
249	Patches *PatchRequest `json:"patches"`
250}
251
252// PatchInputOneSingular is PatchInputOne with a better name for the singular case.
253type PatchInputOneSingular struct {
254	Path  string        `json:"path"`
255	Patch *PatchRequest `json:"patch"`
256}
257
258type PatchInputOneString struct {
259	Path    string `json:"path"`
260	Patches string `json:"patches"` // contains Patches as a JSON string 🤦
261}
262
263// PatchDisplayData is the structured data sent to the UI for display.
264type PatchDisplayData struct {
265	Path       string `json:"path"`
266	OldContent string `json:"oldContent"`
267	NewContent string `json:"newContent"`
268	Diff       string `json:"diff"`
269}
270
271// PatchRequest represents a single patch operation.
272type PatchRequest struct {
273	Operation     string    `json:"operation"`
274	OldText       string    `json:"oldText,omitempty"`
275	NewText       string    `json:"newText,omitempty"`
276	ToClipboard   string    `json:"toClipboard,omitempty"`
277	FromClipboard string    `json:"fromClipboard,omitempty"`
278	Reindent      *Reindent `json:"reindent,omitempty"`
279}
280
281// Reindent represents indentation adjustment configuration.
282type Reindent struct {
283	// TODO: it might be nice to make this more flexible,
284	// so it can e.g. strip all whitespace,
285	// or strip the prefix only on lines where it is present,
286	// or strip based on a regex.
287	Strip string `json:"strip,omitempty"`
288	Add   string `json:"add,omitempty"`
289}
290
291// Run implements the patch tool logic.
292func (p *PatchTool) Run(ctx context.Context, m json.RawMessage) llm.ToolOut {
293	if p.clipboards == nil {
294		p.clipboards = make(map[string]string)
295	}
296	input, err := p.patchParse(m)
297	var output llm.ToolOut
298	if err != nil {
299		output = llm.ErrorToolOut(err)
300	} else {
301		output = p.patchRun(ctx, &input)
302	}
303	if p.Callback != nil {
304		return p.Callback(input, output)
305	}
306	return output
307}
308
309// patchParse parses the input message into a PatchInput structure.
310// It accepts a few different formats, because empirically,
311// LLMs sometimes generate slightly different JSON structures,
312// and we may as well accept such near misses.
313func (p *PatchTool) patchParse(m json.RawMessage) (PatchInput, error) {
314	var input PatchInput
315	originalErr := json.Unmarshal(m, &input)
316	if originalErr == nil && len(input.Patches) > 0 {
317		return input, nil
318	}
319	var inputOne PatchInputOne
320	if err := json.Unmarshal(m, &inputOne); err == nil && inputOne.Patches != nil {
321		return PatchInput{Path: inputOne.Path, Patches: []PatchRequest{*inputOne.Patches}}, nil
322	} else if originalErr == nil {
323		originalErr = err
324	}
325	var inputOneSingular PatchInputOneSingular
326	if err := json.Unmarshal(m, &inputOneSingular); err == nil && inputOneSingular.Patch != nil {
327		return PatchInput{Path: inputOneSingular.Path, Patches: []PatchRequest{*inputOneSingular.Patch}}, nil
328	} else if originalErr == nil {
329		originalErr = err
330	}
331	var inputOneString PatchInputOneString
332	if err := json.Unmarshal(m, &inputOneString); err == nil && inputOneString.Patches != "" {
333		var onePatch PatchRequest
334		if err := json.Unmarshal([]byte(inputOneString.Patches), &onePatch); err == nil && onePatch.Operation != "" {
335			return PatchInput{Path: inputOneString.Path, Patches: []PatchRequest{onePatch}}, nil
336		} else if originalErr == nil {
337			originalErr = err
338		}
339		var patches []PatchRequest
340		if err := json.Unmarshal([]byte(inputOneString.Patches), &patches); err == nil {
341			return PatchInput{Path: inputOneString.Path, Patches: patches}, nil
342		} else if originalErr == nil {
343			originalErr = err
344		}
345	}
346	// If JSON parsed but patches field was missing/empty, provide a clear error
347	if originalErr == nil {
348		return PatchInput{}, fmt.Errorf("patches field is missing or empty (this may indicate a truncated LLM response)\nJSON: %s", string(m))
349	}
350	return PatchInput{}, fmt.Errorf("failed to unmarshal patch input: %w\nJSON: %s", originalErr, string(m))
351}
352
353// patchRun implements the guts of the patch tool.
354// It populates input from m.
355func (p *PatchTool) patchRun(ctx context.Context, input *PatchInput) llm.ToolOut {
356	path := input.Path
357	if !filepath.IsAbs(input.Path) {
358		// Use shared WorkingDir if available, then context, then Pwd fallback
359		pwd := p.getWorkingDir()
360		path = filepath.Join(pwd, input.Path)
361	}
362	input.Path = path
363	if len(input.Patches) == 0 {
364		return llm.ErrorToolOut(fmt.Errorf("no patches provided"))
365	}
366	// TODO: check whether the file is autogenerated, and if so, require a "force" flag to modify it.
367
368	orig, err := os.ReadFile(input.Path)
369	// If the file doesn't exist, we can still apply patches
370	// that don't require finding existing text.
371	switch {
372	case errors.Is(err, os.ErrNotExist):
373		for _, patch := range input.Patches {
374			switch patch.Operation {
375			case "prepend_bof", "append_eof", "overwrite":
376			default:
377				return llm.ErrorfToolOut("file %q does not exist", input.Path)
378			}
379		}
380	case err != nil:
381		return llm.ErrorfToolOut("failed to read file %q: %w", input.Path, err)
382	}
383
384	likelyGoFile := strings.HasSuffix(input.Path, ".go")
385
386	autogenerated := likelyGoFile && IsAutogeneratedGoFile(orig)
387
388	origStr := string(orig)
389	// Process the patches "simultaneously", minimizing them along the way.
390	// Claude generates patches that interact with each other.
391	buf := editbuf.NewBuffer(orig)
392
393	// TODO: is it better to apply the patches that apply cleanly and report on the failures?
394	// or instead have it be all-or-nothing?
395	// For now, it is all-or-nothing.
396	// TODO: when the model gets into a "cannot apply patch" cycle of doom, how do we get it unstuck?
397	// Also: how do we detect that it's in a cycle?
398	var patchErr error
399
400	var clipboardsModified []string
401	updateToClipboard := func(patch PatchRequest, spec *patchkit.Spec) {
402		if patch.ToClipboard == "" {
403			return
404		}
405		// Update clipboard with the actual matched text
406		matchedOldText := origStr[spec.Off : spec.Off+spec.Len]
407		p.clipboards[patch.ToClipboard] = matchedOldText
408		clipboardsModified = append(clipboardsModified, fmt.Sprintf(`<clipboard_modified name="%s"><message>clipboard contents altered in order to match uniquely</message><new_contents>%q</new_contents></clipboard_modified>`, patch.ToClipboard, matchedOldText))
409	}
410
411	for i, patch := range input.Patches {
412		// Process toClipboard first, so that copy works
413		if patch.ToClipboard != "" {
414			if patch.Operation != "replace" {
415				return llm.ErrorfToolOut("toClipboard (%s): can only be used with replace operation", patch.ToClipboard)
416			}
417			if patch.OldText == "" {
418				return llm.ErrorfToolOut("toClipboard (%s): oldText cannot be empty when using toClipboard", patch.ToClipboard)
419			}
420			p.clipboards[patch.ToClipboard] = patch.OldText
421		}
422
423		// Handle fromClipboard
424		newText := patch.NewText
425		if patch.FromClipboard != "" {
426			clipboardText, ok := p.clipboards[patch.FromClipboard]
427			if !ok {
428				return llm.ErrorfToolOut("fromClipboard (%s): no clipboard with that name", patch.FromClipboard)
429			}
430			newText = clipboardText
431		}
432
433		// Apply indentation adjustment if specified
434		if patch.Reindent != nil {
435			reindentedText, err := reindent(newText, patch.Reindent)
436			if err != nil {
437				return llm.ErrorfToolOut("reindent(%q -> %q): %w", patch.Reindent.Strip, patch.Reindent.Add, err)
438			}
439			newText = reindentedText
440		}
441
442		switch patch.Operation {
443		case "prepend_bof":
444			buf.Insert(0, newText)
445		case "append_eof":
446			buf.Insert(len(orig), newText)
447		case "overwrite":
448			buf.Replace(0, len(orig), newText)
449		case "replace":
450			if patch.OldText == "" {
451				return llm.ErrorfToolOut("patch %d: oldText cannot be empty for %s operation", i, patch.Operation)
452			}
453
454			// Attempt to apply the patch.
455			spec, count := patchkit.Unique(origStr, patch.OldText, newText)
456			switch count {
457			case 0:
458				// no matches, maybe recoverable, continued below
459			case 1:
460				// exact match, apply
461				slog.DebugContext(ctx, "patch_applied", "method", "unique")
462				spec.ApplyToEditBuf(buf)
463				continue
464			case 2:
465				// multiple matches
466				patchErr = errors.Join(patchErr, fmt.Errorf("old text not unique:\n%s", patch.OldText))
467				continue
468			default:
469				slog.ErrorContext(ctx, "unique returned unexpected count", "count", count)
470				patchErr = errors.Join(patchErr, fmt.Errorf("internal error"))
471				continue
472			}
473
474			// The following recovery mechanisms are heuristic.
475			// They aren't perfect, but they appear safe,
476			// and the cases they cover appear with some regularity.
477
478			// Try adjusting the whitespace prefix.
479			spec, ok := patchkit.UniqueDedent(origStr, patch.OldText, newText)
480			if ok {
481				slog.DebugContext(ctx, "patch_applied", "method", "unique_dedent")
482				spec.ApplyToEditBuf(buf)
483				updateToClipboard(patch, spec)
484				continue
485			}
486
487			// Try ignoring leading/trailing whitespace in a semantically safe way.
488			spec, ok = patchkit.UniqueInValidGo(origStr, patch.OldText, newText)
489			if ok {
490				slog.DebugContext(ctx, "patch_applied", "method", "unique_in_valid_go")
491				spec.ApplyToEditBuf(buf)
492				updateToClipboard(patch, spec)
493				continue
494			}
495
496			// Try ignoring semantically insignificant whitespace.
497			spec, ok = patchkit.UniqueGoTokens(origStr, patch.OldText, newText)
498			if ok {
499				slog.DebugContext(ctx, "patch_applied", "method", "unique_go_tokens")
500				spec.ApplyToEditBuf(buf)
501				updateToClipboard(patch, spec)
502				continue
503			}
504
505			// Try trimming the first line of the patch, if we can do so safely.
506			spec, ok = patchkit.UniqueTrim(origStr, patch.OldText, newText)
507			if ok {
508				slog.DebugContext(ctx, "patch_applied", "method", "unique_trim")
509				spec.ApplyToEditBuf(buf)
510				// Do NOT call updateToClipboard here,
511				// because the trimmed text may vary significantly from the original text.
512				continue
513			}
514
515			// No dice.
516			patchErr = errors.Join(patchErr, fmt.Errorf("old text not found:\n%s", patch.OldText))
517			continue
518		default:
519			return llm.ErrorfToolOut("unrecognized operation %q", patch.Operation)
520		}
521	}
522
523	if patchErr != nil {
524		errorMsg := patchErr.Error()
525		for _, msg := range clipboardsModified {
526			errorMsg += "\n" + msg
527		}
528		return llm.ErrorToolOut(fmt.Errorf("%s", errorMsg))
529	}
530
531	patched, err := buf.Bytes()
532	if err != nil {
533		return llm.ErrorToolOut(err)
534	}
535	if err := os.MkdirAll(filepath.Dir(input.Path), 0o700); err != nil {
536		return llm.ErrorfToolOut("failed to create directory %q: %w", filepath.Dir(input.Path), err)
537	}
538	if err := os.WriteFile(input.Path, patched, 0o600); err != nil {
539		return llm.ErrorfToolOut("failed to write patched contents to file %q: %w", input.Path, err)
540	}
541
542	response := new(strings.Builder)
543	fmt.Fprintf(response, "<patches_applied>all</patches_applied>\n")
544	for _, msg := range clipboardsModified {
545		fmt.Fprintln(response, msg)
546	}
547
548	if autogenerated {
549		fmt.Fprintf(response, "<warning>%q appears to be autogenerated. Patches were applied anyway.</warning>\n", input.Path)
550	}
551
552	diff := generateUnifiedDiff(input.Path, string(orig), string(patched))
553
554	// Display data for the UI includes structured content for Monaco diff editor
555	displayData := PatchDisplayData{
556		Path:       input.Path,
557		OldContent: string(orig),
558		NewContent: string(patched),
559		Diff:       diff,
560	}
561
562	return llm.ToolOut{
563		LLMContent: llm.TextContent(response.String()),
564		Display:    displayData,
565	}
566}
567
568// IsAutogeneratedGoFile reports whether a Go file has markers indicating it was autogenerated.
569func IsAutogeneratedGoFile(buf []byte) bool {
570	for _, sig := range autogeneratedSignals {
571		if bytes.Contains(buf, []byte(sig)) {
572			return true
573		}
574	}
575
576	// https://pkg.go.dev/cmd/go#hdr-Generate_Go_files_by_processing_source
577	// "This line must appear before the first non-comment, non-blank text in the file."
578	// Approximate that by looking for it at the top of the file, before the last of the imports.
579	// (Sometimes people put it after the package declaration, because of course they do.)
580	// At least in the imports region we know it's not part of their actual code;
581	// we don't want to ignore the generator (which also includes these strings!),
582	// just the generated code.
583	fset := token.NewFileSet()
584	f, err := parser.ParseFile(fset, "x.go", buf, parser.ImportsOnly|parser.ParseComments)
585	if err == nil {
586		for _, cg := range f.Comments {
587			t := strings.ToLower(cg.Text())
588			for _, sig := range autogeneratedHeaderSignals {
589				if strings.Contains(t, sig) {
590					return true
591				}
592			}
593		}
594	}
595
596	return false
597}
598
599// autogeneratedSignals are signals that a file is autogenerated, when present anywhere in the file.
600var autogeneratedSignals = [][]byte{
601	[]byte("\nfunc bindataRead("), // pre-embed bindata packed file
602}
603
604// autogeneratedHeaderSignals are signals that a file is autogenerated, when present at the top of the file.
605var autogeneratedHeaderSignals = []string{
606	// canonical would be `(?m)^// Code generated .* DO NOT EDIT\.$`
607	// but people screw it up, a lot, so be more lenient
608	strings.ToLower("generate"),
609	strings.ToLower("DO NOT EDIT"),
610	strings.ToLower("export by"),
611}
612
613func generateUnifiedDiff(filePath, original, patched string) string {
614	buf := new(strings.Builder)
615	err := diff.Text(filePath, filePath, original, patched, buf)
616	if err != nil {
617		return fmt.Sprintf("(diff generation failed: %v)\n", err)
618	}
619	return buf.String()
620}
621
622// reindent applies indentation adjustments to text.
623func reindent(text string, adj *Reindent) (string, error) {
624	if adj == nil {
625		return text, nil
626	}
627
628	lines := strings.Split(text, "\n")
629
630	for i, line := range lines {
631		if line == "" {
632			continue
633		}
634		var ok bool
635		lines[i], ok = strings.CutPrefix(line, adj.Strip)
636		if !ok {
637			return "", fmt.Errorf("strip precondition failed: line %q does not start with %q", line, adj.Strip)
638		}
639	}
640
641	for i, line := range lines {
642		if line == "" {
643			continue
644		}
645		lines[i] = adj.Add + line
646	}
647
648	return strings.Join(lines, "\n"), nil
649}