skills.go

  1// Package skills implements the Agent Skills open standard.
  2// See https://agentskills.io for the specification.
  3package skills
  4
  5import (
  6	"context"
  7	"errors"
  8	"fmt"
  9	"log/slog"
 10	"os"
 11	"path/filepath"
 12	"regexp"
 13	"slices"
 14	"strings"
 15	"sync"
 16
 17	"github.com/charlievieth/fastwalk"
 18	"github.com/charmbracelet/crush/internal/pubsub"
 19	"gopkg.in/yaml.v3"
 20)
 21
 22const (
 23	SkillFileName          = "SKILL.md"
 24	MaxNameLength          = 64
 25	MaxDescriptionLength   = 1024
 26	MaxCompatibilityLength = 500
 27)
 28
 29var (
 30	namePattern    = regexp.MustCompile(`^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*$`)
 31	promptReplacer = strings.NewReplacer("&", "&amp;", "<", "&lt;", ">", "&gt;", "\"", "&quot;", "'", "&apos;")
 32
 33	latestStates   []*SkillState
 34	latestStatesMu sync.RWMutex
 35)
 36
 37// Skill represents a parsed SKILL.md file.
 38type Skill struct {
 39	Name                   string            `yaml:"name" json:"name"`
 40	Description            string            `yaml:"description" json:"description"`
 41	UserInvocable          bool              `yaml:"user-invocable" json:"user_invocable"`
 42	DisableModelInvocation bool              `yaml:"disable-model-invocation" json:"disable_model_invocation"`
 43	License                string            `yaml:"license,omitempty" json:"license,omitempty"`
 44	Compatibility          string            `yaml:"compatibility,omitempty" json:"compatibility,omitempty"`
 45	Metadata               map[string]string `yaml:"metadata,omitempty" json:"metadata,omitempty"`
 46	Instructions           string            `yaml:"-" json:"instructions"`
 47	Path                   string            `yaml:"-" json:"path"`
 48	SkillFilePath          string            `yaml:"-" json:"skill_file_path"`
 49	Builtin                bool              `yaml:"-" json:"builtin"`
 50}
 51
 52// DiscoveryState represents the outcome of discovering a single skill file.
 53type DiscoveryState int
 54
 55const (
 56	// StateNormal indicates the skill was parsed and validated successfully.
 57	StateNormal DiscoveryState = iota
 58	// StateError indicates discovery encountered a scan/parse/validate error.
 59	StateError
 60)
 61
 62// SkillState represents the latest discovery status of a skill file.
 63type SkillState struct {
 64	Name  string
 65	Path  string
 66	State DiscoveryState
 67	Err   error
 68}
 69
 70// Event is published when skill discovery completes.
 71type Event struct {
 72	States []*SkillState
 73}
 74
 75var broker = pubsub.NewBroker[Event]()
 76
 77// SubscribeEvents returns a channel that receives events when skill discovery state changes.
 78func SubscribeEvents(ctx context.Context) <-chan pubsub.Event[Event] {
 79	return broker.Subscribe(ctx)
 80}
 81
 82// PublishStates publishes a skill discovery event with the given states.
 83func PublishStates(states []*SkillState) {
 84	broker.Publish(pubsub.UpdatedEvent, Event{States: cloneStates(states)})
 85}
 86
 87// cloneStates returns a deep copy of the given state slice so callers cannot
 88// accidentally mutate the source.
 89func cloneStates(states []*SkillState) []*SkillState {
 90	if states == nil {
 91		return nil
 92	}
 93	result := make([]*SkillState, len(states))
 94	for i, s := range states {
 95		clone := *s
 96		result[i] = &clone
 97	}
 98	return result
 99}
100
101// GetLatestStates returns the latest discovery states.
102func GetLatestStates() []*SkillState {
103	latestStatesMu.RLock()
104	defer latestStatesMu.RUnlock()
105	return cloneStates(latestStates)
106}
107
108// SetLatestStates stores the given states in the package-level cache so that
109// GetLatestStates can return them synchronously before the first pubsub event
110// arrives.
111func SetLatestStates(states []*SkillState) {
112	latestStatesMu.Lock()
113	latestStates = cloneStates(states)
114	latestStatesMu.Unlock()
115}
116
117// Validate checks if the skill meets spec requirements.
118func (s *Skill) Validate() error {
119	var errs []error
120
121	if s.Name == "" {
122		errs = append(errs, errors.New("name is required"))
123	} else {
124		if len(s.Name) > MaxNameLength {
125			errs = append(errs, fmt.Errorf("name exceeds %d characters", MaxNameLength))
126		}
127		if !namePattern.MatchString(s.Name) {
128			errs = append(errs, errors.New("name must be alphanumeric with hyphens, no leading/trailing/consecutive hyphens"))
129		}
130		if s.Path != "" && !strings.EqualFold(filepath.Base(s.Path), s.Name) {
131			errs = append(errs, fmt.Errorf("name %q must match directory %q", s.Name, filepath.Base(s.Path)))
132		}
133	}
134
135	if s.Description == "" {
136		errs = append(errs, errors.New("description is required"))
137	} else if len(s.Description) > MaxDescriptionLength {
138		errs = append(errs, fmt.Errorf("description exceeds %d characters", MaxDescriptionLength))
139	}
140
141	if len(s.Compatibility) > MaxCompatibilityLength {
142		errs = append(errs, fmt.Errorf("compatibility exceeds %d characters", MaxCompatibilityLength))
143	}
144
145	return errors.Join(errs...)
146}
147
148// Parse parses a SKILL.md file from disk.
149func Parse(path string) (*Skill, error) {
150	content, err := os.ReadFile(path)
151	if err != nil {
152		return nil, err
153	}
154
155	skill, err := ParseContent(content)
156	if err != nil {
157		return nil, err
158	}
159
160	skill.Path = filepath.Dir(path)
161	skill.SkillFilePath = path
162
163	return skill, nil
164}
165
166// ParseContent parses a SKILL.md from raw bytes.
167func ParseContent(content []byte) (*Skill, error) {
168	frontmatter, body, err := splitFrontmatter(string(content))
169	if err != nil {
170		return nil, err
171	}
172
173	var skill Skill
174	if err := yaml.Unmarshal([]byte(frontmatter), &skill); err != nil {
175		return nil, fmt.Errorf("parsing frontmatter: %w", err)
176	}
177
178	skill.Instructions = strings.TrimSpace(body)
179
180	return &skill, nil
181}
182
183// splitFrontmatter extracts YAML frontmatter and body from markdown content.
184func splitFrontmatter(content string) (frontmatter, body string, err error) {
185	// Strip UTF-8 BOM for compatibility with editors that include it.
186	content = strings.TrimPrefix(content, "\uFEFF")
187	// Normalize line endings to \n for consistent parsing.
188	content = strings.ReplaceAll(content, "\r\n", "\n")
189	content = strings.ReplaceAll(content, "\r", "\n")
190
191	lines := strings.Split(content, "\n")
192	start := slices.IndexFunc(lines, func(line string) bool {
193		return strings.TrimSpace(line) != ""
194	})
195	if start == -1 || strings.TrimSpace(lines[start]) != "---" {
196		return "", "", errors.New("no YAML frontmatter found")
197	}
198
199	endOffset := slices.IndexFunc(lines[start+1:], func(line string) bool {
200		return strings.TrimSpace(line) == "---"
201	})
202	if endOffset == -1 {
203		return "", "", errors.New("unclosed frontmatter")
204	}
205	end := start + 1 + endOffset
206
207	frontmatter = strings.Join(lines[start+1:end], "\n")
208	body = strings.Join(lines[end+1:], "\n")
209	return frontmatter, body, nil
210}
211
212// Discover finds all valid skills in the given paths.
213func Discover(paths []string) []*Skill {
214	skills, _ := DiscoverWithStates(paths)
215	return skills
216}
217
218// DiscoverWithStates finds all valid skills in the given paths and also
219// returns a per-file state slice describing parse/validation outcomes. Useful
220// for diagnostics and UI reporting.
221func DiscoverWithStates(paths []string) ([]*Skill, []*SkillState) {
222	var skills []*Skill
223	var states []*SkillState
224	var mu sync.Mutex
225	seen := make(map[string]bool)
226	addState := func(name, path string, state DiscoveryState, err error) {
227		mu.Lock()
228		states = append(states, &SkillState{
229			Name:  name,
230			Path:  path,
231			State: state,
232			Err:   err,
233		})
234		mu.Unlock()
235	}
236
237	for _, base := range paths {
238		// We use fastwalk with Follow: true instead of filepath.WalkDir because
239		// WalkDir doesn't follow symlinked directories at any depthโ€”only entry
240		// points. This ensures skills in symlinked subdirectories are discovered.
241		// fastwalk is concurrent, so we protect shared state (seen, skills) with mu.
242		conf := fastwalk.Config{
243			Follow:  true,
244			ToSlash: fastwalk.DefaultToSlash(),
245		}
246		err := fastwalk.Walk(&conf, base, func(path string, d os.DirEntry, err error) error {
247			if err != nil {
248				slog.Warn("Failed to walk skills path entry", "base", base, "path", path, "error", err)
249				addState("", path, StateError, err)
250				return nil
251			}
252			if d.IsDir() || d.Name() != SkillFileName {
253				return nil
254			}
255			mu.Lock()
256			if seen[path] {
257				mu.Unlock()
258				return nil
259			}
260			seen[path] = true
261			mu.Unlock()
262			skill, err := Parse(path)
263			if err != nil {
264				slog.Warn("Failed to parse skill file", "path", path, "error", err)
265				addState("", path, StateError, err)
266				return nil
267			}
268			if err := skill.Validate(); err != nil {
269				slog.Warn("Skill validation failed", "path", path, "error", err)
270				addState(skill.Name, path, StateError, err)
271				return nil
272			}
273			slog.Debug("Successfully loaded skill", "name", skill.Name, "path", path)
274			mu.Lock()
275			skills = append(skills, skill)
276			mu.Unlock()
277			addState(skill.Name, path, StateNormal, nil)
278			return nil
279		})
280		if err != nil && !os.IsNotExist(err) {
281			slog.Warn("Failed to walk skills path", "path", base, "error", err)
282		}
283	}
284
285	// fastwalk traversal order is non-deterministic, so sort for stable output.
286	// Sort by path first, then alphabetically by name within each path.
287	slices.SortStableFunc(skills, func(a, b *Skill) int {
288		if c := strings.Compare(strings.ToLower(a.Path), strings.ToLower(b.Path)); c != 0 {
289			return c
290		}
291		return strings.Compare(strings.ToLower(a.Name), strings.ToLower(b.Name))
292	})
293
294	return skills, states
295}
296
297// ToPromptXML generates XML for injection into the system prompt.
298// Skills with DisableModelInvocation set to true are excluded.
299func ToPromptXML(skills []*Skill) string {
300	if len(skills) == 0 {
301		return ""
302	}
303
304	var sb strings.Builder
305	sb.WriteString("<available_skills>\n")
306	for _, s := range skills {
307		// Skip skills that have disable-model-invocation set
308		if s.DisableModelInvocation {
309			continue
310		}
311		sb.WriteString("  <skill>\n")
312		fmt.Fprintf(&sb, "    <name>%s</name>\n", escape(s.Name))
313		fmt.Fprintf(&sb, "    <description>%s</description>\n", escape(s.Description))
314		fmt.Fprintf(&sb, "    <location>%s</location>\n", escape(s.SkillFilePath))
315		if s.Builtin {
316			sb.WriteString("    <type>builtin</type>\n")
317		}
318		sb.WriteString("  </skill>\n")
319	}
320	sb.WriteString("</available_skills>")
321	return sb.String()
322}
323
324// FormatInvocation generates XML for a skill when invoked as a user command.
325func (s *Skill) FormatInvocation() string {
326	var sb strings.Builder
327	sb.WriteString("<loaded_skill>\n")
328	fmt.Fprintf(&sb, "  <name>%s</name>\n", escape(s.Name))
329	fmt.Fprintf(&sb, "  <description>%s</description>\n", escape(s.Description))
330	fmt.Fprintf(&sb, "  <location>%s</location>\n", escape(s.SkillFilePath))
331	sb.WriteString("  <instructions>\n")
332	sb.WriteString(escape(s.Instructions))
333	sb.WriteString("\n  </instructions>\n")
334	sb.WriteString("</loaded_skill>")
335	return sb.String()
336}
337
338func escape(s string) string {
339	return promptReplacer.Replace(s)
340}
341
342// DeduplicateStates removes duplicate skill states by name. When duplicates exist,
343// the last occurrence wins (consistent with Deduplicate for skills).
344func DeduplicateStates(all []*SkillState) []*SkillState {
345	seen := make(map[string]int, len(all))
346	for i, s := range all {
347		if s.Name != "" {
348			seen[s.Name] = i
349		}
350	}
351
352	result := make([]*SkillState, 0, len(seen))
353	for i, s := range all {
354		// If it's the last occurrence of this name, or it has no name (error state), keep it
355		if s.Name == "" || seen[s.Name] == i {
356			result = append(result, s)
357		}
358	}
359	return result
360}
361
362// Deduplicate removes duplicate skills by name. When duplicates exist, the
363// last occurrence wins. This means user skills (appended after builtins)
364// override builtin skills with the same name.
365func Deduplicate(all []*Skill) []*Skill {
366	seen := make(map[string]int, len(all))
367	for i, s := range all {
368		seen[s.Name] = i
369	}
370
371	result := make([]*Skill, 0, len(seen))
372	for i, s := range all {
373		if seen[s.Name] == i {
374			result = append(result, s)
375		}
376	}
377	return result
378}
379
380// ApproxTokenCount returns a rough estimate of how many tokens a string
381// occupies when sent to an LLM. Uses the common ~4-chars-per-token heuristic
382// that approximates GPT/Claude tokenizers well enough for diagnostic logging.
383func ApproxTokenCount(s string) int {
384	if s == "" {
385		return 0
386	}
387	return (len(s) + 3) / 4
388}
389
390// Filter removes skills whose names appear in the disabled list.
391func Filter(all []*Skill, disabled []string) []*Skill {
392	if len(disabled) == 0 {
393		return all
394	}
395
396	disabledSet := make(map[string]bool, len(disabled))
397	for _, name := range disabled {
398		disabledSet[name] = true
399	}
400
401	result := make([]*Skill, 0, len(all))
402	for _, s := range all {
403		if !disabledSet[s.Name] {
404			result = append(result, s)
405		}
406	}
407	return result
408}