skills.go

  1// Package skills implements the Agent Skills open standard.
  2// See https://agentskills.io for the specification.
  3package skills
  4
  5import (
  6	"context"
  7	"errors"
  8	"fmt"
  9	"log/slog"
 10	"os"
 11	"path/filepath"
 12	"regexp"
 13	"slices"
 14	"strings"
 15	"sync"
 16
 17	"github.com/charlievieth/fastwalk"
 18	"github.com/charmbracelet/crush/internal/pubsub"
 19	"gopkg.in/yaml.v3"
 20)
 21
 22const (
 23	SkillFileName          = "SKILL.md"
 24	MaxNameLength          = 64
 25	MaxDescriptionLength   = 1024
 26	MaxCompatibilityLength = 500
 27)
 28
 29var (
 30	namePattern    = regexp.MustCompile(`^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*$`)
 31	promptReplacer = strings.NewReplacer("&", "&amp;", "<", "&lt;", ">", "&gt;", "\"", "&quot;", "'", "&apos;")
 32
 33	latestStates   []*SkillState
 34	latestStatesMu sync.RWMutex
 35)
 36
 37// Skill represents a parsed SKILL.md file.
 38type Skill struct {
 39	Name          string            `yaml:"name" json:"name"`
 40	Description   string            `yaml:"description" json:"description"`
 41	License       string            `yaml:"license,omitempty" json:"license,omitempty"`
 42	Compatibility string            `yaml:"compatibility,omitempty" json:"compatibility,omitempty"`
 43	Metadata      map[string]string `yaml:"metadata,omitempty" json:"metadata,omitempty"`
 44	Instructions  string            `yaml:"-" json:"instructions"`
 45	Path          string            `yaml:"-" json:"path"`
 46	SkillFilePath string            `yaml:"-" json:"skill_file_path"`
 47	Builtin       bool              `yaml:"-" json:"builtin"`
 48}
 49
 50// DiscoveryState represents the outcome of discovering a single skill file.
 51type DiscoveryState int
 52
 53const (
 54	// StateNormal indicates the skill was parsed and validated successfully.
 55	StateNormal DiscoveryState = iota
 56	// StateError indicates discovery encountered a scan/parse/validate error.
 57	StateError
 58)
 59
 60// SkillState represents the latest discovery status of a skill file.
 61type SkillState struct {
 62	Name  string
 63	Path  string
 64	State DiscoveryState
 65	Err   error
 66}
 67
 68// Event is published when skill discovery completes.
 69type Event struct {
 70	States []*SkillState
 71}
 72
 73var broker = pubsub.NewBroker[Event]()
 74
 75// SubscribeEvents returns a channel that receives events when skill discovery state changes.
 76func SubscribeEvents(ctx context.Context) <-chan pubsub.Event[Event] {
 77	return broker.Subscribe(ctx)
 78}
 79
 80// PublishStates publishes a skill discovery event with the given states.
 81func PublishStates(states []*SkillState) {
 82	broker.Publish(pubsub.UpdatedEvent, Event{States: cloneStates(states)})
 83}
 84
 85// cloneStates returns a deep copy of the given state slice so callers cannot
 86// accidentally mutate the source.
 87func cloneStates(states []*SkillState) []*SkillState {
 88	if states == nil {
 89		return nil
 90	}
 91	result := make([]*SkillState, len(states))
 92	for i, s := range states {
 93		clone := *s
 94		result[i] = &clone
 95	}
 96	return result
 97}
 98
 99// GetLatestStates returns the latest discovery states.
100func GetLatestStates() []*SkillState {
101	latestStatesMu.RLock()
102	defer latestStatesMu.RUnlock()
103	return cloneStates(latestStates)
104}
105
106// SetLatestStates stores the given states in the package-level cache so that
107// GetLatestStates can return them synchronously before the first pubsub event
108// arrives.
109func SetLatestStates(states []*SkillState) {
110	latestStatesMu.Lock()
111	latestStates = cloneStates(states)
112	latestStatesMu.Unlock()
113}
114
115// Validate checks if the skill meets spec requirements.
116func (s *Skill) Validate() error {
117	var errs []error
118
119	if s.Name == "" {
120		errs = append(errs, errors.New("name is required"))
121	} else {
122		if len(s.Name) > MaxNameLength {
123			errs = append(errs, fmt.Errorf("name exceeds %d characters", MaxNameLength))
124		}
125		if !namePattern.MatchString(s.Name) {
126			errs = append(errs, errors.New("name must be alphanumeric with hyphens, no leading/trailing/consecutive hyphens"))
127		}
128		if s.Path != "" && !strings.EqualFold(filepath.Base(s.Path), s.Name) {
129			errs = append(errs, fmt.Errorf("name %q must match directory %q", s.Name, filepath.Base(s.Path)))
130		}
131	}
132
133	if s.Description == "" {
134		errs = append(errs, errors.New("description is required"))
135	} else if len(s.Description) > MaxDescriptionLength {
136		errs = append(errs, fmt.Errorf("description exceeds %d characters", MaxDescriptionLength))
137	}
138
139	if len(s.Compatibility) > MaxCompatibilityLength {
140		errs = append(errs, fmt.Errorf("compatibility exceeds %d characters", MaxCompatibilityLength))
141	}
142
143	return errors.Join(errs...)
144}
145
146// Parse parses a SKILL.md file from disk.
147func Parse(path string) (*Skill, error) {
148	content, err := os.ReadFile(path)
149	if err != nil {
150		return nil, err
151	}
152
153	skill, err := ParseContent(content)
154	if err != nil {
155		return nil, err
156	}
157
158	skill.Path = filepath.Dir(path)
159	skill.SkillFilePath = path
160
161	return skill, nil
162}
163
164// ParseContent parses a SKILL.md from raw bytes.
165func ParseContent(content []byte) (*Skill, error) {
166	frontmatter, body, err := splitFrontmatter(string(content))
167	if err != nil {
168		return nil, err
169	}
170
171	var skill Skill
172	if err := yaml.Unmarshal([]byte(frontmatter), &skill); err != nil {
173		return nil, fmt.Errorf("parsing frontmatter: %w", err)
174	}
175
176	skill.Instructions = strings.TrimSpace(body)
177
178	return &skill, nil
179}
180
181// splitFrontmatter extracts YAML frontmatter and body from markdown content.
182func splitFrontmatter(content string) (frontmatter, body string, err error) {
183	// Strip UTF-8 BOM for compatibility with editors that include it.
184	content = strings.TrimPrefix(content, "\uFEFF")
185	// Normalize line endings to \n for consistent parsing.
186	content = strings.ReplaceAll(content, "\r\n", "\n")
187	content = strings.ReplaceAll(content, "\r", "\n")
188
189	lines := strings.Split(content, "\n")
190	start := slices.IndexFunc(lines, func(line string) bool {
191		return strings.TrimSpace(line) != ""
192	})
193	if start == -1 || strings.TrimSpace(lines[start]) != "---" {
194		return "", "", errors.New("no YAML frontmatter found")
195	}
196
197	endOffset := slices.IndexFunc(lines[start+1:], func(line string) bool {
198		return strings.TrimSpace(line) == "---"
199	})
200	if endOffset == -1 {
201		return "", "", errors.New("unclosed frontmatter")
202	}
203	end := start + 1 + endOffset
204
205	frontmatter = strings.Join(lines[start+1:end], "\n")
206	body = strings.Join(lines[end+1:], "\n")
207	return frontmatter, body, nil
208}
209
210// Discover finds all valid skills in the given paths.
211func Discover(paths []string) []*Skill {
212	skills, _ := DiscoverWithStates(paths)
213	return skills
214}
215
216// DiscoverWithStates finds all valid skills in the given paths and also
217// returns a per-file state slice describing parse/validation outcomes. Useful
218// for diagnostics and UI reporting.
219func DiscoverWithStates(paths []string) ([]*Skill, []*SkillState) {
220	var skills []*Skill
221	var states []*SkillState
222	var mu sync.Mutex
223	seen := make(map[string]bool)
224	addState := func(name, path string, state DiscoveryState, err error) {
225		mu.Lock()
226		states = append(states, &SkillState{
227			Name:  name,
228			Path:  path,
229			State: state,
230			Err:   err,
231		})
232		mu.Unlock()
233	}
234
235	for _, base := range paths {
236		// We use fastwalk with Follow: true instead of filepath.WalkDir because
237		// WalkDir doesn't follow symlinked directories at any depthโ€”only entry
238		// points. This ensures skills in symlinked subdirectories are discovered.
239		// fastwalk is concurrent, so we protect shared state (seen, skills) with mu.
240		conf := fastwalk.Config{
241			Follow:  true,
242			ToSlash: fastwalk.DefaultToSlash(),
243		}
244		err := fastwalk.Walk(&conf, base, func(path string, d os.DirEntry, err error) error {
245			if err != nil {
246				slog.Warn("Failed to walk skills path entry", "base", base, "path", path, "error", err)
247				addState("", path, StateError, err)
248				return nil
249			}
250			if d.IsDir() || d.Name() != SkillFileName {
251				return nil
252			}
253			mu.Lock()
254			if seen[path] {
255				mu.Unlock()
256				return nil
257			}
258			seen[path] = true
259			mu.Unlock()
260			skill, err := Parse(path)
261			if err != nil {
262				slog.Warn("Failed to parse skill file", "path", path, "error", err)
263				addState("", path, StateError, err)
264				return nil
265			}
266			if err := skill.Validate(); err != nil {
267				slog.Warn("Skill validation failed", "path", path, "error", err)
268				addState(skill.Name, path, StateError, err)
269				return nil
270			}
271			slog.Debug("Successfully loaded skill", "name", skill.Name, "path", path)
272			mu.Lock()
273			skills = append(skills, skill)
274			mu.Unlock()
275			addState(skill.Name, path, StateNormal, nil)
276			return nil
277		})
278		if err != nil && !os.IsNotExist(err) {
279			slog.Warn("Failed to walk skills path", "path", base, "error", err)
280		}
281	}
282
283	// fastwalk traversal order is non-deterministic, so sort for stable output.
284	// Sort by path first, then alphabetically by name within each path.
285	slices.SortStableFunc(skills, func(a, b *Skill) int {
286		if c := strings.Compare(strings.ToLower(a.Path), strings.ToLower(b.Path)); c != 0 {
287			return c
288		}
289		return strings.Compare(strings.ToLower(a.Name), strings.ToLower(b.Name))
290	})
291
292	return skills, states
293}
294
295// ToPromptXML generates XML for injection into the system prompt.
296func ToPromptXML(skills []*Skill) string {
297	if len(skills) == 0 {
298		return ""
299	}
300
301	var sb strings.Builder
302	sb.WriteString("<available_skills>\n")
303	for _, s := range skills {
304		sb.WriteString("  <skill>\n")
305		fmt.Fprintf(&sb, "    <name>%s</name>\n", escape(s.Name))
306		fmt.Fprintf(&sb, "    <description>%s</description>\n", escape(s.Description))
307		fmt.Fprintf(&sb, "    <location>%s</location>\n", escape(s.SkillFilePath))
308		if s.Builtin {
309			sb.WriteString("    <type>builtin</type>\n")
310		}
311		sb.WriteString("  </skill>\n")
312	}
313	sb.WriteString("</available_skills>")
314	return sb.String()
315}
316
317func escape(s string) string {
318	return promptReplacer.Replace(s)
319}
320
321// DeduplicateStates removes duplicate skill states by name. When duplicates exist,
322// the last occurrence wins (consistent with Deduplicate for skills).
323func DeduplicateStates(all []*SkillState) []*SkillState {
324	seen := make(map[string]int, len(all))
325	for i, s := range all {
326		if s.Name != "" {
327			seen[s.Name] = i
328		}
329	}
330
331	result := make([]*SkillState, 0, len(seen))
332	for i, s := range all {
333		// If it's the last occurrence of this name, or it has no name (error state), keep it
334		if s.Name == "" || seen[s.Name] == i {
335			result = append(result, s)
336		}
337	}
338	return result
339}
340
341// Deduplicate removes duplicate skills by name. When duplicates exist, the
342// last occurrence wins. This means user skills (appended after builtins)
343// override builtin skills with the same name.
344func Deduplicate(all []*Skill) []*Skill {
345	seen := make(map[string]int, len(all))
346	for i, s := range all {
347		seen[s.Name] = i
348	}
349
350	result := make([]*Skill, 0, len(seen))
351	for i, s := range all {
352		if seen[s.Name] == i {
353			result = append(result, s)
354		}
355	}
356	return result
357}
358
359// ApproxTokenCount returns a rough estimate of how many tokens a string
360// occupies when sent to an LLM. Uses the common ~4-chars-per-token heuristic
361// that approximates GPT/Claude tokenizers well enough for diagnostic logging.
362func ApproxTokenCount(s string) int {
363	if s == "" {
364		return 0
365	}
366	return (len(s) + 3) / 4
367}
368
369// Filter removes skills whose names appear in the disabled list.
370func Filter(all []*Skill, disabled []string) []*Skill {
371	if len(disabled) == 0 {
372		return all
373	}
374
375	disabledSet := make(map[string]bool, len(disabled))
376	for _, name := range disabled {
377		disabledSet[name] = true
378	}
379
380	result := make([]*Skill, 0, len(all))
381	for _, s := range all {
382		if !disabledSet[s.Name] {
383			result = append(result, s)
384		}
385	}
386	return result
387}