1// Package skills implements the Agent Skills open standard.
2// See https://agentskills.io for the specification.
3package skills
4
5import (
6 "context"
7 "errors"
8 "fmt"
9 "log/slog"
10 "os"
11 "path/filepath"
12 "regexp"
13 "slices"
14 "strings"
15 "sync"
16
17 "github.com/charlievieth/fastwalk"
18 "github.com/charmbracelet/crush/internal/pubsub"
19 "gopkg.in/yaml.v3"
20)
21
22const (
23 SkillFileName = "SKILL.md"
24 MaxNameLength = 64
25 MaxDescriptionLength = 1024
26 MaxCompatibilityLength = 500
27)
28
29var (
30 namePattern = regexp.MustCompile(`^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*$`)
31 promptReplacer = strings.NewReplacer("&", "&", "<", "<", ">", ">", "\"", """, "'", "'")
32
33 latestStates []*SkillState
34 latestStatesMu sync.RWMutex
35)
36
37// Skill represents a parsed SKILL.md file.
38type Skill struct {
39 Name string `yaml:"name" json:"name"`
40 Description string `yaml:"description" json:"description"`
41 License string `yaml:"license,omitempty" json:"license,omitempty"`
42 Compatibility string `yaml:"compatibility,omitempty" json:"compatibility,omitempty"`
43 Metadata map[string]string `yaml:"metadata,omitempty" json:"metadata,omitempty"`
44 Instructions string `yaml:"-" json:"instructions"`
45 Path string `yaml:"-" json:"path"`
46 SkillFilePath string `yaml:"-" json:"skill_file_path"`
47 Builtin bool `yaml:"-" json:"builtin"`
48}
49
50// DiscoveryState represents the outcome of discovering a single skill file.
51type DiscoveryState int
52
53const (
54 // StateNormal indicates the skill was parsed and validated successfully.
55 StateNormal DiscoveryState = iota
56 // StateError indicates discovery encountered a scan/parse/validate error.
57 StateError
58)
59
60// SkillState represents the latest discovery status of a skill file.
61type SkillState struct {
62 Name string
63 Path string
64 State DiscoveryState
65 Err error
66}
67
68// Event is published when skill discovery completes.
69type Event struct {
70 States []*SkillState
71}
72
73var broker = pubsub.NewBroker[Event]()
74
75// SubscribeEvents returns a channel that receives events when skill discovery state changes.
76func SubscribeEvents(ctx context.Context) <-chan pubsub.Event[Event] {
77 return broker.Subscribe(ctx)
78}
79
80// PublishStates publishes a skill discovery event with the given states.
81func PublishStates(states []*SkillState) {
82 broker.Publish(pubsub.UpdatedEvent, Event{States: cloneStates(states)})
83}
84
85// cloneStates returns a deep copy of the given state slice so callers cannot
86// accidentally mutate the source.
87func cloneStates(states []*SkillState) []*SkillState {
88 if states == nil {
89 return nil
90 }
91 result := make([]*SkillState, len(states))
92 for i, s := range states {
93 clone := *s
94 result[i] = &clone
95 }
96 return result
97}
98
99// GetLatestStates returns the latest discovery states.
100func GetLatestStates() []*SkillState {
101 latestStatesMu.RLock()
102 defer latestStatesMu.RUnlock()
103 return cloneStates(latestStates)
104}
105
106// SetLatestStates stores the given states in the package-level cache so that
107// GetLatestStates can return them synchronously before the first pubsub event
108// arrives.
109func SetLatestStates(states []*SkillState) {
110 latestStatesMu.Lock()
111 latestStates = cloneStates(states)
112 latestStatesMu.Unlock()
113}
114
115// Validate checks if the skill meets spec requirements.
116func (s *Skill) Validate() error {
117 var errs []error
118
119 if s.Name == "" {
120 errs = append(errs, errors.New("name is required"))
121 } else {
122 if len(s.Name) > MaxNameLength {
123 errs = append(errs, fmt.Errorf("name exceeds %d characters", MaxNameLength))
124 }
125 if !namePattern.MatchString(s.Name) {
126 errs = append(errs, errors.New("name must be alphanumeric with hyphens, no leading/trailing/consecutive hyphens"))
127 }
128 if s.Path != "" && !strings.EqualFold(filepath.Base(s.Path), s.Name) {
129 errs = append(errs, fmt.Errorf("name %q must match directory %q", s.Name, filepath.Base(s.Path)))
130 }
131 }
132
133 if s.Description == "" {
134 errs = append(errs, errors.New("description is required"))
135 } else if len(s.Description) > MaxDescriptionLength {
136 errs = append(errs, fmt.Errorf("description exceeds %d characters", MaxDescriptionLength))
137 }
138
139 if len(s.Compatibility) > MaxCompatibilityLength {
140 errs = append(errs, fmt.Errorf("compatibility exceeds %d characters", MaxCompatibilityLength))
141 }
142
143 return errors.Join(errs...)
144}
145
146// Parse parses a SKILL.md file from disk.
147func Parse(path string) (*Skill, error) {
148 content, err := os.ReadFile(path)
149 if err != nil {
150 return nil, err
151 }
152
153 skill, err := ParseContent(content)
154 if err != nil {
155 return nil, err
156 }
157
158 skill.Path = filepath.Dir(path)
159 skill.SkillFilePath = path
160
161 return skill, nil
162}
163
164// ParseContent parses a SKILL.md from raw bytes.
165func ParseContent(content []byte) (*Skill, error) {
166 frontmatter, body, err := splitFrontmatter(string(content))
167 if err != nil {
168 return nil, err
169 }
170
171 var skill Skill
172 if err := yaml.Unmarshal([]byte(frontmatter), &skill); err != nil {
173 return nil, fmt.Errorf("parsing frontmatter: %w", err)
174 }
175
176 skill.Instructions = strings.TrimSpace(body)
177
178 return &skill, nil
179}
180
181// splitFrontmatter extracts YAML frontmatter and body from markdown content.
182func splitFrontmatter(content string) (frontmatter, body string, err error) {
183 // Strip UTF-8 BOM for compatibility with editors that include it.
184 content = strings.TrimPrefix(content, "\uFEFF")
185 // Normalize line endings to \n for consistent parsing.
186 content = strings.ReplaceAll(content, "\r\n", "\n")
187 content = strings.ReplaceAll(content, "\r", "\n")
188
189 lines := strings.Split(content, "\n")
190 start := slices.IndexFunc(lines, func(line string) bool {
191 return strings.TrimSpace(line) != ""
192 })
193 if start == -1 || strings.TrimSpace(lines[start]) != "---" {
194 return "", "", errors.New("no YAML frontmatter found")
195 }
196
197 endOffset := slices.IndexFunc(lines[start+1:], func(line string) bool {
198 return strings.TrimSpace(line) == "---"
199 })
200 if endOffset == -1 {
201 return "", "", errors.New("unclosed frontmatter")
202 }
203 end := start + 1 + endOffset
204
205 frontmatter = strings.Join(lines[start+1:end], "\n")
206 body = strings.Join(lines[end+1:], "\n")
207 return frontmatter, body, nil
208}
209
210// Discover finds all valid skills in the given paths.
211func Discover(paths []string) []*Skill {
212 skills, _ := DiscoverWithStates(paths)
213 return skills
214}
215
216// DiscoverWithStates finds all valid skills in the given paths and also
217// returns a per-file state slice describing parse/validation outcomes. Useful
218// for diagnostics and UI reporting.
219func DiscoverWithStates(paths []string) ([]*Skill, []*SkillState) {
220 var skills []*Skill
221 var states []*SkillState
222 var mu sync.Mutex
223 seen := make(map[string]bool)
224 addState := func(name, path string, state DiscoveryState, err error) {
225 mu.Lock()
226 states = append(states, &SkillState{
227 Name: name,
228 Path: path,
229 State: state,
230 Err: err,
231 })
232 mu.Unlock()
233 }
234
235 for _, base := range paths {
236 // We use fastwalk with Follow: true instead of filepath.WalkDir because
237 // WalkDir doesn't follow symlinked directories at any depthโonly entry
238 // points. This ensures skills in symlinked subdirectories are discovered.
239 // fastwalk is concurrent, so we protect shared state (seen, skills) with mu.
240 conf := fastwalk.Config{
241 Follow: true,
242 ToSlash: fastwalk.DefaultToSlash(),
243 }
244 err := fastwalk.Walk(&conf, base, func(path string, d os.DirEntry, err error) error {
245 if err != nil {
246 slog.Warn("Failed to walk skills path entry", "base", base, "path", path, "error", err)
247 addState("", path, StateError, err)
248 return nil
249 }
250 if d.IsDir() || d.Name() != SkillFileName {
251 return nil
252 }
253 mu.Lock()
254 if seen[path] {
255 mu.Unlock()
256 return nil
257 }
258 seen[path] = true
259 mu.Unlock()
260 skill, err := Parse(path)
261 if err != nil {
262 slog.Warn("Failed to parse skill file", "path", path, "error", err)
263 addState("", path, StateError, err)
264 return nil
265 }
266 if err := skill.Validate(); err != nil {
267 slog.Warn("Skill validation failed", "path", path, "error", err)
268 addState(skill.Name, path, StateError, err)
269 return nil
270 }
271 slog.Debug("Successfully loaded skill", "name", skill.Name, "path", path)
272 mu.Lock()
273 skills = append(skills, skill)
274 mu.Unlock()
275 addState(skill.Name, path, StateNormal, nil)
276 return nil
277 })
278 if err != nil && !os.IsNotExist(err) {
279 slog.Warn("Failed to walk skills path", "path", base, "error", err)
280 }
281 }
282
283 // fastwalk traversal order is non-deterministic, so sort for stable output.
284 // Sort by path first, then alphabetically by name within each path.
285 slices.SortStableFunc(skills, func(a, b *Skill) int {
286 if c := strings.Compare(strings.ToLower(a.Path), strings.ToLower(b.Path)); c != 0 {
287 return c
288 }
289 return strings.Compare(strings.ToLower(a.Name), strings.ToLower(b.Name))
290 })
291
292 return skills, states
293}
294
295// ToPromptXML generates XML for injection into the system prompt.
296func ToPromptXML(skills []*Skill) string {
297 if len(skills) == 0 {
298 return ""
299 }
300
301 var sb strings.Builder
302 sb.WriteString("<available_skills>\n")
303 for _, s := range skills {
304 sb.WriteString(" <skill>\n")
305 fmt.Fprintf(&sb, " <name>%s</name>\n", escape(s.Name))
306 fmt.Fprintf(&sb, " <description>%s</description>\n", escape(s.Description))
307 fmt.Fprintf(&sb, " <location>%s</location>\n", escape(s.SkillFilePath))
308 if s.Builtin {
309 sb.WriteString(" <type>builtin</type>\n")
310 }
311 sb.WriteString(" </skill>\n")
312 }
313 sb.WriteString("</available_skills>")
314 return sb.String()
315}
316
317func escape(s string) string {
318 return promptReplacer.Replace(s)
319}
320
321// DeduplicateStates removes duplicate skill states by name. When duplicates exist,
322// the last occurrence wins (consistent with Deduplicate for skills).
323func DeduplicateStates(all []*SkillState) []*SkillState {
324 seen := make(map[string]int, len(all))
325 for i, s := range all {
326 if s.Name != "" {
327 seen[s.Name] = i
328 }
329 }
330
331 result := make([]*SkillState, 0, len(seen))
332 for i, s := range all {
333 // If it's the last occurrence of this name, or it has no name (error state), keep it
334 if s.Name == "" || seen[s.Name] == i {
335 result = append(result, s)
336 }
337 }
338 return result
339}
340
341// Deduplicate removes duplicate skills by name. When duplicates exist, the
342// last occurrence wins. This means user skills (appended after builtins)
343// override builtin skills with the same name.
344func Deduplicate(all []*Skill) []*Skill {
345 seen := make(map[string]int, len(all))
346 for i, s := range all {
347 seen[s.Name] = i
348 }
349
350 result := make([]*Skill, 0, len(seen))
351 for i, s := range all {
352 if seen[s.Name] == i {
353 result = append(result, s)
354 }
355 }
356 return result
357}
358
359// ApproxTokenCount returns a rough estimate of how many tokens a string
360// occupies when sent to an LLM. Uses the common ~4-chars-per-token heuristic
361// that approximates GPT/Claude tokenizers well enough for diagnostic logging.
362func ApproxTokenCount(s string) int {
363 if s == "" {
364 return 0
365 }
366 return (len(s) + 3) / 4
367}
368
369// Filter removes skills whose names appear in the disabled list.
370func Filter(all []*Skill, disabled []string) []*Skill {
371 if len(disabled) == 0 {
372 return all
373 }
374
375 disabledSet := make(map[string]bool, len(disabled))
376 for _, name := range disabled {
377 disabledSet[name] = true
378 }
379
380 result := make([]*Skill, 0, len(all))
381 for _, s := range all {
382 if !disabledSet[s.Name] {
383 result = append(result, s)
384 }
385 }
386 return result
387}