1// Package skills implements the Agent Skills open standard.
2// See https://agentskills.io for the specification.
3package skills
4
5import (
6 "context"
7 "errors"
8 "fmt"
9 "log/slog"
10 "os"
11 "path/filepath"
12 "regexp"
13 "slices"
14 "strings"
15 "sync"
16
17 "github.com/charlievieth/fastwalk"
18 "github.com/charmbracelet/crush/internal/pubsub"
19 "gopkg.in/yaml.v3"
20)
21
22const (
23 SkillFileName = "SKILL.md"
24 MaxNameLength = 64
25 MaxDescriptionLength = 1024
26 MaxCompatibilityLength = 500
27)
28
29var (
30 namePattern = regexp.MustCompile(`^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*$`)
31 promptReplacer = strings.NewReplacer("&", "&", "<", "<", ">", ">", "\"", """, "'", "'")
32
33 latestStates []*SkillState
34 latestStatesMu sync.RWMutex
35)
36
37// Skill represents a parsed SKILL.md file.
38type Skill struct {
39 Name string `yaml:"name" json:"name"`
40 Description string `yaml:"description" json:"description"`
41 UserInvocable bool `yaml:"user-invocable" json:"user_invocable"`
42 DisableModelInvocation bool `yaml:"disable-model-invocation" json:"disable_model_invocation"`
43 License string `yaml:"license,omitempty" json:"license,omitempty"`
44 Compatibility string `yaml:"compatibility,omitempty" json:"compatibility,omitempty"`
45 Metadata map[string]string `yaml:"metadata,omitempty" json:"metadata,omitempty"`
46 Instructions string `yaml:"-" json:"instructions"`
47 Path string `yaml:"-" json:"path"`
48 SkillFilePath string `yaml:"-" json:"skill_file_path"`
49 Builtin bool `yaml:"-" json:"builtin"`
50}
51
52// DiscoveryState represents the outcome of discovering a single skill file.
53type DiscoveryState int
54
55const (
56 // StateNormal indicates the skill was parsed and validated successfully.
57 StateNormal DiscoveryState = iota
58 // StateError indicates discovery encountered a scan/parse/validate error.
59 StateError
60)
61
62// SkillState represents the latest discovery status of a skill file.
63type SkillState struct {
64 Name string
65 Path string
66 State DiscoveryState
67 Err error
68}
69
70// Event is published when skill discovery completes.
71type Event struct {
72 States []*SkillState
73}
74
75var broker = pubsub.NewBroker[Event]()
76
77// SubscribeEvents returns a channel that receives events when skill discovery state changes.
78func SubscribeEvents(ctx context.Context) <-chan pubsub.Event[Event] {
79 return broker.Subscribe(ctx)
80}
81
82// PublishStates publishes a skill discovery event with the given states.
83func PublishStates(states []*SkillState) {
84 broker.Publish(pubsub.UpdatedEvent, Event{States: cloneStates(states)})
85}
86
87// cloneStates returns a deep copy of the given state slice so callers cannot
88// accidentally mutate the source.
89func cloneStates(states []*SkillState) []*SkillState {
90 if states == nil {
91 return nil
92 }
93 result := make([]*SkillState, len(states))
94 for i, s := range states {
95 clone := *s
96 result[i] = &clone
97 }
98 return result
99}
100
101// GetLatestStates returns the latest discovery states.
102func GetLatestStates() []*SkillState {
103 latestStatesMu.RLock()
104 defer latestStatesMu.RUnlock()
105 return cloneStates(latestStates)
106}
107
108// SetLatestStates stores the given states in the package-level cache so that
109// GetLatestStates can return them synchronously before the first pubsub event
110// arrives.
111func SetLatestStates(states []*SkillState) {
112 latestStatesMu.Lock()
113 latestStates = cloneStates(states)
114 latestStatesMu.Unlock()
115}
116
117// Validate checks if the skill meets spec requirements.
118func (s *Skill) Validate() error {
119 var errs []error
120
121 if s.Name == "" {
122 errs = append(errs, errors.New("name is required"))
123 } else {
124 if len(s.Name) > MaxNameLength {
125 errs = append(errs, fmt.Errorf("name exceeds %d characters", MaxNameLength))
126 }
127 if !namePattern.MatchString(s.Name) {
128 errs = append(errs, errors.New("name must be alphanumeric with hyphens, no leading/trailing/consecutive hyphens"))
129 }
130 if s.Path != "" && !strings.EqualFold(filepath.Base(s.Path), s.Name) {
131 errs = append(errs, fmt.Errorf("name %q must match directory %q", s.Name, filepath.Base(s.Path)))
132 }
133 }
134
135 if s.Description == "" {
136 errs = append(errs, errors.New("description is required"))
137 } else if len(s.Description) > MaxDescriptionLength {
138 errs = append(errs, fmt.Errorf("description exceeds %d characters", MaxDescriptionLength))
139 }
140
141 if len(s.Compatibility) > MaxCompatibilityLength {
142 errs = append(errs, fmt.Errorf("compatibility exceeds %d characters", MaxCompatibilityLength))
143 }
144
145 return errors.Join(errs...)
146}
147
148// Parse parses a SKILL.md file from disk.
149func Parse(path string) (*Skill, error) {
150 content, err := os.ReadFile(path)
151 if err != nil {
152 return nil, err
153 }
154
155 skill, err := ParseContent(content)
156 if err != nil {
157 return nil, err
158 }
159
160 skill.Path = filepath.Dir(path)
161 skill.SkillFilePath = path
162
163 return skill, nil
164}
165
166// ParseContent parses a SKILL.md from raw bytes.
167func ParseContent(content []byte) (*Skill, error) {
168 frontmatter, body, err := splitFrontmatter(string(content))
169 if err != nil {
170 return nil, err
171 }
172
173 var skill Skill
174 if err := yaml.Unmarshal([]byte(frontmatter), &skill); err != nil {
175 return nil, fmt.Errorf("parsing frontmatter: %w", err)
176 }
177
178 skill.Instructions = strings.TrimSpace(body)
179
180 return &skill, nil
181}
182
183// splitFrontmatter extracts YAML frontmatter and body from markdown content.
184func splitFrontmatter(content string) (frontmatter, body string, err error) {
185 // Strip UTF-8 BOM for compatibility with editors that include it.
186 content = strings.TrimPrefix(content, "\uFEFF")
187 // Normalize line endings to \n for consistent parsing.
188 content = strings.ReplaceAll(content, "\r\n", "\n")
189 content = strings.ReplaceAll(content, "\r", "\n")
190
191 lines := strings.Split(content, "\n")
192 start := slices.IndexFunc(lines, func(line string) bool {
193 return strings.TrimSpace(line) != ""
194 })
195 if start == -1 || strings.TrimSpace(lines[start]) != "---" {
196 return "", "", errors.New("no YAML frontmatter found")
197 }
198
199 endOffset := slices.IndexFunc(lines[start+1:], func(line string) bool {
200 return strings.TrimSpace(line) == "---"
201 })
202 if endOffset == -1 {
203 return "", "", errors.New("unclosed frontmatter")
204 }
205 end := start + 1 + endOffset
206
207 frontmatter = strings.Join(lines[start+1:end], "\n")
208 body = strings.Join(lines[end+1:], "\n")
209 return frontmatter, body, nil
210}
211
212// Discover finds all valid skills in the given paths.
213func Discover(paths []string) []*Skill {
214 skills, _ := DiscoverWithStates(paths)
215 return skills
216}
217
218// DiscoverWithStates finds all valid skills in the given paths and also
219// returns a per-file state slice describing parse/validation outcomes. Useful
220// for diagnostics and UI reporting.
221func DiscoverWithStates(paths []string) ([]*Skill, []*SkillState) {
222 var skills []*Skill
223 var states []*SkillState
224 var mu sync.Mutex
225 seen := make(map[string]bool)
226 addState := func(name, path string, state DiscoveryState, err error) {
227 mu.Lock()
228 states = append(states, &SkillState{
229 Name: name,
230 Path: path,
231 State: state,
232 Err: err,
233 })
234 mu.Unlock()
235 }
236
237 for _, base := range paths {
238 // We use fastwalk with Follow: true instead of filepath.WalkDir because
239 // WalkDir doesn't follow symlinked directories at any depthโonly entry
240 // points. This ensures skills in symlinked subdirectories are discovered.
241 // fastwalk is concurrent, so we protect shared state (seen, skills) with mu.
242 conf := fastwalk.Config{
243 Follow: true,
244 ToSlash: fastwalk.DefaultToSlash(),
245 }
246 err := fastwalk.Walk(&conf, base, func(path string, d os.DirEntry, err error) error {
247 if err != nil {
248 slog.Warn("Failed to walk skills path entry", "base", base, "path", path, "error", err)
249 addState("", path, StateError, err)
250 return nil
251 }
252 if d.IsDir() || d.Name() != SkillFileName {
253 return nil
254 }
255 mu.Lock()
256 if seen[path] {
257 mu.Unlock()
258 return nil
259 }
260 seen[path] = true
261 mu.Unlock()
262 skill, err := Parse(path)
263 if err != nil {
264 slog.Warn("Failed to parse skill file", "path", path, "error", err)
265 addState("", path, StateError, err)
266 return nil
267 }
268 if err := skill.Validate(); err != nil {
269 slog.Warn("Skill validation failed", "path", path, "error", err)
270 addState(skill.Name, path, StateError, err)
271 return nil
272 }
273 slog.Debug("Successfully loaded skill", "name", skill.Name, "path", path)
274 mu.Lock()
275 skills = append(skills, skill)
276 mu.Unlock()
277 addState(skill.Name, path, StateNormal, nil)
278 return nil
279 })
280 if err != nil && !os.IsNotExist(err) {
281 slog.Warn("Failed to walk skills path", "path", base, "error", err)
282 }
283 }
284
285 // fastwalk traversal order is non-deterministic, so sort for stable output.
286 // Sort by path first, then alphabetically by name within each path.
287 slices.SortStableFunc(skills, func(a, b *Skill) int {
288 if c := strings.Compare(strings.ToLower(a.Path), strings.ToLower(b.Path)); c != 0 {
289 return c
290 }
291 return strings.Compare(strings.ToLower(a.Name), strings.ToLower(b.Name))
292 })
293
294 return skills, states
295}
296
297// ToPromptXML generates XML for injection into the system prompt.
298// Skills with DisableModelInvocation set to true are excluded.
299func ToPromptXML(skills []*Skill) string {
300 if len(skills) == 0 {
301 return ""
302 }
303
304 var sb strings.Builder
305 sb.WriteString("<available_skills>\n")
306 for _, s := range skills {
307 // Skip skills that have disable-model-invocation set
308 if s.DisableModelInvocation {
309 continue
310 }
311 sb.WriteString(" <skill>\n")
312 fmt.Fprintf(&sb, " <name>%s</name>\n", escape(s.Name))
313 fmt.Fprintf(&sb, " <description>%s</description>\n", escape(s.Description))
314 fmt.Fprintf(&sb, " <location>%s</location>\n", escape(s.SkillFilePath))
315 if s.Builtin {
316 sb.WriteString(" <type>builtin</type>\n")
317 }
318 sb.WriteString(" </skill>\n")
319 }
320 sb.WriteString("</available_skills>")
321 return sb.String()
322}
323
324// FormatInvocation generates XML for a skill when invoked as a user command.
325func (s *Skill) FormatInvocation() string {
326 var sb strings.Builder
327 sb.WriteString("<loaded_skill>\n")
328 fmt.Fprintf(&sb, " <name>%s</name>\n", escape(s.Name))
329 fmt.Fprintf(&sb, " <description>%s</description>\n", escape(s.Description))
330 fmt.Fprintf(&sb, " <location>%s</location>\n", escape(s.SkillFilePath))
331 sb.WriteString(" <instructions>\n")
332 sb.WriteString(escape(s.Instructions))
333 sb.WriteString("\n </instructions>\n")
334 sb.WriteString("</loaded_skill>")
335 return sb.String()
336}
337
338func escape(s string) string {
339 return promptReplacer.Replace(s)
340}
341
342// DeduplicateStates removes duplicate skill states by name. When duplicates exist,
343// the last occurrence wins (consistent with Deduplicate for skills).
344func DeduplicateStates(all []*SkillState) []*SkillState {
345 seen := make(map[string]int, len(all))
346 for i, s := range all {
347 if s.Name != "" {
348 seen[s.Name] = i
349 }
350 }
351
352 result := make([]*SkillState, 0, len(seen))
353 for i, s := range all {
354 // If it's the last occurrence of this name, or it has no name (error state), keep it
355 if s.Name == "" || seen[s.Name] == i {
356 result = append(result, s)
357 }
358 }
359 return result
360}
361
362// Deduplicate removes duplicate skills by name. When duplicates exist, the
363// last occurrence wins. This means user skills (appended after builtins)
364// override builtin skills with the same name.
365func Deduplicate(all []*Skill) []*Skill {
366 seen := make(map[string]int, len(all))
367 for i, s := range all {
368 seen[s.Name] = i
369 }
370
371 result := make([]*Skill, 0, len(seen))
372 for i, s := range all {
373 if seen[s.Name] == i {
374 result = append(result, s)
375 }
376 }
377 return result
378}
379
380// ApproxTokenCount returns a rough estimate of how many tokens a string
381// occupies when sent to an LLM. Uses the common ~4-chars-per-token heuristic
382// that approximates GPT/Claude tokenizers well enough for diagnostic logging.
383func ApproxTokenCount(s string) int {
384 if s == "" {
385 return 0
386 }
387 return (len(s) + 3) / 4
388}
389
390// Filter removes skills whose names appear in the disabled list.
391func Filter(all []*Skill, disabled []string) []*Skill {
392 if len(disabled) == 0 {
393 return all
394 }
395
396 disabledSet := make(map[string]bool, len(disabled))
397 for _, name := range disabled {
398 disabledSet[name] = true
399 }
400
401 result := make([]*Skill, 0, len(all))
402 for _, s := range all {
403 if !disabledSet[s.Name] {
404 result = append(result, s)
405 }
406 }
407 return result
408}