From 7a6a179ccc0fc1d5744bad3a7ccabd085a6474ca Mon Sep 17 00:00:00 2001 From: huaiyuWangh <34158348+huaiyuWangh@users.noreply.github.com> Date: Tue, 14 Apr 2026 04:22:43 +0800 Subject: [PATCH] refactor: simplify skills parsing and improve discovery visibility (#2350) --- internal/skills/skills.go | 48 +++++++++++++++++++++----- internal/skills/skills_test.go | 62 ++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 9 deletions(-) diff --git a/internal/skills/skills.go b/internal/skills/skills.go index 607cdc779e10138ef1225323cfffc7d5884004ec..c14cdeb7231b8862841118dbee835c54dced1bd5 100644 --- a/internal/skills/skills.go +++ b/internal/skills/skills.go @@ -9,6 +9,8 @@ import ( "os" "path/filepath" "regexp" + "slices" + "sort" "strings" "sync" @@ -23,7 +25,10 @@ const ( MaxCompatibilityLength = 500 ) -var namePattern = regexp.MustCompile(`^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*$`) +var ( + namePattern = regexp.MustCompile(`^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*$`) + promptReplacer = strings.NewReplacer("&", "&", "<", "<", ">", ">", "\"", """, "'", "'") +) // Skill represents a parsed SKILL.md file. type Skill struct { @@ -106,19 +111,31 @@ func ParseContent(content []byte) (*Skill, error) { // splitFrontmatter extracts YAML frontmatter and body from markdown content. func splitFrontmatter(content string) (frontmatter, body string, err error) { + // Strip UTF-8 BOM for compatibility with editors that include it. + content = strings.TrimPrefix(content, "\uFEFF") // Normalize line endings to \n for consistent parsing. content = strings.ReplaceAll(content, "\r\n", "\n") - if !strings.HasPrefix(content, "---\n") { + content = strings.ReplaceAll(content, "\r", "\n") + + lines := strings.Split(content, "\n") + start := slices.IndexFunc(lines, func(line string) bool { + return strings.TrimSpace(line) != "" + }) + if start == -1 || strings.TrimSpace(lines[start]) != "---" { return "", "", errors.New("no YAML frontmatter found") } - rest := strings.TrimPrefix(content, "---\n") - before, after, ok := strings.Cut(rest, "\n---") - if !ok { + endOffset := slices.IndexFunc(lines[start+1:], func(line string) bool { + return strings.TrimSpace(line) == "---" + }) + if endOffset == -1 { return "", "", errors.New("unclosed frontmatter") } + end := start + 1 + endOffset - return before, after, nil + frontmatter = strings.Join(lines[start+1:end], "\n") + body = strings.Join(lines[end+1:], "\n") + return frontmatter, body, nil } // Discover finds all valid skills in the given paths. @@ -136,8 +153,9 @@ func Discover(paths []string) []*Skill { Follow: true, ToSlash: fastwalk.DefaultToSlash(), } - fastwalk.Walk(&conf, base, func(path string, d os.DirEntry, err error) error { + err := fastwalk.Walk(&conf, base, func(path string, d os.DirEntry, err error) error { if err != nil { + slog.Warn("Failed to walk skills path entry", "base", base, "path", path, "error", err) return nil } if d.IsDir() || d.Name() != SkillFileName { @@ -165,8 +183,21 @@ func Discover(paths []string) []*Skill { mu.Unlock() return nil }) + if err != nil { + slog.Warn("Failed to walk skills path", "path", base, "error", err) + } } + // fastwalk traversal order is non-deterministic, so sort for stable output. + sort.SliceStable(skills, func(i, j int) bool { + left := strings.ToLower(skills[i].SkillFilePath) + right := strings.ToLower(skills[j].SkillFilePath) + if left == right { + return skills[i].SkillFilePath < skills[j].SkillFilePath + } + return left < right + }) + return skills } @@ -193,8 +224,7 @@ func ToPromptXML(skills []*Skill) string { } func escape(s string) string { - r := strings.NewReplacer("&", "&", "<", "<", ">", ">", "\"", """, "'", "'") - return r.Replace(s) + return promptReplacer.Replace(s) } // Deduplicate removes duplicate skills by name. When duplicates exist, the diff --git a/internal/skills/skills_test.go b/internal/skills/skills_test.go index fcd5e71e2cb73ae5ba143ff62cc0628bda7dd9b0..2c3abd3933b1f49735d14266d96897f5c3b896d3 100644 --- a/internal/skills/skills_test.go +++ b/internal/skills/skills_test.go @@ -63,6 +63,39 @@ Instructions here. wantDesc: "A simple skill for testing.", wantInstr: "# My Skill\n\nInstructions here.", }, + { + name: "frontmatter with utf8 bom", + content: "\uFEFF---\n" + + "name: bom-skill\n" + + "description: Skill with bom.\n" + + "---\n\n" + + "# BOM Skill\n", + wantName: "bom-skill", + wantDesc: "Skill with bom.", + wantInstr: "# BOM Skill", + }, + { + name: "frontmatter with leading blank lines", + content: "\n\n---\n" + + "name: blank-prefix\n" + + "description: Skill with leading blank lines.\n" + + "---\n\n" + + "# Blank Prefix\n", + wantName: "blank-prefix", + wantDesc: "Skill with leading blank lines.", + wantInstr: "# Blank Prefix", + }, + { + name: "frontmatter delimiter with trailing spaces", + content: "--- \n" + + "name: spaced-delimiter\n" + + "description: Delimiter has spaces.\n" + + "--- \n\n" + + "# Spaced Delimiter\n", + wantName: "spaced-delimiter", + wantDesc: "Delimiter has spaces.", + wantInstr: "# Spaced Delimiter", + }, { name: "no frontmatter", content: "# Just Markdown\n\nNo frontmatter here.", @@ -217,6 +250,7 @@ description: Name doesn't match directory. skills := Discover([]string{tmpDir}) require.Len(t, skills, 2) + require.Equal(t, []string{"skill-two", "skill-one"}, []string{skills[0].Name, skills[1].Name}) names := make(map[string]bool) for _, s := range skills { @@ -248,6 +282,34 @@ func TestToPromptXMLEmpty(t *testing.T) { require.Empty(t, ToPromptXML([]*Skill{})) } +func TestEscape(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + in string + want string + }{ + { + name: "escape xml special chars", + in: `'z'`, + want: `<tag attr="x&y">'z'</tag>`, + }, + { + name: "plain text unchanged", + in: "hello world", + want: "hello world", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + require.Equal(t, tt.want, escape(tt.in)) + }) + } +} + func TestToPromptXMLBuiltinType(t *testing.T) { t.Parallel()