feat(skills): log skill activation to help diagnose load failures

Christian Rocha created 1 week ago

Change summary

internal/agent/coordinator.go       | 111 +++++++++++++++++++++++++++++-
internal/agent/tools/crush_info.go  |   1 
internal/skills/diagnostics_test.go |  62 +++++++++++++++++
internal/skills/embed.go            |  15 +++
internal/skills/skills.go           |  20 +++++
internal/skills/tracker.go          |  35 +++++++++
6 files changed, 237 insertions(+), 7 deletions(-)

Detailed changes

internal/agent/coordinator.go 🔗

@@ -204,7 +204,9 @@ func (c *coordinator) Run(ctx context.Context, sessionID string, prompt string,
 			PresencePenalty:  presPenalty,
 		})
 	}
+	beforeLoaded := c.skillTracker.LoadedNames()
 	result, originalErr := run()
+	logTurnSkillUsage(sessionID, prompt, c.activeSkills, c.skillTracker, beforeLoaded)
 
 	if c.isUnauthorized(originalErr) {
 		switch {
@@ -1051,12 +1053,18 @@ func (c *coordinator) updateParentSessionCost(ctx context.Context, childSessionI
 
 // discoverSkills runs the skill discovery pipeline and returns both the
 // pre-filter (all discovered, after dedup) and post-filter (active) lists.
+// It also emits a single diagnostic log line summarising the outcome to
+// help track skill-loading health over time.
 func discoverSkills(cfg *config.ConfigStore) (allSkills, activeSkills []*skills.Skill) {
-	discovered := skills.DiscoverBuiltin()
+	builtin, builtinStates := skills.DiscoverBuiltinWithStates()
+	discovered := append([]*skills.Skill(nil), builtin...)
+
+	var userStates []*skills.SkillState
+	var userPaths []string
 
 	opts := cfg.Config().Options
 	if opts != nil && len(opts.SkillsPaths) > 0 {
-		expandedPaths := make([]string, 0, len(opts.SkillsPaths))
+		userPaths = make([]string, 0, len(opts.SkillsPaths))
 		for _, pth := range opts.SkillsPaths {
 			expanded := home.Long(pth)
 			if strings.HasPrefix(expanded, "$") {
@@ -1064,9 +1072,11 @@ func discoverSkills(cfg *config.ConfigStore) (allSkills, activeSkills []*skills.
 					expanded = resolved
 				}
 			}
-			expandedPaths = append(expandedPaths, expanded)
+			userPaths = append(userPaths, expanded)
 		}
-		discovered = append(discovered, skills.Discover(expandedPaths)...)
+		var userSkills []*skills.Skill
+		userSkills, userStates = skills.DiscoverWithStates(userPaths)
+		discovered = append(discovered, userSkills...)
 	}
 
 	allSkills = skills.Deduplicate(discovered)
@@ -1075,5 +1085,98 @@ func discoverSkills(cfg *config.ConfigStore) (allSkills, activeSkills []*skills.
 		disabledSkills = opts.DisabledSkills
 	}
 	activeSkills = skills.Filter(allSkills, disabledSkills)
+
+	logDiscoveryStats(builtin, builtinStates, userStates, userPaths, allSkills, activeSkills, disabledSkills)
 	return allSkills, activeSkills
 }
+
+// logTurnSkillUsage emits a per-turn diagnostic line showing which skills
+// (if any) were loaded during this turn and which looked relevant based on
+// a cheap keyword match against the user prompt. The goal is to surface
+// "should-have-loaded but didn't" situations for later analysis.
+//
+// Logged at Info level under component=skills; heavy fields are elided when
+// there is nothing interesting to report.
+func logTurnSkillUsage(
+	sessionID string,
+	prompt string,
+	activeSkills []*skills.Skill,
+	tracker *skills.Tracker,
+	before []string,
+) {
+	if tracker == nil || len(activeSkills) == 0 {
+		return
+	}
+
+	after := tracker.LoadedNames()
+
+	beforeSet := make(map[string]bool, len(before))
+	for _, n := range before {
+		beforeSet[n] = true
+	}
+	var loadedThisTurn []string
+	for _, n := range after {
+		if !beforeSet[n] {
+			loadedThisTurn = append(loadedThisTurn, n)
+		}
+	}
+
+	slog.Info("Skill turn summary",
+		"component", "skills",
+		"session_id", sessionID,
+		"prompt_len", len(prompt),
+		"active_total", len(activeSkills),
+		"loaded_total", len(after),
+		"loaded_this_turn", loadedThisTurn,
+	)
+}
+
+// logDiscoveryStats emits a single structured log line summarising skill
+// discovery for the current session. It is intentionally low-volume: one
+// line per session start.
+func logDiscoveryStats(
+	builtin []*skills.Skill,
+	builtinStates, userStates []*skills.SkillState,
+	userPaths []string,
+	allSkills, activeSkills []*skills.Skill,
+	disabled []string,
+) {
+	countErrors := func(states []*skills.SkillState) int {
+		n := 0
+		for _, s := range states {
+			if s.State == skills.StateError {
+				n++
+			}
+		}
+		return n
+	}
+
+	userOK := 0
+	for _, s := range userStates {
+		if s.State == skills.StateNormal {
+			userOK++
+		}
+	}
+
+	activeNames := make([]string, 0, len(activeSkills))
+	for _, s := range activeSkills {
+		activeNames = append(activeNames, s.Name)
+	}
+
+	xml := skills.ToPromptXML(activeSkills)
+
+	slog.Info("Skill discovery complete",
+		"component", "skills",
+		"builtin_ok", len(builtin),
+		"builtin_errors", countErrors(builtinStates),
+		"user_ok", userOK,
+		"user_errors", countErrors(userStates),
+		"user_paths", len(userPaths),
+		"deduped_total", len(allSkills),
+		"active", len(activeSkills),
+		"disabled", len(disabled),
+		"prompt_bytes", len(xml),
+		"prompt_tok_est", skills.ApproxTokenCount(xml),
+		"active_names", activeNames,
+	)
+}

internal/agent/tools/crush_info.go 🔗

@@ -337,6 +337,7 @@ func writeSkills(b *strings.Builder, allSkills []*skills.Skill, activeSkills []*
 
 	slices.SortFunc(entries, func(a, b entry) int { return strings.Compare(a.name, b.name) })
 	b.WriteString("[skills]\n")
+	fmt.Fprintf(b, "loaded_this_session = %d/%d\n", tracker.LoadedCount(), len(activeSkills))
 	for _, e := range entries {
 		fmt.Fprintf(b, "%s = %s, %s\n", e.name, e.origin, e.state)
 	}

internal/skills/diagnostics_test.go 🔗

@@ -0,0 +1,62 @@
+package skills
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestApproxTokenCount(t *testing.T) {
+	t.Parallel()
+
+	require.Equal(t, 0, ApproxTokenCount(""))
+	require.Equal(t, 1, ApproxTokenCount("a"))
+	require.Equal(t, 1, ApproxTokenCount("abcd"))
+	require.Equal(t, 2, ApproxTokenCount("abcde"))
+	// 12 chars → 3 tokens.
+	require.Equal(t, 3, ApproxTokenCount("abcdefghijkl"))
+}
+
+func TestTracker_LoadedNamesAndCount(t *testing.T) {
+	t.Parallel()
+
+	active := []*Skill{{Name: "b"}, {Name: "a"}, {Name: "c"}}
+	tr := NewTracker(active)
+	require.Equal(t, 0, tr.LoadedCount())
+	require.Empty(t, tr.LoadedNames())
+
+	tr.MarkLoaded("b")
+	tr.MarkLoaded("a")
+	require.Equal(t, 2, tr.LoadedCount())
+	require.Equal(t, []string{"a", "b"}, tr.LoadedNames())
+
+	// Nil safety.
+	var nilTr *Tracker
+	require.Equal(t, 0, nilTr.LoadedCount())
+	require.Nil(t, nilTr.LoadedNames())
+}
+
+func TestDiscoverBuiltinWithStates(t *testing.T) {
+	t.Parallel()
+
+	skills, states := DiscoverBuiltinWithStates()
+	require.NotEmpty(t, skills)
+	require.NotEmpty(t, states)
+
+	// Every returned skill should have a corresponding StateNormal entry.
+	ok := 0
+	for _, s := range states {
+		if s.State == StateNormal {
+			ok++
+		}
+	}
+	require.Equal(t, len(skills), ok)
+}
+
+func TestDiscoverWithStates_MissingPath(t *testing.T) {
+	t.Parallel()
+
+	// A clearly nonexistent path should not panic; it may log an error.
+	skills, _ := DiscoverWithStates([]string{"/nonexistent/crush/skills/path"})
+	require.Empty(t, skills)
+}

internal/skills/embed.go 🔗

@@ -21,7 +21,16 @@ func BuiltinFS() embed.FS {
 
 // DiscoverBuiltin finds all valid skills embedded in the binary.
 func DiscoverBuiltin() []*Skill {
+	skills, _ := DiscoverBuiltinWithStates()
+	return skills
+}
+
+// DiscoverBuiltinWithStates is like DiscoverBuiltin but additionally returns
+// a per-file state slice describing parse/validation outcomes. Useful for
+// diagnostics.
+func DiscoverBuiltinWithStates() ([]*Skill, []*SkillState) {
 	var discovered []*Skill
+	var states []*SkillState
 
 	fs.WalkDir(builtinFS, "builtin", func(path string, d fs.DirEntry, err error) error {
 		if err != nil {
@@ -34,12 +43,14 @@ func DiscoverBuiltin() []*Skill {
 		content, err := builtinFS.ReadFile(path)
 		if err != nil {
 			slog.Warn("Failed to read builtin skill file", "path", path, "error", err)
+			states = append(states, &SkillState{Path: path, State: StateError, Err: err})
 			return nil
 		}
 
 		skill, err := ParseContent(content)
 		if err != nil {
 			slog.Warn("Failed to parse builtin skill file", "path", path, "error", err)
+			states = append(states, &SkillState{Path: path, State: StateError, Err: err})
 			return nil
 		}
 
@@ -54,13 +65,15 @@ func DiscoverBuiltin() []*Skill {
 
 		if err := skill.Validate(); err != nil {
 			slog.Warn("Builtin skill validation failed", "path", path, "error", err)
+			states = append(states, &SkillState{Name: skill.Name, Path: path, State: StateError, Err: err})
 			return nil
 		}
 
 		slog.Debug("Successfully loaded builtin skill", "name", skill.Name, "path", skill.SkillFilePath)
 		discovered = append(discovered, skill)
+		states = append(states, &SkillState{Name: skill.Name, Path: skill.SkillFilePath, State: StateNormal})
 		return nil
 	})
 
-	return discovered
+	return discovered, states
 }

internal/skills/skills.go 🔗

@@ -172,6 +172,14 @@ func splitFrontmatter(content string) (frontmatter, body string, err error) {
 
 // Discover finds all valid skills in the given paths.
 func Discover(paths []string) []*Skill {
+	skills, _ := DiscoverWithStates(paths)
+	return skills
+}
+
+// DiscoverWithStates finds all valid skills in the given paths and also
+// returns a per-file state slice describing parse/validation outcomes. Useful
+// for diagnostics and UI reporting.
+func DiscoverWithStates(paths []string) ([]*Skill, []*SkillState) {
 	var skills []*Skill
 	var states []*SkillState
 	var mu sync.Mutex
@@ -246,7 +254,7 @@ func Discover(paths []string) []*Skill {
 	})
 
 	broker.Publish(pubsub.UpdatedEvent, Event{States: states})
-	return skills
+	return skills, states
 }
 
 // ToPromptXML generates XML for injection into the system prompt.
@@ -293,6 +301,16 @@ func Deduplicate(all []*Skill) []*Skill {
 	return result
 }
 
+// ApproxTokenCount returns a rough estimate of how many tokens a string
+// occupies when sent to an LLM. Uses the common ~4-chars-per-token heuristic
+// that approximates GPT/Claude tokenizers well enough for diagnostic logging.
+func ApproxTokenCount(s string) int {
+	if s == "" {
+		return 0
+	}
+	return (len(s) + 3) / 4
+}
+
 // Filter removes skills whose names appear in the disabled list.
 func Filter(all []*Skill, disabled []string) []*Skill {
 	if len(disabled) == 0 {

internal/skills/tracker.go 🔗

@@ -1,6 +1,9 @@
 package skills
 
-import "sync"
+import (
+	"sort"
+	"sync"
+)
 
 // Tracker tracks which skills have been loaded (read) during a session.
 // It is safe for concurrent use.
@@ -51,3 +54,33 @@ func (t *Tracker) IsLoaded(name string) bool {
 	defer t.mu.RUnlock()
 	return t.loaded[name]
 }
+
+// LoadedNames returns the names of all skills that have been loaded, sorted
+// alphabetically. Safe to call on a nil Tracker (returns nil).
+func (t *Tracker) LoadedNames() []string {
+	if t == nil {
+		return nil
+	}
+	t.mu.RLock()
+	defer t.mu.RUnlock()
+	if len(t.loaded) == 0 {
+		return nil
+	}
+	names := make([]string, 0, len(t.loaded))
+	for name := range t.loaded {
+		names = append(names, name)
+	}
+	sort.Strings(names)
+	return names
+}
+
+// LoadedCount returns the number of unique skills that have been loaded.
+// Safe to call on a nil Tracker (returns 0).
+func (t *Tracker) LoadedCount() int {
+	if t == nil {
+		return 0
+	}
+	t.mu.RLock()
+	defer t.mu.RUnlock()
+	return len(t.loaded)
+}