feat(tools): add skill statistics analyzer

Amolith created

Adds skill-stats.go, a tool that analyzes skill files in the repository:
- Parses skill frontmatter and body content
- Counts tokens using the Synthetic API
- Validates skill structure and naming conventions
- Generates detailed reports with token breakdowns
- Supports comparison with git HEAD to track changes
- Includes parallel processing for efficiency

Can be run with optional --compare flag to show differences
from the previous commit. Comparisons are integrated into the
main output rather than shown as a separate section.

Assisted-by: Claude Opus 4.5 via Claude Code

Change summary

skill-stats.go | 653 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 653 insertions(+)

Detailed changes

skill-stats.go 🔗

@@ -0,0 +1,653 @@
+//usr/bin/env go run "$0" "$@"; exit
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strings"
+	"sync"
+)
+
+const (
+	syntheticAPI = "https://api.synthetic.new/anthropic/v1/messages/count_tokens"
+	model        = "hf:deepseek-ai/DeepSeek-V3-0324"
+	workerCount  = 5 // Number of parallel API workers
+)
+
+type Frontmatter struct {
+	Name        string
+	Description string
+}
+
+type TokenCount struct {
+	Name        int
+	Description int
+	Body        int
+	References  map[string]int
+	Total       int
+}
+
+type SkillInfo struct {
+	Dir         string
+	Frontmatter Frontmatter
+	BodyLines   int
+	Tokens      TokenCount
+	Errors      []string
+}
+
+type TokenJob struct {
+	ID   string
+	Text string
+}
+
+type TokenResult struct {
+	ID    string
+	Count int
+}
+
+type SkillComparison struct {
+	PrevTotal int
+	Delta     int
+	Percent   float64
+	IsNew     bool
+}
+
+func main() {
+	compare := flag.Bool("compare", false, "Compare with HEAD commit")
+	workers := flag.Int("workers", workerCount, "Number of parallel API workers")
+	flag.Parse()
+
+	apiKey := os.Getenv("SYNTHETIC_API_KEY")
+	if apiKey == "" {
+		fmt.Fprintln(os.Stderr, "Error: SYNTHETIC_API_KEY environment variable not set")
+		os.Exit(1)
+	}
+
+	// Start worker pool
+	counter := newTokenCounter(apiKey, *workers)
+	defer counter.Close()
+
+	skills, err := analyzeSkills(counter)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+		os.Exit(1)
+	}
+
+	// Build comparison map if requested
+	var comparisons map[string]SkillComparison
+	if *compare {
+		comparisons = buildComparisons(skills, counter)
+	}
+
+	// Sort skills by name for consistent output
+	sort.Slice(skills, func(i, j int) bool {
+		return skills[i].Dir < skills[j].Dir
+	})
+
+	// Print reports
+	for _, skill := range skills {
+		var comp *SkillComparison
+		if comparisons != nil {
+			if c, ok := comparisons[skill.Dir]; ok {
+				comp = &c
+			}
+		}
+		printSkillReport(skill, comp)
+	}
+
+	// Print summary
+	printSummary(skills, comparisons)
+}
+
+// TokenCounter manages a pool of workers for parallel token counting
+type TokenCounter struct {
+	apiKey  string
+	jobs    chan TokenJob
+	results chan TokenResult
+	wg      sync.WaitGroup
+}
+
+func newTokenCounter(apiKey string, workers int) *TokenCounter {
+	tc := &TokenCounter{
+		apiKey:  apiKey,
+		jobs:    make(chan TokenJob, 100),
+		results: make(chan TokenResult, 100),
+	}
+
+	// Start workers
+	for i := 0; i < workers; i++ {
+		tc.wg.Add(1)
+		go tc.worker()
+	}
+
+	return tc
+}
+
+func (tc *TokenCounter) worker() {
+	defer tc.wg.Done()
+	for job := range tc.jobs {
+		count := countTokensAPI(tc.apiKey, job.Text)
+		tc.results <- TokenResult{ID: job.ID, Count: count}
+	}
+}
+
+func (tc *TokenCounter) Count(id, text string) {
+	tc.jobs <- TokenJob{ID: id, Text: text}
+}
+
+func (tc *TokenCounter) GetResult() TokenResult {
+	return <-tc.results
+}
+
+func (tc *TokenCounter) Close() {
+	close(tc.jobs)
+	tc.wg.Wait()
+	close(tc.results)
+}
+
+func analyzeSkills(counter *TokenCounter) ([]SkillInfo, error) {
+	skillsDir := "skills"
+	entries, err := os.ReadDir(skillsDir)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read skills directory: %w", err)
+	}
+
+	var skills []SkillInfo
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			continue
+		}
+
+		skillPath := filepath.Join(skillsDir, entry.Name())
+		skill, err := analyzeSkill(skillPath, counter)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Warning: error analyzing %s: %v\n", entry.Name(), err)
+			continue
+		}
+		skills = append(skills, skill)
+	}
+
+	return skills, nil
+}
+
+func analyzeSkill(path string, counter *TokenCounter) (SkillInfo, error) {
+	skill := SkillInfo{
+		Dir: filepath.Base(path),
+		Tokens: TokenCount{
+			References: make(map[string]int),
+		},
+	}
+
+	// Read SKILL.md
+	skillMdPath := filepath.Join(path, "SKILL.md")
+	content, err := os.ReadFile(skillMdPath)
+	if err != nil {
+		skill.Errors = append(skill.Errors, fmt.Sprintf("Cannot read SKILL.md: %v", err))
+		return skill, nil
+	}
+
+	// Parse frontmatter and body
+	fm, body, err := parseFrontmatter(string(content))
+	if err != nil {
+		skill.Errors = append(skill.Errors, fmt.Sprintf("Cannot parse frontmatter: %v", err))
+		return skill, nil
+	}
+	skill.Frontmatter = fm
+	skill.BodyLines = len(strings.Split(strings.TrimSpace(body), "\n"))
+
+	// Validate
+	skill.Errors = append(skill.Errors, validateSkill(skill)...)
+
+	fmt.Fprintf(os.Stderr, "Analyzing %s...\n", skill.Dir)
+
+	// Submit token counting jobs
+	jobCount := 0
+	counter.Count(fmt.Sprintf("%s:name", skill.Dir), fm.Name)
+	jobCount++
+	counter.Count(fmt.Sprintf("%s:description", skill.Dir), fm.Description)
+	jobCount++
+	counter.Count(fmt.Sprintf("%s:body", skill.Dir), body)
+	jobCount++
+
+	// Count reference files
+	refsPath := filepath.Join(path, "references")
+	var refFiles []string
+	if entries, err := os.ReadDir(refsPath); err == nil {
+		for _, entry := range entries {
+			if entry.IsDir() {
+				continue
+			}
+			refPath := filepath.Join(refsPath, entry.Name())
+			refContent, err := os.ReadFile(refPath)
+			if err == nil {
+				counter.Count(fmt.Sprintf("%s:ref:%s", skill.Dir, entry.Name()), string(refContent))
+				refFiles = append(refFiles, entry.Name())
+				jobCount++
+			}
+		}
+	}
+
+	// Collect results
+	for i := 0; i < jobCount; i++ {
+		result := counter.GetResult()
+		parts := strings.SplitN(result.ID, ":", 3)
+		if len(parts) < 2 {
+			continue
+		}
+
+		switch parts[1] {
+		case "name":
+			skill.Tokens.Name = result.Count
+		case "description":
+			skill.Tokens.Description = result.Count
+		case "body":
+			skill.Tokens.Body = result.Count
+		case "ref":
+			if len(parts) == 3 {
+				skill.Tokens.References[parts[2]] = result.Count
+			}
+		}
+	}
+
+	// Calculate total
+	skill.Tokens.Total = skill.Tokens.Name + skill.Tokens.Description + skill.Tokens.Body
+	for _, count := range skill.Tokens.References {
+		skill.Tokens.Total += count
+	}
+
+	return skill, nil
+}
+
+func parseFrontmatter(content string) (Frontmatter, string, error) {
+	lines := strings.Split(content, "\n")
+	if len(lines) < 3 || lines[0] != "---" {
+		return Frontmatter{}, "", fmt.Errorf("missing frontmatter")
+	}
+
+	var fm Frontmatter
+	var endIdx int
+	var inDescription bool
+	var descriptionLines []string
+
+	for i := 1; i < len(lines); i++ {
+		if lines[i] == "---" {
+			endIdx = i
+			break
+		}
+
+		line := lines[i]
+
+		// Parse name
+		if strings.HasPrefix(line, "name:") {
+			fm.Name = strings.TrimSpace(strings.TrimPrefix(line, "name:"))
+			continue
+		}
+
+		// Parse description (might be multi-line)
+		if strings.HasPrefix(line, "description:") {
+			descPart := strings.TrimSpace(strings.TrimPrefix(line, "description:"))
+			if descPart != "" {
+				descriptionLines = append(descriptionLines, descPart)
+			}
+			inDescription = true
+			continue
+		}
+
+		// Continue multi-line description
+		if inDescription && strings.HasPrefix(line, "  ") {
+			descriptionLines = append(descriptionLines, strings.TrimSpace(line))
+			continue
+		}
+
+		// End of description
+		if inDescription && !strings.HasPrefix(line, "  ") {
+			inDescription = false
+		}
+	}
+
+	fm.Description = strings.Join(descriptionLines, " ")
+
+	if endIdx == 0 {
+		return Frontmatter{}, "", fmt.Errorf("unclosed frontmatter")
+	}
+
+	body := strings.Join(lines[endIdx+1:], "\n")
+	return fm, body, nil
+}
+
+func validateSkill(skill SkillInfo) []string {
+	var errors []string
+
+	// Validate name
+	if len(skill.Frontmatter.Name) < 1 || len(skill.Frontmatter.Name) > 64 {
+		errors = append(errors, "name must be 1-64 characters")
+	}
+
+	namePattern := regexp.MustCompile(`^[a-z0-9]+(-[a-z0-9]+)*$`)
+	if !namePattern.MatchString(skill.Frontmatter.Name) {
+		errors = append(errors, "name must be lowercase letters, numbers, and hyphens only")
+	}
+
+	if strings.Contains(skill.Frontmatter.Name, "--") {
+		errors = append(errors, "name cannot contain consecutive hyphens")
+	}
+
+	if skill.Frontmatter.Name != skill.Dir {
+		errors = append(errors, fmt.Sprintf("name '%s' doesn't match directory '%s'", skill.Frontmatter.Name, skill.Dir))
+	}
+
+	// Validate description
+	if len(skill.Frontmatter.Description) < 1 {
+		errors = append(errors, "description is empty")
+	} else if len(skill.Frontmatter.Description) > 1024 {
+		errors = append(errors, fmt.Sprintf("description is %d characters (max 1024)", len(skill.Frontmatter.Description)))
+	}
+
+	// Check body line count
+	if skill.BodyLines > 500 {
+		errors = append(errors, fmt.Sprintf("body has %d lines (recommended: < 500)", skill.BodyLines))
+	}
+
+	return errors
+}
+
+func countTokensAPI(apiKey string, text string) int {
+	reqBody := map[string]interface{}{
+		"model": model,
+		"messages": []map[string]string{
+			{
+				"role":    "user",
+				"content": text,
+			},
+		},
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return 0
+	}
+
+	req, err := http.NewRequest("POST", syntheticAPI, bytes.NewBuffer(jsonData))
+	if err != nil {
+		return 0
+	}
+
+	req.Header.Set("Authorization", "Bearer "+apiKey)
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("anthropic-version", "2023-06-01")
+
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		return 0
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		fmt.Fprintf(os.Stderr, "Warning: token count API returned %d: %s\n", resp.StatusCode, body)
+		return 0
+	}
+
+	var result struct {
+		InputTokens int `json:"input_tokens"`
+	}
+
+	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+		return 0
+	}
+
+	return result.InputTokens
+}
+
+func buildComparisons(currentSkills []SkillInfo, counter *TokenCounter) map[string]SkillComparison {
+	comparisons := make(map[string]SkillComparison)
+
+	for _, skill := range currentSkills {
+		prevTotal, err := getSkillTokensFromGit(skill.Dir, counter)
+		if err != nil {
+			// Skill is new
+			if skill.Tokens.Total > 0 {
+				comparisons[skill.Dir] = SkillComparison{
+					PrevTotal: 0,
+					Delta:     skill.Tokens.Total,
+					Percent:   100.0,
+					IsNew:     true,
+				}
+			}
+			continue
+		}
+
+		delta := skill.Tokens.Total - prevTotal
+		var percent float64
+		if prevTotal > 0 {
+			percent = (float64(delta) / float64(prevTotal)) * 100
+		}
+
+		if delta != 0 {
+			comparisons[skill.Dir] = SkillComparison{
+				PrevTotal: prevTotal,
+				Delta:     delta,
+				Percent:   percent,
+				IsNew:     false,
+			}
+		}
+	}
+
+	return comparisons
+}
+
+func getSkillTokensFromGit(skillDir string, counter *TokenCounter) (int, error) {
+	// Get file from HEAD
+	skillPath := fmt.Sprintf("skills/%s/SKILL.md", skillDir)
+	cmd := exec.Command("git", "show", fmt.Sprintf("HEAD:%s", skillPath))
+	output, err := cmd.Output()
+	if err != nil {
+		return 0, err
+	}
+
+	// Parse frontmatter and body
+	fm, body, err := parseFrontmatter(string(output))
+	if err != nil {
+		return 0, err
+	}
+
+	// Count tokens for previous version
+	jobCount := 0
+	counter.Count(fmt.Sprintf("prev:%s:name", skillDir), fm.Name)
+	jobCount++
+	counter.Count(fmt.Sprintf("prev:%s:description", skillDir), fm.Description)
+	jobCount++
+	counter.Count(fmt.Sprintf("prev:%s:body", skillDir), body)
+	jobCount++
+
+	// Get reference files from HEAD
+	refsPath := fmt.Sprintf("skills/%s/references", skillDir)
+	cmd = exec.Command("git", "ls-tree", "-r", "--name-only", "HEAD", refsPath)
+	output, err = cmd.Output()
+	if err == nil {
+		refPaths := strings.Split(strings.TrimSpace(string(output)), "\n")
+		for _, refPath := range refPaths {
+			if refPath == "" {
+				continue
+			}
+			cmd = exec.Command("git", "show", fmt.Sprintf("HEAD:%s", refPath))
+			refContent, err := cmd.Output()
+			if err == nil {
+				counter.Count(fmt.Sprintf("prev:%s:ref", skillDir), string(refContent))
+				jobCount++
+			}
+		}
+	}
+
+	// Collect results
+	total := 0
+	for i := 0; i < jobCount; i++ {
+		result := counter.GetResult()
+		total += result.Count
+	}
+
+	return total, nil
+}
+
+func printSkillReport(skill SkillInfo, comp *SkillComparison) {
+	fmt.Printf("\n=== %s ===\n", skill.Dir)
+
+	if len(skill.Errors) > 0 {
+		fmt.Println("\nValidation errors:")
+		for _, err := range skill.Errors {
+			fmt.Printf("  ✗ %s\n", err)
+		}
+	}
+
+	fmt.Println("\nToken breakdown:")
+	fmt.Printf("  Name:        %5d tokens\n", skill.Tokens.Name)
+	fmt.Printf("  Description: %5d tokens\n", skill.Tokens.Description)
+	fmt.Printf("  Body:        %5d tokens (%d lines)\n", skill.Tokens.Body, skill.BodyLines)
+
+	if len(skill.Tokens.References) > 0 {
+		fmt.Println("  References:")
+		// Sort reference names for consistent output
+		refNames := make([]string, 0, len(skill.Tokens.References))
+		for name := range skill.Tokens.References {
+			refNames = append(refNames, name)
+		}
+		sort.Strings(refNames)
+
+		for _, name := range refNames {
+			count := skill.Tokens.References[name]
+			fmt.Printf("    %-40s %5d tokens\n", name, count)
+		}
+	}
+
+	fmt.Println("  ───────────────────────────────────────────────")
+
+	// Print total with comparison if available
+	if comp != nil {
+		sign := "+"
+		if comp.Delta < 0 {
+			sign = ""
+		}
+		indicator := ""
+		if comp.IsNew {
+			indicator = " [NEW]"
+		} else if comp.Percent > 20 {
+			indicator = " ⚠️"
+		} else if comp.Percent < -20 {
+			indicator = " ✓"
+		}
+		fmt.Printf("  Total:       %5d tokens (%s%d, %s%.1f%% from HEAD)%s\n",
+			skill.Tokens.Total, sign, comp.Delta, sign, comp.Percent, indicator)
+	} else {
+		fmt.Printf("  Total:       %5d tokens\n", skill.Tokens.Total)
+	}
+
+	// Warn if approaching budget
+	if skill.Tokens.Body > 5000 {
+		fmt.Println("  ⚠️  Body exceeds recommended 5000 token budget!")
+	} else if skill.Tokens.Body > 4000 {
+		fmt.Println("  ⚠️  Body approaching 5000 token budget")
+	}
+}
+
+func printSummary(skills []SkillInfo, comparisons map[string]SkillComparison) {
+	fmt.Println("\n" + strings.Repeat("=", 60))
+	fmt.Println("SUMMARY")
+	fmt.Println(strings.Repeat("=", 60))
+
+	totalTokens := 0
+	totalErrors := 0
+	totalDelta := 0
+
+	for _, skill := range skills {
+		totalTokens += skill.Tokens.Total
+		totalErrors += len(skill.Errors)
+		if comp, ok := comparisons[skill.Dir]; ok {
+			totalDelta += comp.Delta
+		}
+	}
+
+	fmt.Printf("\nTotal skills: %d\n", len(skills))
+	if comparisons != nil && totalDelta != 0 {
+		fmt.Printf("Total tokens: %d (%+d from HEAD)\n", totalTokens, totalDelta)
+	} else {
+		fmt.Printf("Total tokens: %d\n", totalTokens)
+	}
+	fmt.Printf("Validation errors: %d\n", totalErrors)
+
+	// Find largest skills
+	sort.Slice(skills, func(i, j int) bool {
+		return skills[i].Tokens.Total > skills[j].Tokens.Total
+	})
+
+	fmt.Println("\nLargest skills (by total tokens):")
+	for i := 0; i < 5 && i < len(skills); i++ {
+		skill := skills[i]
+		if comp, ok := comparisons[skill.Dir]; ok {
+			sign := "+"
+			if comp.Delta < 0 {
+				sign = ""
+			}
+			fmt.Printf("  %d. %-40s %5d tokens (%s%d)\n",
+				i+1, skill.Dir, skill.Tokens.Total, sign, comp.Delta)
+		} else {
+			fmt.Printf("  %d. %-40s %5d tokens\n", i+1, skill.Dir, skill.Tokens.Total)
+		}
+	}
+
+	// Show biggest changes if comparing
+	if comparisons != nil && len(comparisons) > 0 {
+		type changeEntry struct {
+			name string
+			comp SkillComparison
+		}
+		var changes []changeEntry
+		for name, comp := range comparisons {
+			changes = append(changes, changeEntry{name, comp})
+		}
+
+		sort.Slice(changes, func(i, j int) bool {
+			absI := changes[i].comp.Delta
+			if absI < 0 {
+				absI = -absI
+			}
+			absJ := changes[j].comp.Delta
+			if absJ < 0 {
+				absJ = -absJ
+			}
+			return absI > absJ
+		})
+
+		fmt.Println("\nBiggest changes:")
+		displayed := 0
+		for _, change := range changes {
+			if displayed >= 5 {
+				break
+			}
+			sign := "+"
+			if change.comp.Delta < 0 {
+				sign = ""
+			}
+			indicator := ""
+			if change.comp.IsNew {
+				indicator = " [NEW]"
+			} else if change.comp.Percent > 20 {
+				indicator = " ⚠️"
+			} else if change.comp.Percent < -20 {
+				indicator = " ✓"
+			}
+			fmt.Printf("  %-40s %s%-5d tokens (%s%.1f%%)%s\n",
+				change.name, sign, change.comp.Delta, sign, change.comp.Percent, indicator)
+			displayed++
+		}
+	}
+}