skill-stats.go

  1//usr/bin/env go run "$0" "$@"; exit
  2package main
  3
  4import (
  5	"bytes"
  6	"encoding/json"
  7	"flag"
  8	"fmt"
  9	"io"
 10	"net/http"
 11	"os"
 12	"os/exec"
 13	"path/filepath"
 14	"regexp"
 15	"sort"
 16	"strings"
 17	"sync"
 18)
 19
 20const (
 21	syntheticAPI = "https://api.synthetic.new/anthropic/v1/messages/count_tokens"
 22	model        = "hf:deepseek-ai/DeepSeek-V3-0324"
 23	workerCount  = 5 // Number of parallel API workers
 24)
 25
 26type Frontmatter struct {
 27	Name        string
 28	Description string
 29}
 30
 31type TokenCount struct {
 32	Name        int
 33	Description int
 34	Body        int
 35	References  map[string]int
 36	Total       int
 37}
 38
 39type SkillInfo struct {
 40	Dir         string
 41	Frontmatter Frontmatter
 42	BodyLines   int
 43	Tokens      TokenCount
 44	Errors      []string
 45}
 46
 47type TokenJob struct {
 48	ID   string
 49	Text string
 50}
 51
 52type TokenResult struct {
 53	ID    string
 54	Count int
 55}
 56
 57type SkillComparison struct {
 58	PrevTotal int
 59	Delta     int
 60	Percent   float64
 61	IsNew     bool
 62}
 63
 64func main() {
 65	compare := flag.Bool("compare", false, "Compare with HEAD commit")
 66	workers := flag.Int("workers", workerCount, "Number of parallel API workers")
 67	flag.Parse()
 68
 69	apiKey := os.Getenv("SYNTHETIC_API_KEY")
 70	if apiKey == "" {
 71		fmt.Fprintln(os.Stderr, "Error: SYNTHETIC_API_KEY environment variable not set")
 72		os.Exit(1)
 73	}
 74
 75	// Start worker pool
 76	counter := newTokenCounter(apiKey, *workers)
 77	defer counter.Close()
 78
 79	skills, err := analyzeSkills(counter)
 80	if err != nil {
 81		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
 82		os.Exit(1)
 83	}
 84
 85	// Build comparison map if requested
 86	var comparisons map[string]SkillComparison
 87	if *compare {
 88		comparisons = buildComparisons(skills, counter)
 89	}
 90
 91	// Sort skills by name for consistent output
 92	sort.Slice(skills, func(i, j int) bool {
 93		return skills[i].Dir < skills[j].Dir
 94	})
 95
 96	// Print reports
 97	for _, skill := range skills {
 98		var comp *SkillComparison
 99		if comparisons != nil {
100			if c, ok := comparisons[skill.Dir]; ok {
101				comp = &c
102			}
103		}
104		printSkillReport(skill, comp)
105	}
106
107	// Print summary
108	printSummary(skills, comparisons)
109}
110
111// TokenCounter manages a pool of workers for parallel token counting
112type TokenCounter struct {
113	apiKey  string
114	jobs    chan TokenJob
115	results chan TokenResult
116	wg      sync.WaitGroup
117}
118
119func newTokenCounter(apiKey string, workers int) *TokenCounter {
120	tc := &TokenCounter{
121		apiKey:  apiKey,
122		jobs:    make(chan TokenJob, 100),
123		results: make(chan TokenResult, 100),
124	}
125
126	// Start workers
127	for i := 0; i < workers; i++ {
128		tc.wg.Add(1)
129		go tc.worker()
130	}
131
132	return tc
133}
134
135func (tc *TokenCounter) worker() {
136	defer tc.wg.Done()
137	for job := range tc.jobs {
138		count := countTokensAPI(tc.apiKey, job.Text)
139		tc.results <- TokenResult{ID: job.ID, Count: count}
140	}
141}
142
143func (tc *TokenCounter) Count(id, text string) {
144	tc.jobs <- TokenJob{ID: id, Text: text}
145}
146
147func (tc *TokenCounter) GetResult() TokenResult {
148	return <-tc.results
149}
150
151func (tc *TokenCounter) Close() {
152	close(tc.jobs)
153	tc.wg.Wait()
154	close(tc.results)
155}
156
157func analyzeSkills(counter *TokenCounter) ([]SkillInfo, error) {
158	skillsDir := "skills"
159	entries, err := os.ReadDir(skillsDir)
160	if err != nil {
161		return nil, fmt.Errorf("cannot read skills directory: %w", err)
162	}
163
164	var skills []SkillInfo
165	for _, entry := range entries {
166		if !entry.IsDir() {
167			continue
168		}
169
170		skillPath := filepath.Join(skillsDir, entry.Name())
171		skill, err := analyzeSkill(skillPath, counter)
172		if err != nil {
173			fmt.Fprintf(os.Stderr, "Warning: error analyzing %s: %v\n", entry.Name(), err)
174			continue
175		}
176		skills = append(skills, skill)
177	}
178
179	return skills, nil
180}
181
182func analyzeSkill(path string, counter *TokenCounter) (SkillInfo, error) {
183	skill := SkillInfo{
184		Dir: filepath.Base(path),
185		Tokens: TokenCount{
186			References: make(map[string]int),
187		},
188	}
189
190	// Read SKILL.md
191	skillMdPath := filepath.Join(path, "SKILL.md")
192	content, err := os.ReadFile(skillMdPath)
193	if err != nil {
194		skill.Errors = append(skill.Errors, fmt.Sprintf("Cannot read SKILL.md: %v", err))
195		return skill, nil
196	}
197
198	// Parse frontmatter and body
199	fm, body, err := parseFrontmatter(string(content))
200	if err != nil {
201		skill.Errors = append(skill.Errors, fmt.Sprintf("Cannot parse frontmatter: %v", err))
202		return skill, nil
203	}
204	skill.Frontmatter = fm
205	skill.BodyLines = len(strings.Split(strings.TrimSpace(body), "\n"))
206
207	// Validate
208	skill.Errors = append(skill.Errors, validateSkill(skill)...)
209
210	fmt.Fprintf(os.Stderr, "Analyzing %s...\n", skill.Dir)
211
212	// Submit token counting jobs
213	jobCount := 0
214	counter.Count(fmt.Sprintf("%s:name", skill.Dir), fm.Name)
215	jobCount++
216	counter.Count(fmt.Sprintf("%s:description", skill.Dir), fm.Description)
217	jobCount++
218	counter.Count(fmt.Sprintf("%s:body", skill.Dir), body)
219	jobCount++
220
221	// Count reference files
222	refsPath := filepath.Join(path, "references")
223	var refFiles []string
224	if entries, err := os.ReadDir(refsPath); err == nil {
225		for _, entry := range entries {
226			if entry.IsDir() {
227				continue
228			}
229			refPath := filepath.Join(refsPath, entry.Name())
230			refContent, err := os.ReadFile(refPath)
231			if err == nil {
232				counter.Count(fmt.Sprintf("%s:ref:%s", skill.Dir, entry.Name()), string(refContent))
233				refFiles = append(refFiles, entry.Name())
234				jobCount++
235			}
236		}
237	}
238
239	// Collect results
240	for i := 0; i < jobCount; i++ {
241		result := counter.GetResult()
242		parts := strings.SplitN(result.ID, ":", 3)
243		if len(parts) < 2 {
244			continue
245		}
246
247		switch parts[1] {
248		case "name":
249			skill.Tokens.Name = result.Count
250		case "description":
251			skill.Tokens.Description = result.Count
252		case "body":
253			skill.Tokens.Body = result.Count
254		case "ref":
255			if len(parts) == 3 {
256				skill.Tokens.References[parts[2]] = result.Count
257			}
258		}
259	}
260
261	// Calculate total
262	skill.Tokens.Total = skill.Tokens.Name + skill.Tokens.Description + skill.Tokens.Body
263	for _, count := range skill.Tokens.References {
264		skill.Tokens.Total += count
265	}
266
267	return skill, nil
268}
269
270func parseFrontmatter(content string) (Frontmatter, string, error) {
271	lines := strings.Split(content, "\n")
272	if len(lines) < 3 || lines[0] != "---" {
273		return Frontmatter{}, "", fmt.Errorf("missing frontmatter")
274	}
275
276	var fm Frontmatter
277	var endIdx int
278	var inDescription bool
279	var descriptionLines []string
280
281	for i := 1; i < len(lines); i++ {
282		if lines[i] == "---" {
283			endIdx = i
284			break
285		}
286
287		line := lines[i]
288
289		// Parse name
290		if strings.HasPrefix(line, "name:") {
291			fm.Name = strings.TrimSpace(strings.TrimPrefix(line, "name:"))
292			continue
293		}
294
295		// Parse description (might be multi-line)
296		if strings.HasPrefix(line, "description:") {
297			descPart := strings.TrimSpace(strings.TrimPrefix(line, "description:"))
298			if descPart != "" {
299				descriptionLines = append(descriptionLines, descPart)
300			}
301			inDescription = true
302			continue
303		}
304
305		// Continue multi-line description
306		if inDescription && strings.HasPrefix(line, "  ") {
307			descriptionLines = append(descriptionLines, strings.TrimSpace(line))
308			continue
309		}
310
311		// End of description
312		if inDescription && !strings.HasPrefix(line, "  ") {
313			inDescription = false
314		}
315	}
316
317	fm.Description = strings.Join(descriptionLines, " ")
318
319	if endIdx == 0 {
320		return Frontmatter{}, "", fmt.Errorf("unclosed frontmatter")
321	}
322
323	body := strings.Join(lines[endIdx+1:], "\n")
324	return fm, body, nil
325}
326
327func validateSkill(skill SkillInfo) []string {
328	var errors []string
329
330	// Validate name
331	if len(skill.Frontmatter.Name) < 1 || len(skill.Frontmatter.Name) > 64 {
332		errors = append(errors, "name must be 1-64 characters")
333	}
334
335	namePattern := regexp.MustCompile(`^[a-z0-9]+(-[a-z0-9]+)*$`)
336	if !namePattern.MatchString(skill.Frontmatter.Name) {
337		errors = append(errors, "name must be lowercase letters, numbers, and hyphens only")
338	}
339
340	if strings.Contains(skill.Frontmatter.Name, "--") {
341		errors = append(errors, "name cannot contain consecutive hyphens")
342	}
343
344	if skill.Frontmatter.Name != skill.Dir {
345		errors = append(errors, fmt.Sprintf("name '%s' doesn't match directory '%s'", skill.Frontmatter.Name, skill.Dir))
346	}
347
348	// Validate description
349	if len(skill.Frontmatter.Description) < 1 {
350		errors = append(errors, "description is empty")
351	} else if len(skill.Frontmatter.Description) > 1024 {
352		errors = append(errors, fmt.Sprintf("description is %d characters (max 1024)", len(skill.Frontmatter.Description)))
353	}
354
355	// Check body line count
356	if skill.BodyLines > 500 {
357		errors = append(errors, fmt.Sprintf("body has %d lines (recommended: < 500)", skill.BodyLines))
358	}
359
360	return errors
361}
362
363func countTokensAPI(apiKey string, text string) int {
364	reqBody := map[string]interface{}{
365		"model": model,
366		"messages": []map[string]string{
367			{
368				"role":    "user",
369				"content": text,
370			},
371		},
372	}
373
374	jsonData, err := json.Marshal(reqBody)
375	if err != nil {
376		return 0
377	}
378
379	req, err := http.NewRequest("POST", syntheticAPI, bytes.NewBuffer(jsonData))
380	if err != nil {
381		return 0
382	}
383
384	req.Header.Set("Authorization", "Bearer "+apiKey)
385	req.Header.Set("Content-Type", "application/json")
386	req.Header.Set("anthropic-version", "2023-06-01")
387
388	client := &http.Client{}
389	resp, err := client.Do(req)
390	if err != nil {
391		return 0
392	}
393	defer resp.Body.Close()
394
395	if resp.StatusCode != http.StatusOK {
396		body, _ := io.ReadAll(resp.Body)
397		fmt.Fprintf(os.Stderr, "Warning: token count API returned %d: %s\n", resp.StatusCode, body)
398		return 0
399	}
400
401	var result struct {
402		InputTokens int `json:"input_tokens"`
403	}
404
405	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
406		return 0
407	}
408
409	return result.InputTokens
410}
411
412func buildComparisons(currentSkills []SkillInfo, counter *TokenCounter) map[string]SkillComparison {
413	comparisons := make(map[string]SkillComparison)
414
415	for _, skill := range currentSkills {
416		prevTotal, err := getSkillTokensFromGit(skill.Dir, counter)
417		if err != nil {
418			// Skill is new
419			if skill.Tokens.Total > 0 {
420				comparisons[skill.Dir] = SkillComparison{
421					PrevTotal: 0,
422					Delta:     skill.Tokens.Total,
423					Percent:   100.0,
424					IsNew:     true,
425				}
426			}
427			continue
428		}
429
430		delta := skill.Tokens.Total - prevTotal
431		var percent float64
432		if prevTotal > 0 {
433			percent = (float64(delta) / float64(prevTotal)) * 100
434		}
435
436		if delta != 0 {
437			comparisons[skill.Dir] = SkillComparison{
438				PrevTotal: prevTotal,
439				Delta:     delta,
440				Percent:   percent,
441				IsNew:     false,
442			}
443		}
444	}
445
446	return comparisons
447}
448
449func getSkillTokensFromGit(skillDir string, counter *TokenCounter) (int, error) {
450	// Get file from HEAD
451	skillPath := fmt.Sprintf("skills/%s/SKILL.md", skillDir)
452	cmd := exec.Command("git", "show", fmt.Sprintf("HEAD:%s", skillPath))
453	output, err := cmd.Output()
454	if err != nil {
455		return 0, err
456	}
457
458	// Parse frontmatter and body
459	fm, body, err := parseFrontmatter(string(output))
460	if err != nil {
461		return 0, err
462	}
463
464	// Count tokens for previous version
465	jobCount := 0
466	counter.Count(fmt.Sprintf("prev:%s:name", skillDir), fm.Name)
467	jobCount++
468	counter.Count(fmt.Sprintf("prev:%s:description", skillDir), fm.Description)
469	jobCount++
470	counter.Count(fmt.Sprintf("prev:%s:body", skillDir), body)
471	jobCount++
472
473	// Get reference files from HEAD
474	refsPath := fmt.Sprintf("skills/%s/references", skillDir)
475	cmd = exec.Command("git", "ls-tree", "-r", "--name-only", "HEAD", refsPath)
476	output, err = cmd.Output()
477	if err == nil {
478		refPaths := strings.Split(strings.TrimSpace(string(output)), "\n")
479		for _, refPath := range refPaths {
480			if refPath == "" {
481				continue
482			}
483			cmd = exec.Command("git", "show", fmt.Sprintf("HEAD:%s", refPath))
484			refContent, err := cmd.Output()
485			if err == nil {
486				counter.Count(fmt.Sprintf("prev:%s:ref", skillDir), string(refContent))
487				jobCount++
488			}
489		}
490	}
491
492	// Collect results
493	total := 0
494	for i := 0; i < jobCount; i++ {
495		result := counter.GetResult()
496		total += result.Count
497	}
498
499	return total, nil
500}
501
502func printSkillReport(skill SkillInfo, comp *SkillComparison) {
503	fmt.Printf("\n=== %s ===\n", skill.Dir)
504
505	if len(skill.Errors) > 0 {
506		fmt.Println("\nValidation errors:")
507		for _, err := range skill.Errors {
508			fmt.Printf("  ✗ %s\n", err)
509		}
510	}
511
512	fmt.Println("\nToken breakdown:")
513	fmt.Printf("  Name:        %5d tokens\n", skill.Tokens.Name)
514	fmt.Printf("  Description: %5d tokens\n", skill.Tokens.Description)
515	fmt.Printf("  Body:        %5d tokens (%d lines)\n", skill.Tokens.Body, skill.BodyLines)
516
517	if len(skill.Tokens.References) > 0 {
518		fmt.Println("  References:")
519		// Sort reference names for consistent output
520		refNames := make([]string, 0, len(skill.Tokens.References))
521		for name := range skill.Tokens.References {
522			refNames = append(refNames, name)
523		}
524		sort.Strings(refNames)
525
526		for _, name := range refNames {
527			count := skill.Tokens.References[name]
528			fmt.Printf("    %-40s %5d tokens\n", name, count)
529		}
530	}
531
532	fmt.Println("  ───────────────────────────────────────────────")
533
534	// Print total with comparison if available
535	if comp != nil {
536		sign := "+"
537		if comp.Delta < 0 {
538			sign = ""
539		}
540		indicator := ""
541		if comp.IsNew {
542			indicator = " [NEW]"
543		} else if comp.Percent > 20 {
544			indicator = " ⚠️"
545		} else if comp.Percent < -20 {
546			indicator = " ✓"
547		}
548		fmt.Printf("  Total:       %5d tokens (%s%d, %s%.1f%% from HEAD)%s\n",
549			skill.Tokens.Total, sign, comp.Delta, sign, comp.Percent, indicator)
550	} else {
551		fmt.Printf("  Total:       %5d tokens\n", skill.Tokens.Total)
552	}
553
554	// Warn if approaching budget
555	if skill.Tokens.Body > 5000 {
556		fmt.Println("  ⚠️  Body exceeds recommended 5000 token budget!")
557	} else if skill.Tokens.Body > 4000 {
558		fmt.Println("  ⚠️  Body approaching 5000 token budget")
559	}
560}
561
562func printSummary(skills []SkillInfo, comparisons map[string]SkillComparison) {
563	fmt.Println("\n" + strings.Repeat("=", 60))
564	fmt.Println("SUMMARY")
565	fmt.Println(strings.Repeat("=", 60))
566
567	totalTokens := 0
568	totalErrors := 0
569	totalDelta := 0
570
571	for _, skill := range skills {
572		totalTokens += skill.Tokens.Total
573		totalErrors += len(skill.Errors)
574		if comp, ok := comparisons[skill.Dir]; ok {
575			totalDelta += comp.Delta
576		}
577	}
578
579	fmt.Printf("\nTotal skills: %d\n", len(skills))
580	if comparisons != nil && totalDelta != 0 {
581		fmt.Printf("Total tokens: %d (%+d from HEAD)\n", totalTokens, totalDelta)
582	} else {
583		fmt.Printf("Total tokens: %d\n", totalTokens)
584	}
585	fmt.Printf("Validation errors: %d\n", totalErrors)
586
587	// Find largest skills
588	sort.Slice(skills, func(i, j int) bool {
589		return skills[i].Tokens.Total > skills[j].Tokens.Total
590	})
591
592	fmt.Println("\nLargest skills (by total tokens):")
593	for i := 0; i < 5 && i < len(skills); i++ {
594		skill := skills[i]
595		if comp, ok := comparisons[skill.Dir]; ok {
596			sign := "+"
597			if comp.Delta < 0 {
598				sign = ""
599			}
600			fmt.Printf("  %d. %-40s %5d tokens (%s%d)\n",
601				i+1, skill.Dir, skill.Tokens.Total, sign, comp.Delta)
602		} else {
603			fmt.Printf("  %d. %-40s %5d tokens\n", i+1, skill.Dir, skill.Tokens.Total)
604		}
605	}
606
607	// Show biggest changes if comparing
608	if comparisons != nil && len(comparisons) > 0 {
609		type changeEntry struct {
610			name string
611			comp SkillComparison
612		}
613		var changes []changeEntry
614		for name, comp := range comparisons {
615			changes = append(changes, changeEntry{name, comp})
616		}
617
618		sort.Slice(changes, func(i, j int) bool {
619			absI := changes[i].comp.Delta
620			if absI < 0 {
621				absI = -absI
622			}
623			absJ := changes[j].comp.Delta
624			if absJ < 0 {
625				absJ = -absJ
626			}
627			return absI > absJ
628		})
629
630		fmt.Println("\nBiggest changes:")
631		displayed := 0
632		for _, change := range changes {
633			if displayed >= 5 {
634				break
635			}
636			sign := "+"
637			if change.comp.Delta < 0 {
638				sign = ""
639			}
640			indicator := ""
641			if change.comp.IsNew {
642				indicator = " [NEW]"
643			} else if change.comp.Percent > 20 {
644				indicator = " ⚠️"
645			} else if change.comp.Percent < -20 {
646				indicator = " ✓"
647			}
648			fmt.Printf("  %-40s %s%-5d tokens (%s%.1f%%)%s\n",
649				change.name, sign, change.comp.Delta, sign, change.comp.Percent, indicator)
650			displayed++
651		}
652	}
653}