feat(skill-stats): fix deadlock and add breakdown

Amolith created

- Track metadata and body token deltas separately
- Fix deadlock by interleaving enqueue and drain operations
- Add HTTP timeout to prevent hanging requests
- Improve error handling throughout token counting
- Update summary output to show metadata/body/overall breakdown
- Fix empty body line counting
- Simplify name validation error message

Assisted-by: GLM 4.7 via Crush

Change summary

skill-stats.go | 283 +++++++++++++++++++++++++++++++++++++--------------
1 file changed, 203 insertions(+), 80 deletions(-)

Detailed changes

skill-stats.go 🔗

@@ -15,6 +15,7 @@ import (
 	"sort"
 	"strings"
 	"sync"
+	"time"
 )
 
 const (
@@ -23,6 +24,10 @@ const (
 	workerCount  = 5 // Number of parallel API workers
 )
 
+var httpClient = &http.Client{
+	Timeout: 30 * time.Second,
+}
+
 type Frontmatter struct {
 	Name        string
 	Description string
@@ -52,13 +57,18 @@ type TokenJob struct {
 type TokenResult struct {
 	ID    string
 	Count int
+	Err   error
 }
 
 type SkillComparison struct {
-	PrevTotal int
-	Delta     int
-	Percent   float64
-	IsNew     bool
+	PrevTotal      int
+	PrevMetadata   int
+	PrevBody       int
+	Delta          int
+	MetadataDelta  int
+	BodyDelta      int
+	Percent        float64
+	IsNew          bool
 }
 
 func main() {
@@ -135,8 +145,8 @@ func newTokenCounter(apiKey string, workers int) *TokenCounter {
 func (tc *TokenCounter) worker() {
 	defer tc.wg.Done()
 	for job := range tc.jobs {
-		count := countTokensAPI(tc.apiKey, job.Text)
-		tc.results <- TokenResult{ID: job.ID, Count: count}
+		count, err := countTokensAPI(tc.apiKey, job.Text)
+		tc.results <- TokenResult{ID: job.ID, Count: count, Err: err}
 	}
 }
 
@@ -148,6 +158,15 @@ func (tc *TokenCounter) GetResult() TokenResult {
 	return <-tc.results
 }
 
+func (tc *TokenCounter) TryGetResult() (TokenResult, bool) {
+	select {
+	case r := <-tc.results:
+		return r, true
+	default:
+		return TokenResult{}, false
+	}
+}
+
 func (tc *TokenCounter) Close() {
 	close(tc.jobs)
 	tc.wg.Wait()
@@ -202,48 +221,59 @@ func analyzeSkill(path string, counter *TokenCounter) (SkillInfo, error) {
 		return skill, nil
 	}
 	skill.Frontmatter = fm
-	skill.BodyLines = len(strings.Split(strings.TrimSpace(body), "\n"))
+	trimmedBody := strings.TrimSpace(body)
+	if trimmedBody == "" {
+		skill.BodyLines = 0
+	} else {
+		skill.BodyLines = len(strings.Split(trimmedBody, "\n"))
+	}
 
 	// Validate
 	skill.Errors = append(skill.Errors, validateSkill(skill)...)
 
 	fmt.Fprintf(os.Stderr, "Analyzing %s...\n", skill.Dir)
 
-	// Submit token counting jobs
-	jobCount := 0
-	counter.Count(fmt.Sprintf("%s:name", skill.Dir), fm.Name)
-	jobCount++
-	counter.Count(fmt.Sprintf("%s:description", skill.Dir), fm.Description)
-	jobCount++
-	counter.Count(fmt.Sprintf("%s:body", skill.Dir), body)
-	jobCount++
+	// Collect all jobs first (no channel use yet)
+	var jobs []TokenJob
+	jobs = append(jobs, TokenJob{ID: fmt.Sprintf("%s:name", skill.Dir), Text: fm.Name})
+	jobs = append(jobs, TokenJob{ID: fmt.Sprintf("%s:description", skill.Dir), Text: fm.Description})
+	jobs = append(jobs, TokenJob{ID: fmt.Sprintf("%s:body", skill.Dir), Text: body})
 
-	// Count reference files
+	// Collect reference file jobs
 	refsPath := filepath.Join(path, "references")
-	var refFiles []string
-	if entries, err := os.ReadDir(refsPath); err == nil {
+	entries, err := os.ReadDir(refsPath)
+	if err != nil {
+		if !os.IsNotExist(err) {
+			skill.Errors = append(skill.Errors, fmt.Sprintf("Cannot read references directory: %v", err))
+		}
+	} else {
 		for _, entry := range entries {
 			if entry.IsDir() {
 				continue
 			}
 			refPath := filepath.Join(refsPath, entry.Name())
 			refContent, err := os.ReadFile(refPath)
-			if err == nil {
-				counter.Count(fmt.Sprintf("%s:ref:%s", skill.Dir, entry.Name()), string(refContent))
-				refFiles = append(refFiles, entry.Name())
-				jobCount++
+			if err != nil {
+				skill.Errors = append(skill.Errors, fmt.Sprintf("Cannot read reference %s: %v", entry.Name(), err))
+				continue
 			}
+			jobs = append(jobs, TokenJob{
+				ID:   fmt.Sprintf("%s:ref:%s", skill.Dir, entry.Name()),
+				Text: string(refContent),
+			})
 		}
 	}
 
-	// Collect results
-	for i := 0; i < jobCount; i++ {
-		result := counter.GetResult()
+	// Interleave enqueue and drain to prevent deadlock
+	processResult := func(result TokenResult) {
+		if result.Err != nil {
+			skill.Errors = append(skill.Errors, fmt.Sprintf("Token count failed for %s: %v", result.ID, result.Err))
+			return
+		}
 		parts := strings.SplitN(result.ID, ":", 3)
 		if len(parts) < 2 {
-			continue
+			return
 		}
-
 		switch parts[1] {
 		case "name":
 			skill.Tokens.Name = result.Count
@@ -258,6 +288,28 @@ func analyzeSkill(path string, counter *TokenCounter) (SkillInfo, error) {
 		}
 	}
 
+	outstanding := 0
+	for _, job := range jobs {
+		counter.Count(job.ID, job.Text)
+		outstanding++
+		// Drain any available results to prevent backpressure
+		for {
+			if result, ok := counter.TryGetResult(); ok {
+				processResult(result)
+				outstanding--
+			} else {
+				break
+			}
+		}
+	}
+
+	// Drain remaining results
+	for outstanding > 0 {
+		result := counter.GetResult()
+		processResult(result)
+		outstanding--
+	}
+
 	// Calculate total
 	skill.Tokens.Total = skill.Tokens.Name + skill.Tokens.Description + skill.Tokens.Body
 	for _, count := range skill.Tokens.References {
@@ -334,11 +386,7 @@ func validateSkill(skill SkillInfo) []string {
 
 	namePattern := regexp.MustCompile(`^[a-z0-9]+(-[a-z0-9]+)*$`)
 	if !namePattern.MatchString(skill.Frontmatter.Name) {
-		errors = append(errors, "name must be lowercase letters, numbers, and hyphens only")
-	}
-
-	if strings.Contains(skill.Frontmatter.Name, "--") {
-		errors = append(errors, "name cannot contain consecutive hyphens")
+		errors = append(errors, "name must be lowercase letters, numbers, hyphens only; no leading/trailing/consecutive hyphens")
 	}
 
 	if skill.Frontmatter.Name != skill.Dir {
@@ -360,7 +408,7 @@ func validateSkill(skill SkillInfo) []string {
 	return errors
 }
 
-func countTokensAPI(apiKey string, text string) int {
+func countTokensAPI(apiKey string, text string) (int, error) {
 	reqBody := map[string]interface{}{
 		"model": model,
 		"messages": []map[string]string{
@@ -373,29 +421,27 @@ func countTokensAPI(apiKey string, text string) int {
 
 	jsonData, err := json.Marshal(reqBody)
 	if err != nil {
-		return 0
+		return 0, fmt.Errorf("marshal request: %w", err)
 	}
 
 	req, err := http.NewRequest("POST", syntheticAPI, bytes.NewBuffer(jsonData))
 	if err != nil {
-		return 0
+		return 0, fmt.Errorf("create request: %w", err)
 	}
 
 	req.Header.Set("Authorization", "Bearer "+apiKey)
 	req.Header.Set("Content-Type", "application/json")
 	req.Header.Set("anthropic-version", "2023-06-01")
 
-	client := &http.Client{}
-	resp, err := client.Do(req)
+	resp, err := httpClient.Do(req)
 	if err != nil {
-		return 0
+		return 0, fmt.Errorf("HTTP request: %w", err)
 	}
 	defer resp.Body.Close()
 
-	if resp.StatusCode != http.StatusOK {
-		body, _ := io.ReadAll(resp.Body)
-		fmt.Fprintf(os.Stderr, "Warning: token count API returned %d: %s\n", resp.StatusCode, body)
-		return 0
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		body, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
+		return 0, fmt.Errorf("API status %d: %s", resp.StatusCode, string(body))
 	}
 
 	var result struct {
@@ -403,42 +449,58 @@ func countTokensAPI(apiKey string, text string) int {
 	}
 
 	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
-		return 0
+		return 0, fmt.Errorf("decode response: %w", err)
 	}
 
-	return result.InputTokens
+	return result.InputTokens, nil
+}
+
+type PrevTokens struct {
+	Total    int
+	Metadata int
+	Body     int
 }
 
 func buildComparisons(currentSkills []SkillInfo, counter *TokenCounter) map[string]SkillComparison {
 	comparisons := make(map[string]SkillComparison)
 
 	for _, skill := range currentSkills {
-		prevTotal, err := getSkillTokensFromGit(skill.Dir, counter)
+		prev, err := getSkillTokensFromGit(skill.Dir, counter)
 		if err != nil {
 			// Skill is new
 			if skill.Tokens.Total > 0 {
 				comparisons[skill.Dir] = SkillComparison{
-					PrevTotal: 0,
-					Delta:     skill.Tokens.Total,
-					Percent:   100.0,
-					IsNew:     true,
+					PrevTotal:     0,
+					PrevMetadata:  0,
+					PrevBody:      0,
+					Delta:         skill.Tokens.Total,
+					MetadataDelta: skill.Tokens.Name + skill.Tokens.Description,
+					BodyDelta:     skill.Tokens.Body,
+					Percent:       100.0,
+					IsNew:         true,
 				}
 			}
 			continue
 		}
 
-		delta := skill.Tokens.Total - prevTotal
+		delta := skill.Tokens.Total - prev.Total
+		metadataDelta := (skill.Tokens.Name + skill.Tokens.Description) - prev.Metadata
+		bodyDelta := skill.Tokens.Body - prev.Body
 		var percent float64
-		if prevTotal > 0 {
-			percent = (float64(delta) / float64(prevTotal)) * 100
+		if prev.Total > 0 {
+			percent = (float64(delta) / float64(prev.Total)) * 100
 		}
 
-		if delta != 0 {
+		if delta != 0 || metadataDelta != 0 || bodyDelta != 0 {
 			comparisons[skill.Dir] = SkillComparison{
-				PrevTotal: prevTotal,
-				Delta:     delta,
-				Percent:   percent,
-				IsNew:     false,
+				PrevTotal:     prev.Total,
+				PrevMetadata:  prev.Metadata,
+				PrevBody:      prev.Body,
+				Delta:         delta,
+				MetadataDelta: metadataDelta,
+				BodyDelta:     bodyDelta,
+				Percent:       percent,
+				IsNew:         false,
 			}
 		}
 	}
@@ -446,35 +508,37 @@ func buildComparisons(currentSkills []SkillInfo, counter *TokenCounter) map[stri
 	return comparisons
 }
 
-func getSkillTokensFromGit(skillDir string, counter *TokenCounter) (int, error) {
+func getSkillTokensFromGit(skillDir string, counter *TokenCounter) (PrevTokens, error) {
 	// Get file from HEAD
 	skillPath := fmt.Sprintf("skills/%s/SKILL.md", skillDir)
 	cmd := exec.Command("git", "show", fmt.Sprintf("HEAD:%s", skillPath))
 	output, err := cmd.Output()
 	if err != nil {
-		return 0, err
+		return PrevTokens{}, err
 	}
 
 	// Parse frontmatter and body
 	fm, body, err := parseFrontmatter(string(output))
 	if err != nil {
-		return 0, err
+		return PrevTokens{}, err
 	}
 
-	// Count tokens for previous version
-	jobCount := 0
-	counter.Count(fmt.Sprintf("prev:%s:name", skillDir), fm.Name)
-	jobCount++
-	counter.Count(fmt.Sprintf("prev:%s:description", skillDir), fm.Description)
-	jobCount++
-	counter.Count(fmt.Sprintf("prev:%s:body", skillDir), body)
-	jobCount++
+	// Collect all jobs first (no channel use yet)
+	var jobs []TokenJob
+	jobs = append(jobs, TokenJob{ID: fmt.Sprintf("prev:%s:name", skillDir), Text: fm.Name})
+	jobs = append(jobs, TokenJob{ID: fmt.Sprintf("prev:%s:description", skillDir), Text: fm.Description})
+	jobs = append(jobs, TokenJob{ID: fmt.Sprintf("prev:%s:body", skillDir), Text: body})
 
 	// Get reference files from HEAD
 	refsPath := fmt.Sprintf("skills/%s/references", skillDir)
 	cmd = exec.Command("git", "ls-tree", "-r", "--name-only", "HEAD", refsPath)
 	output, err = cmd.Output()
-	if err == nil {
+	if err != nil {
+		// Log non-fatal git errors (e.g., refs directory doesn't exist in HEAD)
+		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() != 0 {
+			fmt.Fprintf(os.Stderr, "Warning: cannot list git refs for %s (may not exist in HEAD)\n", refsPath)
+		}
+	} else {
 		refPaths := strings.Split(strings.TrimSpace(string(output)), "\n")
 		for _, refPath := range refPaths {
 			if refPath == "" {
@@ -482,21 +546,62 @@ func getSkillTokensFromGit(skillDir string, counter *TokenCounter) (int, error)
 			}
 			cmd = exec.Command("git", "show", fmt.Sprintf("HEAD:%s", refPath))
 			refContent, err := cmd.Output()
-			if err == nil {
-				counter.Count(fmt.Sprintf("prev:%s:ref", skillDir), string(refContent))
-				jobCount++
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "Warning: cannot read %s from HEAD: %v\n", refPath, err)
+				continue
+			}
+			jobs = append(jobs, TokenJob{ID: fmt.Sprintf("prev:%s:ref", skillDir), Text: string(refContent)})
+		}
+	}
+
+	// Interleave enqueue and drain to prevent deadlock
+	prev := PrevTokens{}
+	processResult := func(result TokenResult) {
+		if result.Err != nil {
+			fmt.Fprintf(os.Stderr, "Warning: token count failed for %s: %v\n", result.ID, result.Err)
+			return
+		}
+		parts := strings.SplitN(result.ID, ":", 3)
+		if len(parts) < 3 {
+			prev.Total += result.Count
+			return
+		}
+		switch parts[2] {
+		case "name":
+			prev.Metadata += result.Count
+		case "description":
+			prev.Metadata += result.Count
+		case "body":
+			prev.Body += result.Count
+		case "ref":
+			// References are counted in total but not metadata or body
+		}
+		prev.Total += result.Count
+	}
+
+	outstanding := 0
+	for _, job := range jobs {
+		counter.Count(job.ID, job.Text)
+		outstanding++
+		// Drain any available results to prevent backpressure
+		for {
+			if result, ok := counter.TryGetResult(); ok {
+				processResult(result)
+				outstanding--
+			} else {
+				break
 			}
 		}
 	}
 
-	// Collect results
-	total := 0
-	for i := 0; i < jobCount; i++ {
+	// Drain remaining results
+	for outstanding > 0 {
 		result := counter.GetResult()
-		total += result.Count
+		processResult(result)
+		outstanding--
 	}
 
-	return total, nil
+	return prev, nil
 }
 
 func printSkillReport(skill SkillInfo, comp *SkillComparison) {
@@ -565,22 +670,40 @@ func printSummary(skills []SkillInfo, comparisons map[string]SkillComparison) {
 	fmt.Println(strings.Repeat("=", 60))
 
 	totalTokens := 0
+	totalMetadataTokens := 0
+	totalBodyTokens := 0
 	totalErrors := 0
 	totalDelta := 0
+	metadataDelta := 0
+	bodyDelta := 0
 
 	for _, skill := range skills {
 		totalTokens += skill.Tokens.Total
+		totalMetadataTokens += skill.Tokens.Name + skill.Tokens.Description
+		totalBodyTokens += skill.Tokens.Body
 		totalErrors += len(skill.Errors)
 		if comp, ok := comparisons[skill.Dir]; ok {
 			totalDelta += comp.Delta
+			metadataDelta += comp.MetadataDelta
+			bodyDelta += comp.BodyDelta
 		}
 	}
 
-	fmt.Printf("\nTotal skills: %d\n", len(skills))
+	fmt.Printf("\nSkills: %d\n", len(skills))
+	if comparisons != nil && metadataDelta != 0 {
+		fmt.Printf("Metadata: %d tokens (%+d)\n", totalMetadataTokens, metadataDelta)
+	} else {
+		fmt.Printf("Metadata: %d tokens\n", totalMetadataTokens)
+	}
+	if comparisons != nil && bodyDelta != 0 {
+		fmt.Printf("Combined bodies: %d tokens (%+d)\n", totalBodyTokens, bodyDelta)
+	} else {
+		fmt.Printf("Combined bodies: %d tokens\n", totalBodyTokens)
+	}
 	if comparisons != nil && totalDelta != 0 {
-		fmt.Printf("Total tokens: %d (%+d from HEAD)\n", totalTokens, totalDelta)
+		fmt.Printf("Overall: %d tokens (%+d from HEAD)\n", totalTokens, totalDelta)
 	} else {
-		fmt.Printf("Total tokens: %d\n", totalTokens)
+		fmt.Printf("Overall: %d tokens\n", totalTokens)
 	}
 	fmt.Printf("Validation errors: %d\n", totalErrors)