diff --git a/skill-stats.go b/skill-stats.go index 80e06ccdeb0505dec46f9a9c43aa875ad9fa96c0..61be7ecbc3c6ab6813004f713cc93f3332b9597d 100755 --- a/skill-stats.go +++ b/skill-stats.go @@ -15,6 +15,7 @@ import ( "sort" "strings" "sync" + "time" ) const ( @@ -23,6 +24,10 @@ const ( workerCount = 5 // Number of parallel API workers ) +var httpClient = &http.Client{ + Timeout: 30 * time.Second, +} + type Frontmatter struct { Name string Description string @@ -52,13 +57,18 @@ type TokenJob struct { type TokenResult struct { ID string Count int + Err error } type SkillComparison struct { - PrevTotal int - Delta int - Percent float64 - IsNew bool + PrevTotal int + PrevMetadata int + PrevBody int + Delta int + MetadataDelta int + BodyDelta int + Percent float64 + IsNew bool } func main() { @@ -135,8 +145,8 @@ func newTokenCounter(apiKey string, workers int) *TokenCounter { func (tc *TokenCounter) worker() { defer tc.wg.Done() for job := range tc.jobs { - count := countTokensAPI(tc.apiKey, job.Text) - tc.results <- TokenResult{ID: job.ID, Count: count} + count, err := countTokensAPI(tc.apiKey, job.Text) + tc.results <- TokenResult{ID: job.ID, Count: count, Err: err} } } @@ -148,6 +158,15 @@ func (tc *TokenCounter) GetResult() TokenResult { return <-tc.results } +func (tc *TokenCounter) TryGetResult() (TokenResult, bool) { + select { + case r := <-tc.results: + return r, true + default: + return TokenResult{}, false + } +} + func (tc *TokenCounter) Close() { close(tc.jobs) tc.wg.Wait() @@ -202,48 +221,59 @@ func analyzeSkill(path string, counter *TokenCounter) (SkillInfo, error) { return skill, nil } skill.Frontmatter = fm - skill.BodyLines = len(strings.Split(strings.TrimSpace(body), "\n")) + trimmedBody := strings.TrimSpace(body) + if trimmedBody == "" { + skill.BodyLines = 0 + } else { + skill.BodyLines = len(strings.Split(trimmedBody, "\n")) + } // Validate skill.Errors = append(skill.Errors, validateSkill(skill)...) fmt.Fprintf(os.Stderr, "Analyzing %s...\n", skill.Dir) - // Submit token counting jobs - jobCount := 0 - counter.Count(fmt.Sprintf("%s:name", skill.Dir), fm.Name) - jobCount++ - counter.Count(fmt.Sprintf("%s:description", skill.Dir), fm.Description) - jobCount++ - counter.Count(fmt.Sprintf("%s:body", skill.Dir), body) - jobCount++ + // Collect all jobs first (no channel use yet) + var jobs []TokenJob + jobs = append(jobs, TokenJob{ID: fmt.Sprintf("%s:name", skill.Dir), Text: fm.Name}) + jobs = append(jobs, TokenJob{ID: fmt.Sprintf("%s:description", skill.Dir), Text: fm.Description}) + jobs = append(jobs, TokenJob{ID: fmt.Sprintf("%s:body", skill.Dir), Text: body}) - // Count reference files + // Collect reference file jobs refsPath := filepath.Join(path, "references") - var refFiles []string - if entries, err := os.ReadDir(refsPath); err == nil { + entries, err := os.ReadDir(refsPath) + if err != nil { + if !os.IsNotExist(err) { + skill.Errors = append(skill.Errors, fmt.Sprintf("Cannot read references directory: %v", err)) + } + } else { for _, entry := range entries { if entry.IsDir() { continue } refPath := filepath.Join(refsPath, entry.Name()) refContent, err := os.ReadFile(refPath) - if err == nil { - counter.Count(fmt.Sprintf("%s:ref:%s", skill.Dir, entry.Name()), string(refContent)) - refFiles = append(refFiles, entry.Name()) - jobCount++ + if err != nil { + skill.Errors = append(skill.Errors, fmt.Sprintf("Cannot read reference %s: %v", entry.Name(), err)) + continue } + jobs = append(jobs, TokenJob{ + ID: fmt.Sprintf("%s:ref:%s", skill.Dir, entry.Name()), + Text: string(refContent), + }) } } - // Collect results - for i := 0; i < jobCount; i++ { - result := counter.GetResult() + // Interleave enqueue and drain to prevent deadlock + processResult := func(result TokenResult) { + if result.Err != nil { + skill.Errors = append(skill.Errors, fmt.Sprintf("Token count failed for %s: %v", result.ID, result.Err)) + return + } parts := strings.SplitN(result.ID, ":", 3) if len(parts) < 2 { - continue + return } - switch parts[1] { case "name": skill.Tokens.Name = result.Count @@ -258,6 +288,28 @@ func analyzeSkill(path string, counter *TokenCounter) (SkillInfo, error) { } } + outstanding := 0 + for _, job := range jobs { + counter.Count(job.ID, job.Text) + outstanding++ + // Drain any available results to prevent backpressure + for { + if result, ok := counter.TryGetResult(); ok { + processResult(result) + outstanding-- + } else { + break + } + } + } + + // Drain remaining results + for outstanding > 0 { + result := counter.GetResult() + processResult(result) + outstanding-- + } + // Calculate total skill.Tokens.Total = skill.Tokens.Name + skill.Tokens.Description + skill.Tokens.Body for _, count := range skill.Tokens.References { @@ -334,11 +386,7 @@ func validateSkill(skill SkillInfo) []string { namePattern := regexp.MustCompile(`^[a-z0-9]+(-[a-z0-9]+)*$`) if !namePattern.MatchString(skill.Frontmatter.Name) { - errors = append(errors, "name must be lowercase letters, numbers, and hyphens only") - } - - if strings.Contains(skill.Frontmatter.Name, "--") { - errors = append(errors, "name cannot contain consecutive hyphens") + errors = append(errors, "name must be lowercase letters, numbers, hyphens only; no leading/trailing/consecutive hyphens") } if skill.Frontmatter.Name != skill.Dir { @@ -360,7 +408,7 @@ func validateSkill(skill SkillInfo) []string { return errors } -func countTokensAPI(apiKey string, text string) int { +func countTokensAPI(apiKey string, text string) (int, error) { reqBody := map[string]interface{}{ "model": model, "messages": []map[string]string{ @@ -373,29 +421,27 @@ func countTokensAPI(apiKey string, text string) int { jsonData, err := json.Marshal(reqBody) if err != nil { - return 0 + return 0, fmt.Errorf("marshal request: %w", err) } req, err := http.NewRequest("POST", syntheticAPI, bytes.NewBuffer(jsonData)) if err != nil { - return 0 + return 0, fmt.Errorf("create request: %w", err) } req.Header.Set("Authorization", "Bearer "+apiKey) req.Header.Set("Content-Type", "application/json") req.Header.Set("anthropic-version", "2023-06-01") - client := &http.Client{} - resp, err := client.Do(req) + resp, err := httpClient.Do(req) if err != nil { - return 0 + return 0, fmt.Errorf("HTTP request: %w", err) } defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - body, _ := io.ReadAll(resp.Body) - fmt.Fprintf(os.Stderr, "Warning: token count API returned %d: %s\n", resp.StatusCode, body) - return 0 + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + return 0, fmt.Errorf("API status %d: %s", resp.StatusCode, string(body)) } var result struct { @@ -403,42 +449,58 @@ func countTokensAPI(apiKey string, text string) int { } if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { - return 0 + return 0, fmt.Errorf("decode response: %w", err) } - return result.InputTokens + return result.InputTokens, nil +} + +type PrevTokens struct { + Total int + Metadata int + Body int } func buildComparisons(currentSkills []SkillInfo, counter *TokenCounter) map[string]SkillComparison { comparisons := make(map[string]SkillComparison) for _, skill := range currentSkills { - prevTotal, err := getSkillTokensFromGit(skill.Dir, counter) + prev, err := getSkillTokensFromGit(skill.Dir, counter) if err != nil { // Skill is new if skill.Tokens.Total > 0 { comparisons[skill.Dir] = SkillComparison{ - PrevTotal: 0, - Delta: skill.Tokens.Total, - Percent: 100.0, - IsNew: true, + PrevTotal: 0, + PrevMetadata: 0, + PrevBody: 0, + Delta: skill.Tokens.Total, + MetadataDelta: skill.Tokens.Name + skill.Tokens.Description, + BodyDelta: skill.Tokens.Body, + Percent: 100.0, + IsNew: true, } } continue } - delta := skill.Tokens.Total - prevTotal + delta := skill.Tokens.Total - prev.Total + metadataDelta := (skill.Tokens.Name + skill.Tokens.Description) - prev.Metadata + bodyDelta := skill.Tokens.Body - prev.Body var percent float64 - if prevTotal > 0 { - percent = (float64(delta) / float64(prevTotal)) * 100 + if prev.Total > 0 { + percent = (float64(delta) / float64(prev.Total)) * 100 } - if delta != 0 { + if delta != 0 || metadataDelta != 0 || bodyDelta != 0 { comparisons[skill.Dir] = SkillComparison{ - PrevTotal: prevTotal, - Delta: delta, - Percent: percent, - IsNew: false, + PrevTotal: prev.Total, + PrevMetadata: prev.Metadata, + PrevBody: prev.Body, + Delta: delta, + MetadataDelta: metadataDelta, + BodyDelta: bodyDelta, + Percent: percent, + IsNew: false, } } } @@ -446,35 +508,37 @@ func buildComparisons(currentSkills []SkillInfo, counter *TokenCounter) map[stri return comparisons } -func getSkillTokensFromGit(skillDir string, counter *TokenCounter) (int, error) { +func getSkillTokensFromGit(skillDir string, counter *TokenCounter) (PrevTokens, error) { // Get file from HEAD skillPath := fmt.Sprintf("skills/%s/SKILL.md", skillDir) cmd := exec.Command("git", "show", fmt.Sprintf("HEAD:%s", skillPath)) output, err := cmd.Output() if err != nil { - return 0, err + return PrevTokens{}, err } // Parse frontmatter and body fm, body, err := parseFrontmatter(string(output)) if err != nil { - return 0, err + return PrevTokens{}, err } - // Count tokens for previous version - jobCount := 0 - counter.Count(fmt.Sprintf("prev:%s:name", skillDir), fm.Name) - jobCount++ - counter.Count(fmt.Sprintf("prev:%s:description", skillDir), fm.Description) - jobCount++ - counter.Count(fmt.Sprintf("prev:%s:body", skillDir), body) - jobCount++ + // Collect all jobs first (no channel use yet) + var jobs []TokenJob + jobs = append(jobs, TokenJob{ID: fmt.Sprintf("prev:%s:name", skillDir), Text: fm.Name}) + jobs = append(jobs, TokenJob{ID: fmt.Sprintf("prev:%s:description", skillDir), Text: fm.Description}) + jobs = append(jobs, TokenJob{ID: fmt.Sprintf("prev:%s:body", skillDir), Text: body}) // Get reference files from HEAD refsPath := fmt.Sprintf("skills/%s/references", skillDir) cmd = exec.Command("git", "ls-tree", "-r", "--name-only", "HEAD", refsPath) output, err = cmd.Output() - if err == nil { + if err != nil { + // Log non-fatal git errors (e.g., refs directory doesn't exist in HEAD) + if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() != 0 { + fmt.Fprintf(os.Stderr, "Warning: cannot list git refs for %s (may not exist in HEAD)\n", refsPath) + } + } else { refPaths := strings.Split(strings.TrimSpace(string(output)), "\n") for _, refPath := range refPaths { if refPath == "" { @@ -482,21 +546,62 @@ func getSkillTokensFromGit(skillDir string, counter *TokenCounter) (int, error) } cmd = exec.Command("git", "show", fmt.Sprintf("HEAD:%s", refPath)) refContent, err := cmd.Output() - if err == nil { - counter.Count(fmt.Sprintf("prev:%s:ref", skillDir), string(refContent)) - jobCount++ + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: cannot read %s from HEAD: %v\n", refPath, err) + continue + } + jobs = append(jobs, TokenJob{ID: fmt.Sprintf("prev:%s:ref", skillDir), Text: string(refContent)}) + } + } + + // Interleave enqueue and drain to prevent deadlock + prev := PrevTokens{} + processResult := func(result TokenResult) { + if result.Err != nil { + fmt.Fprintf(os.Stderr, "Warning: token count failed for %s: %v\n", result.ID, result.Err) + return + } + parts := strings.SplitN(result.ID, ":", 3) + if len(parts) < 3 { + prev.Total += result.Count + return + } + switch parts[2] { + case "name": + prev.Metadata += result.Count + case "description": + prev.Metadata += result.Count + case "body": + prev.Body += result.Count + case "ref": + // References are counted in total but not metadata or body + } + prev.Total += result.Count + } + + outstanding := 0 + for _, job := range jobs { + counter.Count(job.ID, job.Text) + outstanding++ + // Drain any available results to prevent backpressure + for { + if result, ok := counter.TryGetResult(); ok { + processResult(result) + outstanding-- + } else { + break } } } - // Collect results - total := 0 - for i := 0; i < jobCount; i++ { + // Drain remaining results + for outstanding > 0 { result := counter.GetResult() - total += result.Count + processResult(result) + outstanding-- } - return total, nil + return prev, nil } func printSkillReport(skill SkillInfo, comp *SkillComparison) { @@ -565,22 +670,40 @@ func printSummary(skills []SkillInfo, comparisons map[string]SkillComparison) { fmt.Println(strings.Repeat("=", 60)) totalTokens := 0 + totalMetadataTokens := 0 + totalBodyTokens := 0 totalErrors := 0 totalDelta := 0 + metadataDelta := 0 + bodyDelta := 0 for _, skill := range skills { totalTokens += skill.Tokens.Total + totalMetadataTokens += skill.Tokens.Name + skill.Tokens.Description + totalBodyTokens += skill.Tokens.Body totalErrors += len(skill.Errors) if comp, ok := comparisons[skill.Dir]; ok { totalDelta += comp.Delta + metadataDelta += comp.MetadataDelta + bodyDelta += comp.BodyDelta } } - fmt.Printf("\nTotal skills: %d\n", len(skills)) + fmt.Printf("\nSkills: %d\n", len(skills)) + if comparisons != nil && metadataDelta != 0 { + fmt.Printf("Metadata: %d tokens (%+d)\n", totalMetadataTokens, metadataDelta) + } else { + fmt.Printf("Metadata: %d tokens\n", totalMetadataTokens) + } + if comparisons != nil && bodyDelta != 0 { + fmt.Printf("Combined bodies: %d tokens (%+d)\n", totalBodyTokens, bodyDelta) + } else { + fmt.Printf("Combined bodies: %d tokens\n", totalBodyTokens) + } if comparisons != nil && totalDelta != 0 { - fmt.Printf("Total tokens: %d (%+d from HEAD)\n", totalTokens, totalDelta) + fmt.Printf("Overall: %d tokens (%+d from HEAD)\n", totalTokens, totalDelta) } else { - fmt.Printf("Total tokens: %d\n", totalTokens) + fmt.Printf("Overall: %d tokens\n", totalTokens) } fmt.Printf("Validation errors: %d\n", totalErrors)