diff --git a/server/context_window_test.go b/server/context_window_test.go index 08f383a689975ad677db765e6f352237c2dcb6f6..cbd078eb141061b817a27c5c7c0d50a81285094e 100644 --- a/server/context_window_test.go +++ b/server/context_window_test.go @@ -114,6 +114,48 @@ func TestContextWindowSizeCalculation(t *testing.T) { t.Errorf("calculateContextWindowSize() = %d, want %d", got, want) } }) + + t.Run("skips_zero_usage_messages", func(t *testing.T) { + // Test that we skip messages with zero usage data (common for user/tool messages) + // and find the last message with actual usage + validUsage := llm.Usage{ + InputTokens: 200, + OutputTokens: 50, + } + validUsageJSON, _ := json.Marshal(validUsage) + validUsageStr := string(validUsageJSON) + + zeroUsage := llm.Usage{} // All zeros + zeroUsageJSON, _ := json.Marshal(zeroUsage) + zeroUsageStr := string(zeroUsageJSON) + + messages := []APIMessage{ + { + Type: string(db.MessageTypeSystem), + UsageData: &zeroUsageStr, // System message with zero usage + }, + { + Type: string(db.MessageTypeUser), + UsageData: &zeroUsageStr, // User message with zero usage + }, + { + Type: string(db.MessageTypeAgent), + UsageData: &validUsageStr, // Agent message with valid usage + }, + { + Type: string(db.MessageTypeUser), + UsageData: &zeroUsageStr, // User message after agent (zero usage) + }, + } + + // Should find the agent message's usage (200 + 50 = 250), not the last message's zero usage + got := calculateContextWindowSize(messages) + want := uint64(250) + + if got != want { + t.Errorf("calculateContextWindowSize() = %d, want %d", got, want) + } + }) } // TestContextWindowGrowsWithConversation tests that the context window size grows diff --git a/server/server.go b/server/server.go index 9cb5f6ade00e061601b22dbc7987aad93e319801..3ddce141a1e8794c917e5c1052042a69a12b886f 100644 --- a/server/server.go +++ b/server/server.go @@ -120,12 +120,13 @@ func extractEndOfTurn(raw string) (bool, bool) { return message.EndOfTurn, true } -// calculateContextWindowSize returns the context window usage from the most recent message. +// calculateContextWindowSize returns the context window usage from the most recent message with non-zero usage. // Each API call's input tokens represent the full conversation history sent to the model, // so we only need the last message's tokens (not accumulated across all messages). // The total input includes regular input tokens plus cached tokens (both read and created). +// Messages without usage data (user messages, tool messages, etc.) are skipped. func calculateContextWindowSize(messages []APIMessage) uint64 { - // Find the last message with usage data + // Find the last message with non-zero usage data for i := len(messages) - 1; i >= 0; i-- { msg := messages[i] if msg.UsageData == nil { @@ -135,9 +136,13 @@ func calculateContextWindowSize(messages []APIMessage) uint64 { if err := json.Unmarshal([]byte(*msg.UsageData), &usage); err != nil { continue } + ctxUsed := usage.ContextWindowUsed() + if ctxUsed == 0 { + continue + } // Return total context window used: all input tokens + output tokens // This represents the full context that would be sent for the next turn - return usage.ContextWindowUsed() + return ctxUsed } return 0 }