@@ -211,8 +211,10 @@ func DefaultUsageFunc(response openai.ChatCompletion) (fantasy.Usage, fantasy.Pr
providerMetadata.RejectedPredictionTokens = completionTokenDetails.RejectedPredictionTokens
}
}
+ // OpenAI reports prompt_tokens INCLUDING cached tokens. Subtract to avoid double-counting.
+ inputTokens := max(response.Usage.PromptTokens-promptTokenDetails.CachedTokens, 0)
return fantasy.Usage{
- InputTokens: response.Usage.PromptTokens,
+ InputTokens: inputTokens,
OutputTokens: response.Usage.CompletionTokens,
TotalTokens: response.Usage.TotalTokens,
ReasoningTokens: completionTokenDetails.ReasoningTokens,
@@ -237,8 +239,10 @@ func DefaultStreamUsageFunc(chunk openai.ChatCompletionChunk, _ map[string]any,
// we do this here because the acc does not add prompt details
completionTokenDetails := chunk.Usage.CompletionTokensDetails
promptTokenDetails := chunk.Usage.PromptTokensDetails
+ // OpenAI reports prompt_tokens INCLUDING cached tokens. Subtract to avoid double-counting.
+ inputTokens := max(chunk.Usage.PromptTokens-promptTokenDetails.CachedTokens, 0)
usage := fantasy.Usage{
- InputTokens: chunk.Usage.PromptTokens,
+ InputTokens: inputTokens,
OutputTokens: chunk.Usage.CompletionTokens,
TotalTokens: chunk.Usage.TotalTokens,
ReasoningTokens: completionTokenDetails.ReasoningTokens,
@@ -1425,7 +1425,8 @@ func TestDoGenerate(t *testing.T) {
require.NoError(t, err)
require.Equal(t, int64(1152), result.Usage.CacheReadTokens)
- require.Equal(t, int64(15), result.Usage.InputTokens)
+ // InputTokens = prompt_tokens - cached_tokens = 15 - 1152 = -1137 → clamped to 0
+ require.Equal(t, int64(0), result.Usage.InputTokens)
require.Equal(t, int64(20), result.Usage.OutputTokens)
require.Equal(t, int64(35), result.Usage.TotalTokens)
})
@@ -2594,7 +2595,8 @@ func TestDoStream(t *testing.T) {
require.NotNil(t, finishPart)
require.Equal(t, int64(1152), finishPart.Usage.CacheReadTokens)
- require.Equal(t, int64(15), finishPart.Usage.InputTokens)
+ // InputTokens = prompt_tokens - cached_tokens = 15 - 1152 = -1137 → clamped to 0
+ require.Equal(t, int64(0), finishPart.Usage.InputTokens)
require.Equal(t, int64(20), finishPart.Usage.OutputTokens)
require.Equal(t, int64(35), finishPart.Usage.TotalTokens)
})
@@ -375,8 +375,10 @@ func responsesProviderMetadata(responseID string) fantasy.ProviderMetadata {
}
func responsesUsage(resp responses.Response) fantasy.Usage {
+ // OpenAI reports input_tokens INCLUDING cached tokens. Subtract to avoid double-counting.
+ inputTokens := max(resp.Usage.InputTokens-resp.Usage.InputTokensDetails.CachedTokens, 0)
usage := fantasy.Usage{
- InputTokens: resp.Usage.InputTokens,
+ InputTokens: inputTokens,
OutputTokens: resp.Usage.OutputTokens,
TotalTokens: resp.Usage.InputTokens + resp.Usage.OutputTokens,
}