agent: Fix bug with double-counting tokens in Gemini (#31885)

Oleksiy Syvokon created

We report the total number of input tokens by summing the numbers of
1. Prompt tokens
2. Cached tokens

But Google API returns prompt tokens (1) that already include cached
tokens (2), so we were double counting tokens in some cases.

Release Notes:

- Fixed bug with double-counting tokens in Gemini

Change summary

crates/language_models/src/provider/google.rs | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)

Detailed changes

crates/language_models/src/provider/google.rs 🔗

@@ -685,10 +685,15 @@ fn update_usage(usage: &mut UsageMetadata, new: &UsageMetadata) {
 }
 
 fn convert_usage(usage: &UsageMetadata) -> language_model::TokenUsage {
+    let prompt_tokens = usage.prompt_token_count.unwrap_or(0) as u32;
+    let cached_tokens = usage.cached_content_token_count.unwrap_or(0) as u32;
+    let input_tokens = prompt_tokens - cached_tokens;
+    let output_tokens = usage.candidates_token_count.unwrap_or(0) as u32;
+
     language_model::TokenUsage {
-        input_tokens: usage.prompt_token_count.unwrap_or(0) as u32,
-        output_tokens: usage.candidates_token_count.unwrap_or(0) as u32,
-        cache_read_input_tokens: usage.cached_content_token_count.unwrap_or(0) as u32,
+        input_tokens,
+        output_tokens,
+        cache_read_input_tokens: cached_tokens,
         cache_creation_input_tokens: 0,
     }
 }