Token count fix (#2935)

Kyle Caverly created

Fix token count for OpenAIEmbeddings

Release Notes (Preview Only)

- update token count calculation for truncated OpenAIEmbeddings
- increased request timeout for OpenAI

Change summary

crates/semantic_index/src/embedding.rs | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)

Detailed changes

crates/semantic_index/src/embedding.rs 🔗

@@ -181,18 +181,17 @@ impl EmbeddingProvider for OpenAIEmbeddings {
 
     fn truncate(&self, span: &str) -> (String, usize) {
         let mut tokens = OPENAI_BPE_TOKENIZER.encode_with_special_tokens(span);
-        let token_count = tokens.len();
-        let output = if token_count > OPENAI_INPUT_LIMIT {
+        let output = if tokens.len() > OPENAI_INPUT_LIMIT {
             tokens.truncate(OPENAI_INPUT_LIMIT);
             OPENAI_BPE_TOKENIZER
-                .decode(tokens)
+                .decode(tokens.clone())
                 .ok()
                 .unwrap_or_else(|| span.to_string())
         } else {
             span.to_string()
         };
 
-        (output, token_count)
+        (output, tokens.len())
     }
 
     async fn embed_batch(&self, spans: Vec<String>) -> Result<Vec<Embedding>> {
@@ -204,7 +203,7 @@ impl EmbeddingProvider for OpenAIEmbeddings {
             .ok_or_else(|| anyhow!("no api key"))?;
 
         let mut request_number = 0;
-        let mut request_timeout: u64 = 10;
+        let mut request_timeout: u64 = 15;
         let mut response: Response<AsyncBody>;
         while request_number < MAX_RETRIES {
             response = self