Slightly tidy up vector_db code (#2744)
Kirill Bulatov
created 2 years ago
Code snippet
```rust
fn main() {
//√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√
}
```
has length of 191, but consists of 87 chars, and the debug code with
`.truncate(100)` panicked.
Fixed that issue, cc @KCaverly
Release Notes:
- N/A
Change summary
crates/vector_store/src/embedding.rs | 12 +++++++-----
crates/vector_store/src/parsing.rs | 7 ++-----
2 files changed, 9 insertions(+), 10 deletions(-)
Detailed changes
@@ -67,11 +67,13 @@ impl EmbeddingProvider for DummyEmbeddings {
}
}
+const INPUT_LIMIT: usize = 8190;
+
impl OpenAIEmbeddings {
- async fn truncate(span: String) -> String {
+ fn truncate(span: String) -> String {
let mut tokens = OPENAI_BPE_TOKENIZER.encode_with_special_tokens(span.as_ref());
- if tokens.len() > 8190 {
- tokens.truncate(8190);
+ if tokens.len() > INPUT_LIMIT {
+ tokens.truncate(INPUT_LIMIT);
let result = OPENAI_BPE_TOKENIZER.decode(tokens.clone());
if result.is_ok() {
let transformed = result.unwrap();
@@ -80,7 +82,7 @@ impl OpenAIEmbeddings {
}
}
- return span.to_string();
+ span
}
async fn send_request(&self, api_key: &str, spans: Vec<&str>) -> Result<Response<AsyncBody>> {
@@ -137,7 +139,7 @@ impl EmbeddingProvider for OpenAIEmbeddings {
// Don't worry about delaying bad request, as we can assume
// we haven't been rate limited yet.
for span in spans.iter_mut() {
- *span = Self::truncate(span.to_string()).await;
+ *span = Self::truncate(span.to_string());
}
}
StatusCode::OK => {
@@ -63,7 +63,7 @@ impl CodeContextRetriever {
) {
// log::info!("-----MATCH-----");
- let mut name: Vec<&str> = vec![];
+ let mut name = Vec::new();
let mut item: Option<&str> = None;
let mut offset: Option<usize> = None;
for capture in mat.captures {
@@ -91,11 +91,8 @@ impl CodeContextRetriever {
.replace("<language>", &pending_file.language.name().to_lowercase())
.replace("<item>", item.unwrap());
- let mut truncated_span = context_span.clone();
- truncated_span.truncate(100);
-
// log::info!("Name: {:?}", name);
- // log::info!("Span: {:?}", truncated_span);
+ // log::info!("Span: {:?}", util::truncate(&context_span, 100));
context_spans.push(context_span);
documents.push(Document {