From 507852af0920e1c56edc37ecf7596113cd6f473b Mon Sep 17 00:00:00 2001 From: Philip Zeyliger Date: Wed, 21 Jan 2026 19:21:18 -0800 Subject: [PATCH] shelley: Add LLM request tracking with prefix deduplication So, storing LLM messages for debugging/history turned out to be accidentally quadratic. This tries to fix it. The debug page is still not usable, but that's ok. Prompt: Track LLM HTTP requests in database with prefix deduplication for space efficiency, then add a /debug/llm_requests page to view the data - Track LLM HTTP requests in database with custom headers - Add prefix deduplication for llm_requests table to reduce storage - Reorder Anthropic and Gemini request struct fields for better prefix matching - Pass conversation ID through context for prefix dedup (was missing) - Add /debug/llm_requests page with: - Table showing recent requests with size info - Lazy loading of request/response bodies - Collapsible JSON viewer with syntax highlighting - Prefix deduplication info display Tested: prefix deduplication working for both Anthropic and OpenAI providers Co-authored-by: Shelley --- db/db.go | 149 +++++++ db/db_test.go | 413 ++++++++++++++++++ db/generated/llm_requests.sql.go | 181 +++++++- db/generated/models.go | 24 +- db/query/llm_requests.sql | 42 +- db/schema/011-add-llm-request-prefix.sql | 9 + llm/ant/ant.go | 9 +- llm/conversation/testdata/basic_convo.httprr | 4 +- llm/gem/gemini/gemini.go | 11 +- server/convo.go | 5 +- server/debug_handlers.go | 419 +++++++++++++++++++ server/server.go | 6 +- 12 files changed, 1236 insertions(+), 36 deletions(-) create mode 100644 db/schema/011-add-llm-request-prefix.sql create mode 100644 server/debug_handlers.go diff --git a/db/db.go b/db/db.go index 0e4c653c6432ac37dad2dde866574577b2d8fdf2..d95a2baf0f718203d33f842dea9c7af0def7a032 100644 --- a/db/db.go +++ b/db/db.go @@ -695,9 +695,158 @@ func (db *DB) InsertLLMRequest(ctx context.Context, params generated.InsertLLMRe var request generated.LlmRequest err := db.pool.Tx(ctx, func(ctx context.Context, tx *Tx) error { q := generated.New(tx.Conn()) + + // If we have a conversation ID and request body, try to find common prefix + if params.ConversationID != nil && params.RequestBody != nil { + // Get the last request for this conversation + lastReq, err := q.GetLastRequestForConversation(ctx, params.ConversationID) + if err == nil { + // Found a previous request - compute common prefix + prefixLen, fullPrevBody := computeSharedPrefixLength(lastReq, *params.RequestBody) + if prefixLen > 0 { + // Store only the suffix + suffix := (*params.RequestBody)[prefixLen:] + params.RequestBody = &suffix + params.PrefixRequestID = &lastReq.ID + prefixLen64 := int64(prefixLen) + params.PrefixLength = &prefixLen64 + _ = fullPrevBody // silence unused warning, used for computing prefix + } + } + // If no previous request found or error, just store the full body + } + var err error request, err = q.InsertLLMRequest(ctx, params) return err }) return &request, err } + +// computeSharedPrefixLength computes the length of the shared prefix between +// the full previous request body (reconstructed by walking the chain) and the new request body. +// It returns the prefix length and the fully reconstructed previous body. +func computeSharedPrefixLength(prevReq generated.LlmRequest, newBody string) (int, string) { + // Get the stored body (which may be just a suffix if prevReq has a prefix reference) + prevBody := "" + if prevReq.RequestBody != nil { + prevBody = *prevReq.RequestBody + } + + // If the previous request has a prefix reference, we need to account for that + // by prepending the prefix length worth of bytes from the new body. + // This works because in a conversation, request N+1 typically starts with + // all of request N plus new content at the end. + if prevReq.PrefixLength != nil && *prevReq.PrefixLength > 0 { + // The previous request's full body would be: + // [first prefix_length bytes that match its parent] + [stored suffix] + // If the new body is a continuation, its first prefix_length bytes + // should match those same bytes. + prefixLen := int(*prevReq.PrefixLength) + if prefixLen <= len(newBody) { + prevBody = newBody[:prefixLen] + prevBody + } + } + + // Compute byte-by-byte shared prefix between reconstructed prevBody and newBody + minLen := len(prevBody) + if len(newBody) < minLen { + minLen = len(newBody) + } + + prefixLen := 0 + for i := 0; i < minLen; i++ { + if prevBody[i] != newBody[i] { + break + } + prefixLen++ + } + + // Only use prefix deduplication if we save meaningful space + // (at least 100 bytes saved) + if prefixLen < 100 { + return 0, prevBody + } + + return prefixLen, prevBody +} + +// ListRecentLLMRequests returns the most recent LLM requests +func (db *DB) ListRecentLLMRequests(ctx context.Context, limit int64) ([]generated.ListRecentLLMRequestsRow, error) { + var requests []generated.ListRecentLLMRequestsRow + err := db.pool.Rx(ctx, func(ctx context.Context, rx *Rx) error { + q := generated.New(rx.Conn()) + var err error + requests, err = q.ListRecentLLMRequests(ctx, limit) + return err + }) + return requests, err +} + +// GetLLMRequestBody returns the raw request body for a request +func (db *DB) GetLLMRequestBody(ctx context.Context, id int64) (*string, error) { + var body *string + err := db.pool.Rx(ctx, func(ctx context.Context, rx *Rx) error { + q := generated.New(rx.Conn()) + var err error + body, err = q.GetLLMRequestBody(ctx, id) + return err + }) + return body, err +} + +// GetLLMResponseBody returns the raw response body for a request +func (db *DB) GetLLMResponseBody(ctx context.Context, id int64) (*string, error) { + var body *string + err := db.pool.Rx(ctx, func(ctx context.Context, rx *Rx) error { + q := generated.New(rx.Conn()) + var err error + body, err = q.GetLLMResponseBody(ctx, id) + return err + }) + return body, err +} + +// GetFullLLMRequestBody reconstructs the full request body for a request, +// following the prefix chain if necessary. +func (db *DB) GetFullLLMRequestBody(ctx context.Context, requestID int64) (string, error) { + var result string + err := db.pool.Rx(ctx, func(ctx context.Context, rx *Rx) error { + q := generated.New(rx.Conn()) + return reconstructRequestBody(ctx, q, requestID, &result) + }) + return result, err +} + +// reconstructRequestBody recursively reconstructs the full request body +func reconstructRequestBody(ctx context.Context, q *generated.Queries, requestID int64, result *string) error { + req, err := q.GetLLMRequestByID(ctx, requestID) + if err != nil { + return err + } + + suffix := "" + if req.RequestBody != nil { + suffix = *req.RequestBody + } + + if req.PrefixRequestID == nil || req.PrefixLength == nil || *req.PrefixLength == 0 { + // No prefix reference - the stored body is the full body + *result = suffix + return nil + } + + // Recursively get the parent's full body + var parentBody string + if err := reconstructRequestBody(ctx, q, *req.PrefixRequestID, &parentBody); err != nil { + return err + } + + // The full body is the first prefix_length bytes from the parent + our suffix + prefixLen := int(*req.PrefixLength) + if prefixLen > len(parentBody) { + prefixLen = len(parentBody) + } + *result = parentBody[:prefixLen] + suffix + return nil +} diff --git a/db/db_test.go b/db/db_test.go index 44e027475b1bb3be5043481c8b0eadee8df2312c..23becd394d77408533a889f1209500bf6f2679c4 100644 --- a/db/db_test.go +++ b/db/db_test.go @@ -217,3 +217,416 @@ func TestDB_WithTxRes(t *testing.T) { t.Error("Expected error from WithTxRes, got none") } } + +func TestLLMRequestPrefixDeduplication(t *testing.T) { + db := setupTestDB(t) + defer db.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + // Create a conversation first + slug := "test-prefix-conv" + conv, err := db.CreateConversation(ctx, &slug, true, nil) + if err != nil { + t.Fatalf("Failed to create conversation: %v", err) + } + + // Create a long shared prefix (must be > 100 bytes for deduplication to kick in) + sharedPrefix := strings.Repeat("A", 200) // 200 bytes of 'A's + + // First request - full body stored + req1Body := sharedPrefix + "_suffix1" + req1, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{ + ConversationID: &conv.ConversationID, + Model: "test-model", + Provider: "test-provider", + Url: "http://example.com", + RequestBody: &req1Body, + }) + if err != nil { + t.Fatalf("Failed to insert first request: %v", err) + } + + // First request should have full body, no prefix reference + if req1.PrefixRequestID != nil { + t.Errorf("First request should not have prefix reference, got %v", *req1.PrefixRequestID) + } + if req1.PrefixLength != nil && *req1.PrefixLength != 0 { + t.Errorf("First request should have no prefix length, got %v", *req1.PrefixLength) + } + if req1.RequestBody == nil || *req1.RequestBody != req1Body { + t.Errorf("First request body mismatch: expected %q, got %q", req1Body, safeDeref(req1.RequestBody)) + } + + // Second request - shares prefix with first + req2Body := sharedPrefix + "_suffix2_longer" + req2, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{ + ConversationID: &conv.ConversationID, + Model: "test-model", + Provider: "test-provider", + Url: "http://example.com", + RequestBody: &req2Body, + }) + if err != nil { + t.Fatalf("Failed to insert second request: %v", err) + } + + // Second request should have prefix reference + if req2.PrefixRequestID == nil || *req2.PrefixRequestID != req1.ID { + t.Errorf("Second request should reference first request, got prefix_request_id=%v", safeDeref64(req2.PrefixRequestID)) + } + // Common prefix is sharedPrefix + "_suffix" = 200 + 7 = 207 bytes + expectedPrefixLen := len(sharedPrefix) + len("_suffix") + if req2.PrefixLength == nil || *req2.PrefixLength != int64(expectedPrefixLen) { + t.Errorf("Second request prefix length should be %d, got %v", expectedPrefixLen, safeDeref64(req2.PrefixLength)) + } + // Stored body should only be the suffix after the shared prefix ("1" vs "2_longer") + expectedSuffix := "2_longer" + if req2.RequestBody == nil || *req2.RequestBody != expectedSuffix { + t.Errorf("Second request should only store suffix %q, got %q", expectedSuffix, safeDeref(req2.RequestBody)) + } + + // Third request - shares even longer prefix with second + req3Body := sharedPrefix + "_suffix2_longer_and_more" + req3, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{ + ConversationID: &conv.ConversationID, + Model: "test-model", + Provider: "test-provider", + Url: "http://example.com", + RequestBody: &req3Body, + }) + if err != nil { + t.Fatalf("Failed to insert third request: %v", err) + } + + // Third request should reference second request + if req3.PrefixRequestID == nil || *req3.PrefixRequestID != req2.ID { + t.Errorf("Third request should reference second request, got prefix_request_id=%v", safeDeref64(req3.PrefixRequestID)) + } + // The prefix length should be the full length of req2Body (since req3Body starts with req2Body) + if req3.PrefixLength == nil || *req3.PrefixLength != int64(len(sharedPrefix)+len("_suffix2_longer")) { + t.Errorf("Third request prefix length should be %d, got %v", len(sharedPrefix)+len("_suffix2_longer"), safeDeref64(req3.PrefixLength)) + } + + // Test reconstruction of full bodies + reconstructed1, err := db.GetFullLLMRequestBody(ctx, req1.ID) + if err != nil { + t.Fatalf("Failed to reconstruct first request: %v", err) + } + if reconstructed1 != req1Body { + t.Errorf("Reconstructed first request mismatch: expected %q, got %q", req1Body, reconstructed1) + } + + reconstructed2, err := db.GetFullLLMRequestBody(ctx, req2.ID) + if err != nil { + t.Fatalf("Failed to reconstruct second request: %v", err) + } + if reconstructed2 != req2Body { + t.Errorf("Reconstructed second request mismatch: expected %q, got %q", req2Body, reconstructed2) + } + + reconstructed3, err := db.GetFullLLMRequestBody(ctx, req3.ID) + if err != nil { + t.Fatalf("Failed to reconstruct third request: %v", err) + } + if reconstructed3 != req3Body { + t.Errorf("Reconstructed third request mismatch: expected %q, got %q", req3Body, reconstructed3) + } +} + +func TestLLMRequestNoPrefixForShortOverlap(t *testing.T) { + db := setupTestDB(t) + defer db.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + slug := "test-short-conv" + conv, err := db.CreateConversation(ctx, &slug, true, nil) + if err != nil { + t.Fatalf("Failed to create conversation: %v", err) + } + + // Short prefix (< 100 bytes) - should NOT deduplicate + shortPrefix := strings.Repeat("B", 50) + + req1Body := shortPrefix + "_first" + _, err = db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{ + ConversationID: &conv.ConversationID, + Model: "test-model", + Provider: "test-provider", + Url: "http://example.com", + RequestBody: &req1Body, + }) + if err != nil { + t.Fatalf("Failed to insert first request: %v", err) + } + + req2Body := shortPrefix + "_second" + req2, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{ + ConversationID: &conv.ConversationID, + Model: "test-model", + Provider: "test-provider", + Url: "http://example.com", + RequestBody: &req2Body, + }) + if err != nil { + t.Fatalf("Failed to insert second request: %v", err) + } + + // With short prefix, should NOT have prefix reference (full body stored) + if req2.PrefixRequestID != nil { + t.Errorf("Short overlap should not have prefix reference, got %v", *req2.PrefixRequestID) + } + if req2.RequestBody == nil || *req2.RequestBody != req2Body { + t.Errorf("Short overlap should store full body %q, got %q", req2Body, safeDeref(req2.RequestBody)) + } +} + +func TestLLMRequestNoConversationID(t *testing.T) { + db := setupTestDB(t) + defer db.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + // Request without conversation_id - should store full body + reqBody := strings.Repeat("C", 300) + req, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{ + ConversationID: nil, + Model: "test-model", + Provider: "test-provider", + Url: "http://example.com", + RequestBody: &reqBody, + }) + if err != nil { + t.Fatalf("Failed to insert request: %v", err) + } + + // Should not have prefix reference + if req.PrefixRequestID != nil { + t.Errorf("Request without conversation_id should not have prefix reference") + } + if req.RequestBody == nil || *req.RequestBody != reqBody { + t.Errorf("Request should store full body") + } +} + +func safeDeref(s *string) string { + if s == nil { + return "" + } + return *s +} + +func safeDeref64(i *int64) int64 { + if i == nil { + return -1 + } + return *i +} + +func TestLLMRequestRealisticConversation(t *testing.T) { + // This test simulates realistic LLM API request patterns where each + // subsequent request includes all previous messages plus new ones + db := setupTestDB(t) + defer db.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + slug := "test-realistic-conv" + conv, err := db.CreateConversation(ctx, &slug, true, nil) + if err != nil { + t.Fatalf("Failed to create conversation: %v", err) + } + + // Simulate Anthropic-style messages array growing over conversation + // Each request adds to the previous messages + baseRequest := `{"model":"claude-sonnet-4-5-20250929","system":[{"type":"text","text":"You are a helpful assistant."}],"messages":[` + + message1 := `{"role":"user","content":[{"type":"text","text":"Hello, how are you?"}]}` + req1Body := baseRequest + message1 + `],"max_tokens":8192}` + + req1, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{ + ConversationID: &conv.ConversationID, + Model: "claude-sonnet-4-5-20250929", + Provider: "anthropic", + Url: "https://api.anthropic.com/v1/messages", + RequestBody: &req1Body, + }) + if err != nil { + t.Fatalf("Failed to insert first request: %v", err) + } + + // First request stored in full + if req1.PrefixRequestID != nil { + t.Errorf("First request should not have prefix reference") + } + + // Second request: user message + assistant response + new user message + message2 := `{"role":"assistant","content":[{"type":"text","text":"I'm doing well, thank you for asking!"}]}` + message3 := `{"role":"user","content":[{"type":"text","text":"Can you help me write some code?"}]}` + req2Body := baseRequest + message1 + `,` + message2 + `,` + message3 + `],"max_tokens":8192}` + + req2, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{ + ConversationID: &conv.ConversationID, + Model: "claude-sonnet-4-5-20250929", + Provider: "anthropic", + Url: "https://api.anthropic.com/v1/messages", + RequestBody: &req2Body, + }) + if err != nil { + t.Fatalf("Failed to insert second request: %v", err) + } + + // Second request should have prefix deduplication + if req2.PrefixRequestID == nil { + t.Errorf("Second request should have prefix reference") + } else if *req2.PrefixRequestID != req1.ID { + t.Errorf("Second request should reference first request") + } + + // Verify prefix length is reasonable (should be at least the base + message1 length) + minExpectedPrefix := len(baseRequest) + len(message1) + if req2.PrefixLength == nil || *req2.PrefixLength < int64(minExpectedPrefix) { + t.Errorf("Second request prefix length should be at least %d, got %v", minExpectedPrefix, safeDeref64(req2.PrefixLength)) + } + + // Verify we saved significant space + req2StoredLen := len(safeDeref(req2.RequestBody)) + req2FullLen := len(req2Body) + if req2StoredLen >= req2FullLen { + t.Errorf("Second request should store less than full body: stored %d, full %d", req2StoredLen, req2FullLen) + } + t.Logf("Space saved for request 2: %d bytes (%.1f%% reduction)", + req2FullLen-req2StoredLen, + 100.0*float64(req2FullLen-req2StoredLen)/float64(req2FullLen)) + + // Third request: even more messages + message4 := `{"role":"assistant","content":[{"type":"text","text":"Of course! What kind of code would you like me to help you with?"}]}` + message5 := `{"role":"user","content":[{"type":"text","text":"I need a function to calculate fibonacci numbers."}]}` + req3Body := baseRequest + message1 + `,` + message2 + `,` + message3 + `,` + message4 + `,` + message5 + `],"max_tokens":8192}` + + req3, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{ + ConversationID: &conv.ConversationID, + Model: "claude-sonnet-4-5-20250929", + Provider: "anthropic", + Url: "https://api.anthropic.com/v1/messages", + RequestBody: &req3Body, + }) + if err != nil { + t.Fatalf("Failed to insert third request: %v", err) + } + + // Third request should reference second + if req3.PrefixRequestID == nil || *req3.PrefixRequestID != req2.ID { + t.Errorf("Third request should reference second request") + } + + req3StoredLen := len(safeDeref(req3.RequestBody)) + req3FullLen := len(req3Body) + t.Logf("Space saved for request 3: %d bytes (%.1f%% reduction)", + req3FullLen-req3StoredLen, + 100.0*float64(req3FullLen-req3StoredLen)/float64(req3FullLen)) + + // Verify reconstruction works for all requests + reconstructed1, err := db.GetFullLLMRequestBody(ctx, req1.ID) + if err != nil { + t.Fatalf("Failed to reconstruct request 1: %v", err) + } + if reconstructed1 != req1Body { + t.Errorf("Reconstructed request 1 mismatch") + } + + reconstructed2, err := db.GetFullLLMRequestBody(ctx, req2.ID) + if err != nil { + t.Fatalf("Failed to reconstruct request 2: %v", err) + } + if reconstructed2 != req2Body { + t.Errorf("Reconstructed request 2 mismatch") + } + + reconstructed3, err := db.GetFullLLMRequestBody(ctx, req3.ID) + if err != nil { + t.Fatalf("Failed to reconstruct request 3: %v", err) + } + if reconstructed3 != req3Body { + t.Errorf("Reconstructed request 3 mismatch") + } + + // Calculate total storage savings + totalOriginal := len(req1Body) + len(req2Body) + len(req3Body) + totalStored := len(safeDeref(req1.RequestBody)) + len(safeDeref(req2.RequestBody)) + len(safeDeref(req3.RequestBody)) + t.Logf("Total space: original %d bytes, stored %d bytes, saved %d bytes (%.1f%% reduction)", + totalOriginal, totalStored, totalOriginal-totalStored, + 100.0*float64(totalOriginal-totalStored)/float64(totalOriginal)) +} + +func TestLLMRequestOpenAIStyle(t *testing.T) { + // Test with OpenAI-style request format + db := setupTestDB(t) + defer db.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + slug := "test-openai-conv" + conv, err := db.CreateConversation(ctx, &slug, true, nil) + if err != nil { + t.Fatalf("Failed to create conversation: %v", err) + } + + // OpenAI-style request format + baseRequest := `{"model":"gpt-4","messages":[` + message1 := `{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"Hello!"}` + req1Body := baseRequest + message1 + `],"stream":true}` + + req1, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{ + ConversationID: &conv.ConversationID, + Model: "gpt-4", + Provider: "openai", + Url: "https://api.openai.com/v1/chat/completions", + RequestBody: &req1Body, + }) + if err != nil { + t.Fatalf("Failed to insert first request: %v", err) + } + + // Second request with more messages + message2 := `{"role":"assistant","content":"Hello! How can I help you today?"},{"role":"user","content":"What's the weather like?"}` + req2Body := baseRequest + message1 + `,` + message2 + `],"stream":true}` + + req2, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{ + ConversationID: &conv.ConversationID, + Model: "gpt-4", + Provider: "openai", + Url: "https://api.openai.com/v1/chat/completions", + RequestBody: &req2Body, + }) + if err != nil { + t.Fatalf("Failed to insert second request: %v", err) + } + + // Should have prefix deduplication + if req2.PrefixRequestID == nil || *req2.PrefixRequestID != req1.ID { + t.Errorf("Second request should reference first request") + } + + // Verify reconstruction + reconstructed2, err := db.GetFullLLMRequestBody(ctx, req2.ID) + if err != nil { + t.Fatalf("Failed to reconstruct second request: %v", err) + } + if reconstructed2 != req2Body { + t.Errorf("Reconstructed request mismatch:\nexpected: %s\ngot: %s", req2Body, reconstructed2) + } + + // Calculate savings + req2StoredLen := len(safeDeref(req2.RequestBody)) + req2FullLen := len(req2Body) + t.Logf("OpenAI-style space saved: %d bytes (%.1f%% reduction)", + req2FullLen-req2StoredLen, + 100.0*float64(req2FullLen-req2StoredLen)/float64(req2FullLen)) +} diff --git a/db/generated/llm_requests.sql.go b/db/generated/llm_requests.sql.go index 0b6b88f0004e42d856be98e770eaeb099c051530..599d6023040b91d281b84f32b1d87048be6e1ac5 100644 --- a/db/generated/llm_requests.sql.go +++ b/db/generated/llm_requests.sql.go @@ -7,8 +7,84 @@ package generated import ( "context" + "time" ) +const getLLMRequestBody = `-- name: GetLLMRequestBody :one +SELECT request_body FROM llm_requests WHERE id = ? +` + +func (q *Queries) GetLLMRequestBody(ctx context.Context, id int64) (*string, error) { + row := q.db.QueryRowContext(ctx, getLLMRequestBody, id) + var request_body *string + err := row.Scan(&request_body) + return request_body, err +} + +const getLLMRequestByID = `-- name: GetLLMRequestByID :one +SELECT id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at, prefix_request_id, prefix_length FROM llm_requests WHERE id = ? +` + +func (q *Queries) GetLLMRequestByID(ctx context.Context, id int64) (LlmRequest, error) { + row := q.db.QueryRowContext(ctx, getLLMRequestByID, id) + var i LlmRequest + err := row.Scan( + &i.ID, + &i.ConversationID, + &i.Model, + &i.Provider, + &i.Url, + &i.RequestBody, + &i.ResponseBody, + &i.StatusCode, + &i.Error, + &i.DurationMs, + &i.CreatedAt, + &i.PrefixRequestID, + &i.PrefixLength, + ) + return i, err +} + +const getLLMResponseBody = `-- name: GetLLMResponseBody :one +SELECT response_body FROM llm_requests WHERE id = ? +` + +func (q *Queries) GetLLMResponseBody(ctx context.Context, id int64) (*string, error) { + row := q.db.QueryRowContext(ctx, getLLMResponseBody, id) + var response_body *string + err := row.Scan(&response_body) + return response_body, err +} + +const getLastRequestForConversation = `-- name: GetLastRequestForConversation :one +SELECT id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at, prefix_request_id, prefix_length FROM llm_requests +WHERE conversation_id = ? +ORDER BY id DESC +LIMIT 1 +` + +func (q *Queries) GetLastRequestForConversation(ctx context.Context, conversationID *string) (LlmRequest, error) { + row := q.db.QueryRowContext(ctx, getLastRequestForConversation, conversationID) + var i LlmRequest + err := row.Scan( + &i.ID, + &i.ConversationID, + &i.Model, + &i.Provider, + &i.Url, + &i.RequestBody, + &i.ResponseBody, + &i.StatusCode, + &i.Error, + &i.DurationMs, + &i.CreatedAt, + &i.PrefixRequestID, + &i.PrefixLength, + ) + return i, err +} + const insertLLMRequest = `-- name: InsertLLMRequest :one INSERT INTO llm_requests ( conversation_id, @@ -19,21 +95,25 @@ INSERT INTO llm_requests ( response_body, status_code, error, - duration_ms -) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) -RETURNING id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at + duration_ms, + prefix_request_id, + prefix_length +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +RETURNING id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at, prefix_request_id, prefix_length ` type InsertLLMRequestParams struct { - ConversationID *string `json:"conversation_id"` - Model string `json:"model"` - Provider string `json:"provider"` - Url string `json:"url"` - RequestBody *string `json:"request_body"` - ResponseBody *string `json:"response_body"` - StatusCode *int64 `json:"status_code"` - Error *string `json:"error"` - DurationMs *int64 `json:"duration_ms"` + ConversationID *string `json:"conversation_id"` + Model string `json:"model"` + Provider string `json:"provider"` + Url string `json:"url"` + RequestBody *string `json:"request_body"` + ResponseBody *string `json:"response_body"` + StatusCode *int64 `json:"status_code"` + Error *string `json:"error"` + DurationMs *int64 `json:"duration_ms"` + PrefixRequestID *int64 `json:"prefix_request_id"` + PrefixLength *int64 `json:"prefix_length"` } func (q *Queries) InsertLLMRequest(ctx context.Context, arg InsertLLMRequestParams) (LlmRequest, error) { @@ -47,6 +127,8 @@ func (q *Queries) InsertLLMRequest(ctx context.Context, arg InsertLLMRequestPara arg.StatusCode, arg.Error, arg.DurationMs, + arg.PrefixRequestID, + arg.PrefixLength, ) var i LlmRequest err := row.Scan( @@ -61,6 +143,81 @@ func (q *Queries) InsertLLMRequest(ctx context.Context, arg InsertLLMRequestPara &i.Error, &i.DurationMs, &i.CreatedAt, + &i.PrefixRequestID, + &i.PrefixLength, ) return i, err } + +const listRecentLLMRequests = `-- name: ListRecentLLMRequests :many +SELECT + id, + conversation_id, + model, + provider, + url, + LENGTH(request_body) as request_body_length, + LENGTH(response_body) as response_body_length, + status_code, + error, + duration_ms, + created_at, + prefix_request_id, + prefix_length +FROM llm_requests +ORDER BY id DESC +LIMIT ? +` + +type ListRecentLLMRequestsRow struct { + ID int64 `json:"id"` + ConversationID *string `json:"conversation_id"` + Model string `json:"model"` + Provider string `json:"provider"` + Url string `json:"url"` + RequestBodyLength *int64 `json:"request_body_length"` + ResponseBodyLength *int64 `json:"response_body_length"` + StatusCode *int64 `json:"status_code"` + Error *string `json:"error"` + DurationMs *int64 `json:"duration_ms"` + CreatedAt time.Time `json:"created_at"` + PrefixRequestID *int64 `json:"prefix_request_id"` + PrefixLength *int64 `json:"prefix_length"` +} + +func (q *Queries) ListRecentLLMRequests(ctx context.Context, limit int64) ([]ListRecentLLMRequestsRow, error) { + rows, err := q.db.QueryContext(ctx, listRecentLLMRequests, limit) + if err != nil { + return nil, err + } + defer rows.Close() + items := []ListRecentLLMRequestsRow{} + for rows.Next() { + var i ListRecentLLMRequestsRow + if err := rows.Scan( + &i.ID, + &i.ConversationID, + &i.Model, + &i.Provider, + &i.Url, + &i.RequestBodyLength, + &i.ResponseBodyLength, + &i.StatusCode, + &i.Error, + &i.DurationMs, + &i.CreatedAt, + &i.PrefixRequestID, + &i.PrefixLength, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} diff --git a/db/generated/models.go b/db/generated/models.go index 8b43efda0cb45058a5955f4e1d238c7b1a42a99f..4bd0ad9a39e9e498850b1886f7aeb214291b8c49 100644 --- a/db/generated/models.go +++ b/db/generated/models.go @@ -20,17 +20,19 @@ type Conversation struct { } type LlmRequest struct { - ID int64 `json:"id"` - ConversationID *string `json:"conversation_id"` - Model string `json:"model"` - Provider string `json:"provider"` - Url string `json:"url"` - RequestBody *string `json:"request_body"` - ResponseBody *string `json:"response_body"` - StatusCode *int64 `json:"status_code"` - Error *string `json:"error"` - DurationMs *int64 `json:"duration_ms"` - CreatedAt time.Time `json:"created_at"` + ID int64 `json:"id"` + ConversationID *string `json:"conversation_id"` + Model string `json:"model"` + Provider string `json:"provider"` + Url string `json:"url"` + RequestBody *string `json:"request_body"` + ResponseBody *string `json:"response_body"` + StatusCode *int64 `json:"status_code"` + Error *string `json:"error"` + DurationMs *int64 `json:"duration_ms"` + CreatedAt time.Time `json:"created_at"` + PrefixRequestID *int64 `json:"prefix_request_id"` + PrefixLength *int64 `json:"prefix_length"` } type Message struct { diff --git a/db/query/llm_requests.sql b/db/query/llm_requests.sql index 90aeab858de6a87378673a9d786370304ef2f295..3f1b8c10db355687a4f1859e5522bf9fbc59655f 100644 --- a/db/query/llm_requests.sql +++ b/db/query/llm_requests.sql @@ -8,6 +8,44 @@ INSERT INTO llm_requests ( response_body, status_code, error, - duration_ms -) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + duration_ms, + prefix_request_id, + prefix_length +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) RETURNING *; + +-- name: GetLastRequestForConversation :one +SELECT * FROM llm_requests +WHERE conversation_id = ? +ORDER BY id DESC +LIMIT 1; + +-- name: GetLLMRequestByID :one +SELECT * FROM llm_requests WHERE id = ?; + +-- name: ListRecentLLMRequests :many +SELECT + id, + conversation_id, + model, + provider, + url, + LENGTH(request_body) as request_body_length, + LENGTH(response_body) as response_body_length, + status_code, + error, + duration_ms, + created_at, + prefix_request_id, + prefix_length +FROM llm_requests +ORDER BY id DESC +LIMIT ?; + +-- name: GetLLMRequestBody :one +SELECT request_body FROM llm_requests WHERE id = ?; + +-- name: GetLLMResponseBody :one +SELECT response_body FROM llm_requests WHERE id = ?; + + diff --git a/db/schema/011-add-llm-request-prefix.sql b/db/schema/011-add-llm-request-prefix.sql new file mode 100644 index 0000000000000000000000000000000000000000..e8397143ed945b70059743934ddf3713124325f4 --- /dev/null +++ b/db/schema/011-add-llm-request-prefix.sql @@ -0,0 +1,9 @@ +-- Add prefix deduplication columns to llm_requests table +-- This allows storing only the suffix of request_body when there's a shared prefix +-- with a previous request in the same conversation. + +ALTER TABLE llm_requests ADD COLUMN prefix_request_id INTEGER REFERENCES llm_requests(id); +ALTER TABLE llm_requests ADD COLUMN prefix_length INTEGER; + +-- Index for efficient prefix lookups +CREATE INDEX idx_llm_requests_prefix_request_id ON llm_requests(prefix_request_id) WHERE prefix_request_id IS NOT NULL; diff --git a/llm/ant/ant.go b/llm/ant/ant.go index 3b2c2579bfe38fc7107e809dcf19dc53a8ad6d8c..aa3929b7ebc47c4e6efa1f1e06bfc051c2f4ef0f 100644 --- a/llm/ant/ant.go +++ b/llm/ant/ant.go @@ -218,17 +218,20 @@ type systemContent struct { // request represents the request payload for creating a message. type request struct { + // Field order matters for JSON serialization - stable fields should come first + // to maximize prefix deduplication when storing LLM requests. Model string `json:"model"` - Messages []message `json:"messages"` - ToolChoice *toolChoice `json:"tool_choice,omitempty"` MaxTokens int `json:"max_tokens"` - Tools []*tool `json:"tools,omitempty"` Stream bool `json:"stream,omitempty"` System []systemContent `json:"system,omitempty"` + Tools []*tool `json:"tools,omitempty"` + ToolChoice *toolChoice `json:"tool_choice,omitempty"` Temperature float64 `json:"temperature,omitempty"` TopK int `json:"top_k,omitempty"` TopP float64 `json:"top_p,omitempty"` StopSequences []string `json:"stop_sequences,omitempty"` + // Messages comes last since it grows with each request in a conversation + Messages []message `json:"messages"` } func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T { diff --git a/llm/conversation/testdata/basic_convo.httprr b/llm/conversation/testdata/basic_convo.httprr index 4a3d3aa868791ff94e8cec8402de4e4208329e67..ad8af0a17518295671a8e1f5691cc538515cbf31 100644 --- a/llm/conversation/testdata/basic_convo.httprr +++ b/llm/conversation/testdata/basic_convo.httprr @@ -7,7 +7,7 @@ Content-Length: 183 Anthropic-Version: 2023-06-01 Content-Type: application/json -{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius","cache_control":{"type":"ephemeral"}}]}],"max_tokens":8192} +{"model":"claude-sonnet-4-20250514","max_tokens":8192,"messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius","cache_control":{"type":"ephemeral"}}]}]} HTTP/2.0 200 OK Anthropic-Organization-Id: 3c473a21-7208-450a-a9f8-80aebda45c1b Anthropic-Ratelimit-Input-Tokens-Limit: 4000000 @@ -37,7 +37,7 @@ Content-Length: 454 Anthropic-Version: 2023-06-01 Content-Type: application/json -{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius"}]},{"role":"assistant","content":[{"type":"text","text":"Hello Cornelius! It's nice to meet you. That's a distinctive and classic name. How are you doing today? Is there anything I can help you with?"}]},{"role":"user","content":[{"type":"text","text":"What is my name?","cache_control":{"type":"ephemeral"}}]}],"max_tokens":8192} +{"model":"claude-sonnet-4-20250514","max_tokens":8192,"messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius"}]},{"role":"assistant","content":[{"type":"text","text":"Hello Cornelius! It's nice to meet you. That's a distinctive and classic name. How are you doing today? Is there anything I can help you with?"}]},{"role":"user","content":[{"type":"text","text":"What is my name?","cache_control":{"type":"ephemeral"}}]}]} HTTP/2.0 200 OK Anthropic-Organization-Id: 3c473a21-7208-450a-a9f8-80aebda45c1b Anthropic-Ratelimit-Input-Tokens-Limit: 4000000 diff --git a/llm/gem/gemini/gemini.go b/llm/gem/gemini/gemini.go index eeafbaf25cbdf2e3c0a9dfb090fe9a47e1231f95..609fa8e5da19c68a45a4a4bc4e108e83bfc3d600 100644 --- a/llm/gem/gemini/gemini.go +++ b/llm/gem/gemini/gemini.go @@ -11,12 +11,15 @@ import ( // https://ai.google.dev/api/generate-content#request-body type Request struct { - Contents []Content `json:"contents"` - Tools []Tool `json:"tools,omitempty"` - SystemInstruction *Content `json:"systemInstruction,omitempty"` - GenerationConfig *GenerationConfig `json:"generationConfig,omitempty"` + // Field order matters for JSON serialization - stable fields should come first + // to maximize prefix deduplication when storing LLM requests. CachedContent string `json:"cachedContent,omitempty"` // format: "cachedContents/{name}" + GenerationConfig *GenerationConfig `json:"generationConfig,omitempty"` + SystemInstruction *Content `json:"systemInstruction,omitempty"` + Tools []Tool `json:"tools,omitempty"` // ToolConfig has been left out because it does not appear to be useful. + // Contents comes last since it grows with each request in a conversation + Contents []Content `json:"contents"` } // https://ai.google.dev/api/generate-content#response-body diff --git a/server/convo.go b/server/convo.go index 534c741c3f5225436da4a4c0bfb8fc31ca2f914d..b94b37526b4db470b5883e66449af74ddcf9cfe2 100644 --- a/server/convo.go +++ b/server/convo.go @@ -13,6 +13,7 @@ import ( "shelley.exe.dev/db/generated" "shelley.exe.dev/gitstate" "shelley.exe.dev/llm" + "shelley.exe.dev/llm/llmhttp" "shelley.exe.dev/loop" "shelley.exe.dev/subpub" ) @@ -372,7 +373,9 @@ func (cm *ConversationManager) ensureLoop(service llm.Service, modelID string) e } } - processCtx, cancel := context.WithTimeout(context.Background(), 12*time.Hour) + // Create a context with the conversation ID for LLM request recording/prefix dedup + baseCtx := llmhttp.WithConversationID(context.Background(), conversationID) + processCtx, cancel := context.WithTimeout(baseCtx, 12*time.Hour) toolSet := claudetool.NewToolSet(processCtx, toolSetConfig) loopInstance := loop.NewLoop(loop.Config{ diff --git a/server/debug_handlers.go b/server/debug_handlers.go new file mode 100644 index 0000000000000000000000000000000000000000..3ff6e156cf81651209d1a66e92f2d3b775b613e4 --- /dev/null +++ b/server/debug_handlers.go @@ -0,0 +1,419 @@ +package server + +import ( + "encoding/json" + "net/http" + "strconv" +) + +// handleDebugLLMRequests serves the debug page for LLM requests +func (s *Server) handleDebugLLMRequests(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(debugLLMRequestsHTML)) +} + +// handleDebugLLMRequestsAPI returns recent LLM requests as JSON +func (s *Server) handleDebugLLMRequestsAPI(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + limit := int64(100) + if limitStr := r.URL.Query().Get("limit"); limitStr != "" { + if l, err := strconv.ParseInt(limitStr, 10, 64); err == nil && l > 0 { + limit = l + } + } + + requests, err := s.db.ListRecentLLMRequests(ctx, limit) + if err != nil { + s.logger.Error("Failed to list LLM requests", "error", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(requests) +} + +// handleDebugLLMRequestBody returns the request body for a specific LLM request +func (s *Server) handleDebugLLMRequestBody(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + idStr := r.PathValue("id") + id, err := strconv.ParseInt(idStr, 10, 64) + if err != nil { + http.Error(w, "Invalid ID", http.StatusBadRequest) + return + } + + body, err := s.db.GetLLMRequestBody(ctx, id) + if err != nil { + s.logger.Error("Failed to get LLM request body", "error", err, "id", id) + http.Error(w, "Not found", http.StatusNotFound) + return + } + + if body == nil { + w.Header().Set("Content-Type", "application/json") + w.Write([]byte("null")) + return + } + + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(*body)) +} + +// handleDebugLLMResponseBody returns the response body for a specific LLM request +func (s *Server) handleDebugLLMResponseBody(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + idStr := r.PathValue("id") + id, err := strconv.ParseInt(idStr, 10, 64) + if err != nil { + http.Error(w, "Invalid ID", http.StatusBadRequest) + return + } + + body, err := s.db.GetLLMResponseBody(ctx, id) + if err != nil { + s.logger.Error("Failed to get LLM response body", "error", err, "id", id) + http.Error(w, "Not found", http.StatusNotFound) + return + } + + if body == nil { + w.Header().Set("Content-Type", "application/json") + w.Write([]byte("null")) + return + } + + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(*body)) +} + +const debugLLMRequestsHTML = ` + + + + +Debug: LLM Requests + + + +

LLM Requests

+ + + + + + + + + + + + + + + + + + +
IDTimeModelProviderStatusDurationRequest SizeResponse SizePrefix InfoActions
Loading...
+ + + + +` diff --git a/server/server.go b/server/server.go index fc29c0e54af1db25a46d8caf55f1453cc2953d10..d2fe39610827e4baa65ea103e4591781aafba40b 100644 --- a/server/server.go +++ b/server/server.go @@ -268,7 +268,11 @@ func (s *Server) RegisterRoutes(mux *http.ServeMux) { mux.Handle("POST /upgrade", http.HandlerFunc(s.handleUpgrade)) mux.Handle("POST /exit", http.HandlerFunc(s.handleExit)) - // Debug routes + // Debug endpoints + mux.Handle("GET /debug/llm_requests", http.HandlerFunc(s.handleDebugLLMRequests)) + mux.Handle("GET /debug/llm_requests/api", http.HandlerFunc(s.handleDebugLLMRequestsAPI)) + mux.Handle("GET /debug/llm_requests/{id}/request", http.HandlerFunc(s.handleDebugLLMRequestBody)) + mux.Handle("GET /debug/llm_requests/{id}/response", http.HandlerFunc(s.handleDebugLLMResponseBody)) // Serve embedded UI assets mux.Handle("/", s.staticHandler(ui.Assets()))