Detailed changes
@@ -695,9 +695,158 @@ func (db *DB) InsertLLMRequest(ctx context.Context, params generated.InsertLLMRe
var request generated.LlmRequest
err := db.pool.Tx(ctx, func(ctx context.Context, tx *Tx) error {
q := generated.New(tx.Conn())
+
+ // If we have a conversation ID and request body, try to find common prefix
+ if params.ConversationID != nil && params.RequestBody != nil {
+ // Get the last request for this conversation
+ lastReq, err := q.GetLastRequestForConversation(ctx, params.ConversationID)
+ if err == nil {
+ // Found a previous request - compute common prefix
+ prefixLen, fullPrevBody := computeSharedPrefixLength(lastReq, *params.RequestBody)
+ if prefixLen > 0 {
+ // Store only the suffix
+ suffix := (*params.RequestBody)[prefixLen:]
+ params.RequestBody = &suffix
+ params.PrefixRequestID = &lastReq.ID
+ prefixLen64 := int64(prefixLen)
+ params.PrefixLength = &prefixLen64
+ _ = fullPrevBody // silence unused warning, used for computing prefix
+ }
+ }
+ // If no previous request found or error, just store the full body
+ }
+
var err error
request, err = q.InsertLLMRequest(ctx, params)
return err
})
return &request, err
}
+
+// computeSharedPrefixLength computes the length of the shared prefix between
+// the full previous request body (reconstructed by walking the chain) and the new request body.
+// It returns the prefix length and the fully reconstructed previous body.
+func computeSharedPrefixLength(prevReq generated.LlmRequest, newBody string) (int, string) {
+ // Get the stored body (which may be just a suffix if prevReq has a prefix reference)
+ prevBody := ""
+ if prevReq.RequestBody != nil {
+ prevBody = *prevReq.RequestBody
+ }
+
+ // If the previous request has a prefix reference, we need to account for that
+ // by prepending the prefix length worth of bytes from the new body.
+ // This works because in a conversation, request N+1 typically starts with
+ // all of request N plus new content at the end.
+ if prevReq.PrefixLength != nil && *prevReq.PrefixLength > 0 {
+ // The previous request's full body would be:
+ // [first prefix_length bytes that match its parent] + [stored suffix]
+ // If the new body is a continuation, its first prefix_length bytes
+ // should match those same bytes.
+ prefixLen := int(*prevReq.PrefixLength)
+ if prefixLen <= len(newBody) {
+ prevBody = newBody[:prefixLen] + prevBody
+ }
+ }
+
+ // Compute byte-by-byte shared prefix between reconstructed prevBody and newBody
+ minLen := len(prevBody)
+ if len(newBody) < minLen {
+ minLen = len(newBody)
+ }
+
+ prefixLen := 0
+ for i := 0; i < minLen; i++ {
+ if prevBody[i] != newBody[i] {
+ break
+ }
+ prefixLen++
+ }
+
+ // Only use prefix deduplication if we save meaningful space
+ // (at least 100 bytes saved)
+ if prefixLen < 100 {
+ return 0, prevBody
+ }
+
+ return prefixLen, prevBody
+}
+
+// ListRecentLLMRequests returns the most recent LLM requests
+func (db *DB) ListRecentLLMRequests(ctx context.Context, limit int64) ([]generated.ListRecentLLMRequestsRow, error) {
+ var requests []generated.ListRecentLLMRequestsRow
+ err := db.pool.Rx(ctx, func(ctx context.Context, rx *Rx) error {
+ q := generated.New(rx.Conn())
+ var err error
+ requests, err = q.ListRecentLLMRequests(ctx, limit)
+ return err
+ })
+ return requests, err
+}
+
+// GetLLMRequestBody returns the raw request body for a request
+func (db *DB) GetLLMRequestBody(ctx context.Context, id int64) (*string, error) {
+ var body *string
+ err := db.pool.Rx(ctx, func(ctx context.Context, rx *Rx) error {
+ q := generated.New(rx.Conn())
+ var err error
+ body, err = q.GetLLMRequestBody(ctx, id)
+ return err
+ })
+ return body, err
+}
+
+// GetLLMResponseBody returns the raw response body for a request
+func (db *DB) GetLLMResponseBody(ctx context.Context, id int64) (*string, error) {
+ var body *string
+ err := db.pool.Rx(ctx, func(ctx context.Context, rx *Rx) error {
+ q := generated.New(rx.Conn())
+ var err error
+ body, err = q.GetLLMResponseBody(ctx, id)
+ return err
+ })
+ return body, err
+}
+
+// GetFullLLMRequestBody reconstructs the full request body for a request,
+// following the prefix chain if necessary.
+func (db *DB) GetFullLLMRequestBody(ctx context.Context, requestID int64) (string, error) {
+ var result string
+ err := db.pool.Rx(ctx, func(ctx context.Context, rx *Rx) error {
+ q := generated.New(rx.Conn())
+ return reconstructRequestBody(ctx, q, requestID, &result)
+ })
+ return result, err
+}
+
+// reconstructRequestBody recursively reconstructs the full request body
+func reconstructRequestBody(ctx context.Context, q *generated.Queries, requestID int64, result *string) error {
+ req, err := q.GetLLMRequestByID(ctx, requestID)
+ if err != nil {
+ return err
+ }
+
+ suffix := ""
+ if req.RequestBody != nil {
+ suffix = *req.RequestBody
+ }
+
+ if req.PrefixRequestID == nil || req.PrefixLength == nil || *req.PrefixLength == 0 {
+ // No prefix reference - the stored body is the full body
+ *result = suffix
+ return nil
+ }
+
+ // Recursively get the parent's full body
+ var parentBody string
+ if err := reconstructRequestBody(ctx, q, *req.PrefixRequestID, &parentBody); err != nil {
+ return err
+ }
+
+ // The full body is the first prefix_length bytes from the parent + our suffix
+ prefixLen := int(*req.PrefixLength)
+ if prefixLen > len(parentBody) {
+ prefixLen = len(parentBody)
+ }
+ *result = parentBody[:prefixLen] + suffix
+ return nil
+}
@@ -217,3 +217,416 @@ func TestDB_WithTxRes(t *testing.T) {
t.Error("Expected error from WithTxRes, got none")
}
}
+
+func TestLLMRequestPrefixDeduplication(t *testing.T) {
+ db := setupTestDB(t)
+ defer db.Close()
+
+ ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+ defer cancel()
+
+ // Create a conversation first
+ slug := "test-prefix-conv"
+ conv, err := db.CreateConversation(ctx, &slug, true, nil)
+ if err != nil {
+ t.Fatalf("Failed to create conversation: %v", err)
+ }
+
+ // Create a long shared prefix (must be > 100 bytes for deduplication to kick in)
+ sharedPrefix := strings.Repeat("A", 200) // 200 bytes of 'A's
+
+ // First request - full body stored
+ req1Body := sharedPrefix + "_suffix1"
+ req1, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+ ConversationID: &conv.ConversationID,
+ Model: "test-model",
+ Provider: "test-provider",
+ Url: "http://example.com",
+ RequestBody: &req1Body,
+ })
+ if err != nil {
+ t.Fatalf("Failed to insert first request: %v", err)
+ }
+
+ // First request should have full body, no prefix reference
+ if req1.PrefixRequestID != nil {
+ t.Errorf("First request should not have prefix reference, got %v", *req1.PrefixRequestID)
+ }
+ if req1.PrefixLength != nil && *req1.PrefixLength != 0 {
+ t.Errorf("First request should have no prefix length, got %v", *req1.PrefixLength)
+ }
+ if req1.RequestBody == nil || *req1.RequestBody != req1Body {
+ t.Errorf("First request body mismatch: expected %q, got %q", req1Body, safeDeref(req1.RequestBody))
+ }
+
+ // Second request - shares prefix with first
+ req2Body := sharedPrefix + "_suffix2_longer"
+ req2, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+ ConversationID: &conv.ConversationID,
+ Model: "test-model",
+ Provider: "test-provider",
+ Url: "http://example.com",
+ RequestBody: &req2Body,
+ })
+ if err != nil {
+ t.Fatalf("Failed to insert second request: %v", err)
+ }
+
+ // Second request should have prefix reference
+ if req2.PrefixRequestID == nil || *req2.PrefixRequestID != req1.ID {
+ t.Errorf("Second request should reference first request, got prefix_request_id=%v", safeDeref64(req2.PrefixRequestID))
+ }
+ // Common prefix is sharedPrefix + "_suffix" = 200 + 7 = 207 bytes
+ expectedPrefixLen := len(sharedPrefix) + len("_suffix")
+ if req2.PrefixLength == nil || *req2.PrefixLength != int64(expectedPrefixLen) {
+ t.Errorf("Second request prefix length should be %d, got %v", expectedPrefixLen, safeDeref64(req2.PrefixLength))
+ }
+ // Stored body should only be the suffix after the shared prefix ("1" vs "2_longer")
+ expectedSuffix := "2_longer"
+ if req2.RequestBody == nil || *req2.RequestBody != expectedSuffix {
+ t.Errorf("Second request should only store suffix %q, got %q", expectedSuffix, safeDeref(req2.RequestBody))
+ }
+
+ // Third request - shares even longer prefix with second
+ req3Body := sharedPrefix + "_suffix2_longer_and_more"
+ req3, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+ ConversationID: &conv.ConversationID,
+ Model: "test-model",
+ Provider: "test-provider",
+ Url: "http://example.com",
+ RequestBody: &req3Body,
+ })
+ if err != nil {
+ t.Fatalf("Failed to insert third request: %v", err)
+ }
+
+ // Third request should reference second request
+ if req3.PrefixRequestID == nil || *req3.PrefixRequestID != req2.ID {
+ t.Errorf("Third request should reference second request, got prefix_request_id=%v", safeDeref64(req3.PrefixRequestID))
+ }
+ // The prefix length should be the full length of req2Body (since req3Body starts with req2Body)
+ if req3.PrefixLength == nil || *req3.PrefixLength != int64(len(sharedPrefix)+len("_suffix2_longer")) {
+ t.Errorf("Third request prefix length should be %d, got %v", len(sharedPrefix)+len("_suffix2_longer"), safeDeref64(req3.PrefixLength))
+ }
+
+ // Test reconstruction of full bodies
+ reconstructed1, err := db.GetFullLLMRequestBody(ctx, req1.ID)
+ if err != nil {
+ t.Fatalf("Failed to reconstruct first request: %v", err)
+ }
+ if reconstructed1 != req1Body {
+ t.Errorf("Reconstructed first request mismatch: expected %q, got %q", req1Body, reconstructed1)
+ }
+
+ reconstructed2, err := db.GetFullLLMRequestBody(ctx, req2.ID)
+ if err != nil {
+ t.Fatalf("Failed to reconstruct second request: %v", err)
+ }
+ if reconstructed2 != req2Body {
+ t.Errorf("Reconstructed second request mismatch: expected %q, got %q", req2Body, reconstructed2)
+ }
+
+ reconstructed3, err := db.GetFullLLMRequestBody(ctx, req3.ID)
+ if err != nil {
+ t.Fatalf("Failed to reconstruct third request: %v", err)
+ }
+ if reconstructed3 != req3Body {
+ t.Errorf("Reconstructed third request mismatch: expected %q, got %q", req3Body, reconstructed3)
+ }
+}
+
+func TestLLMRequestNoPrefixForShortOverlap(t *testing.T) {
+ db := setupTestDB(t)
+ defer db.Close()
+
+ ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+ defer cancel()
+
+ slug := "test-short-conv"
+ conv, err := db.CreateConversation(ctx, &slug, true, nil)
+ if err != nil {
+ t.Fatalf("Failed to create conversation: %v", err)
+ }
+
+ // Short prefix (< 100 bytes) - should NOT deduplicate
+ shortPrefix := strings.Repeat("B", 50)
+
+ req1Body := shortPrefix + "_first"
+ _, err = db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+ ConversationID: &conv.ConversationID,
+ Model: "test-model",
+ Provider: "test-provider",
+ Url: "http://example.com",
+ RequestBody: &req1Body,
+ })
+ if err != nil {
+ t.Fatalf("Failed to insert first request: %v", err)
+ }
+
+ req2Body := shortPrefix + "_second"
+ req2, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+ ConversationID: &conv.ConversationID,
+ Model: "test-model",
+ Provider: "test-provider",
+ Url: "http://example.com",
+ RequestBody: &req2Body,
+ })
+ if err != nil {
+ t.Fatalf("Failed to insert second request: %v", err)
+ }
+
+ // With short prefix, should NOT have prefix reference (full body stored)
+ if req2.PrefixRequestID != nil {
+ t.Errorf("Short overlap should not have prefix reference, got %v", *req2.PrefixRequestID)
+ }
+ if req2.RequestBody == nil || *req2.RequestBody != req2Body {
+ t.Errorf("Short overlap should store full body %q, got %q", req2Body, safeDeref(req2.RequestBody))
+ }
+}
+
+func TestLLMRequestNoConversationID(t *testing.T) {
+ db := setupTestDB(t)
+ defer db.Close()
+
+ ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+ defer cancel()
+
+ // Request without conversation_id - should store full body
+ reqBody := strings.Repeat("C", 300)
+ req, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+ ConversationID: nil,
+ Model: "test-model",
+ Provider: "test-provider",
+ Url: "http://example.com",
+ RequestBody: &reqBody,
+ })
+ if err != nil {
+ t.Fatalf("Failed to insert request: %v", err)
+ }
+
+ // Should not have prefix reference
+ if req.PrefixRequestID != nil {
+ t.Errorf("Request without conversation_id should not have prefix reference")
+ }
+ if req.RequestBody == nil || *req.RequestBody != reqBody {
+ t.Errorf("Request should store full body")
+ }
+}
+
+func safeDeref(s *string) string {
+ if s == nil {
+ return "<nil>"
+ }
+ return *s
+}
+
+func safeDeref64(i *int64) int64 {
+ if i == nil {
+ return -1
+ }
+ return *i
+}
+
+func TestLLMRequestRealisticConversation(t *testing.T) {
+ // This test simulates realistic LLM API request patterns where each
+ // subsequent request includes all previous messages plus new ones
+ db := setupTestDB(t)
+ defer db.Close()
+
+ ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+ defer cancel()
+
+ slug := "test-realistic-conv"
+ conv, err := db.CreateConversation(ctx, &slug, true, nil)
+ if err != nil {
+ t.Fatalf("Failed to create conversation: %v", err)
+ }
+
+ // Simulate Anthropic-style messages array growing over conversation
+ // Each request adds to the previous messages
+ baseRequest := `{"model":"claude-sonnet-4-5-20250929","system":[{"type":"text","text":"You are a helpful assistant."}],"messages":[`
+
+ message1 := `{"role":"user","content":[{"type":"text","text":"Hello, how are you?"}]}`
+ req1Body := baseRequest + message1 + `],"max_tokens":8192}`
+
+ req1, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+ ConversationID: &conv.ConversationID,
+ Model: "claude-sonnet-4-5-20250929",
+ Provider: "anthropic",
+ Url: "https://api.anthropic.com/v1/messages",
+ RequestBody: &req1Body,
+ })
+ if err != nil {
+ t.Fatalf("Failed to insert first request: %v", err)
+ }
+
+ // First request stored in full
+ if req1.PrefixRequestID != nil {
+ t.Errorf("First request should not have prefix reference")
+ }
+
+ // Second request: user message + assistant response + new user message
+ message2 := `{"role":"assistant","content":[{"type":"text","text":"I'm doing well, thank you for asking!"}]}`
+ message3 := `{"role":"user","content":[{"type":"text","text":"Can you help me write some code?"}]}`
+ req2Body := baseRequest + message1 + `,` + message2 + `,` + message3 + `],"max_tokens":8192}`
+
+ req2, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+ ConversationID: &conv.ConversationID,
+ Model: "claude-sonnet-4-5-20250929",
+ Provider: "anthropic",
+ Url: "https://api.anthropic.com/v1/messages",
+ RequestBody: &req2Body,
+ })
+ if err != nil {
+ t.Fatalf("Failed to insert second request: %v", err)
+ }
+
+ // Second request should have prefix deduplication
+ if req2.PrefixRequestID == nil {
+ t.Errorf("Second request should have prefix reference")
+ } else if *req2.PrefixRequestID != req1.ID {
+ t.Errorf("Second request should reference first request")
+ }
+
+ // Verify prefix length is reasonable (should be at least the base + message1 length)
+ minExpectedPrefix := len(baseRequest) + len(message1)
+ if req2.PrefixLength == nil || *req2.PrefixLength < int64(minExpectedPrefix) {
+ t.Errorf("Second request prefix length should be at least %d, got %v", minExpectedPrefix, safeDeref64(req2.PrefixLength))
+ }
+
+ // Verify we saved significant space
+ req2StoredLen := len(safeDeref(req2.RequestBody))
+ req2FullLen := len(req2Body)
+ if req2StoredLen >= req2FullLen {
+ t.Errorf("Second request should store less than full body: stored %d, full %d", req2StoredLen, req2FullLen)
+ }
+ t.Logf("Space saved for request 2: %d bytes (%.1f%% reduction)",
+ req2FullLen-req2StoredLen,
+ 100.0*float64(req2FullLen-req2StoredLen)/float64(req2FullLen))
+
+ // Third request: even more messages
+ message4 := `{"role":"assistant","content":[{"type":"text","text":"Of course! What kind of code would you like me to help you with?"}]}`
+ message5 := `{"role":"user","content":[{"type":"text","text":"I need a function to calculate fibonacci numbers."}]}`
+ req3Body := baseRequest + message1 + `,` + message2 + `,` + message3 + `,` + message4 + `,` + message5 + `],"max_tokens":8192}`
+
+ req3, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+ ConversationID: &conv.ConversationID,
+ Model: "claude-sonnet-4-5-20250929",
+ Provider: "anthropic",
+ Url: "https://api.anthropic.com/v1/messages",
+ RequestBody: &req3Body,
+ })
+ if err != nil {
+ t.Fatalf("Failed to insert third request: %v", err)
+ }
+
+ // Third request should reference second
+ if req3.PrefixRequestID == nil || *req3.PrefixRequestID != req2.ID {
+ t.Errorf("Third request should reference second request")
+ }
+
+ req3StoredLen := len(safeDeref(req3.RequestBody))
+ req3FullLen := len(req3Body)
+ t.Logf("Space saved for request 3: %d bytes (%.1f%% reduction)",
+ req3FullLen-req3StoredLen,
+ 100.0*float64(req3FullLen-req3StoredLen)/float64(req3FullLen))
+
+ // Verify reconstruction works for all requests
+ reconstructed1, err := db.GetFullLLMRequestBody(ctx, req1.ID)
+ if err != nil {
+ t.Fatalf("Failed to reconstruct request 1: %v", err)
+ }
+ if reconstructed1 != req1Body {
+ t.Errorf("Reconstructed request 1 mismatch")
+ }
+
+ reconstructed2, err := db.GetFullLLMRequestBody(ctx, req2.ID)
+ if err != nil {
+ t.Fatalf("Failed to reconstruct request 2: %v", err)
+ }
+ if reconstructed2 != req2Body {
+ t.Errorf("Reconstructed request 2 mismatch")
+ }
+
+ reconstructed3, err := db.GetFullLLMRequestBody(ctx, req3.ID)
+ if err != nil {
+ t.Fatalf("Failed to reconstruct request 3: %v", err)
+ }
+ if reconstructed3 != req3Body {
+ t.Errorf("Reconstructed request 3 mismatch")
+ }
+
+ // Calculate total storage savings
+ totalOriginal := len(req1Body) + len(req2Body) + len(req3Body)
+ totalStored := len(safeDeref(req1.RequestBody)) + len(safeDeref(req2.RequestBody)) + len(safeDeref(req3.RequestBody))
+ t.Logf("Total space: original %d bytes, stored %d bytes, saved %d bytes (%.1f%% reduction)",
+ totalOriginal, totalStored, totalOriginal-totalStored,
+ 100.0*float64(totalOriginal-totalStored)/float64(totalOriginal))
+}
+
+func TestLLMRequestOpenAIStyle(t *testing.T) {
+ // Test with OpenAI-style request format
+ db := setupTestDB(t)
+ defer db.Close()
+
+ ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+ defer cancel()
+
+ slug := "test-openai-conv"
+ conv, err := db.CreateConversation(ctx, &slug, true, nil)
+ if err != nil {
+ t.Fatalf("Failed to create conversation: %v", err)
+ }
+
+ // OpenAI-style request format
+ baseRequest := `{"model":"gpt-4","messages":[`
+ message1 := `{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"Hello!"}`
+ req1Body := baseRequest + message1 + `],"stream":true}`
+
+ req1, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+ ConversationID: &conv.ConversationID,
+ Model: "gpt-4",
+ Provider: "openai",
+ Url: "https://api.openai.com/v1/chat/completions",
+ RequestBody: &req1Body,
+ })
+ if err != nil {
+ t.Fatalf("Failed to insert first request: %v", err)
+ }
+
+ // Second request with more messages
+ message2 := `{"role":"assistant","content":"Hello! How can I help you today?"},{"role":"user","content":"What's the weather like?"}`
+ req2Body := baseRequest + message1 + `,` + message2 + `],"stream":true}`
+
+ req2, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+ ConversationID: &conv.ConversationID,
+ Model: "gpt-4",
+ Provider: "openai",
+ Url: "https://api.openai.com/v1/chat/completions",
+ RequestBody: &req2Body,
+ })
+ if err != nil {
+ t.Fatalf("Failed to insert second request: %v", err)
+ }
+
+ // Should have prefix deduplication
+ if req2.PrefixRequestID == nil || *req2.PrefixRequestID != req1.ID {
+ t.Errorf("Second request should reference first request")
+ }
+
+ // Verify reconstruction
+ reconstructed2, err := db.GetFullLLMRequestBody(ctx, req2.ID)
+ if err != nil {
+ t.Fatalf("Failed to reconstruct second request: %v", err)
+ }
+ if reconstructed2 != req2Body {
+ t.Errorf("Reconstructed request mismatch:\nexpected: %s\ngot: %s", req2Body, reconstructed2)
+ }
+
+ // Calculate savings
+ req2StoredLen := len(safeDeref(req2.RequestBody))
+ req2FullLen := len(req2Body)
+ t.Logf("OpenAI-style space saved: %d bytes (%.1f%% reduction)",
+ req2FullLen-req2StoredLen,
+ 100.0*float64(req2FullLen-req2StoredLen)/float64(req2FullLen))
+}
@@ -7,8 +7,84 @@ package generated
import (
"context"
+ "time"
)
+const getLLMRequestBody = `-- name: GetLLMRequestBody :one
+SELECT request_body FROM llm_requests WHERE id = ?
+`
+
+func (q *Queries) GetLLMRequestBody(ctx context.Context, id int64) (*string, error) {
+ row := q.db.QueryRowContext(ctx, getLLMRequestBody, id)
+ var request_body *string
+ err := row.Scan(&request_body)
+ return request_body, err
+}
+
+const getLLMRequestByID = `-- name: GetLLMRequestByID :one
+SELECT id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at, prefix_request_id, prefix_length FROM llm_requests WHERE id = ?
+`
+
+func (q *Queries) GetLLMRequestByID(ctx context.Context, id int64) (LlmRequest, error) {
+ row := q.db.QueryRowContext(ctx, getLLMRequestByID, id)
+ var i LlmRequest
+ err := row.Scan(
+ &i.ID,
+ &i.ConversationID,
+ &i.Model,
+ &i.Provider,
+ &i.Url,
+ &i.RequestBody,
+ &i.ResponseBody,
+ &i.StatusCode,
+ &i.Error,
+ &i.DurationMs,
+ &i.CreatedAt,
+ &i.PrefixRequestID,
+ &i.PrefixLength,
+ )
+ return i, err
+}
+
+const getLLMResponseBody = `-- name: GetLLMResponseBody :one
+SELECT response_body FROM llm_requests WHERE id = ?
+`
+
+func (q *Queries) GetLLMResponseBody(ctx context.Context, id int64) (*string, error) {
+ row := q.db.QueryRowContext(ctx, getLLMResponseBody, id)
+ var response_body *string
+ err := row.Scan(&response_body)
+ return response_body, err
+}
+
+const getLastRequestForConversation = `-- name: GetLastRequestForConversation :one
+SELECT id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at, prefix_request_id, prefix_length FROM llm_requests
+WHERE conversation_id = ?
+ORDER BY id DESC
+LIMIT 1
+`
+
+func (q *Queries) GetLastRequestForConversation(ctx context.Context, conversationID *string) (LlmRequest, error) {
+ row := q.db.QueryRowContext(ctx, getLastRequestForConversation, conversationID)
+ var i LlmRequest
+ err := row.Scan(
+ &i.ID,
+ &i.ConversationID,
+ &i.Model,
+ &i.Provider,
+ &i.Url,
+ &i.RequestBody,
+ &i.ResponseBody,
+ &i.StatusCode,
+ &i.Error,
+ &i.DurationMs,
+ &i.CreatedAt,
+ &i.PrefixRequestID,
+ &i.PrefixLength,
+ )
+ return i, err
+}
+
const insertLLMRequest = `-- name: InsertLLMRequest :one
INSERT INTO llm_requests (
conversation_id,
@@ -19,21 +95,25 @@ INSERT INTO llm_requests (
response_body,
status_code,
error,
- duration_ms
-) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-RETURNING id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at
+ duration_ms,
+ prefix_request_id,
+ prefix_length
+) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+RETURNING id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at, prefix_request_id, prefix_length
`
type InsertLLMRequestParams struct {
- ConversationID *string `json:"conversation_id"`
- Model string `json:"model"`
- Provider string `json:"provider"`
- Url string `json:"url"`
- RequestBody *string `json:"request_body"`
- ResponseBody *string `json:"response_body"`
- StatusCode *int64 `json:"status_code"`
- Error *string `json:"error"`
- DurationMs *int64 `json:"duration_ms"`
+ ConversationID *string `json:"conversation_id"`
+ Model string `json:"model"`
+ Provider string `json:"provider"`
+ Url string `json:"url"`
+ RequestBody *string `json:"request_body"`
+ ResponseBody *string `json:"response_body"`
+ StatusCode *int64 `json:"status_code"`
+ Error *string `json:"error"`
+ DurationMs *int64 `json:"duration_ms"`
+ PrefixRequestID *int64 `json:"prefix_request_id"`
+ PrefixLength *int64 `json:"prefix_length"`
}
func (q *Queries) InsertLLMRequest(ctx context.Context, arg InsertLLMRequestParams) (LlmRequest, error) {
@@ -47,6 +127,8 @@ func (q *Queries) InsertLLMRequest(ctx context.Context, arg InsertLLMRequestPara
arg.StatusCode,
arg.Error,
arg.DurationMs,
+ arg.PrefixRequestID,
+ arg.PrefixLength,
)
var i LlmRequest
err := row.Scan(
@@ -61,6 +143,81 @@ func (q *Queries) InsertLLMRequest(ctx context.Context, arg InsertLLMRequestPara
&i.Error,
&i.DurationMs,
&i.CreatedAt,
+ &i.PrefixRequestID,
+ &i.PrefixLength,
)
return i, err
}
+
+const listRecentLLMRequests = `-- name: ListRecentLLMRequests :many
+SELECT
+ id,
+ conversation_id,
+ model,
+ provider,
+ url,
+ LENGTH(request_body) as request_body_length,
+ LENGTH(response_body) as response_body_length,
+ status_code,
+ error,
+ duration_ms,
+ created_at,
+ prefix_request_id,
+ prefix_length
+FROM llm_requests
+ORDER BY id DESC
+LIMIT ?
+`
+
+type ListRecentLLMRequestsRow struct {
+ ID int64 `json:"id"`
+ ConversationID *string `json:"conversation_id"`
+ Model string `json:"model"`
+ Provider string `json:"provider"`
+ Url string `json:"url"`
+ RequestBodyLength *int64 `json:"request_body_length"`
+ ResponseBodyLength *int64 `json:"response_body_length"`
+ StatusCode *int64 `json:"status_code"`
+ Error *string `json:"error"`
+ DurationMs *int64 `json:"duration_ms"`
+ CreatedAt time.Time `json:"created_at"`
+ PrefixRequestID *int64 `json:"prefix_request_id"`
+ PrefixLength *int64 `json:"prefix_length"`
+}
+
+func (q *Queries) ListRecentLLMRequests(ctx context.Context, limit int64) ([]ListRecentLLMRequestsRow, error) {
+ rows, err := q.db.QueryContext(ctx, listRecentLLMRequests, limit)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+ items := []ListRecentLLMRequestsRow{}
+ for rows.Next() {
+ var i ListRecentLLMRequestsRow
+ if err := rows.Scan(
+ &i.ID,
+ &i.ConversationID,
+ &i.Model,
+ &i.Provider,
+ &i.Url,
+ &i.RequestBodyLength,
+ &i.ResponseBodyLength,
+ &i.StatusCode,
+ &i.Error,
+ &i.DurationMs,
+ &i.CreatedAt,
+ &i.PrefixRequestID,
+ &i.PrefixLength,
+ ); err != nil {
+ return nil, err
+ }
+ items = append(items, i)
+ }
+ if err := rows.Close(); err != nil {
+ return nil, err
+ }
+ if err := rows.Err(); err != nil {
+ return nil, err
+ }
+ return items, nil
+}
@@ -20,17 +20,19 @@ type Conversation struct {
}
type LlmRequest struct {
- ID int64 `json:"id"`
- ConversationID *string `json:"conversation_id"`
- Model string `json:"model"`
- Provider string `json:"provider"`
- Url string `json:"url"`
- RequestBody *string `json:"request_body"`
- ResponseBody *string `json:"response_body"`
- StatusCode *int64 `json:"status_code"`
- Error *string `json:"error"`
- DurationMs *int64 `json:"duration_ms"`
- CreatedAt time.Time `json:"created_at"`
+ ID int64 `json:"id"`
+ ConversationID *string `json:"conversation_id"`
+ Model string `json:"model"`
+ Provider string `json:"provider"`
+ Url string `json:"url"`
+ RequestBody *string `json:"request_body"`
+ ResponseBody *string `json:"response_body"`
+ StatusCode *int64 `json:"status_code"`
+ Error *string `json:"error"`
+ DurationMs *int64 `json:"duration_ms"`
+ CreatedAt time.Time `json:"created_at"`
+ PrefixRequestID *int64 `json:"prefix_request_id"`
+ PrefixLength *int64 `json:"prefix_length"`
}
type Message struct {
@@ -8,6 +8,44 @@ INSERT INTO llm_requests (
response_body,
status_code,
error,
- duration_ms
-) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+ duration_ms,
+ prefix_request_id,
+ prefix_length
+) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
RETURNING *;
+
+-- name: GetLastRequestForConversation :one
+SELECT * FROM llm_requests
+WHERE conversation_id = ?
+ORDER BY id DESC
+LIMIT 1;
+
+-- name: GetLLMRequestByID :one
+SELECT * FROM llm_requests WHERE id = ?;
+
+-- name: ListRecentLLMRequests :many
+SELECT
+ id,
+ conversation_id,
+ model,
+ provider,
+ url,
+ LENGTH(request_body) as request_body_length,
+ LENGTH(response_body) as response_body_length,
+ status_code,
+ error,
+ duration_ms,
+ created_at,
+ prefix_request_id,
+ prefix_length
+FROM llm_requests
+ORDER BY id DESC
+LIMIT ?;
+
+-- name: GetLLMRequestBody :one
+SELECT request_body FROM llm_requests WHERE id = ?;
+
+-- name: GetLLMResponseBody :one
+SELECT response_body FROM llm_requests WHERE id = ?;
+
+
@@ -0,0 +1,9 @@
+-- Add prefix deduplication columns to llm_requests table
+-- This allows storing only the suffix of request_body when there's a shared prefix
+-- with a previous request in the same conversation.
+
+ALTER TABLE llm_requests ADD COLUMN prefix_request_id INTEGER REFERENCES llm_requests(id);
+ALTER TABLE llm_requests ADD COLUMN prefix_length INTEGER;
+
+-- Index for efficient prefix lookups
+CREATE INDEX idx_llm_requests_prefix_request_id ON llm_requests(prefix_request_id) WHERE prefix_request_id IS NOT NULL;
@@ -218,17 +218,20 @@ type systemContent struct {
// request represents the request payload for creating a message.
type request struct {
+ // Field order matters for JSON serialization - stable fields should come first
+ // to maximize prefix deduplication when storing LLM requests.
Model string `json:"model"`
- Messages []message `json:"messages"`
- ToolChoice *toolChoice `json:"tool_choice,omitempty"`
MaxTokens int `json:"max_tokens"`
- Tools []*tool `json:"tools,omitempty"`
Stream bool `json:"stream,omitempty"`
System []systemContent `json:"system,omitempty"`
+ Tools []*tool `json:"tools,omitempty"`
+ ToolChoice *toolChoice `json:"tool_choice,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
TopK int `json:"top_k,omitempty"`
TopP float64 `json:"top_p,omitempty"`
StopSequences []string `json:"stop_sequences,omitempty"`
+ // Messages comes last since it grows with each request in a conversation
+ Messages []message `json:"messages"`
}
func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
@@ -7,7 +7,7 @@ Content-Length: 183
Anthropic-Version: 2023-06-01
Content-Type: application/json
-{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius","cache_control":{"type":"ephemeral"}}]}],"max_tokens":8192}
+{"model":"claude-sonnet-4-20250514","max_tokens":8192,"messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius","cache_control":{"type":"ephemeral"}}]}]}
HTTP/2.0 200 OK
Anthropic-Organization-Id: 3c473a21-7208-450a-a9f8-80aebda45c1b
Anthropic-Ratelimit-Input-Tokens-Limit: 4000000
@@ -37,7 +37,7 @@ Content-Length: 454
Anthropic-Version: 2023-06-01
Content-Type: application/json
-{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius"}]},{"role":"assistant","content":[{"type":"text","text":"Hello Cornelius! It's nice to meet you. That's a distinctive and classic name. How are you doing today? Is there anything I can help you with?"}]},{"role":"user","content":[{"type":"text","text":"What is my name?","cache_control":{"type":"ephemeral"}}]}],"max_tokens":8192}
+{"model":"claude-sonnet-4-20250514","max_tokens":8192,"messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius"}]},{"role":"assistant","content":[{"type":"text","text":"Hello Cornelius! It's nice to meet you. That's a distinctive and classic name. How are you doing today? Is there anything I can help you with?"}]},{"role":"user","content":[{"type":"text","text":"What is my name?","cache_control":{"type":"ephemeral"}}]}]}
HTTP/2.0 200 OK
Anthropic-Organization-Id: 3c473a21-7208-450a-a9f8-80aebda45c1b
Anthropic-Ratelimit-Input-Tokens-Limit: 4000000
@@ -11,12 +11,15 @@ import (
// https://ai.google.dev/api/generate-content#request-body
type Request struct {
- Contents []Content `json:"contents"`
- Tools []Tool `json:"tools,omitempty"`
- SystemInstruction *Content `json:"systemInstruction,omitempty"`
- GenerationConfig *GenerationConfig `json:"generationConfig,omitempty"`
+ // Field order matters for JSON serialization - stable fields should come first
+ // to maximize prefix deduplication when storing LLM requests.
CachedContent string `json:"cachedContent,omitempty"` // format: "cachedContents/{name}"
+ GenerationConfig *GenerationConfig `json:"generationConfig,omitempty"`
+ SystemInstruction *Content `json:"systemInstruction,omitempty"`
+ Tools []Tool `json:"tools,omitempty"`
// ToolConfig has been left out because it does not appear to be useful.
+ // Contents comes last since it grows with each request in a conversation
+ Contents []Content `json:"contents"`
}
// https://ai.google.dev/api/generate-content#response-body
@@ -13,6 +13,7 @@ import (
"shelley.exe.dev/db/generated"
"shelley.exe.dev/gitstate"
"shelley.exe.dev/llm"
+ "shelley.exe.dev/llm/llmhttp"
"shelley.exe.dev/loop"
"shelley.exe.dev/subpub"
)
@@ -372,7 +373,9 @@ func (cm *ConversationManager) ensureLoop(service llm.Service, modelID string) e
}
}
- processCtx, cancel := context.WithTimeout(context.Background(), 12*time.Hour)
+ // Create a context with the conversation ID for LLM request recording/prefix dedup
+ baseCtx := llmhttp.WithConversationID(context.Background(), conversationID)
+ processCtx, cancel := context.WithTimeout(baseCtx, 12*time.Hour)
toolSet := claudetool.NewToolSet(processCtx, toolSetConfig)
loopInstance := loop.NewLoop(loop.Config{
@@ -0,0 +1,419 @@
+package server
+
+import (
+ "encoding/json"
+ "net/http"
+ "strconv"
+)
+
+// handleDebugLLMRequests serves the debug page for LLM requests
+func (s *Server) handleDebugLLMRequests(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "text/html")
+ w.Write([]byte(debugLLMRequestsHTML))
+}
+
+// handleDebugLLMRequestsAPI returns recent LLM requests as JSON
+func (s *Server) handleDebugLLMRequestsAPI(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+
+ limit := int64(100)
+ if limitStr := r.URL.Query().Get("limit"); limitStr != "" {
+ if l, err := strconv.ParseInt(limitStr, 10, 64); err == nil && l > 0 {
+ limit = l
+ }
+ }
+
+ requests, err := s.db.ListRecentLLMRequests(ctx, limit)
+ if err != nil {
+ s.logger.Error("Failed to list LLM requests", "error", err)
+ http.Error(w, "Internal server error", http.StatusInternalServerError)
+ return
+ }
+
+ w.Header().Set("Content-Type", "application/json")
+ json.NewEncoder(w).Encode(requests)
+}
+
+// handleDebugLLMRequestBody returns the request body for a specific LLM request
+func (s *Server) handleDebugLLMRequestBody(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+
+ idStr := r.PathValue("id")
+ id, err := strconv.ParseInt(idStr, 10, 64)
+ if err != nil {
+ http.Error(w, "Invalid ID", http.StatusBadRequest)
+ return
+ }
+
+ body, err := s.db.GetLLMRequestBody(ctx, id)
+ if err != nil {
+ s.logger.Error("Failed to get LLM request body", "error", err, "id", id)
+ http.Error(w, "Not found", http.StatusNotFound)
+ return
+ }
+
+ if body == nil {
+ w.Header().Set("Content-Type", "application/json")
+ w.Write([]byte("null"))
+ return
+ }
+
+ w.Header().Set("Content-Type", "application/json")
+ w.Write([]byte(*body))
+}
+
+// handleDebugLLMResponseBody returns the response body for a specific LLM request
+func (s *Server) handleDebugLLMResponseBody(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+
+ idStr := r.PathValue("id")
+ id, err := strconv.ParseInt(idStr, 10, 64)
+ if err != nil {
+ http.Error(w, "Invalid ID", http.StatusBadRequest)
+ return
+ }
+
+ body, err := s.db.GetLLMResponseBody(ctx, id)
+ if err != nil {
+ s.logger.Error("Failed to get LLM response body", "error", err, "id", id)
+ http.Error(w, "Not found", http.StatusNotFound)
+ return
+ }
+
+ if body == nil {
+ w.Header().Set("Content-Type", "application/json")
+ w.Write([]byte("null"))
+ return
+ }
+
+ w.Header().Set("Content-Type", "application/json")
+ w.Write([]byte(*body))
+}
+
+const debugLLMRequestsHTML = `<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Debug: LLM Requests</title>
+<style>
+* { box-sizing: border-box; }
+body {
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+ margin: 0;
+ padding: 20px;
+ background: #1a1a1a;
+ color: #e0e0e0;
+}
+h1 { margin: 0 0 20px 0; font-size: 24px; color: #fff; }
+table {
+ width: 100%;
+ border-collapse: collapse;
+ font-size: 13px;
+}
+th, td {
+ padding: 8px 12px;
+ text-align: left;
+ border-bottom: 1px solid #333;
+}
+th {
+ background: #252525;
+ font-weight: 600;
+ position: sticky;
+ top: 0;
+}
+tr:hover { background: #252525; }
+.mono { font-family: 'SF Mono', Monaco, monospace; font-size: 12px; }
+.error { color: #ff6b6b; }
+.success { color: #69db7c; }
+.btn {
+ background: #333;
+ border: 1px solid #444;
+ color: #e0e0e0;
+ padding: 4px 8px;
+ border-radius: 4px;
+ cursor: pointer;
+ font-size: 12px;
+}
+.btn:hover { background: #444; }
+.btn:disabled { opacity: 0.5; cursor: not-allowed; }
+.json-viewer {
+ background: #1e1e1e;
+ border: 1px solid #333;
+ border-radius: 4px;
+ padding: 12px;
+ margin-top: 8px;
+ overflow-x: auto;
+ max-height: 400px;
+ overflow-y: auto;
+}
+.json-viewer pre {
+ margin: 0;
+ font-family: 'SF Mono', Monaco, monospace;
+ font-size: 12px;
+ white-space: pre-wrap;
+ word-wrap: break-word;
+}
+.collapsed { display: none; }
+.size { color: #888; font-size: 11px; }
+.prefix { color: #ffd43b; }
+.dedup-info { color: #74c0fc; font-size: 11px; }
+.loading { color: #888; font-style: italic; }
+.expand-row { background: #1e1e1e; }
+.expand-row td { padding: 0; }
+.expand-content { padding: 12px; }
+.expand-tabs {
+ display: flex;
+ gap: 8px;
+ margin-bottom: 12px;
+}
+.tab-btn {
+ background: transparent;
+ border: 1px solid #444;
+ color: #888;
+ padding: 6px 12px;
+ border-radius: 4px;
+ cursor: pointer;
+}
+.tab-btn.active {
+ background: #333;
+ color: #fff;
+ border-color: #555;
+}
+.tab-content { display: none; }
+.tab-content.active { display: block; }
+</style>
+</head>
+<body>
+<h1>LLM Requests</h1>
+<table id="requests-table">
+<thead>
+<tr>
+ <th>ID</th>
+ <th>Time</th>
+ <th>Model</th>
+ <th>Provider</th>
+ <th>Status</th>
+ <th>Duration</th>
+ <th>Request Size</th>
+ <th>Response Size</th>
+ <th>Prefix Info</th>
+ <th>Actions</th>
+</tr>
+</thead>
+<tbody id="requests-body">
+<tr><td colspan="10" class="loading">Loading...</td></tr>
+</tbody>
+</table>
+
+<script>
+const expandedRows = new Set();
+const loadedData = {};
+
+function formatSize(bytes) {
+ if (bytes === null || bytes === undefined) return '-';
+ if (bytes < 1024) return bytes + ' B';
+ if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KB';
+ return (bytes / (1024 * 1024)).toFixed(2) + ' MB';
+}
+
+function formatDate(dateStr) {
+ const d = new Date(dateStr);
+ return d.toLocaleString();
+}
+
+function formatDuration(ms) {
+ if (ms === null || ms === undefined) return '-';
+ if (ms < 1000) return ms + 'ms';
+ return (ms / 1000).toFixed(2) + 's';
+}
+
+function syntaxHighlight(json) {
+ if (typeof json !== 'string') json = JSON.stringify(json, null, 2);
+ json = json.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
+ return json.replace(/("(\\u[a-zA-Z0-9]{4}|\\[^u]|[^\\"])*"(\s*:)?|\b(true|false|null)\b|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?)/g, function (match) {
+ let cls = 'number';
+ if (/^"/.test(match)) {
+ if (/:$/.test(match)) {
+ cls = 'key';
+ } else {
+ cls = 'string';
+ }
+ } else if (/true|false/.test(match)) {
+ cls = 'boolean';
+ } else if (/null/.test(match)) {
+ cls = 'null';
+ }
+ return '<span class="' + cls + '">' + match + '</span>';
+ });
+}
+
+async function loadRequests() {
+ try {
+ const resp = await fetch('/debug/llm_requests/api?limit=100');
+ const data = await resp.json();
+ renderTable(data);
+ } catch (e) {
+ document.getElementById('requests-body').innerHTML =
+ '<tr><td colspan="10" class="error">Error loading requests: ' + e.message + '</td></tr>';
+ }
+}
+
+function renderTable(requests) {
+ const tbody = document.getElementById('requests-body');
+ if (!requests || requests.length === 0) {
+ tbody.innerHTML = '<tr><td colspan="10">No requests found</td></tr>';
+ return;
+ }
+ tbody.innerHTML = '';
+ for (const req of requests) {
+ const tr = document.createElement('tr');
+ tr.id = 'row-' + req.id;
+
+ const statusClass = req.status_code && req.status_code >= 200 && req.status_code < 300 ? 'success' :
+ (req.status_code ? 'error' : '');
+
+ let prefixInfo = '-';
+ if (req.prefix_request_id) {
+ prefixInfo = '<span class="dedup-info">prefix from #' + req.prefix_request_id +
+ ' (' + formatSize(req.prefix_length) + ')</span>';
+ }
+
+ tr.innerHTML = ` + "`" + `
+ <td class="mono">${req.id}</td>
+ <td>${formatDate(req.created_at)}</td>
+ <td>${req.model}</td>
+ <td>${req.provider}</td>
+ <td class="${statusClass}">${req.status_code || '-'}${req.error ? ' ⚠' : ''}</td>
+ <td>${formatDuration(req.duration_ms)}</td>
+ <td class="size">${formatSize(req.request_body_length)}</td>
+ <td class="size">${formatSize(req.response_body_length)}</td>
+ <td>${prefixInfo}</td>
+ <td><button class="btn" onclick="toggleExpand(${req.id})">Expand</button></td>
+ ` + "`" + `;
+ tbody.appendChild(tr);
+ }
+}
+
+async function toggleExpand(id) {
+ const existingExpand = document.getElementById('expand-' + id);
+ if (existingExpand) {
+ existingExpand.remove();
+ expandedRows.delete(id);
+ return;
+ }
+
+ expandedRows.add(id);
+ const row = document.getElementById('row-' + id);
+ const expandRow = document.createElement('tr');
+ expandRow.id = 'expand-' + id;
+ expandRow.className = 'expand-row';
+ expandRow.innerHTML = ` + "`" + `
+ <td colspan="10">
+ <div class="expand-content">
+ <div class="expand-tabs">
+ <button class="tab-btn active" onclick="showTab(${id}, 'request')">Request</button>
+ <button class="tab-btn" onclick="showTab(${id}, 'response')">Response</button>
+ </div>
+ <div id="tab-request-${id}" class="tab-content active">
+ <div class="json-viewer"><pre class="loading">Loading request...</pre></div>
+ </div>
+ <div id="tab-response-${id}" class="tab-content">
+ <div class="json-viewer"><pre class="loading">Loading response...</pre></div>
+ </div>
+ </div>
+ </td>
+ ` + "`" + `;
+ row.after(expandRow);
+
+ // Load request body
+ loadBody(id, 'request');
+}
+
+async function loadBody(id, type) {
+ const key = id + '-' + type;
+ if (loadedData[key]) {
+ renderBody(id, type, loadedData[key]);
+ return;
+ }
+
+ try {
+ const url = type === 'request'
+ ? '/debug/llm_requests/' + id + '/request'
+ : '/debug/llm_requests/' + id + '/response';
+ const resp = await fetch(url);
+ const text = await resp.text();
+ let data;
+ try {
+ data = JSON.parse(text);
+ } catch {
+ data = text;
+ }
+ loadedData[key] = data;
+ renderBody(id, type, data);
+ } catch (e) {
+ const container = document.querySelector('#tab-' + type + '-' + id + ' pre');
+ if (container) {
+ container.className = 'error';
+ container.textContent = 'Error loading: ' + e.message;
+ }
+ }
+}
+
+function renderBody(id, type, data) {
+ const container = document.querySelector('#tab-' + type + '-' + id + ' pre');
+ if (!container) return;
+
+ if (data === null) {
+ container.className = '';
+ container.textContent = '(empty)';
+ return;
+ }
+
+ container.className = '';
+ if (typeof data === 'object') {
+ container.innerHTML = syntaxHighlight(JSON.stringify(data, null, 2));
+ } else {
+ container.textContent = data;
+ }
+}
+
+function showTab(id, tab) {
+ // Update tab buttons
+ const expandRow = document.getElementById('expand-' + id);
+ if (!expandRow) return;
+
+ expandRow.querySelectorAll('.tab-btn').forEach(btn => {
+ btn.classList.remove('active');
+ if (btn.textContent.toLowerCase() === tab) {
+ btn.classList.add('active');
+ }
+ });
+
+ // Update tab content
+ expandRow.querySelectorAll('.tab-content').forEach(content => {
+ content.classList.remove('active');
+ });
+ const activeTab = document.getElementById('tab-' + tab + '-' + id);
+ if (activeTab) {
+ activeTab.classList.add('active');
+ loadBody(id, tab);
+ }
+}
+
+// Add syntax highlighting styles
+const style = document.createElement('style');
+style.textContent = ` + "`" + `
+ .string { color: #98c379; }
+ .number { color: #d19a66; }
+ .boolean { color: #56b6c2; }
+ .null { color: #c678dd; }
+ .key { color: #e06c75; }
+` + "`" + `;
+document.head.appendChild(style);
+
+loadRequests();
+</script>
+</body>
+</html>
+`
@@ -268,7 +268,11 @@ func (s *Server) RegisterRoutes(mux *http.ServeMux) {
mux.Handle("POST /upgrade", http.HandlerFunc(s.handleUpgrade))
mux.Handle("POST /exit", http.HandlerFunc(s.handleExit))
- // Debug routes
+ // Debug endpoints
+ mux.Handle("GET /debug/llm_requests", http.HandlerFunc(s.handleDebugLLMRequests))
+ mux.Handle("GET /debug/llm_requests/api", http.HandlerFunc(s.handleDebugLLMRequestsAPI))
+ mux.Handle("GET /debug/llm_requests/{id}/request", http.HandlerFunc(s.handleDebugLLMRequestBody))
+ mux.Handle("GET /debug/llm_requests/{id}/response", http.HandlerFunc(s.handleDebugLLMResponseBody))
// Serve embedded UI assets
mux.Handle("/", s.staticHandler(ui.Assets()))