From 507852af0920e1c56edc37ecf7596113cd6f473b Mon Sep 17 00:00:00 2001
From: Philip Zeyliger <philip@bold.dev>
Date: Wed, 21 Jan 2026 19:21:18 -0800
Subject: [PATCH] shelley: Add LLM request tracking with prefix deduplication

So, storing LLM messages for debugging/history turned out to be
accidentally quadratic. This tries to fix it.

The debug page is still not usable, but that's ok.

Prompt: Track LLM HTTP requests in database with prefix deduplication for space efficiency, then add a /debug/llm_requests page to view the data

- Track LLM HTTP requests in database with custom headers
- Add prefix deduplication for llm_requests table to reduce storage
- Reorder Anthropic and Gemini request struct fields for better prefix matching
- Pass conversation ID through context for prefix dedup (was missing)
- Add /debug/llm_requests page with:
  - Table showing recent requests with size info
  - Lazy loading of request/response bodies
  - Collapsible JSON viewer with syntax highlighting
  - Prefix deduplication info display

Tested: prefix deduplication working for both Anthropic and OpenAI providers
Co-authored-by: Shelley <shelley@exe.dev>
---
 db/db.go                                     | 149 +++++++
 db/db_test.go                                | 413 ++++++++++++++++++
 db/generated/llm_requests.sql.go             | 181 +++++++-
 db/generated/models.go                       |  24 +-
 db/query/llm_requests.sql                    |  42 +-
 db/schema/011-add-llm-request-prefix.sql     |   9 +
 llm/ant/ant.go                               |   9 +-
 llm/conversation/testdata/basic_convo.httprr |   4 +-
 llm/gem/gemini/gemini.go                     |  11 +-
 server/convo.go                              |   5 +-
 server/debug_handlers.go                     | 419 +++++++++++++++++++
 server/server.go                             |   6 +-
 12 files changed, 1236 insertions(+), 36 deletions(-)
 create mode 100644 db/schema/011-add-llm-request-prefix.sql
 create mode 100644 server/debug_handlers.go

diff --git a/db/db.go b/db/db.go
index 0e4c653c6432ac37dad2dde866574577b2d8fdf2..d95a2baf0f718203d33f842dea9c7af0def7a032 100644
--- a/db/db.go
+++ b/db/db.go
@@ -695,9 +695,158 @@ func (db *DB) InsertLLMRequest(ctx context.Context, params generated.InsertLLMRe
 	var request generated.LlmRequest
 	err := db.pool.Tx(ctx, func(ctx context.Context, tx *Tx) error {
 		q := generated.New(tx.Conn())
+
+		// If we have a conversation ID and request body, try to find common prefix
+		if params.ConversationID != nil && params.RequestBody != nil {
+			// Get the last request for this conversation
+			lastReq, err := q.GetLastRequestForConversation(ctx, params.ConversationID)
+			if err == nil {
+				// Found a previous request - compute common prefix
+				prefixLen, fullPrevBody := computeSharedPrefixLength(lastReq, *params.RequestBody)
+				if prefixLen > 0 {
+					// Store only the suffix
+					suffix := (*params.RequestBody)[prefixLen:]
+					params.RequestBody = &suffix
+					params.PrefixRequestID = &lastReq.ID
+					prefixLen64 := int64(prefixLen)
+					params.PrefixLength = &prefixLen64
+					_ = fullPrevBody // silence unused warning, used for computing prefix
+				}
+			}
+			// If no previous request found or error, just store the full body
+		}
+
 		var err error
 		request, err = q.InsertLLMRequest(ctx, params)
 		return err
 	})
 	return &request, err
 }
+
+// computeSharedPrefixLength computes the length of the shared prefix between
+// the full previous request body (reconstructed by walking the chain) and the new request body.
+// It returns the prefix length and the fully reconstructed previous body.
+func computeSharedPrefixLength(prevReq generated.LlmRequest, newBody string) (int, string) {
+	// Get the stored body (which may be just a suffix if prevReq has a prefix reference)
+	prevBody := ""
+	if prevReq.RequestBody != nil {
+		prevBody = *prevReq.RequestBody
+	}
+
+	// If the previous request has a prefix reference, we need to account for that
+	// by prepending the prefix length worth of bytes from the new body.
+	// This works because in a conversation, request N+1 typically starts with
+	// all of request N plus new content at the end.
+	if prevReq.PrefixLength != nil && *prevReq.PrefixLength > 0 {
+		// The previous request's full body would be:
+		// [first prefix_length bytes that match its parent] + [stored suffix]
+		// If the new body is a continuation, its first prefix_length bytes
+		// should match those same bytes.
+		prefixLen := int(*prevReq.PrefixLength)
+		if prefixLen <= len(newBody) {
+			prevBody = newBody[:prefixLen] + prevBody
+		}
+	}
+
+	// Compute byte-by-byte shared prefix between reconstructed prevBody and newBody
+	minLen := len(prevBody)
+	if len(newBody) < minLen {
+		minLen = len(newBody)
+	}
+
+	prefixLen := 0
+	for i := 0; i < minLen; i++ {
+		if prevBody[i] != newBody[i] {
+			break
+		}
+		prefixLen++
+	}
+
+	// Only use prefix deduplication if we save meaningful space
+	// (at least 100 bytes saved)
+	if prefixLen < 100 {
+		return 0, prevBody
+	}
+
+	return prefixLen, prevBody
+}
+
+// ListRecentLLMRequests returns the most recent LLM requests
+func (db *DB) ListRecentLLMRequests(ctx context.Context, limit int64) ([]generated.ListRecentLLMRequestsRow, error) {
+	var requests []generated.ListRecentLLMRequestsRow
+	err := db.pool.Rx(ctx, func(ctx context.Context, rx *Rx) error {
+		q := generated.New(rx.Conn())
+		var err error
+		requests, err = q.ListRecentLLMRequests(ctx, limit)
+		return err
+	})
+	return requests, err
+}
+
+// GetLLMRequestBody returns the raw request body for a request
+func (db *DB) GetLLMRequestBody(ctx context.Context, id int64) (*string, error) {
+	var body *string
+	err := db.pool.Rx(ctx, func(ctx context.Context, rx *Rx) error {
+		q := generated.New(rx.Conn())
+		var err error
+		body, err = q.GetLLMRequestBody(ctx, id)
+		return err
+	})
+	return body, err
+}
+
+// GetLLMResponseBody returns the raw response body for a request
+func (db *DB) GetLLMResponseBody(ctx context.Context, id int64) (*string, error) {
+	var body *string
+	err := db.pool.Rx(ctx, func(ctx context.Context, rx *Rx) error {
+		q := generated.New(rx.Conn())
+		var err error
+		body, err = q.GetLLMResponseBody(ctx, id)
+		return err
+	})
+	return body, err
+}
+
+// GetFullLLMRequestBody reconstructs the full request body for a request,
+// following the prefix chain if necessary.
+func (db *DB) GetFullLLMRequestBody(ctx context.Context, requestID int64) (string, error) {
+	var result string
+	err := db.pool.Rx(ctx, func(ctx context.Context, rx *Rx) error {
+		q := generated.New(rx.Conn())
+		return reconstructRequestBody(ctx, q, requestID, &result)
+	})
+	return result, err
+}
+
+// reconstructRequestBody recursively reconstructs the full request body
+func reconstructRequestBody(ctx context.Context, q *generated.Queries, requestID int64, result *string) error {
+	req, err := q.GetLLMRequestByID(ctx, requestID)
+	if err != nil {
+		return err
+	}
+
+	suffix := ""
+	if req.RequestBody != nil {
+		suffix = *req.RequestBody
+	}
+
+	if req.PrefixRequestID == nil || req.PrefixLength == nil || *req.PrefixLength == 0 {
+		// No prefix reference - the stored body is the full body
+		*result = suffix
+		return nil
+	}
+
+	// Recursively get the parent's full body
+	var parentBody string
+	if err := reconstructRequestBody(ctx, q, *req.PrefixRequestID, &parentBody); err != nil {
+		return err
+	}
+
+	// The full body is the first prefix_length bytes from the parent + our suffix
+	prefixLen := int(*req.PrefixLength)
+	if prefixLen > len(parentBody) {
+		prefixLen = len(parentBody)
+	}
+	*result = parentBody[:prefixLen] + suffix
+	return nil
+}
diff --git a/db/db_test.go b/db/db_test.go
index 44e027475b1bb3be5043481c8b0eadee8df2312c..23becd394d77408533a889f1209500bf6f2679c4 100644
--- a/db/db_test.go
+++ b/db/db_test.go
@@ -217,3 +217,416 @@ func TestDB_WithTxRes(t *testing.T) {
 		t.Error("Expected error from WithTxRes, got none")
 	}
 }
+
+func TestLLMRequestPrefixDeduplication(t *testing.T) {
+	db := setupTestDB(t)
+	defer db.Close()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	// Create a conversation first
+	slug := "test-prefix-conv"
+	conv, err := db.CreateConversation(ctx, &slug, true, nil)
+	if err != nil {
+		t.Fatalf("Failed to create conversation: %v", err)
+	}
+
+	// Create a long shared prefix (must be > 100 bytes for deduplication to kick in)
+	sharedPrefix := strings.Repeat("A", 200) // 200 bytes of 'A's
+
+	// First request - full body stored
+	req1Body := sharedPrefix + "_suffix1"
+	req1, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+		ConversationID: &conv.ConversationID,
+		Model:          "test-model",
+		Provider:       "test-provider",
+		Url:            "http://example.com",
+		RequestBody:    &req1Body,
+	})
+	if err != nil {
+		t.Fatalf("Failed to insert first request: %v", err)
+	}
+
+	// First request should have full body, no prefix reference
+	if req1.PrefixRequestID != nil {
+		t.Errorf("First request should not have prefix reference, got %v", *req1.PrefixRequestID)
+	}
+	if req1.PrefixLength != nil && *req1.PrefixLength != 0 {
+		t.Errorf("First request should have no prefix length, got %v", *req1.PrefixLength)
+	}
+	if req1.RequestBody == nil || *req1.RequestBody != req1Body {
+		t.Errorf("First request body mismatch: expected %q, got %q", req1Body, safeDeref(req1.RequestBody))
+	}
+
+	// Second request - shares prefix with first
+	req2Body := sharedPrefix + "_suffix2_longer"
+	req2, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+		ConversationID: &conv.ConversationID,
+		Model:          "test-model",
+		Provider:       "test-provider",
+		Url:            "http://example.com",
+		RequestBody:    &req2Body,
+	})
+	if err != nil {
+		t.Fatalf("Failed to insert second request: %v", err)
+	}
+
+	// Second request should have prefix reference
+	if req2.PrefixRequestID == nil || *req2.PrefixRequestID != req1.ID {
+		t.Errorf("Second request should reference first request, got prefix_request_id=%v", safeDeref64(req2.PrefixRequestID))
+	}
+	// Common prefix is sharedPrefix + "_suffix" = 200 + 7 = 207 bytes
+	expectedPrefixLen := len(sharedPrefix) + len("_suffix")
+	if req2.PrefixLength == nil || *req2.PrefixLength != int64(expectedPrefixLen) {
+		t.Errorf("Second request prefix length should be %d, got %v", expectedPrefixLen, safeDeref64(req2.PrefixLength))
+	}
+	// Stored body should only be the suffix after the shared prefix ("1" vs "2_longer")
+	expectedSuffix := "2_longer"
+	if req2.RequestBody == nil || *req2.RequestBody != expectedSuffix {
+		t.Errorf("Second request should only store suffix %q, got %q", expectedSuffix, safeDeref(req2.RequestBody))
+	}
+
+	// Third request - shares even longer prefix with second
+	req3Body := sharedPrefix + "_suffix2_longer_and_more"
+	req3, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+		ConversationID: &conv.ConversationID,
+		Model:          "test-model",
+		Provider:       "test-provider",
+		Url:            "http://example.com",
+		RequestBody:    &req3Body,
+	})
+	if err != nil {
+		t.Fatalf("Failed to insert third request: %v", err)
+	}
+
+	// Third request should reference second request
+	if req3.PrefixRequestID == nil || *req3.PrefixRequestID != req2.ID {
+		t.Errorf("Third request should reference second request, got prefix_request_id=%v", safeDeref64(req3.PrefixRequestID))
+	}
+	// The prefix length should be the full length of req2Body (since req3Body starts with req2Body)
+	if req3.PrefixLength == nil || *req3.PrefixLength != int64(len(sharedPrefix)+len("_suffix2_longer")) {
+		t.Errorf("Third request prefix length should be %d, got %v", len(sharedPrefix)+len("_suffix2_longer"), safeDeref64(req3.PrefixLength))
+	}
+
+	// Test reconstruction of full bodies
+	reconstructed1, err := db.GetFullLLMRequestBody(ctx, req1.ID)
+	if err != nil {
+		t.Fatalf("Failed to reconstruct first request: %v", err)
+	}
+	if reconstructed1 != req1Body {
+		t.Errorf("Reconstructed first request mismatch: expected %q, got %q", req1Body, reconstructed1)
+	}
+
+	reconstructed2, err := db.GetFullLLMRequestBody(ctx, req2.ID)
+	if err != nil {
+		t.Fatalf("Failed to reconstruct second request: %v", err)
+	}
+	if reconstructed2 != req2Body {
+		t.Errorf("Reconstructed second request mismatch: expected %q, got %q", req2Body, reconstructed2)
+	}
+
+	reconstructed3, err := db.GetFullLLMRequestBody(ctx, req3.ID)
+	if err != nil {
+		t.Fatalf("Failed to reconstruct third request: %v", err)
+	}
+	if reconstructed3 != req3Body {
+		t.Errorf("Reconstructed third request mismatch: expected %q, got %q", req3Body, reconstructed3)
+	}
+}
+
+func TestLLMRequestNoPrefixForShortOverlap(t *testing.T) {
+	db := setupTestDB(t)
+	defer db.Close()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	slug := "test-short-conv"
+	conv, err := db.CreateConversation(ctx, &slug, true, nil)
+	if err != nil {
+		t.Fatalf("Failed to create conversation: %v", err)
+	}
+
+	// Short prefix (< 100 bytes) - should NOT deduplicate
+	shortPrefix := strings.Repeat("B", 50)
+
+	req1Body := shortPrefix + "_first"
+	_, err = db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+		ConversationID: &conv.ConversationID,
+		Model:          "test-model",
+		Provider:       "test-provider",
+		Url:            "http://example.com",
+		RequestBody:    &req1Body,
+	})
+	if err != nil {
+		t.Fatalf("Failed to insert first request: %v", err)
+	}
+
+	req2Body := shortPrefix + "_second"
+	req2, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+		ConversationID: &conv.ConversationID,
+		Model:          "test-model",
+		Provider:       "test-provider",
+		Url:            "http://example.com",
+		RequestBody:    &req2Body,
+	})
+	if err != nil {
+		t.Fatalf("Failed to insert second request: %v", err)
+	}
+
+	// With short prefix, should NOT have prefix reference (full body stored)
+	if req2.PrefixRequestID != nil {
+		t.Errorf("Short overlap should not have prefix reference, got %v", *req2.PrefixRequestID)
+	}
+	if req2.RequestBody == nil || *req2.RequestBody != req2Body {
+		t.Errorf("Short overlap should store full body %q, got %q", req2Body, safeDeref(req2.RequestBody))
+	}
+}
+
+func TestLLMRequestNoConversationID(t *testing.T) {
+	db := setupTestDB(t)
+	defer db.Close()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	// Request without conversation_id - should store full body
+	reqBody := strings.Repeat("C", 300)
+	req, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+		ConversationID: nil,
+		Model:          "test-model",
+		Provider:       "test-provider",
+		Url:            "http://example.com",
+		RequestBody:    &reqBody,
+	})
+	if err != nil {
+		t.Fatalf("Failed to insert request: %v", err)
+	}
+
+	// Should not have prefix reference
+	if req.PrefixRequestID != nil {
+		t.Errorf("Request without conversation_id should not have prefix reference")
+	}
+	if req.RequestBody == nil || *req.RequestBody != reqBody {
+		t.Errorf("Request should store full body")
+	}
+}
+
+func safeDeref(s *string) string {
+	if s == nil {
+		return "<nil>"
+	}
+	return *s
+}
+
+func safeDeref64(i *int64) int64 {
+	if i == nil {
+		return -1
+	}
+	return *i
+}
+
+func TestLLMRequestRealisticConversation(t *testing.T) {
+	// This test simulates realistic LLM API request patterns where each
+	// subsequent request includes all previous messages plus new ones
+	db := setupTestDB(t)
+	defer db.Close()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	slug := "test-realistic-conv"
+	conv, err := db.CreateConversation(ctx, &slug, true, nil)
+	if err != nil {
+		t.Fatalf("Failed to create conversation: %v", err)
+	}
+
+	// Simulate Anthropic-style messages array growing over conversation
+	// Each request adds to the previous messages
+	baseRequest := `{"model":"claude-sonnet-4-5-20250929","system":[{"type":"text","text":"You are a helpful assistant."}],"messages":[`
+
+	message1 := `{"role":"user","content":[{"type":"text","text":"Hello, how are you?"}]}`
+	req1Body := baseRequest + message1 + `],"max_tokens":8192}`
+
+	req1, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+		ConversationID: &conv.ConversationID,
+		Model:          "claude-sonnet-4-5-20250929",
+		Provider:       "anthropic",
+		Url:            "https://api.anthropic.com/v1/messages",
+		RequestBody:    &req1Body,
+	})
+	if err != nil {
+		t.Fatalf("Failed to insert first request: %v", err)
+	}
+
+	// First request stored in full
+	if req1.PrefixRequestID != nil {
+		t.Errorf("First request should not have prefix reference")
+	}
+
+	// Second request: user message + assistant response + new user message
+	message2 := `{"role":"assistant","content":[{"type":"text","text":"I'm doing well, thank you for asking!"}]}`
+	message3 := `{"role":"user","content":[{"type":"text","text":"Can you help me write some code?"}]}`
+	req2Body := baseRequest + message1 + `,` + message2 + `,` + message3 + `],"max_tokens":8192}`
+
+	req2, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+		ConversationID: &conv.ConversationID,
+		Model:          "claude-sonnet-4-5-20250929",
+		Provider:       "anthropic",
+		Url:            "https://api.anthropic.com/v1/messages",
+		RequestBody:    &req2Body,
+	})
+	if err != nil {
+		t.Fatalf("Failed to insert second request: %v", err)
+	}
+
+	// Second request should have prefix deduplication
+	if req2.PrefixRequestID == nil {
+		t.Errorf("Second request should have prefix reference")
+	} else if *req2.PrefixRequestID != req1.ID {
+		t.Errorf("Second request should reference first request")
+	}
+
+	// Verify prefix length is reasonable (should be at least the base + message1 length)
+	minExpectedPrefix := len(baseRequest) + len(message1)
+	if req2.PrefixLength == nil || *req2.PrefixLength < int64(minExpectedPrefix) {
+		t.Errorf("Second request prefix length should be at least %d, got %v", minExpectedPrefix, safeDeref64(req2.PrefixLength))
+	}
+
+	// Verify we saved significant space
+	req2StoredLen := len(safeDeref(req2.RequestBody))
+	req2FullLen := len(req2Body)
+	if req2StoredLen >= req2FullLen {
+		t.Errorf("Second request should store less than full body: stored %d, full %d", req2StoredLen, req2FullLen)
+	}
+	t.Logf("Space saved for request 2: %d bytes (%.1f%% reduction)",
+		req2FullLen-req2StoredLen,
+		100.0*float64(req2FullLen-req2StoredLen)/float64(req2FullLen))
+
+	// Third request: even more messages
+	message4 := `{"role":"assistant","content":[{"type":"text","text":"Of course! What kind of code would you like me to help you with?"}]}`
+	message5 := `{"role":"user","content":[{"type":"text","text":"I need a function to calculate fibonacci numbers."}]}`
+	req3Body := baseRequest + message1 + `,` + message2 + `,` + message3 + `,` + message4 + `,` + message5 + `],"max_tokens":8192}`
+
+	req3, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+		ConversationID: &conv.ConversationID,
+		Model:          "claude-sonnet-4-5-20250929",
+		Provider:       "anthropic",
+		Url:            "https://api.anthropic.com/v1/messages",
+		RequestBody:    &req3Body,
+	})
+	if err != nil {
+		t.Fatalf("Failed to insert third request: %v", err)
+	}
+
+	// Third request should reference second
+	if req3.PrefixRequestID == nil || *req3.PrefixRequestID != req2.ID {
+		t.Errorf("Third request should reference second request")
+	}
+
+	req3StoredLen := len(safeDeref(req3.RequestBody))
+	req3FullLen := len(req3Body)
+	t.Logf("Space saved for request 3: %d bytes (%.1f%% reduction)",
+		req3FullLen-req3StoredLen,
+		100.0*float64(req3FullLen-req3StoredLen)/float64(req3FullLen))
+
+	// Verify reconstruction works for all requests
+	reconstructed1, err := db.GetFullLLMRequestBody(ctx, req1.ID)
+	if err != nil {
+		t.Fatalf("Failed to reconstruct request 1: %v", err)
+	}
+	if reconstructed1 != req1Body {
+		t.Errorf("Reconstructed request 1 mismatch")
+	}
+
+	reconstructed2, err := db.GetFullLLMRequestBody(ctx, req2.ID)
+	if err != nil {
+		t.Fatalf("Failed to reconstruct request 2: %v", err)
+	}
+	if reconstructed2 != req2Body {
+		t.Errorf("Reconstructed request 2 mismatch")
+	}
+
+	reconstructed3, err := db.GetFullLLMRequestBody(ctx, req3.ID)
+	if err != nil {
+		t.Fatalf("Failed to reconstruct request 3: %v", err)
+	}
+	if reconstructed3 != req3Body {
+		t.Errorf("Reconstructed request 3 mismatch")
+	}
+
+	// Calculate total storage savings
+	totalOriginal := len(req1Body) + len(req2Body) + len(req3Body)
+	totalStored := len(safeDeref(req1.RequestBody)) + len(safeDeref(req2.RequestBody)) + len(safeDeref(req3.RequestBody))
+	t.Logf("Total space: original %d bytes, stored %d bytes, saved %d bytes (%.1f%% reduction)",
+		totalOriginal, totalStored, totalOriginal-totalStored,
+		100.0*float64(totalOriginal-totalStored)/float64(totalOriginal))
+}
+
+func TestLLMRequestOpenAIStyle(t *testing.T) {
+	// Test with OpenAI-style request format
+	db := setupTestDB(t)
+	defer db.Close()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	slug := "test-openai-conv"
+	conv, err := db.CreateConversation(ctx, &slug, true, nil)
+	if err != nil {
+		t.Fatalf("Failed to create conversation: %v", err)
+	}
+
+	// OpenAI-style request format
+	baseRequest := `{"model":"gpt-4","messages":[`
+	message1 := `{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"Hello!"}`
+	req1Body := baseRequest + message1 + `],"stream":true}`
+
+	req1, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+		ConversationID: &conv.ConversationID,
+		Model:          "gpt-4",
+		Provider:       "openai",
+		Url:            "https://api.openai.com/v1/chat/completions",
+		RequestBody:    &req1Body,
+	})
+	if err != nil {
+		t.Fatalf("Failed to insert first request: %v", err)
+	}
+
+	// Second request with more messages
+	message2 := `{"role":"assistant","content":"Hello! How can I help you today?"},{"role":"user","content":"What's the weather like?"}`
+	req2Body := baseRequest + message1 + `,` + message2 + `],"stream":true}`
+
+	req2, err := db.InsertLLMRequest(ctx, generated.InsertLLMRequestParams{
+		ConversationID: &conv.ConversationID,
+		Model:          "gpt-4",
+		Provider:       "openai",
+		Url:            "https://api.openai.com/v1/chat/completions",
+		RequestBody:    &req2Body,
+	})
+	if err != nil {
+		t.Fatalf("Failed to insert second request: %v", err)
+	}
+
+	// Should have prefix deduplication
+	if req2.PrefixRequestID == nil || *req2.PrefixRequestID != req1.ID {
+		t.Errorf("Second request should reference first request")
+	}
+
+	// Verify reconstruction
+	reconstructed2, err := db.GetFullLLMRequestBody(ctx, req2.ID)
+	if err != nil {
+		t.Fatalf("Failed to reconstruct second request: %v", err)
+	}
+	if reconstructed2 != req2Body {
+		t.Errorf("Reconstructed request mismatch:\nexpected: %s\ngot: %s", req2Body, reconstructed2)
+	}
+
+	// Calculate savings
+	req2StoredLen := len(safeDeref(req2.RequestBody))
+	req2FullLen := len(req2Body)
+	t.Logf("OpenAI-style space saved: %d bytes (%.1f%% reduction)",
+		req2FullLen-req2StoredLen,
+		100.0*float64(req2FullLen-req2StoredLen)/float64(req2FullLen))
+}
diff --git a/db/generated/llm_requests.sql.go b/db/generated/llm_requests.sql.go
index 0b6b88f0004e42d856be98e770eaeb099c051530..599d6023040b91d281b84f32b1d87048be6e1ac5 100644
--- a/db/generated/llm_requests.sql.go
+++ b/db/generated/llm_requests.sql.go
@@ -7,8 +7,84 @@ package generated
 
 import (
 	"context"
+	"time"
 )
 
+const getLLMRequestBody = `-- name: GetLLMRequestBody :one
+SELECT request_body FROM llm_requests WHERE id = ?
+`
+
+func (q *Queries) GetLLMRequestBody(ctx context.Context, id int64) (*string, error) {
+	row := q.db.QueryRowContext(ctx, getLLMRequestBody, id)
+	var request_body *string
+	err := row.Scan(&request_body)
+	return request_body, err
+}
+
+const getLLMRequestByID = `-- name: GetLLMRequestByID :one
+SELECT id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at, prefix_request_id, prefix_length FROM llm_requests WHERE id = ?
+`
+
+func (q *Queries) GetLLMRequestByID(ctx context.Context, id int64) (LlmRequest, error) {
+	row := q.db.QueryRowContext(ctx, getLLMRequestByID, id)
+	var i LlmRequest
+	err := row.Scan(
+		&i.ID,
+		&i.ConversationID,
+		&i.Model,
+		&i.Provider,
+		&i.Url,
+		&i.RequestBody,
+		&i.ResponseBody,
+		&i.StatusCode,
+		&i.Error,
+		&i.DurationMs,
+		&i.CreatedAt,
+		&i.PrefixRequestID,
+		&i.PrefixLength,
+	)
+	return i, err
+}
+
+const getLLMResponseBody = `-- name: GetLLMResponseBody :one
+SELECT response_body FROM llm_requests WHERE id = ?
+`
+
+func (q *Queries) GetLLMResponseBody(ctx context.Context, id int64) (*string, error) {
+	row := q.db.QueryRowContext(ctx, getLLMResponseBody, id)
+	var response_body *string
+	err := row.Scan(&response_body)
+	return response_body, err
+}
+
+const getLastRequestForConversation = `-- name: GetLastRequestForConversation :one
+SELECT id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at, prefix_request_id, prefix_length FROM llm_requests
+WHERE conversation_id = ?
+ORDER BY id DESC
+LIMIT 1
+`
+
+func (q *Queries) GetLastRequestForConversation(ctx context.Context, conversationID *string) (LlmRequest, error) {
+	row := q.db.QueryRowContext(ctx, getLastRequestForConversation, conversationID)
+	var i LlmRequest
+	err := row.Scan(
+		&i.ID,
+		&i.ConversationID,
+		&i.Model,
+		&i.Provider,
+		&i.Url,
+		&i.RequestBody,
+		&i.ResponseBody,
+		&i.StatusCode,
+		&i.Error,
+		&i.DurationMs,
+		&i.CreatedAt,
+		&i.PrefixRequestID,
+		&i.PrefixLength,
+	)
+	return i, err
+}
+
 const insertLLMRequest = `-- name: InsertLLMRequest :one
 INSERT INTO llm_requests (
     conversation_id,
@@ -19,21 +95,25 @@ INSERT INTO llm_requests (
     response_body,
     status_code,
     error,
-    duration_ms
-) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-RETURNING id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at
+    duration_ms,
+    prefix_request_id,
+    prefix_length
+) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+RETURNING id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at, prefix_request_id, prefix_length
 `
 
 type InsertLLMRequestParams struct {
-	ConversationID *string `json:"conversation_id"`
-	Model          string  `json:"model"`
-	Provider       string  `json:"provider"`
-	Url            string  `json:"url"`
-	RequestBody    *string `json:"request_body"`
-	ResponseBody   *string `json:"response_body"`
-	StatusCode     *int64  `json:"status_code"`
-	Error          *string `json:"error"`
-	DurationMs     *int64  `json:"duration_ms"`
+	ConversationID  *string `json:"conversation_id"`
+	Model           string  `json:"model"`
+	Provider        string  `json:"provider"`
+	Url             string  `json:"url"`
+	RequestBody     *string `json:"request_body"`
+	ResponseBody    *string `json:"response_body"`
+	StatusCode      *int64  `json:"status_code"`
+	Error           *string `json:"error"`
+	DurationMs      *int64  `json:"duration_ms"`
+	PrefixRequestID *int64  `json:"prefix_request_id"`
+	PrefixLength    *int64  `json:"prefix_length"`
 }
 
 func (q *Queries) InsertLLMRequest(ctx context.Context, arg InsertLLMRequestParams) (LlmRequest, error) {
@@ -47,6 +127,8 @@ func (q *Queries) InsertLLMRequest(ctx context.Context, arg InsertLLMRequestPara
 		arg.StatusCode,
 		arg.Error,
 		arg.DurationMs,
+		arg.PrefixRequestID,
+		arg.PrefixLength,
 	)
 	var i LlmRequest
 	err := row.Scan(
@@ -61,6 +143,81 @@ func (q *Queries) InsertLLMRequest(ctx context.Context, arg InsertLLMRequestPara
 		&i.Error,
 		&i.DurationMs,
 		&i.CreatedAt,
+		&i.PrefixRequestID,
+		&i.PrefixLength,
 	)
 	return i, err
 }
+
+const listRecentLLMRequests = `-- name: ListRecentLLMRequests :many
+SELECT 
+    id, 
+    conversation_id, 
+    model, 
+    provider, 
+    url, 
+    LENGTH(request_body) as request_body_length,
+    LENGTH(response_body) as response_body_length,
+    status_code, 
+    error, 
+    duration_ms, 
+    created_at,
+    prefix_request_id,
+    prefix_length
+FROM llm_requests 
+ORDER BY id DESC 
+LIMIT ?
+`
+
+type ListRecentLLMRequestsRow struct {
+	ID                 int64     `json:"id"`
+	ConversationID     *string   `json:"conversation_id"`
+	Model              string    `json:"model"`
+	Provider           string    `json:"provider"`
+	Url                string    `json:"url"`
+	RequestBodyLength  *int64    `json:"request_body_length"`
+	ResponseBodyLength *int64    `json:"response_body_length"`
+	StatusCode         *int64    `json:"status_code"`
+	Error              *string   `json:"error"`
+	DurationMs         *int64    `json:"duration_ms"`
+	CreatedAt          time.Time `json:"created_at"`
+	PrefixRequestID    *int64    `json:"prefix_request_id"`
+	PrefixLength       *int64    `json:"prefix_length"`
+}
+
+func (q *Queries) ListRecentLLMRequests(ctx context.Context, limit int64) ([]ListRecentLLMRequestsRow, error) {
+	rows, err := q.db.QueryContext(ctx, listRecentLLMRequests, limit)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+	items := []ListRecentLLMRequestsRow{}
+	for rows.Next() {
+		var i ListRecentLLMRequestsRow
+		if err := rows.Scan(
+			&i.ID,
+			&i.ConversationID,
+			&i.Model,
+			&i.Provider,
+			&i.Url,
+			&i.RequestBodyLength,
+			&i.ResponseBodyLength,
+			&i.StatusCode,
+			&i.Error,
+			&i.DurationMs,
+			&i.CreatedAt,
+			&i.PrefixRequestID,
+			&i.PrefixLength,
+		); err != nil {
+			return nil, err
+		}
+		items = append(items, i)
+	}
+	if err := rows.Close(); err != nil {
+		return nil, err
+	}
+	if err := rows.Err(); err != nil {
+		return nil, err
+	}
+	return items, nil
+}
diff --git a/db/generated/models.go b/db/generated/models.go
index 8b43efda0cb45058a5955f4e1d238c7b1a42a99f..4bd0ad9a39e9e498850b1886f7aeb214291b8c49 100644
--- a/db/generated/models.go
+++ b/db/generated/models.go
@@ -20,17 +20,19 @@ type Conversation struct {
 }
 
 type LlmRequest struct {
-	ID             int64     `json:"id"`
-	ConversationID *string   `json:"conversation_id"`
-	Model          string    `json:"model"`
-	Provider       string    `json:"provider"`
-	Url            string    `json:"url"`
-	RequestBody    *string   `json:"request_body"`
-	ResponseBody   *string   `json:"response_body"`
-	StatusCode     *int64    `json:"status_code"`
-	Error          *string   `json:"error"`
-	DurationMs     *int64    `json:"duration_ms"`
-	CreatedAt      time.Time `json:"created_at"`
+	ID              int64     `json:"id"`
+	ConversationID  *string   `json:"conversation_id"`
+	Model           string    `json:"model"`
+	Provider        string    `json:"provider"`
+	Url             string    `json:"url"`
+	RequestBody     *string   `json:"request_body"`
+	ResponseBody    *string   `json:"response_body"`
+	StatusCode      *int64    `json:"status_code"`
+	Error           *string   `json:"error"`
+	DurationMs      *int64    `json:"duration_ms"`
+	CreatedAt       time.Time `json:"created_at"`
+	PrefixRequestID *int64    `json:"prefix_request_id"`
+	PrefixLength    *int64    `json:"prefix_length"`
 }
 
 type Message struct {
diff --git a/db/query/llm_requests.sql b/db/query/llm_requests.sql
index 90aeab858de6a87378673a9d786370304ef2f295..3f1b8c10db355687a4f1859e5522bf9fbc59655f 100644
--- a/db/query/llm_requests.sql
+++ b/db/query/llm_requests.sql
@@ -8,6 +8,44 @@ INSERT INTO llm_requests (
     response_body,
     status_code,
     error,
-    duration_ms
-) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+    duration_ms,
+    prefix_request_id,
+    prefix_length
+) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
 RETURNING *;
+
+-- name: GetLastRequestForConversation :one
+SELECT * FROM llm_requests
+WHERE conversation_id = ?
+ORDER BY id DESC
+LIMIT 1;
+
+-- name: GetLLMRequestByID :one
+SELECT * FROM llm_requests WHERE id = ?;
+
+-- name: ListRecentLLMRequests :many
+SELECT 
+    id, 
+    conversation_id, 
+    model, 
+    provider, 
+    url, 
+    LENGTH(request_body) as request_body_length,
+    LENGTH(response_body) as response_body_length,
+    status_code, 
+    error, 
+    duration_ms, 
+    created_at,
+    prefix_request_id,
+    prefix_length
+FROM llm_requests 
+ORDER BY id DESC 
+LIMIT ?;
+
+-- name: GetLLMRequestBody :one
+SELECT request_body FROM llm_requests WHERE id = ?;
+
+-- name: GetLLMResponseBody :one
+SELECT response_body FROM llm_requests WHERE id = ?;
+
+
diff --git a/db/schema/011-add-llm-request-prefix.sql b/db/schema/011-add-llm-request-prefix.sql
new file mode 100644
index 0000000000000000000000000000000000000000..e8397143ed945b70059743934ddf3713124325f4
--- /dev/null
+++ b/db/schema/011-add-llm-request-prefix.sql
@@ -0,0 +1,9 @@
+-- Add prefix deduplication columns to llm_requests table
+-- This allows storing only the suffix of request_body when there's a shared prefix
+-- with a previous request in the same conversation.
+
+ALTER TABLE llm_requests ADD COLUMN prefix_request_id INTEGER REFERENCES llm_requests(id);
+ALTER TABLE llm_requests ADD COLUMN prefix_length INTEGER;
+
+-- Index for efficient prefix lookups
+CREATE INDEX idx_llm_requests_prefix_request_id ON llm_requests(prefix_request_id) WHERE prefix_request_id IS NOT NULL;
diff --git a/llm/ant/ant.go b/llm/ant/ant.go
index 3b2c2579bfe38fc7107e809dcf19dc53a8ad6d8c..aa3929b7ebc47c4e6efa1f1e06bfc051c2f4ef0f 100644
--- a/llm/ant/ant.go
+++ b/llm/ant/ant.go
@@ -218,17 +218,20 @@ type systemContent struct {
 
 // request represents the request payload for creating a message.
 type request struct {
+	// Field order matters for JSON serialization - stable fields should come first
+	// to maximize prefix deduplication when storing LLM requests.
 	Model         string          `json:"model"`
-	Messages      []message       `json:"messages"`
-	ToolChoice    *toolChoice     `json:"tool_choice,omitempty"`
 	MaxTokens     int             `json:"max_tokens"`
-	Tools         []*tool         `json:"tools,omitempty"`
 	Stream        bool            `json:"stream,omitempty"`
 	System        []systemContent `json:"system,omitempty"`
+	Tools         []*tool         `json:"tools,omitempty"`
+	ToolChoice    *toolChoice     `json:"tool_choice,omitempty"`
 	Temperature   float64         `json:"temperature,omitempty"`
 	TopK          int             `json:"top_k,omitempty"`
 	TopP          float64         `json:"top_p,omitempty"`
 	StopSequences []string        `json:"stop_sequences,omitempty"`
+	// Messages comes last since it grows with each request in a conversation
+	Messages []message `json:"messages"`
 }
 
 func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
diff --git a/llm/conversation/testdata/basic_convo.httprr b/llm/conversation/testdata/basic_convo.httprr
index 4a3d3aa868791ff94e8cec8402de4e4208329e67..ad8af0a17518295671a8e1f5691cc538515cbf31 100644
--- a/llm/conversation/testdata/basic_convo.httprr
+++ b/llm/conversation/testdata/basic_convo.httprr
@@ -7,7 +7,7 @@ Content-Length: 183
 Anthropic-Version: 2023-06-01
 Content-Type: application/json
 
-{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius","cache_control":{"type":"ephemeral"}}]}],"max_tokens":8192}
+{"model":"claude-sonnet-4-20250514","max_tokens":8192,"messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius","cache_control":{"type":"ephemeral"}}]}]}
 HTTP/2.0 200 OK
 Anthropic-Organization-Id: 3c473a21-7208-450a-a9f8-80aebda45c1b
 Anthropic-Ratelimit-Input-Tokens-Limit: 4000000
@@ -37,7 +37,7 @@ Content-Length: 454
 Anthropic-Version: 2023-06-01
 Content-Type: application/json
 
-{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius"}]},{"role":"assistant","content":[{"type":"text","text":"Hello Cornelius! It's nice to meet you. That's a distinctive and classic name. How are you doing today? Is there anything I can help you with?"}]},{"role":"user","content":[{"type":"text","text":"What is my name?","cache_control":{"type":"ephemeral"}}]}],"max_tokens":8192}
+{"model":"claude-sonnet-4-20250514","max_tokens":8192,"messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius"}]},{"role":"assistant","content":[{"type":"text","text":"Hello Cornelius! It's nice to meet you. That's a distinctive and classic name. How are you doing today? Is there anything I can help you with?"}]},{"role":"user","content":[{"type":"text","text":"What is my name?","cache_control":{"type":"ephemeral"}}]}]}
 HTTP/2.0 200 OK
 Anthropic-Organization-Id: 3c473a21-7208-450a-a9f8-80aebda45c1b
 Anthropic-Ratelimit-Input-Tokens-Limit: 4000000
diff --git a/llm/gem/gemini/gemini.go b/llm/gem/gemini/gemini.go
index eeafbaf25cbdf2e3c0a9dfb090fe9a47e1231f95..609fa8e5da19c68a45a4a4bc4e108e83bfc3d600 100644
--- a/llm/gem/gemini/gemini.go
+++ b/llm/gem/gemini/gemini.go
@@ -11,12 +11,15 @@ import (
 
 // https://ai.google.dev/api/generate-content#request-body
 type Request struct {
-	Contents          []Content         `json:"contents"`
-	Tools             []Tool            `json:"tools,omitempty"`
-	SystemInstruction *Content          `json:"systemInstruction,omitempty"`
-	GenerationConfig  *GenerationConfig `json:"generationConfig,omitempty"`
+	// Field order matters for JSON serialization - stable fields should come first
+	// to maximize prefix deduplication when storing LLM requests.
 	CachedContent     string            `json:"cachedContent,omitempty"` // format: "cachedContents/{name}"
+	GenerationConfig  *GenerationConfig `json:"generationConfig,omitempty"`
+	SystemInstruction *Content          `json:"systemInstruction,omitempty"`
+	Tools             []Tool            `json:"tools,omitempty"`
 	// ToolConfig has been left out because it does not appear to be useful.
+	// Contents comes last since it grows with each request in a conversation
+	Contents []Content `json:"contents"`
 }
 
 // https://ai.google.dev/api/generate-content#response-body
diff --git a/server/convo.go b/server/convo.go
index 534c741c3f5225436da4a4c0bfb8fc31ca2f914d..b94b37526b4db470b5883e66449af74ddcf9cfe2 100644
--- a/server/convo.go
+++ b/server/convo.go
@@ -13,6 +13,7 @@ import (
 	"shelley.exe.dev/db/generated"
 	"shelley.exe.dev/gitstate"
 	"shelley.exe.dev/llm"
+	"shelley.exe.dev/llm/llmhttp"
 	"shelley.exe.dev/loop"
 	"shelley.exe.dev/subpub"
 )
@@ -372,7 +373,9 @@ func (cm *ConversationManager) ensureLoop(service llm.Service, modelID string) e
 		}
 	}
 
-	processCtx, cancel := context.WithTimeout(context.Background(), 12*time.Hour)
+	// Create a context with the conversation ID for LLM request recording/prefix dedup
+	baseCtx := llmhttp.WithConversationID(context.Background(), conversationID)
+	processCtx, cancel := context.WithTimeout(baseCtx, 12*time.Hour)
 	toolSet := claudetool.NewToolSet(processCtx, toolSetConfig)
 
 	loopInstance := loop.NewLoop(loop.Config{
diff --git a/server/debug_handlers.go b/server/debug_handlers.go
new file mode 100644
index 0000000000000000000000000000000000000000..3ff6e156cf81651209d1a66e92f2d3b775b613e4
--- /dev/null
+++ b/server/debug_handlers.go
@@ -0,0 +1,419 @@
+package server
+
+import (
+	"encoding/json"
+	"net/http"
+	"strconv"
+)
+
+// handleDebugLLMRequests serves the debug page for LLM requests
+func (s *Server) handleDebugLLMRequests(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "text/html")
+	w.Write([]byte(debugLLMRequestsHTML))
+}
+
+// handleDebugLLMRequestsAPI returns recent LLM requests as JSON
+func (s *Server) handleDebugLLMRequestsAPI(w http.ResponseWriter, r *http.Request) {
+	ctx := r.Context()
+
+	limit := int64(100)
+	if limitStr := r.URL.Query().Get("limit"); limitStr != "" {
+		if l, err := strconv.ParseInt(limitStr, 10, 64); err == nil && l > 0 {
+			limit = l
+		}
+	}
+
+	requests, err := s.db.ListRecentLLMRequests(ctx, limit)
+	if err != nil {
+		s.logger.Error("Failed to list LLM requests", "error", err)
+		http.Error(w, "Internal server error", http.StatusInternalServerError)
+		return
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(requests)
+}
+
+// handleDebugLLMRequestBody returns the request body for a specific LLM request
+func (s *Server) handleDebugLLMRequestBody(w http.ResponseWriter, r *http.Request) {
+	ctx := r.Context()
+
+	idStr := r.PathValue("id")
+	id, err := strconv.ParseInt(idStr, 10, 64)
+	if err != nil {
+		http.Error(w, "Invalid ID", http.StatusBadRequest)
+		return
+	}
+
+	body, err := s.db.GetLLMRequestBody(ctx, id)
+	if err != nil {
+		s.logger.Error("Failed to get LLM request body", "error", err, "id", id)
+		http.Error(w, "Not found", http.StatusNotFound)
+		return
+	}
+
+	if body == nil {
+		w.Header().Set("Content-Type", "application/json")
+		w.Write([]byte("null"))
+		return
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	w.Write([]byte(*body))
+}
+
+// handleDebugLLMResponseBody returns the response body for a specific LLM request
+func (s *Server) handleDebugLLMResponseBody(w http.ResponseWriter, r *http.Request) {
+	ctx := r.Context()
+
+	idStr := r.PathValue("id")
+	id, err := strconv.ParseInt(idStr, 10, 64)
+	if err != nil {
+		http.Error(w, "Invalid ID", http.StatusBadRequest)
+		return
+	}
+
+	body, err := s.db.GetLLMResponseBody(ctx, id)
+	if err != nil {
+		s.logger.Error("Failed to get LLM response body", "error", err, "id", id)
+		http.Error(w, "Not found", http.StatusNotFound)
+		return
+	}
+
+	if body == nil {
+		w.Header().Set("Content-Type", "application/json")
+		w.Write([]byte("null"))
+		return
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	w.Write([]byte(*body))
+}
+
+const debugLLMRequestsHTML = `<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Debug: LLM Requests</title>
+<style>
+* { box-sizing: border-box; }
+body {
+	font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+	margin: 0;
+	padding: 20px;
+	background: #1a1a1a;
+	color: #e0e0e0;
+}
+h1 { margin: 0 0 20px 0; font-size: 24px; color: #fff; }
+table {
+	width: 100%;
+	border-collapse: collapse;
+	font-size: 13px;
+}
+th, td {
+	padding: 8px 12px;
+	text-align: left;
+	border-bottom: 1px solid #333;
+}
+th {
+	background: #252525;
+	font-weight: 600;
+	position: sticky;
+	top: 0;
+}
+tr:hover { background: #252525; }
+.mono { font-family: 'SF Mono', Monaco, monospace; font-size: 12px; }
+.error { color: #ff6b6b; }
+.success { color: #69db7c; }
+.btn {
+	background: #333;
+	border: 1px solid #444;
+	color: #e0e0e0;
+	padding: 4px 8px;
+	border-radius: 4px;
+	cursor: pointer;
+	font-size: 12px;
+}
+.btn:hover { background: #444; }
+.btn:disabled { opacity: 0.5; cursor: not-allowed; }
+.json-viewer {
+	background: #1e1e1e;
+	border: 1px solid #333;
+	border-radius: 4px;
+	padding: 12px;
+	margin-top: 8px;
+	overflow-x: auto;
+	max-height: 400px;
+	overflow-y: auto;
+}
+.json-viewer pre {
+	margin: 0;
+	font-family: 'SF Mono', Monaco, monospace;
+	font-size: 12px;
+	white-space: pre-wrap;
+	word-wrap: break-word;
+}
+.collapsed { display: none; }
+.size { color: #888; font-size: 11px; }
+.prefix { color: #ffd43b; }
+.dedup-info { color: #74c0fc; font-size: 11px; }
+.loading { color: #888; font-style: italic; }
+.expand-row { background: #1e1e1e; }
+.expand-row td { padding: 0; }
+.expand-content { padding: 12px; }
+.expand-tabs {
+	display: flex;
+	gap: 8px;
+	margin-bottom: 12px;
+}
+.tab-btn {
+	background: transparent;
+	border: 1px solid #444;
+	color: #888;
+	padding: 6px 12px;
+	border-radius: 4px;
+	cursor: pointer;
+}
+.tab-btn.active {
+	background: #333;
+	color: #fff;
+	border-color: #555;
+}
+.tab-content { display: none; }
+.tab-content.active { display: block; }
+</style>
+</head>
+<body>
+<h1>LLM Requests</h1>
+<table id="requests-table">
+<thead>
+<tr>
+	<th>ID</th>
+	<th>Time</th>
+	<th>Model</th>
+	<th>Provider</th>
+	<th>Status</th>
+	<th>Duration</th>
+	<th>Request Size</th>
+	<th>Response Size</th>
+	<th>Prefix Info</th>
+	<th>Actions</th>
+</tr>
+</thead>
+<tbody id="requests-body">
+<tr><td colspan="10" class="loading">Loading...</td></tr>
+</tbody>
+</table>
+
+<script>
+const expandedRows = new Set();
+const loadedData = {};
+
+function formatSize(bytes) {
+	if (bytes === null || bytes === undefined) return '-';
+	if (bytes < 1024) return bytes + ' B';
+	if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KB';
+	return (bytes / (1024 * 1024)).toFixed(2) + ' MB';
+}
+
+function formatDate(dateStr) {
+	const d = new Date(dateStr);
+	return d.toLocaleString();
+}
+
+function formatDuration(ms) {
+	if (ms === null || ms === undefined) return '-';
+	if (ms < 1000) return ms + 'ms';
+	return (ms / 1000).toFixed(2) + 's';
+}
+
+function syntaxHighlight(json) {
+	if (typeof json !== 'string') json = JSON.stringify(json, null, 2);
+	json = json.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
+	return json.replace(/("(\\u[a-zA-Z0-9]{4}|\\[^u]|[^\\"])*"(\s*:)?|\b(true|false|null)\b|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?)/g, function (match) {
+		let cls = 'number';
+		if (/^"/.test(match)) {
+			if (/:$/.test(match)) {
+				cls = 'key';
+			} else {
+				cls = 'string';
+			}
+		} else if (/true|false/.test(match)) {
+			cls = 'boolean';
+		} else if (/null/.test(match)) {
+			cls = 'null';
+		}
+		return '<span class="' + cls + '">' + match + '</span>';
+	});
+}
+
+async function loadRequests() {
+	try {
+		const resp = await fetch('/debug/llm_requests/api?limit=100');
+		const data = await resp.json();
+		renderTable(data);
+	} catch (e) {
+		document.getElementById('requests-body').innerHTML = 
+			'<tr><td colspan="10" class="error">Error loading requests: ' + e.message + '</td></tr>';
+	}
+}
+
+function renderTable(requests) {
+	const tbody = document.getElementById('requests-body');
+	if (!requests || requests.length === 0) {
+		tbody.innerHTML = '<tr><td colspan="10">No requests found</td></tr>';
+		return;
+	}
+	tbody.innerHTML = '';
+	for (const req of requests) {
+		const tr = document.createElement('tr');
+		tr.id = 'row-' + req.id;
+		
+		const statusClass = req.status_code && req.status_code >= 200 && req.status_code < 300 ? 'success' : 
+			(req.status_code ? 'error' : '');
+		
+		let prefixInfo = '-';
+		if (req.prefix_request_id) {
+			prefixInfo = '<span class="dedup-info">prefix from #' + req.prefix_request_id + 
+				' (' + formatSize(req.prefix_length) + ')</span>';
+		}
+		
+		tr.innerHTML = ` + "`" + `
+			<td class="mono">${req.id}</td>
+			<td>${formatDate(req.created_at)}</td>
+			<td>${req.model}</td>
+			<td>${req.provider}</td>
+			<td class="${statusClass}">${req.status_code || '-'}${req.error ? ' ⚠' : ''}</td>
+			<td>${formatDuration(req.duration_ms)}</td>
+			<td class="size">${formatSize(req.request_body_length)}</td>
+			<td class="size">${formatSize(req.response_body_length)}</td>
+			<td>${prefixInfo}</td>
+			<td><button class="btn" onclick="toggleExpand(${req.id})">Expand</button></td>
+		` + "`" + `;
+		tbody.appendChild(tr);
+	}
+}
+
+async function toggleExpand(id) {
+	const existingExpand = document.getElementById('expand-' + id);
+	if (existingExpand) {
+		existingExpand.remove();
+		expandedRows.delete(id);
+		return;
+	}
+	
+	expandedRows.add(id);
+	const row = document.getElementById('row-' + id);
+	const expandRow = document.createElement('tr');
+	expandRow.id = 'expand-' + id;
+	expandRow.className = 'expand-row';
+	expandRow.innerHTML = ` + "`" + `
+		<td colspan="10">
+			<div class="expand-content">
+				<div class="expand-tabs">
+					<button class="tab-btn active" onclick="showTab(${id}, 'request')">Request</button>
+					<button class="tab-btn" onclick="showTab(${id}, 'response')">Response</button>
+				</div>
+				<div id="tab-request-${id}" class="tab-content active">
+					<div class="json-viewer"><pre class="loading">Loading request...</pre></div>
+				</div>
+				<div id="tab-response-${id}" class="tab-content">
+					<div class="json-viewer"><pre class="loading">Loading response...</pre></div>
+				</div>
+			</div>
+		</td>
+	` + "`" + `;
+	row.after(expandRow);
+	
+	// Load request body
+	loadBody(id, 'request');
+}
+
+async function loadBody(id, type) {
+	const key = id + '-' + type;
+	if (loadedData[key]) {
+		renderBody(id, type, loadedData[key]);
+		return;
+	}
+	
+	try {
+		const url = type === 'request' 
+			? '/debug/llm_requests/' + id + '/request'
+			: '/debug/llm_requests/' + id + '/response';
+		const resp = await fetch(url);
+		const text = await resp.text();
+		let data;
+		try {
+			data = JSON.parse(text);
+		} catch {
+			data = text;
+		}
+		loadedData[key] = data;
+		renderBody(id, type, data);
+	} catch (e) {
+		const container = document.querySelector('#tab-' + type + '-' + id + ' pre');
+		if (container) {
+			container.className = 'error';
+			container.textContent = 'Error loading: ' + e.message;
+		}
+	}
+}
+
+function renderBody(id, type, data) {
+	const container = document.querySelector('#tab-' + type + '-' + id + ' pre');
+	if (!container) return;
+	
+	if (data === null) {
+		container.className = '';
+		container.textContent = '(empty)';
+		return;
+	}
+	
+	container.className = '';
+	if (typeof data === 'object') {
+		container.innerHTML = syntaxHighlight(JSON.stringify(data, null, 2));
+	} else {
+		container.textContent = data;
+	}
+}
+
+function showTab(id, tab) {
+	// Update tab buttons
+	const expandRow = document.getElementById('expand-' + id);
+	if (!expandRow) return;
+	
+	expandRow.querySelectorAll('.tab-btn').forEach(btn => {
+		btn.classList.remove('active');
+		if (btn.textContent.toLowerCase() === tab) {
+			btn.classList.add('active');
+		}
+	});
+	
+	// Update tab content
+	expandRow.querySelectorAll('.tab-content').forEach(content => {
+		content.classList.remove('active');
+	});
+	const activeTab = document.getElementById('tab-' + tab + '-' + id);
+	if (activeTab) {
+		activeTab.classList.add('active');
+		loadBody(id, tab);
+	}
+}
+
+// Add syntax highlighting styles
+const style = document.createElement('style');
+style.textContent = ` + "`" + `
+	.string { color: #98c379; }
+	.number { color: #d19a66; }
+	.boolean { color: #56b6c2; }
+	.null { color: #c678dd; }
+	.key { color: #e06c75; }
+` + "`" + `;
+document.head.appendChild(style);
+
+loadRequests();
+</script>
+</body>
+</html>
+`
diff --git a/server/server.go b/server/server.go
index fc29c0e54af1db25a46d8caf55f1453cc2953d10..d2fe39610827e4baa65ea103e4591781aafba40b 100644
--- a/server/server.go
+++ b/server/server.go
@@ -268,7 +268,11 @@ func (s *Server) RegisterRoutes(mux *http.ServeMux) {
 	mux.Handle("POST /upgrade", http.HandlerFunc(s.handleUpgrade))
 	mux.Handle("POST /exit", http.HandlerFunc(s.handleExit))
 
-	// Debug routes
+	// Debug endpoints
+	mux.Handle("GET /debug/llm_requests", http.HandlerFunc(s.handleDebugLLMRequests))
+	mux.Handle("GET /debug/llm_requests/api", http.HandlerFunc(s.handleDebugLLMRequestsAPI))
+	mux.Handle("GET /debug/llm_requests/{id}/request", http.HandlerFunc(s.handleDebugLLMRequestBody))
+	mux.Handle("GET /debug/llm_requests/{id}/response", http.HandlerFunc(s.handleDebugLLMResponseBody))
 
 	// Serve embedded UI assets
 	mux.Handle("/", s.staticHandler(ui.Assets()))