shelley: Track LLM HTTP requests in database; new headers

Philip Zeyliger and Shelley created 3 months ago

Prompt: Add tracking of all LLM HTTP requests to a database table
(llm_requests) for debugging/auditing, plus add custom headers
(User-Agent with Shelley version, Shelley-ConversationId) to all
outgoing LLM requests. Remove existing HTTPRecorder callback and ant's
DumpLLM; we don't need them. Remove in-memory LLMRequestHistory and
/debug/llm endpoint. Make sure that the anthropic and openai and gemini
implementations all follow the same pattern/implementation here.

- Add llm_requests table for tracking/debugging LLM API calls
- Add custom HTTP headers: User-Agent with Shelley version, Shelley-ConversationId
- Create llmhttp package for HTTP transport with recording and headers
- Remove HTTPRecorder callback from ant.Service
- Remove DumpLLM from ant, oai, and gem services
- Remove in-memory LLMRequestHistory and /debug/llm endpoint
- Update models.Manager to use new database-based recording
- Pass http.Client through factory functions for consistent transport

The llm_requests table stores:
- conversation_id (optional)
- model and provider
- URL, request/response bodies
- status code, error, duration

Custom headers help with debugging and tracing:
- User-Agent: Shelley/<commit_hash>
- Shelley-ConversationId: <conversation_id>

Co-authored-by: Shelley <shelley@exe.dev>

Change summary

cmd/shelley/main.go                          |  10 
db/db.go                                     |  12 +
db/generated/llm_requests.sql.go             |  66 +++++
db/generated/models.go                       |  14 +
db/query/llm_requests.sql                    |  13 +
db/schema/010-add-llm-requests.sql           |  25 ++
llm/ant/ant.go                               |  57 ----
llm/ant/ant_test.go                          | 141 ------------
llm/conversation/convo_test.go               |   2 
llm/conversation/testdata/basic_convo.httprr | 126 +++-------
llm/gem/gem.go                               |  22 -
llm/llmhttp/llmhttp.go                       | 149 ++++++++++++
llm/llmhttp/llmhttp_test.go                  | 175 +++++++++++++++
llm/oai/oai.go                               |  18 -
models/models.go                             | 256 ++++++++++++---------
models/models_test.go                        | 143 +++--------
server/cancel_claude_test.go                 |  43 +++
server/handlers.go                           | 147 ------------
server/llmconfig.go                          |   9 
server/server.go                             |   6 
test/anthropic_test.go                       |   2 
test/server_test.go                          |  16 
22 files changed, 747 insertions(+), 705 deletions(-)

Detailed changes

cmd/shelley/main.go 🔗

@@ -99,13 +99,10 @@ func runServe(global GlobalConfig, args []string) {
 	server.DBPath = global.DBPath
 
 	// Build LLM configuration
-	llmConfig := buildLLMConfig(logger, global.ConfigPath, global.TerminalURL, global.DefaultModel)
-
-	// Create request history for debugging
-	llmHistory := models.NewLLMRequestHistory(10)
+	llmConfig := buildLLMConfig(logger, global.ConfigPath, global.TerminalURL, global.DefaultModel, database)
 
 	// Initialize LLM service manager
-	llmManager := server.NewLLMServiceManager(llmConfig, llmHistory)
+	llmManager := server.NewLLMServiceManager(llmConfig)
 
 	// Log available models
 	availableModels := llmManager.GetAvailableModels()
@@ -251,7 +248,7 @@ func setupToolSetConfig(llmProvider claudetool.LLMServiceProvider) claudetool.To
 }
 
 // buildLLMConfig constructs LLMConfig from environment variables and optional config file
-func buildLLMConfig(logger *slog.Logger, configPath, terminalURL, defaultModel string) *server.LLMConfig {
+func buildLLMConfig(logger *slog.Logger, configPath, terminalURL, defaultModel string, database *db.DB) *server.LLMConfig {
 	llmCfg := &server.LLMConfig{
 		AnthropicAPIKey: os.Getenv("ANTHROPIC_API_KEY"),
 		OpenAIAPIKey:    os.Getenv("OPENAI_API_KEY"),
@@ -259,6 +256,7 @@ func buildLLMConfig(logger *slog.Logger, configPath, terminalURL, defaultModel s
 		FireworksAPIKey: os.Getenv("FIREWORKS_API_KEY"),
 		TerminalURL:     terminalURL,
 		DefaultModel:    defaultModel,
+		DB:              database,
 		Logger:          logger,
 	}

db/db.go 🔗

@@ -689,3 +689,15 @@ func (a *SubagentDBAdapter) GetOrCreateSubagentConversation(ctx context.Context,
 
 	return "", "", fmt.Errorf("failed to create unique subagent slug after 100 attempts")
 }
+
+// InsertLLMRequest inserts a new LLM request record
+func (db *DB) InsertLLMRequest(ctx context.Context, params generated.InsertLLMRequestParams) (*generated.LlmRequest, error) {
+	var request generated.LlmRequest
+	err := db.pool.Tx(ctx, func(ctx context.Context, tx *Tx) error {
+		q := generated.New(tx.Conn())
+		var err error
+		request, err = q.InsertLLMRequest(ctx, params)
+		return err
+	})
+	return &request, err
+}

db/generated/llm_requests.sql.go 🔗

@@ -0,0 +1,66 @@
+// Code generated by sqlc. DO NOT EDIT.
+// versions:
+//   sqlc v1.30.0
+// source: llm_requests.sql
+
+package generated
+
+import (
+	"context"
+)
+
+const insertLLMRequest = `-- name: InsertLLMRequest :one
+INSERT INTO llm_requests (
+    conversation_id,
+    model,
+    provider,
+    url,
+    request_body,
+    response_body,
+    status_code,
+    error,
+    duration_ms
+) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+RETURNING id, conversation_id, model, provider, url, request_body, response_body, status_code, error, duration_ms, created_at
+`
+
+type InsertLLMRequestParams struct {
+	ConversationID *string `json:"conversation_id"`
+	Model          string  `json:"model"`
+	Provider       string  `json:"provider"`
+	Url            string  `json:"url"`
+	RequestBody    *string `json:"request_body"`
+	ResponseBody   *string `json:"response_body"`
+	StatusCode     *int64  `json:"status_code"`
+	Error          *string `json:"error"`
+	DurationMs     *int64  `json:"duration_ms"`
+}
+
+func (q *Queries) InsertLLMRequest(ctx context.Context, arg InsertLLMRequestParams) (LlmRequest, error) {
+	row := q.db.QueryRowContext(ctx, insertLLMRequest,
+		arg.ConversationID,
+		arg.Model,
+		arg.Provider,
+		arg.Url,
+		arg.RequestBody,
+		arg.ResponseBody,
+		arg.StatusCode,
+		arg.Error,
+		arg.DurationMs,
+	)
+	var i LlmRequest
+	err := row.Scan(
+		&i.ID,
+		&i.ConversationID,
+		&i.Model,
+		&i.Provider,
+		&i.Url,
+		&i.RequestBody,
+		&i.ResponseBody,
+		&i.StatusCode,
+		&i.Error,
+		&i.DurationMs,
+		&i.CreatedAt,
+	)
+	return i, err
+}

db/generated/models.go 🔗

@@ -19,6 +19,20 @@ type Conversation struct {
 	ParentConversationID *string   `json:"parent_conversation_id"`
 }
 
+type LlmRequest struct {
+	ID             int64     `json:"id"`
+	ConversationID *string   `json:"conversation_id"`
+	Model          string    `json:"model"`
+	Provider       string    `json:"provider"`
+	Url            string    `json:"url"`
+	RequestBody    *string   `json:"request_body"`
+	ResponseBody   *string   `json:"response_body"`
+	StatusCode     *int64    `json:"status_code"`
+	Error          *string   `json:"error"`
+	DurationMs     *int64    `json:"duration_ms"`
+	CreatedAt      time.Time `json:"created_at"`
+}
+
 type Message struct {
 	MessageID      string    `json:"message_id"`
 	ConversationID string    `json:"conversation_id"`

db/query/llm_requests.sql 🔗

@@ -0,0 +1,13 @@
+-- name: InsertLLMRequest :one
+INSERT INTO llm_requests (
+    conversation_id,
+    model,
+    provider,
+    url,
+    request_body,
+    response_body,
+    status_code,
+    error,
+    duration_ms
+) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+RETURNING *;

db/schema/010-add-llm-requests.sql 🔗

@@ -0,0 +1,25 @@
+-- LLM Requests table for tracking/debugging API calls
+-- Each row represents one HTTP request/response to an LLM provider
+
+CREATE TABLE llm_requests (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    conversation_id TEXT,  -- optional, may be NULL for requests outside conversations
+    model TEXT NOT NULL,   -- model ID used for the request
+    provider TEXT NOT NULL, -- e.g., "anthropic", "openai", "gemini"
+    url TEXT NOT NULL,
+    request_body TEXT,     -- JSON request body
+    response_body TEXT,    -- JSON response body
+    status_code INTEGER,
+    error TEXT,            -- error message if any
+    duration_ms INTEGER,   -- request duration in milliseconds
+    created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Index on conversation_id for debugging specific conversations
+CREATE INDEX idx_llm_requests_conversation_id ON llm_requests(conversation_id);
+
+-- Index on created_at for time-based queries
+CREATE INDEX idx_llm_requests_created_at ON llm_requests(created_at DESC);
+
+-- Index on model for filtering by model
+CREATE INDEX idx_llm_requests_model ON llm_requests(model);

llm/ant/ant.go 🔗

@@ -12,7 +12,6 @@ import (
 	"math/rand/v2"
 	"net/http"
 	"strings"
-	"testing"
 	"time"
 
 	"shelley.exe.dev/llm"
@@ -80,19 +79,14 @@ func (s *Service) MaxImageDimension() int {
 	return 2000
 }
 
-// HTTPRecorder is a callback for recording HTTP request/response data for debugging
-type HTTPRecorder func(url string, requestBody, responseBody []byte, statusCode int, err error, duration time.Duration)
-
 // Service provides Claude completions.
 // Fields should not be altered concurrently with calling any method on Service.
 type Service struct {
-	HTTPC        *http.Client // defaults to http.DefaultClient if nil
-	URL          string       // defaults to DefaultURL if empty
-	APIKey       string       // must be non-empty
-	Model        string       // defaults to DefaultModel if empty
-	MaxTokens    int          // defaults to DefaultMaxTokens if zero
-	DumpLLM      bool         // whether to dump request/response text to files for debugging; defaults to false
-	HTTPRecorder HTTPRecorder // optional callback for recording HTTP requests/responses
+	HTTPC     *http.Client // defaults to http.DefaultClient if nil
+	URL       string       // defaults to DefaultURL if empty
+	APIKey    string       // must be non-empty
+	Model     string       // defaults to DefaultModel if empty
+	MaxTokens int          // defaults to DefaultMaxTokens if zero
 }
 
 var _ llm.Service = (*Service)(nil)
@@ -462,37 +456,17 @@ func toLLMResponse(r *response) *llm.Response {
 func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
 	startTime := time.Now()
 	request := s.fromLLMRequest(ir)
-	var payload []byte
-	var err error
-	if s.DumpLLM || testing.Testing() {
-		payload, err = json.MarshalIndent(request, "", " ")
-	} else {
-		payload, err = json.Marshal(request)
-		payload = append(payload, '\n')
-	}
+	payload, err := json.Marshal(request)
 	if err != nil {
 		return nil, err
 	}
-
-	if false {
-		fmt.Printf("claude request payload:\n%s\n", payload)
-	}
+	payload = append(payload, '\n')
 
 	backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute}
 
 	url := cmp.Or(s.URL, DefaultURL)
 	httpc := cmp.Or(s.HTTPC, http.DefaultClient)
 
-	// For recording the last attempt's response
-	var lastResponseBody []byte
-	var lastStatusCode int
-	var finalErr error
-	defer func() {
-		if s.HTTPRecorder != nil {
-			s.HTTPRecorder(url, payload, lastResponseBody, lastStatusCode, finalErr, time.Since(startTime))
-		}
-	}()
-
 	// retry loop
 	var errs error // accumulated errors across all attempts
 	for attempts := 0; ; attempts++ {
@@ -504,11 +478,6 @@ func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error
 			slog.WarnContext(ctx, "anthropic request sleep before retry", "sleep", sleep, "attempts", attempts)
 			time.Sleep(sleep)
 		}
-		if s.DumpLLM {
-			if err := llm.DumpToFile("request", url, payload); err != nil {
-				slog.WarnContext(ctx, "failed to dump request to file", "error", err)
-			}
-		}
 		req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload))
 		if err != nil {
 			return nil, errors.Join(errs, err)
@@ -534,17 +503,8 @@ func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error
 			continue
 		}
 
-		// Record response for HTTPRecorder callback
-		lastResponseBody = buf
-		lastStatusCode = resp.StatusCode
-
 		switch {
 		case resp.StatusCode == http.StatusOK:
-			if s.DumpLLM {
-				if err := llm.DumpToFile("response", "", buf); err != nil {
-					slog.WarnContext(ctx, "failed to dump response to file", "error", err)
-				}
-			}
 			var response response
 			err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response)
 			if err != nil {
@@ -562,13 +522,11 @@ func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error
 			// server error, retry
 			slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "url", url, "model", s.Model)
 			errs = errors.Join(errs, fmt.Errorf("status %v (url=%s, model=%s): %s", resp.Status, url, cmp.Or(s.Model, DefaultModel), buf))
-			finalErr = errs
 			continue
 		case resp.StatusCode == 429:
 			// rate limited, retry
 			slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf), "url", url, "model", s.Model)
 			errs = errors.Join(errs, fmt.Errorf("status %v (url=%s, model=%s): %s", resp.Status, url, cmp.Or(s.Model, DefaultModel), buf))
-			finalErr = errs
 			continue
 		case resp.StatusCode >= 400 && resp.StatusCode < 500:
 			// some other 400, probably unrecoverable
@@ -578,7 +536,6 @@ func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error
 			// ...retry, I guess?
 			slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "url", url, "model", s.Model)
 			errs = errors.Join(errs, fmt.Errorf("status %v (url=%s, model=%s): %s", resp.Status, url, cmp.Or(s.Model, DefaultModel), buf))
-			finalErr = errs
 			continue
 		}
 	}

llm/ant/ant_test.go 🔗

@@ -7,7 +7,6 @@ import (
 	"net/http"
 	"strings"
 	"testing"
-	"time"
 
 	"shelley.exe.dev/llm"
 )
@@ -1046,87 +1045,6 @@ func TestToLLMContentWithNestedToolResults(t *testing.T) {
 	}
 }
 
-func TestDoWithHTTPRecorder(t *testing.T) {
-	// Create a mock HTTP client that returns a predefined response
-	mockResponse := `{
-		"id": "msg_123",
-		"type": "message",
-		"role": "assistant",
-		"model": "claude-sonnet-4-5-20250929",
-		"content": [
-			{
-				"type": "text",
-				"text": "Hello, world!"
-			}
-		],
-		"stop_reason": "end_turn",
-		"usage": {
-			"input_tokens": 100,
-			"output_tokens": 50,
-			"cost_usd": 0.01
-		}
-	}`
-
-	// Variables to capture HTTPRecorder calls
-	var recorded bool
-	var recordedURL string
-	var recordedStatusCode int
-
-	// Create a service with a mock HTTP client and HTTPRecorder
-	client := &http.Client{
-		Transport: &mockHTTPTransport{responseBody: mockResponse, statusCode: 200},
-	}
-
-	s := &Service{
-		APIKey: "test-key",
-		HTTPC:  client,
-		HTTPRecorder: func(url string, payload, response []byte, statusCode int, err error, duration time.Duration) {
-			recorded = true
-			recordedURL = url
-			recordedStatusCode = statusCode
-		},
-	}
-
-	// Create a request
-	req := &llm.Request{
-		Messages: []llm.Message{
-			{
-				Role: llm.MessageRoleUser,
-				Content: []llm.Content{
-					{
-						Type: llm.ContentTypeText,
-						Text: "Hello, Claude!",
-					},
-				},
-			},
-		},
-	}
-
-	// Call Do
-	resp, err := s.Do(context.Background(), req)
-	if err != nil {
-		t.Fatalf("Do() error = %v, want nil", err)
-	}
-
-	// Check the response
-	if resp == nil {
-		t.Fatalf("Do() response = nil, want not nil")
-	}
-
-	// Check that HTTPRecorder was called
-	if !recorded {
-		t.Error("HTTPRecorder was not called")
-	}
-
-	if recordedURL == "" {
-		t.Error("HTTPRecorder did not record URL")
-	}
-
-	if recordedStatusCode != 200 {
-		t.Errorf("HTTPRecorder recordedStatusCode = %v, want %v", recordedStatusCode, 200)
-	}
-}
-
 func TestDoClientError(t *testing.T) {
 	// Create a mock HTTP client that returns a client error
 	mockResponse := `{"error": "bad request"}`
@@ -1167,65 +1085,6 @@ func TestDoClientError(t *testing.T) {
 	}
 }
 
-func TestDoWithDumpLLM(t *testing.T) {
-	// Create a mock HTTP client that returns a predefined response
-	mockResponse := `{
-		"id": "msg_123",
-		"type": "message",
-		"role": "assistant",
-		"model": "claude-sonnet-4-5-20250929",
-		"content": [
-			{
-				"type": "text",
-				"text": "Hello, world!"
-			}
-		],
-		"stop_reason": "end_turn",
-		"usage": {
-			"input_tokens": 100,
-			"output_tokens": 50,
-			"cost_usd": 0.01
-		}
-	}`
-
-	// Create a service with a mock HTTP client and DumpLLM enabled
-	client := &http.Client{
-		Transport: &mockHTTPTransport{responseBody: mockResponse, statusCode: 200},
-	}
-
-	s := &Service{
-		APIKey:  "test-key",
-		HTTPC:   client,
-		DumpLLM: true,
-	}
-
-	// Create a request
-	req := &llm.Request{
-		Messages: []llm.Message{
-			{
-				Role: llm.MessageRoleUser,
-				Content: []llm.Content{
-					{
-						Type: llm.ContentTypeText,
-						Text: "Hello, Claude!",
-					},
-				},
-			},
-		},
-	}
-
-	// Call Do
-	resp, err := s.Do(context.Background(), req)
-	if err != nil {
-		t.Fatalf("Do() error = %v, want nil", err)
-	}
-
-	// Check the response
-	if resp == nil {
-		t.Fatalf("Do() response = nil, want not nil")
-	}
-}
-
 func TestServiceConfigDetails(t *testing.T) {
 	tests := []struct {
 		name    string

llm/conversation/convo_test.go 🔗

@@ -25,6 +25,8 @@ func TestBasicConvo(t *testing.T) {
 	}
 	rr.ScrubReq(func(req *http.Request) error {
 		req.Header.Del("x-api-key")
+		req.Header.Del("User-Agent")
+		req.Header.Del("Shelley-Conversation-Id")
 		return nil
 	})

llm/conversation/testdata/basic_convo.httprr 🔗

@@ -1,118 +1,62 @@
 httprr trace v1
-455 1424
+379 1422
 POST https://api.anthropic.com/v1/messages HTTP/1.1
 Host: api.anthropic.com
 User-Agent: Go-http-client/1.1
-Content-Length: 259

+Content-Length: 183

 Anthropic-Version: 2023-06-01
 Content-Type: application/json
 
-{
- "model": "claude-sonnet-4-20250514",
- "messages": [
-  {
-   "role": "user",
-   "content": [
-    {
-     "type": "text",
-     "text": "Hi, my name is Cornelius",
-     "cache_control": {
-      "type": "ephemeral"
-     }
-    }
-   ]
-  }
- ],
- "max_tokens": 8192
-}HTTP/2.0 200 OK

+{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius","cache_control":{"type":"ephemeral"}}]}],"max_tokens":8192}
+HTTP/2.0 200 OK

 Anthropic-Organization-Id: 3c473a21-7208-450a-a9f8-80aebda45c1b
-Anthropic-Ratelimit-Input-Tokens-Limit: 200000

-Anthropic-Ratelimit-Input-Tokens-Remaining: 200000

-Anthropic-Ratelimit-Input-Tokens-Reset: 2025-05-24T19:27:38Z

-Anthropic-Ratelimit-Output-Tokens-Limit: 80000

-Anthropic-Ratelimit-Output-Tokens-Remaining: 80000

-Anthropic-Ratelimit-Output-Tokens-Reset: 2025-05-24T19:27:38Z

-Anthropic-Ratelimit-Requests-Limit: 4000

-Anthropic-Ratelimit-Requests-Remaining: 3999

-Anthropic-Ratelimit-Requests-Reset: 2025-05-24T19:27:36Z

-Anthropic-Ratelimit-Tokens-Limit: 280000

-Anthropic-Ratelimit-Tokens-Remaining: 280000

-Anthropic-Ratelimit-Tokens-Reset: 2025-05-24T19:27:38Z

+Anthropic-Ratelimit-Input-Tokens-Limit: 4000000

+Anthropic-Ratelimit-Input-Tokens-Remaining: 4000000

+Anthropic-Ratelimit-Input-Tokens-Reset: 2026-01-20T05:02:18Z

+Anthropic-Ratelimit-Output-Tokens-Limit: 400000

+Anthropic-Ratelimit-Output-Tokens-Remaining: 400000

+Anthropic-Ratelimit-Output-Tokens-Reset: 2026-01-20T05:02:19Z

+Anthropic-Ratelimit-Tokens-Limit: 4400000

+Anthropic-Ratelimit-Tokens-Remaining: 4400000

+Anthropic-Ratelimit-Tokens-Reset: 2026-01-20T05:02:18Z

 Cf-Cache-Status: DYNAMIC
-Cf-Ray: 944f30fd0f0a15d4-SJC

+Cf-Ray: 9c0c04d42abfefa4-PDX

 Content-Type: application/json
-Date: Sat, 24 May 2025 19:27:38 GMT

-Request-Id: req_011CPSuX337qwfNzNzGSwG3b

+Date: Tue, 20 Jan 2026 05:02:19 GMT

+Request-Id: req_011CXJ3Uban5HaKm7cjTPc4V

 Server: cloudflare
 Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
-Via: 1.1 google

+X-Envoy-Upstream-Service-Time: 1234

 X-Robots-Tag: none
 
-{"id":"msg_01L127Hi3H8X613Fh8HojDgk","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[{"type":"text","text":"Hello Cornelius! It's nice to meet you. How are you doing today? Is there anything I can help you with?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":15,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":30,"service_tier":"standard"}}775 1394
+{"model":"claude-sonnet-4-20250514","id":"msg_01VwKDeEZjChwVGi6FdWjcWU","type":"message","role":"assistant","content":[{"type":"text","text":"Hello Cornelius! It's nice to meet you. That's a distinctive and classic name. How are you doing today? Is there anything I can help you with?"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":15,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":38,"service_tier":"standard"}}650 1341
 POST https://api.anthropic.com/v1/messages HTTP/1.1
 Host: api.anthropic.com
 User-Agent: Go-http-client/1.1
-Content-Length: 579

+Content-Length: 454

 Anthropic-Version: 2023-06-01
 Content-Type: application/json
 
-{
- "model": "claude-sonnet-4-20250514",
- "messages": [
-  {
-   "role": "user",
-   "content": [
-    {
-     "type": "text",
-     "text": "Hi, my name is Cornelius"
-    }
-   ]
-  },
-  {
-   "role": "assistant",
-   "content": [
-    {
-     "type": "text",
-     "text": "Hello Cornelius! It's nice to meet you. How are you doing today? Is there anything I can help you with?"
-    }
-   ]
-  },
-  {
-   "role": "user",
-   "content": [
-    {
-     "type": "text",
-     "text": "What is my name?",
-     "cache_control": {
-      "type": "ephemeral"
-     }
-    }
-   ]
-  }
- ],
- "max_tokens": 8192
-}HTTP/2.0 200 OK

+{"model":"claude-sonnet-4-20250514","messages":[{"role":"user","content":[{"type":"text","text":"Hi, my name is Cornelius"}]},{"role":"assistant","content":[{"type":"text","text":"Hello Cornelius! It's nice to meet you. That's a distinctive and classic name. How are you doing today? Is there anything I can help you with?"}]},{"role":"user","content":[{"type":"text","text":"What is my name?","cache_control":{"type":"ephemeral"}}]}],"max_tokens":8192}
+HTTP/2.0 200 OK

 Anthropic-Organization-Id: 3c473a21-7208-450a-a9f8-80aebda45c1b
-Anthropic-Ratelimit-Input-Tokens-Limit: 200000

-Anthropic-Ratelimit-Input-Tokens-Remaining: 200000

-Anthropic-Ratelimit-Input-Tokens-Reset: 2025-05-24T19:27:39Z

-Anthropic-Ratelimit-Output-Tokens-Limit: 80000

-Anthropic-Ratelimit-Output-Tokens-Remaining: 80000

-Anthropic-Ratelimit-Output-Tokens-Reset: 2025-05-24T19:27:40Z

-Anthropic-Ratelimit-Requests-Limit: 4000

-Anthropic-Ratelimit-Requests-Remaining: 3999

-Anthropic-Ratelimit-Requests-Reset: 2025-05-24T19:27:38Z

-Anthropic-Ratelimit-Tokens-Limit: 280000

-Anthropic-Ratelimit-Tokens-Remaining: 280000

-Anthropic-Ratelimit-Tokens-Reset: 2025-05-24T19:27:39Z

+Anthropic-Ratelimit-Input-Tokens-Limit: 4000000

+Anthropic-Ratelimit-Input-Tokens-Remaining: 4000000

+Anthropic-Ratelimit-Input-Tokens-Reset: 2026-01-20T05:02:21Z

+Anthropic-Ratelimit-Output-Tokens-Limit: 400000

+Anthropic-Ratelimit-Output-Tokens-Remaining: 400000

+Anthropic-Ratelimit-Output-Tokens-Reset: 2026-01-20T05:02:22Z

+Anthropic-Ratelimit-Tokens-Limit: 4400000

+Anthropic-Ratelimit-Tokens-Remaining: 4400000

+Anthropic-Ratelimit-Tokens-Reset: 2026-01-20T05:02:21Z

 Cf-Cache-Status: DYNAMIC
-Cf-Ray: 944f31098c9e15d4-SJC

+Cf-Ray: 9c0c04dc7fd5efa4-PDX

 Content-Type: application/json
-Date: Sat, 24 May 2025 19:27:40 GMT

-Request-Id: req_011CPSuXBim8ntiKJDjvFUWG

+Date: Tue, 20 Jan 2026 05:02:22 GMT

+Request-Id: req_011CXJ3UhEMLAi1P8AyXy9X3

 Server: cloudflare
 Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
-Via: 1.1 google

+X-Envoy-Upstream-Service-Time: 2424

 X-Robots-Tag: none
 
-{"id":"msg_01TiEuRrzLgJEfBUNhZ9Am3B","type":"message","role":"assistant","model":"claude-sonnet-4-20250514","content":[{"type":"text","text":"Your name is Cornelius, as you introduced yourself in your first message."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":53,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":19,"service_tier":"standard"}}
+{"model":"claude-sonnet-4-20250514","id":"msg_01XdV1M6Kpkvcjc3yDhPCj2u","type":"message","role":"assistant","content":[{"type":"text","text":"Your name is Cornelius, as you told me in your first message."}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":61,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":19,"service_tier":"standard"}}

llm/gem/gem.go 🔗

@@ -23,11 +23,10 @@ const (
 // Service provides Gemini completions.
 // Fields should not be altered concurrently with calling any method on Service.
 type Service struct {
-	HTTPC   *http.Client // defaults to http.DefaultClient if nil
-	URL     string       // Gemini API URL, uses the gemini package default if empty
-	APIKey  string       // must be non-empty
-	Model   string       // defaults to DefaultModel if empty
-	DumpLLM bool         // whether to dump request/response text to files for debugging; defaults to false
+	HTTPC  *http.Client // defaults to http.DefaultClient if nil
+	URL    string       // Gemini API URL, uses the gemini package default if empty
+	APIKey string       // must be non-empty
+	Model  string       // defaults to DefaultModel if empty
 }
 
 var _ llm.Service = (*Service)(nil)
@@ -520,14 +519,6 @@ func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error
 	// Log the structured Gemini request for debugging
 	if reqJSON, err := json.MarshalIndent(gemReq, "", "  "); err == nil {
 		slog.DebugContext(ctx, "gemini_request_json", "request", string(reqJSON))
-		if s.DumpLLM {
-			// Construct the same URL that the Gemini client will use
-			endpoint := cmp.Or(s.URL, "https://generativelanguage.googleapis.com/v1beta")
-			url := fmt.Sprintf("%s/models/%s:generateContent?key=%s", endpoint, cmp.Or(s.Model, DefaultModel), s.APIKey)
-			if err := llm.DumpToFile("request", url, reqJSON); err != nil {
-				slog.WarnContext(ctx, "failed to dump gemini request to file", "error", err)
-			}
-		}
 	}
 
 	// Create a Gemini model instance
@@ -555,11 +546,6 @@ func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error
 			// Log the structured Gemini response
 			if resJSON, err := json.MarshalIndent(gemRes, "", "  "); err == nil {
 				slog.DebugContext(ctx, "gemini_response_json", "response", string(resJSON))
-				if s.DumpLLM {
-					if err := llm.DumpToFile("response", "", resJSON); err != nil {
-						slog.WarnContext(ctx, "failed to dump gemini response to file", "error", err)
-					}
-				}
 			}
 			break
 		}

llm/llmhttp/llmhttp.go 🔗

@@ -0,0 +1,149 @@
+// Package llmhttp provides HTTP utilities for LLM requests including
+// custom headers and database recording.
+package llmhttp
+
+import (
+	"bytes"
+	"context"
+	"io"
+	"net/http"
+	"time"
+
+	"shelley.exe.dev/version"
+)
+
+// contextKey is the type for context keys in this package.
+type contextKey int
+
+const (
+	conversationIDKey contextKey = iota
+	modelIDKey
+	providerKey
+)
+
+// WithConversationID returns a context with the conversation ID attached.
+func WithConversationID(ctx context.Context, conversationID string) context.Context {
+	return context.WithValue(ctx, conversationIDKey, conversationID)
+}
+
+// ConversationIDFromContext returns the conversation ID from the context, if any.
+func ConversationIDFromContext(ctx context.Context) string {
+	if v := ctx.Value(conversationIDKey); v != nil {
+		return v.(string)
+	}
+	return ""
+}
+
+// WithModelID returns a context with the model ID attached.
+func WithModelID(ctx context.Context, modelID string) context.Context {
+	return context.WithValue(ctx, modelIDKey, modelID)
+}
+
+// ModelIDFromContext returns the model ID from the context, if any.
+func ModelIDFromContext(ctx context.Context) string {
+	if v := ctx.Value(modelIDKey); v != nil {
+		return v.(string)
+	}
+	return ""
+}
+
+// WithProvider returns a context with the provider name attached.
+func WithProvider(ctx context.Context, provider string) context.Context {
+	return context.WithValue(ctx, providerKey, provider)
+}
+
+// ProviderFromContext returns the provider name from the context, if any.
+func ProviderFromContext(ctx context.Context) string {
+	if v := ctx.Value(providerKey); v != nil {
+		return v.(string)
+	}
+	return ""
+}
+
+// Recorder is called after each LLM HTTP request with the request/response details.
+type Recorder func(ctx context.Context, url string, requestBody, responseBody []byte, statusCode int, err error, duration time.Duration)
+
+// Transport wraps an http.RoundTripper to add Shelley-specific headers
+// and optionally record requests to a database.
+type Transport struct {
+	Base     http.RoundTripper
+	Recorder Recorder
+}
+
+// RoundTrip implements http.RoundTripper.
+func (t *Transport) RoundTrip(req *http.Request) (*http.Response, error) {
+	start := time.Now()
+
+	// Clone the request to avoid modifying the original
+	req = req.Clone(req.Context())
+
+	// Add User-Agent with Shelley version
+	info := version.GetInfo()
+	userAgent := "Shelley"
+	if info.Commit != "" {
+		userAgent += "/" + info.Commit[:min(8, len(info.Commit))]
+	}
+	req.Header.Set("User-Agent", userAgent)
+
+	// Add conversation ID header if present
+	if conversationID := ConversationIDFromContext(req.Context()); conversationID != "" {
+		req.Header.Set("Shelley-Conversation-Id", conversationID)
+	}
+
+	// Read and store the request body for recording
+	var requestBody []byte
+	if t.Recorder != nil && req.Body != nil {
+		var err error
+		requestBody, err = io.ReadAll(req.Body)
+		if err != nil {
+			return nil, err
+		}
+		req.Body = io.NopCloser(bytes.NewReader(requestBody))
+	}
+
+	// Perform the actual request
+	base := t.Base
+	if base == nil {
+		base = http.DefaultTransport
+	}
+
+	resp, err := base.RoundTrip(req)
+
+	// Record the request if we have a recorder
+	if t.Recorder != nil {
+		var responseBody []byte
+		var statusCode int
+
+		if resp != nil {
+			statusCode = resp.StatusCode
+			// Read and restore the response body
+			responseBody, _ = io.ReadAll(resp.Body)
+			resp.Body.Close()
+			resp.Body = io.NopCloser(bytes.NewReader(responseBody))
+		}
+
+		t.Recorder(req.Context(), req.URL.String(), requestBody, responseBody, statusCode, err, time.Since(start))
+	}
+
+	return resp, err
+}
+
+// NewClient creates an http.Client with Shelley headers and optional recording.
+func NewClient(base *http.Client, recorder Recorder) *http.Client {
+	if base == nil {
+		base = http.DefaultClient
+	}
+
+	transport := base.Transport
+	if transport == nil {
+		transport = http.DefaultTransport
+	}
+
+	return &http.Client{
+		Transport: &Transport{
+			Base:     transport,
+			Recorder: recorder,
+		},
+		Timeout: base.Timeout,
+	}
+}

llm/llmhttp/llmhttp_test.go 🔗

@@ -0,0 +1,175 @@
+package llmhttp
+
+import (
+	"context"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestContextFunctions(t *testing.T) {
+	ctx := context.Background()
+
+	// Test ConversationID
+	ctx = WithConversationID(ctx, "conv-123")
+	if got := ConversationIDFromContext(ctx); got != "conv-123" {
+		t.Errorf("ConversationIDFromContext() = %q, want %q", got, "conv-123")
+	}
+
+	// Test ModelID
+	ctx = WithModelID(ctx, "model-456")
+	if got := ModelIDFromContext(ctx); got != "model-456" {
+		t.Errorf("ModelIDFromContext() = %q, want %q", got, "model-456")
+	}
+
+	// Test Provider
+	ctx = WithProvider(ctx, "anthropic")
+	if got := ProviderFromContext(ctx); got != "anthropic" {
+		t.Errorf("ProviderFromContext() = %q, want %q", got, "anthropic")
+	}
+
+	// Test empty context
+	emptyCtx := context.Background()
+	if got := ConversationIDFromContext(emptyCtx); got != "" {
+		t.Errorf("ConversationIDFromContext(empty) = %q, want empty", got)
+	}
+	if got := ModelIDFromContext(emptyCtx); got != "" {
+		t.Errorf("ModelIDFromContext(empty) = %q, want empty", got)
+	}
+	if got := ProviderFromContext(emptyCtx); got != "" {
+		t.Errorf("ProviderFromContext(empty) = %q, want empty", got)
+	}
+}
+
+func TestTransportAddsHeaders(t *testing.T) {
+	// Create a test server that echoes request headers
+	var receivedHeaders http.Header
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		receivedHeaders = r.Header.Clone()
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte("ok"))
+	}))
+	defer server.Close()
+
+	// Create client with our transport
+	client := NewClient(nil, nil)
+
+	// Make a request with conversation ID in context
+	ctx := WithConversationID(context.Background(), "test-conv-id")
+	req, _ := http.NewRequestWithContext(ctx, "GET", server.URL, nil)
+
+	resp, err := client.Do(req)
+	if err != nil {
+		t.Fatalf("Request failed: %v", err)
+	}
+	resp.Body.Close()
+
+	// Verify User-Agent header was added
+	if !strings.HasPrefix(receivedHeaders.Get("User-Agent"), "Shelley") {
+		t.Errorf("User-Agent = %q, want prefix 'Shelley'", receivedHeaders.Get("User-Agent"))
+	}
+
+	// Verify Shelley-Conversation-Id header was added
+	if got := receivedHeaders.Get("Shelley-Conversation-Id"); got != "test-conv-id" {
+		t.Errorf("Shelley-Conversation-Id = %q, want %q", got, "test-conv-id")
+	}
+}
+
+func TestTransportRecordsRequest(t *testing.T) {
+	// Create a test server
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		body, _ := io.ReadAll(r.Body)
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte("response body: " + string(body)))
+	}))
+	defer server.Close()
+
+	// Track recorded values
+	var (
+		recordedURL         string
+		recordedRequestBody []byte
+		recordedRespBody    []byte
+		recordedStatusCode  int
+		recordedDuration    time.Duration
+		recorderCalled      bool
+	)
+
+	recorder := func(ctx context.Context, url string, requestBody, responseBody []byte, statusCode int, err error, duration time.Duration) {
+		recorderCalled = true
+		recordedURL = url
+		recordedRequestBody = requestBody
+		recordedRespBody = responseBody
+		recordedStatusCode = statusCode
+		recordedDuration = duration
+	}
+
+	// Create client with recorder
+	client := NewClient(nil, recorder)
+
+	// Make a request with body
+	req, _ := http.NewRequest("POST", server.URL, strings.NewReader("test body"))
+	resp, err := client.Do(req)
+	if err != nil {
+		t.Fatalf("Request failed: %v", err)
+	}
+
+	// Read response body to ensure it's still accessible
+	respBody, _ := io.ReadAll(resp.Body)
+	resp.Body.Close()
+
+	if string(respBody) != "response body: test body" {
+		t.Errorf("Response body = %q, want %q", string(respBody), "response body: test body")
+	}
+
+	// Verify recorder was called with correct values
+	if !recorderCalled {
+		t.Fatal("Recorder was not called")
+	}
+
+	if recordedURL != server.URL {
+		t.Errorf("Recorded URL = %q, want %q", recordedURL, server.URL)
+	}
+
+	if string(recordedRequestBody) != "test body" {
+		t.Errorf("Recorded request body = %q, want %q", string(recordedRequestBody), "test body")
+	}
+
+	if string(recordedRespBody) != "response body: test body" {
+		t.Errorf("Recorded response body = %q, want %q", string(recordedRespBody), "response body: test body")
+	}
+
+	if recordedStatusCode != http.StatusOK {
+		t.Errorf("Recorded status code = %d, want %d", recordedStatusCode, http.StatusOK)
+	}
+
+	if recordedDuration <= 0 {
+		t.Error("Recorded duration should be positive")
+	}
+}
+
+func TestTransportWithoutRecorder(t *testing.T) {
+	// Create a test server
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte("ok"))
+	}))
+	defer server.Close()
+
+	// Create client without recorder
+	client := NewClient(nil, nil)
+
+	// Make a request
+	req, _ := http.NewRequest("GET", server.URL, nil)
+	resp, err := client.Do(req)
+	if err != nil {
+		t.Fatalf("Request failed: %v", err)
+	}
+	resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		t.Errorf("Status code = %d, want %d", resp.StatusCode, http.StatusOK)
+	}
+}

llm/oai/oai.go 🔗

@@ -314,7 +314,6 @@ type Service struct {
 	ModelURL  string       // optional, overrides Model.URL
 	MaxTokens int          // defaults to DefaultMaxTokens if zero
 	Org       string       // optional - organization ID
-	DumpLLM   bool         // whether to dump request/response text to files for debugging; defaults to false
 }
 
 var _ llm.Service = (*Service)(nil)
@@ -826,15 +825,6 @@ func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error
 	// Construct the full URL for logging and debugging
 	fullURL := baseURL + "/chat/completions"
 
-	// Dump request if enabled
-	if s.DumpLLM {
-		if reqJSON, err := json.MarshalIndent(req, "", "  "); err == nil {
-			if err := llm.DumpToFile("request", fullURL, reqJSON); err != nil {
-				slog.WarnContext(ctx, "failed to dump openai request to file", "error", err)
-			}
-		}
-	}
-
 	// Retry mechanism
 	backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second, 10 * time.Second, 15 * time.Second}
 
@@ -854,14 +844,6 @@ func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error
 
 		// Handle successful response
 		if err == nil {
-			// Dump response if enabled
-			if s.DumpLLM {
-				if respJSON, jsonErr := json.MarshalIndent(resp, "", "  "); jsonErr == nil {
-					if dumpErr := llm.DumpToFile("response", "", respJSON); dumpErr != nil {
-						slog.WarnContext(ctx, "failed to dump openai response to file", "error", dumpErr)
-					}
-				}
-			}
 			return s.toLLMResponse(&resp), nil
 		}

models/models.go 🔗

@@ -4,11 +4,15 @@ import (
 	"context"
 	"fmt"
 	"log/slog"
-	"sync"
+	"net/http"
 	"time"
 
+	"shelley.exe.dev/db"
+	"shelley.exe.dev/db/generated"
 	"shelley.exe.dev/llm"
 	"shelley.exe.dev/llm/ant"
+	"shelley.exe.dev/llm/gem"
+	"shelley.exe.dev/llm/llmhttp"
 	"shelley.exe.dev/llm/oai"
 	"shelley.exe.dev/loop"
 )
@@ -17,11 +21,11 @@ import (
 type Provider string
 
 const (
-	ProviderOpenAI    Provider = "OpenAI"
-	ProviderAnthropic Provider = "Anthropic"
-	ProviderFireworks Provider = "Fireworks"
-	ProviderGemini    Provider = "Gemini"
-	ProviderBuiltIn   Provider = "Built-in"
+	ProviderOpenAI    Provider = "openai"
+	ProviderAnthropic Provider = "anthropic"
+	ProviderFireworks Provider = "fireworks"
+	ProviderGemini    Provider = "gemini"
+	ProviderBuiltIn   Provider = "builtin"
 )
 
 // Model represents a configured LLM model in Shelley
@@ -39,7 +43,7 @@ type Model struct {
 	RequiredEnvVars []string
 
 	// Factory creates an llm.Service instance for this model
-	Factory func(config *Config) (llm.Service, error)
+	Factory func(config *Config, httpc *http.Client) (llm.Service, error)
 }
 
 // Config holds the configuration needed to create LLM services
@@ -55,6 +59,9 @@ type Config struct {
 	Gateway string
 
 	Logger *slog.Logger
+
+	// Database for recording LLM requests (optional)
+	DB *db.DB
 }
 
 // getAnthropicURL returns the Anthropic API URL, with gateway suffix if gateway is set
@@ -97,11 +104,11 @@ func All() []Model {
 			Provider:        ProviderAnthropic,
 			Description:     "Claude Opus 4.5 (default)",
 			RequiredEnvVars: []string{"ANTHROPIC_API_KEY"},
-			Factory: func(config *Config) (llm.Service, error) {
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
 				if config.AnthropicAPIKey == "" {
 					return nil, fmt.Errorf("claude-opus-4.5 requires ANTHROPIC_API_KEY")
 				}
-				svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Opus}
+				svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Opus, HTTPC: httpc}
 				if url := config.getAnthropicURL(); url != "" {
 					svc.URL = url
 				}
@@ -113,11 +120,11 @@ func All() []Model {
 			Provider:        ProviderFireworks,
 			Description:     "Qwen3 Coder 480B on Fireworks",
 			RequiredEnvVars: []string{"FIREWORKS_API_KEY"},
-			Factory: func(config *Config) (llm.Service, error) {
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
 				if config.FireworksAPIKey == "" {
 					return nil, fmt.Errorf("qwen3-coder-fireworks requires FIREWORKS_API_KEY")
 				}
-				svc := &oai.Service{Model: oai.Qwen3CoderFireworks, APIKey: config.FireworksAPIKey}
+				svc := &oai.Service{Model: oai.Qwen3CoderFireworks, APIKey: config.FireworksAPIKey, HTTPC: httpc}
 				if url := config.getFireworksURL(); url != "" {
 					svc.ModelURL = url
 				}
@@ -129,11 +136,11 @@ func All() []Model {
 			Provider:        ProviderFireworks,
 			Description:     "GLM-4P6 on Fireworks",
 			RequiredEnvVars: []string{"FIREWORKS_API_KEY"},
-			Factory: func(config *Config) (llm.Service, error) {
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
 				if config.FireworksAPIKey == "" {
 					return nil, fmt.Errorf("glm-4p6-fireworks requires FIREWORKS_API_KEY")
 				}
-				svc := &oai.Service{Model: oai.GLM4P6Fireworks, APIKey: config.FireworksAPIKey}
+				svc := &oai.Service{Model: oai.GLM4P6Fireworks, APIKey: config.FireworksAPIKey, HTTPC: httpc}
 				if url := config.getFireworksURL(); url != "" {
 					svc.ModelURL = url
 				}
@@ -145,11 +152,11 @@ func All() []Model {
 			Provider:        ProviderOpenAI,
 			Description:     "GPT-5",
 			RequiredEnvVars: []string{"OPENAI_API_KEY"},
-			Factory: func(config *Config) (llm.Service, error) {
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
 				if config.OpenAIAPIKey == "" {
 					return nil, fmt.Errorf("gpt-5 requires OPENAI_API_KEY")
 				}
-				svc := &oai.Service{Model: oai.GPT5, APIKey: config.OpenAIAPIKey}
+				svc := &oai.Service{Model: oai.GPT5, APIKey: config.OpenAIAPIKey, HTTPC: httpc}
 				if url := config.getOpenAIURL(); url != "" {
 					svc.ModelURL = url
 				}
@@ -161,11 +168,11 @@ func All() []Model {
 			Provider:        ProviderOpenAI,
 			Description:     "GPT-5 Nano",
 			RequiredEnvVars: []string{"OPENAI_API_KEY"},
-			Factory: func(config *Config) (llm.Service, error) {
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
 				if config.OpenAIAPIKey == "" {
 					return nil, fmt.Errorf("gpt-5-nano requires OPENAI_API_KEY")
 				}
-				svc := &oai.Service{Model: oai.GPT5Nano, APIKey: config.OpenAIAPIKey}
+				svc := &oai.Service{Model: oai.GPT5Nano, APIKey: config.OpenAIAPIKey, HTTPC: httpc}
 				if url := config.getOpenAIURL(); url != "" {
 					svc.ModelURL = url
 				}
@@ -177,11 +184,11 @@ func All() []Model {
 			Provider:        ProviderOpenAI,
 			Description:     "GPT-5.1 Codex (uses Responses API)",
 			RequiredEnvVars: []string{"OPENAI_API_KEY"},
-			Factory: func(config *Config) (llm.Service, error) {
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
 				if config.OpenAIAPIKey == "" {
 					return nil, fmt.Errorf("gpt-5.1-codex requires OPENAI_API_KEY")
 				}
-				svc := &oai.ResponsesService{Model: oai.GPT5Codex, APIKey: config.OpenAIAPIKey}
+				svc := &oai.ResponsesService{Model: oai.GPT5Codex, APIKey: config.OpenAIAPIKey, HTTPC: httpc}
 				if url := config.getOpenAIURL(); url != "" {
 					svc.ModelURL = url
 				}
@@ -193,11 +200,11 @@ func All() []Model {
 			Provider:        ProviderAnthropic,
 			Description:     "Claude Sonnet 4.5",
 			RequiredEnvVars: []string{"ANTHROPIC_API_KEY"},
-			Factory: func(config *Config) (llm.Service, error) {
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
 				if config.AnthropicAPIKey == "" {
 					return nil, fmt.Errorf("claude-sonnet-4.5 requires ANTHROPIC_API_KEY")
 				}
-				svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Sonnet}
+				svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Sonnet, HTTPC: httpc}
 				if url := config.getAnthropicURL(); url != "" {
 					svc.URL = url
 				}
@@ -209,23 +216,39 @@ func All() []Model {
 			Provider:        ProviderAnthropic,
 			Description:     "Claude Haiku 4.5",
 			RequiredEnvVars: []string{"ANTHROPIC_API_KEY"},
-			Factory: func(config *Config) (llm.Service, error) {
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
 				if config.AnthropicAPIKey == "" {
 					return nil, fmt.Errorf("claude-haiku-4.5 requires ANTHROPIC_API_KEY")
 				}
-				svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Haiku}
+				svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Haiku, HTTPC: httpc}
 				if url := config.getAnthropicURL(); url != "" {
 					svc.URL = url
 				}
 				return svc, nil
 			},
 		},
+		{
+			ID:              "gemini-2.5-pro",
+			Provider:        ProviderGemini,
+			Description:     "Gemini 2.5 Pro",
+			RequiredEnvVars: []string{"GEMINI_API_KEY"},
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
+				if config.GeminiAPIKey == "" {
+					return nil, fmt.Errorf("gemini-2.5-pro requires GEMINI_API_KEY")
+				}
+				svc := &gem.Service{APIKey: config.GeminiAPIKey, Model: gem.DefaultModel, HTTPC: httpc}
+				if url := config.getGeminiURL(); url != "" {
+					svc.URL = url
+				}
+				return svc, nil
+			},
+		},
 		{
 			ID:              "predictable",
 			Provider:        ProviderBuiltIn,
 			Description:     "Deterministic test model (no API key)",
 			RequiredEnvVars: []string{},
-			Factory: func(config *Config) (llm.Service, error) {
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
 				return loop.NewPredictableService(), nil
 			},
 		},
@@ -259,58 +282,15 @@ func Default() Model {
 
 // Manager manages LLM services for all configured models
 type Manager struct {
-	services map[string]llm.Service
+	services map[string]serviceEntry
 	logger   *slog.Logger
-	history  *LLMRequestHistory
-}
-
-// LLMRequestRecord stores a request/response pair for debugging
-type LLMRequestRecord struct {
-	Timestamp      time.Time `json:"timestamp"`
-	ModelID        string    `json:"model_id"`
-	URL            string    `json:"url"`
-	HTTPRequest    []byte    `json:"http_request,omitempty"`
-	HTTPResponse   []byte    `json:"http_response,omitempty"`
-	HTTPStatusCode int       `json:"http_status_code,omitempty"`
-	Error          string    `json:"error,omitempty"`
-	Duration       float64   `json:"duration_seconds"`
-}
-
-// LLMRequestHistory maintains a circular buffer of recent LLM requests
-type LLMRequestHistory struct {
-	mu      sync.RWMutex
-	records []LLMRequestRecord
-	maxSize int
-}
-
-// NewLLMRequestHistory creates a new request history with the given max size
-func NewLLMRequestHistory(maxSize int) *LLMRequestHistory {
-	return &LLMRequestHistory{
-		records: make([]LLMRequestRecord, 0, maxSize),
-		maxSize: maxSize,
-	}
+	db       *db.DB
 }
 
-// Add adds a new record to the history
-func (h *LLMRequestHistory) Add(record LLMRequestRecord) {
-	h.mu.Lock()
-	defer h.mu.Unlock()
-
-	if len(h.records) >= h.maxSize {
-		// Remove oldest record
-		h.records = h.records[1:]
-	}
-	h.records = append(h.records, record)
-}
-
-// GetRecords returns a copy of all records
-func (h *LLMRequestHistory) GetRecords() []LLMRequestRecord {
-	h.mu.RLock()
-	defer h.mu.RUnlock()
-
-	result := make([]LLMRequestRecord, len(h.records))
-	copy(result, h.records)
-	return result
+type serviceEntry struct {
+	service  llm.Service
+	provider Provider
+	modelID  string
 }
 
 // ConfigInfo is an optional interface that services can implement to provide configuration details for logging
@@ -321,25 +301,27 @@ type ConfigInfo interface {
 
 // loggingService wraps an llm.Service to log request completion with usage information
 type loggingService struct {
-	service llm.Service
-	logger  *slog.Logger
-	modelID string
-	history *LLMRequestHistory
+	service  llm.Service
+	logger   *slog.Logger
+	modelID  string
+	provider Provider
+	db       *db.DB
 }
 
-// Do wraps the underlying service's Do method with logging
+// Do wraps the underlying service's Do method with logging and database recording
 func (l *loggingService) Do(ctx context.Context, request *llm.Request) (*llm.Response, error) {
 	start := time.Now()
 
+	// Add model ID and provider to context for the HTTP transport
+	ctx = llmhttp.WithModelID(ctx, l.modelID)
+	ctx = llmhttp.WithProvider(ctx, string(l.provider))
+
 	// Call the underlying service
 	response, err := l.service.Do(ctx, request)
 
 	duration := time.Since(start)
 	durationSeconds := duration.Seconds()
 
-	// History recording now happens in the provider (e.g., ant.Service)
-	// to capture raw HTTP requests/responses
-
 	// Log the completion with usage information
 	if err != nil {
 		logAttrs := []any{
@@ -403,20 +385,86 @@ func (l *loggingService) UseSimplifiedPatch() bool {
 }
 
 // NewManager creates a new Manager with all models configured
-func NewManager(cfg *Config, history *LLMRequestHistory) (*Manager, error) {
+func NewManager(cfg *Config) (*Manager, error) {
 	manager := &Manager{
-		services: make(map[string]llm.Service),
+		services: make(map[string]serviceEntry),
 		logger:   cfg.Logger,
-		history:  history,
+		db:       cfg.DB,
+	}
+
+	// Create HTTP client with recording if database is available
+	var httpc *http.Client
+	if cfg.DB != nil {
+		recorder := func(ctx context.Context, url string, requestBody, responseBody []byte, statusCode int, err error, duration time.Duration) {
+			modelID := llmhttp.ModelIDFromContext(ctx)
+			provider := llmhttp.ProviderFromContext(ctx)
+			conversationID := llmhttp.ConversationIDFromContext(ctx)
+
+			var convIDPtr *string
+			if conversationID != "" {
+				convIDPtr = &conversationID
+			}
+
+			var reqBodyPtr, respBodyPtr *string
+			if len(requestBody) > 0 {
+				s := string(requestBody)
+				reqBodyPtr = &s
+			}
+			if len(responseBody) > 0 {
+				s := string(responseBody)
+				respBodyPtr = &s
+			}
+
+			var statusCodePtr *int64
+			if statusCode != 0 {
+				sc := int64(statusCode)
+				statusCodePtr = &sc
+			}
+
+			var errPtr *string
+			if err != nil {
+				s := err.Error()
+				errPtr = &s
+			}
+
+			durationMs := duration.Milliseconds()
+			durationMsPtr := &durationMs
+
+			// Insert into database (fire and forget, don't block the request)
+			go func() {
+				_, insertErr := cfg.DB.InsertLLMRequest(context.Background(), generated.InsertLLMRequestParams{
+					ConversationID: convIDPtr,
+					Model:          modelID,
+					Provider:       provider,
+					Url:            url,
+					RequestBody:    reqBodyPtr,
+					ResponseBody:   respBodyPtr,
+					StatusCode:     statusCodePtr,
+					Error:          errPtr,
+					DurationMs:     durationMsPtr,
+				})
+				if insertErr != nil && cfg.Logger != nil {
+					cfg.Logger.Warn("Failed to record LLM request", "error", insertErr)
+				}
+			}()
+		}
+		httpc = llmhttp.NewClient(nil, recorder)
+	} else {
+		// Still use the custom transport for headers, just without recording
+		httpc = llmhttp.NewClient(nil, nil)
 	}
 
 	for _, model := range All() {
-		svc, err := model.Factory(cfg)
+		svc, err := model.Factory(cfg, httpc)
 		if err != nil {
 			// Model not available (e.g., missing API key) - skip it
 			continue
 		}
-		manager.services[model.ID] = svc
+		manager.services[model.ID] = serviceEntry{
+			service:  svc,
+			provider: model.Provider,
+			modelID:  model.ID,
+		}
 	}
 
 	return manager, nil
@@ -424,44 +472,22 @@ func NewManager(cfg *Config, history *LLMRequestHistory) (*Manager, error) {
 
 // GetService returns the LLM service for the given model ID, wrapped with logging
 func (m *Manager) GetService(modelID string) (llm.Service, error) {
-	if svc, ok := m.services[modelID]; ok {
-		// Set HTTP recorder on ant.Service if we have history
-		if antSvc, ok := svc.(*ant.Service); ok && m.history != nil {
-			antSvc.HTTPRecorder = func(url string, requestBody, responseBody []byte, statusCode int, err error, duration time.Duration) {
-				record := LLMRequestRecord{
-					Timestamp:      time.Now().Add(-duration),
-					ModelID:        modelID,
-					URL:            url,
-					HTTPRequest:    requestBody,
-					HTTPResponse:   responseBody,
-					HTTPStatusCode: statusCode,
-					Duration:       duration.Seconds(),
-				}
-				if err != nil {
-					record.Error = err.Error()
-				}
-				m.history.Add(record)
-			}
-		}
+	if entry, ok := m.services[modelID]; ok {
 		// Wrap with logging if we have a logger
 		if m.logger != nil {
 			return &loggingService{
-				service: svc,
-				logger:  m.logger,
-				modelID: modelID,
-				history: m.history,
+				service:  entry.service,
+				logger:   m.logger,
+				modelID:  entry.modelID,
+				provider: entry.provider,
+				db:       m.db,
 			}, nil
 		}
-		return svc, nil
+		return entry.service, nil
 	}
 	return nil, fmt.Errorf("unsupported model: %s", modelID)
 }
 
-// GetHistory returns the LLM request history
-func (m *Manager) GetHistory() *LLMRequestHistory {
-	return m.history
-}
-
 // GetAvailableModels returns a list of available model IDs in the same order as All()
 func (m *Manager) GetAvailableModels() []string {
 	// Return IDs in the same order as All() for consistency

models/models_test.go 🔗

@@ -3,8 +3,8 @@ package models
 import (
 	"context"
 	"log/slog"
+	"net/http"
 	"testing"
-	"time"
 
 	"shelley.exe.dev/llm"
 )
@@ -95,7 +95,7 @@ func TestFactory(t *testing.T) {
 		t.Fatal("predictable model not found")
 	}
 
-	svc, err := m.Factory(cfg)
+	svc, err := m.Factory(cfg, nil)
 	if err != nil {
 		t.Fatalf("predictable Factory() failed: %v", err)
 	}
@@ -109,7 +109,7 @@ func TestManagerGetAvailableModelsOrder(t *testing.T) {
 	cfg := &Config{}
 
 	// Create manager - should only have predictable model since no API keys
-	manager, err := NewManager(cfg, nil)
+	manager, err := NewManager(cfg)
 	if err != nil {
 		t.Fatalf("NewManager failed: %v", err)
 	}
@@ -148,7 +148,7 @@ func TestManagerGetAvailableModelsMatchesAllOrder(t *testing.T) {
 		FireworksAPIKey: "test-key",
 	}
 
-	manager, err := NewManager(cfg, nil)
+	manager, err := NewManager(cfg)
 	if err != nil {
 		t.Fatalf("NewManager failed: %v", err)
 	}
@@ -176,80 +176,16 @@ func TestManagerGetAvailableModelsMatchesAllOrder(t *testing.T) {
 	}
 }
 
-func TestLLMRequestHistory(t *testing.T) {
-	// Test NewLLMRequestHistory
-	history := NewLLMRequestHistory(3)
-	if history == nil {
-		t.Fatal("NewLLMRequestHistory returned nil")
-	}
-
-	// Test Add and GetRecords
-	record1 := LLMRequestRecord{
-		Timestamp: time.Now(),
-		ModelID:   "test-model-1",
-		URL:       "http://test.com/1",
-	}
-
-	record2 := LLMRequestRecord{
-		Timestamp: time.Now(),
-		ModelID:   "test-model-2",
-		URL:       "http://test.com/2",
-	}
-
-	history.Add(record1)
-	history.Add(record2)
-
-	records := history.GetRecords()
-	if len(records) != 2 {
-		t.Errorf("Expected 2 records, got %d", len(records))
-	}
-
-	if records[0].ModelID != "test-model-1" {
-		t.Errorf("Expected first record model ID 'test-model-1', got %s", records[0].ModelID)
-	}
-
-	if records[1].ModelID != "test-model-2" {
-		t.Errorf("Expected second record model ID 'test-model-2', got %s", records[1].ModelID)
-	}
-
-	// Test circular buffer behavior
-	record3 := LLMRequestRecord{
-		Timestamp: time.Now(),
-		ModelID:   "test-model-3",
-		URL:       "http://test.com/3",
-	}
-
-	record4 := LLMRequestRecord{
-		Timestamp: time.Now(),
-		ModelID:   "test-model-4",
-		URL:       "http://test.com/4",
-	}
-
-	history.Add(record3)
-	history.Add(record4) // This should remove record1
-
-	records = history.GetRecords()
-	if len(records) != 3 {
-		t.Errorf("Expected 3 records (circular buffer), got %d", len(records))
-	}
-
-	// First record should now be record2 (record1 was removed)
-	if records[0].ModelID != "test-model-2" {
-		t.Errorf("Expected first record model ID 'test-model-2', got %s", records[0].ModelID)
-	}
-}
-
-func TestHistoryRecordingService(t *testing.T) {
+func TestLoggingService(t *testing.T) {
 	// Create a mock service for testing
 	mockService := &mockLLMService{}
-	history := NewLLMRequestHistory(10)
 	logger := slog.Default()
 
 	loggingSvc := &loggingService{
-		service: mockService,
-		logger:  logger,
-		modelID: "test-model",
-		history: history,
+		service:  mockService,
+		logger:   logger,
+		modelID:  "test-model",
+		provider: ProviderBuiltIn,
 	}
 
 	// Test Do method
@@ -327,9 +263,8 @@ func (m *mockLLMService) UseSimplifiedPatch() bool {
 func TestManagerGetService(t *testing.T) {
 	// Test with predictable model (no API keys needed)
 	cfg := &Config{}
-	history := NewLLMRequestHistory(10)
 
-	manager, err := NewManager(cfg, history)
+	manager, err := NewManager(cfg)
 	if err != nil {
 		t.Fatalf("NewManager failed: %v", err)
 	}
@@ -350,25 +285,10 @@ func TestManagerGetService(t *testing.T) {
 	}
 }
 
-func TestManagerGetHistory(t *testing.T) {
-	cfg := &Config{}
-	history := NewLLMRequestHistory(5)
-
-	manager, err := NewManager(cfg, history)
-	if err != nil {
-		t.Fatalf("NewManager failed: %v", err)
-	}
-
-	retrievedHistory := manager.GetHistory()
-	if retrievedHistory != history {
-		t.Error("GetHistory did not return the expected history instance")
-	}
-}
-
 func TestManagerHasModel(t *testing.T) {
 	cfg := &Config{}
 
-	manager, err := NewManager(cfg, nil)
+	manager, err := NewManager(cfg)
 	if err != nil {
 		t.Fatalf("NewManager failed: %v", err)
 	}
@@ -420,14 +340,13 @@ func TestConfigGetURLMethods(t *testing.T) {
 func TestUseSimplifiedPatch(t *testing.T) {
 	// Test with a service that doesn't implement SimplifiedPatcher
 	mockService := &mockLLMService{}
-	history := NewLLMRequestHistory(10)
 	logger := slog.Default()
 
 	loggingSvc := &loggingService{
-		service: mockService,
-		logger:  logger,
-		modelID: "test-model",
-		history: history,
+		service:  mockService,
+		logger:   logger,
+		modelID:  "test-model",
+		provider: ProviderBuiltIn,
 	}
 
 	// Should return false since mockService doesn't implement SimplifiedPatcher
@@ -439,10 +358,10 @@ func TestUseSimplifiedPatch(t *testing.T) {
 	// Test with a service that implements SimplifiedPatcher
 	mockSimplifiedService := &mockSimplifiedLLMService{useSimplified: true}
 	loggingSvc2 := &loggingService{
-		service: mockSimplifiedService,
-		logger:  logger,
-		modelID: "test-model-2",
-		history: history,
+		service:  mockSimplifiedService,
+		logger:   logger,
+		modelID:  "test-model-2",
+		provider: ProviderBuiltIn,
 	}
 
 	// Should return true since mockSimplifiedService implements SimplifiedPatcher and returns true
@@ -461,3 +380,27 @@ type mockSimplifiedLLMService struct {
 func (m *mockSimplifiedLLMService) UseSimplifiedPatch() bool {
 	return m.useSimplified
 }
+
+func TestHTTPClientPassedToFactory(t *testing.T) {
+	// Test that HTTP client is passed to factory and used by services
+	cfg := &Config{
+		AnthropicAPIKey: "test-key",
+	}
+
+	// Create a custom HTTP client
+	customClient := &http.Client{}
+
+	// Test that claude factory accepts HTTP client
+	m := ByID("claude-opus-4.5")
+	if m == nil {
+		t.Fatal("claude-opus-4.5 model not found")
+	}
+
+	svc, err := m.Factory(cfg, customClient)
+	if err != nil {
+		t.Fatalf("Factory with custom HTTP client failed: %v", err)
+	}
+	if svc == nil {
+		t.Fatal("Factory returned nil service")
+	}
+}

server/cancel_claude_test.go 🔗

@@ -1,8 +1,10 @@
 package server
 
 import (
+	"bytes"
 	"context"
 	"encoding/json"
+	"io"
 	"log/slog"
 	"net/http"
 	"net/http/httptest"
@@ -53,10 +55,18 @@ func NewClaudeTestHarness(t *testing.T) *ClaudeTestHarness {
 		requestTokens: make([]uint64, 0),
 	}
 
+	// Create HTTP client with custom transport for token tracking
+	httpc := &http.Client{
+		Transport: &tokenTrackingTransport{
+			base:        http.DefaultTransport,
+			recordToken: h.recordHTTPResponse,
+		},
+	}
+
 	service := &ant.Service{
-		APIKey:       apiKey,
-		Model:        ant.Claude45Haiku, // Use cheaper model for testing
-		HTTPRecorder: h.recordHTTPRequest,
+		APIKey: apiKey,
+		Model:  ant.Claude45Haiku, // Use cheaper model for testing
+		HTTPC:  httpc,
 	}
 	h.llmService = service
 
@@ -75,9 +85,30 @@ func NewClaudeTestHarness(t *testing.T) *ClaudeTestHarness {
 	return h
 }
 
-// recordHTTPRequest is a callback to record HTTP requests for token tracking
-func (h *ClaudeTestHarness) recordHTTPRequest(url string, requestBody, responseBody []byte, statusCode int, err error, duration time.Duration) {
-	h.t.Logf("HTTP callback: status=%d, err=%v, responseLen=%d", statusCode, err, len(responseBody))
+// tokenTrackingTransport wraps an HTTP transport to track token usage from responses
+type tokenTrackingTransport struct {
+	base        http.RoundTripper
+	recordToken func(responseBody []byte, statusCode int)
+}
+
+func (t *tokenTrackingTransport) RoundTrip(req *http.Request) (*http.Response, error) {
+	resp, err := t.base.RoundTrip(req)
+	if err != nil {
+		return resp, err
+	}
+
+	// Read and restore the response body
+	body, _ := io.ReadAll(resp.Body)
+	resp.Body.Close()
+	resp.Body = io.NopCloser(bytes.NewReader(body))
+
+	t.recordToken(body, resp.StatusCode)
+	return resp, nil
+}
+
+// recordHTTPResponse is a callback to record HTTP responses for token tracking
+func (h *ClaudeTestHarness) recordHTTPResponse(responseBody []byte, statusCode int) {
+	h.t.Logf("HTTP callback: status=%d, responseLen=%d", statusCode, len(responseBody))
 
 	if statusCode != http.StatusOK || responseBody == nil {
 		return

server/handlers.go 🔗

@@ -902,153 +902,6 @@ func (s *Server) handleStreamConversation(w http.ResponseWriter, r *http.Request
 	}
 }
 
-// handleDebugLLM serves recent LLM requests and responses for debugging
-func (s *Server) handleDebugLLM(w http.ResponseWriter, r *http.Request) {
-	if r.Method != http.MethodGet {
-		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
-		return
-	}
-
-	// Check if requesting a specific record JSON
-	if idx := r.URL.Query().Get("index"); idx != "" {
-		var i int
-		if _, err := fmt.Sscanf(idx, "%d", &i); err != nil {
-			http.Error(w, "Invalid index", http.StatusBadRequest)
-			return
-		}
-
-		type historyProvider interface {
-			GetHistory() *models.LLMRequestHistory
-		}
-
-		var records []models.LLMRequestRecord
-		if hp, ok := s.llmManager.(historyProvider); ok && hp.GetHistory() != nil {
-			records = hp.GetHistory().GetRecords()
-		}
-
-		if i < 0 || i >= len(records) {
-			http.Error(w, "Index out of range", http.StatusNotFound)
-			return
-		}
-
-		record := records[i]
-		recordType := r.URL.Query().Get("type")
-
-		switch recordType {
-		case "request":
-			w.Header().Set("Content-Type", "application/json")
-			w.Write(record.HTTPRequest)
-		case "response":
-			w.Header().Set("Content-Type", "application/json")
-			w.Write(record.HTTPResponse)
-		default:
-			// Return the full record
-			w.Header().Set("Content-Type", "application/json")
-			json.NewEncoder(w).Encode(record)
-		}
-		return
-	}
-
-	// Get history from the LLM manager if it's a models.Manager
-	type historyProvider interface {
-		GetHistory() *models.LLMRequestHistory
-	}
-
-	var records []models.LLMRequestRecord
-	if hp, ok := s.llmManager.(historyProvider); ok && hp.GetHistory() != nil {
-		records = hp.GetHistory().GetRecords()
-	}
-
-	w.Header().Set("Content-Type", "text/html; charset=utf-8")
-	w.WriteHeader(http.StatusOK)
-
-	// Write simple HTML with links to JSON
-	fmt.Fprint(w, `<!DOCTYPE html>
-<html>
-<head>
-<meta charset="utf-8">
-<title>LLM Debug - Recent Requests</title>
-<style>
-body {
-	font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-	margin: 20px;
-	background: #ffffff;
-	color: #000000;
-}
-h1 {
-	margin-bottom: 20px;
-}
-table {
-	border-collapse: collapse;
-	width: 100%;
-}
-th, td {
-	padding: 8px 12px;
-	text-align: left;
-	border-bottom: 1px solid #ddd;
-}
-th {
-	background: #f5f5f5;
-	font-weight: 600;
-}
-tr:hover {
-	background: #f9f9f9;
-}
-.error {
-	color: #d32f2f;
-}
-.success {
-	color: #388e3c;
-}
-a {
-	color: #1976d2;
-	text-decoration: none;
-}
-a:hover {
-	text-decoration: underline;
-}
-</style>
-</head>
-<body>
-<h1>LLM Debug - Recent Requests</h1>
-`)
-
-	if len(records) == 0 {
-		fmt.Fprint(w, "<p>No requests recorded yet.</p>")
-	} else {
-		fmt.Fprint(w, "<table>")
-		fmt.Fprint(w, "<tr><th>#</th><th>Time</th><th>Model</th><th>URL</th><th>Status</th><th>Duration</th><th>Request</th><th>Response</th></tr>")
-		for i := len(records) - 1; i >= 0; i-- {
-			record := records[i]
-			num := len(records) - i
-			statusClass := "success"
-			statusText := fmt.Sprintf("%d", record.HTTPStatusCode)
-			if record.Error != "" {
-				statusClass = "error"
-				statusText = record.Error
-			} else if record.HTTPStatusCode >= 400 {
-				statusClass = "error"
-			}
-			fmt.Fprintf(w, "<tr>")
-			fmt.Fprintf(w, "<td>%d</td>", num)
-			fmt.Fprintf(w, "<td>%s</td>", record.Timestamp.Format("15:04:05"))
-			fmt.Fprintf(w, "<td>%s</td>", record.ModelID)
-			fmt.Fprintf(w, "<td>%s</td>", record.URL)
-			fmt.Fprintf(w, "<td class=\"%s\">%s</td>", statusClass, statusText)
-			fmt.Fprintf(w, "<td>%.2fs</td>", record.Duration)
-			fmt.Fprintf(w, "<td><a href=\"/debug/llm?index=%d&type=request\" target=\"_blank\">json</a></td>", i)
-			fmt.Fprintf(w, "<td><a href=\"/debug/llm?index=%d&type=response\" target=\"_blank\">json</a></td>", i)
-			fmt.Fprintf(w, "</tr>")
-		}
-		fmt.Fprint(w, "</table>")
-	}
-
-	fmt.Fprint(w, `
-</body>
-</html>
-`)
-}
-
 // handleVersion returns version information as JSON
 func (s *Server) handleVersion(w http.ResponseWriter, r *http.Request) {
 	if r.Method != http.MethodGet {

server/llmconfig.go 🔗

@@ -1,6 +1,10 @@
 package server
 
-import "log/slog"
+import (
+	"log/slog"
+
+	"shelley.exe.dev/db"
+)
 
 // Link represents a custom link to be displayed in the UI
 type Link struct {
@@ -29,5 +33,8 @@ type LLMConfig struct {
 	// Links are custom links to be displayed in the UI (optional)
 	Links []Link
 
+	// DB is the database for recording LLM requests (optional)
+	DB *db.DB
+
 	Logger *slog.Logger
 }

server/server.go 🔗

@@ -72,7 +72,7 @@ type LLMProvider interface {
 }
 
 // NewLLMServiceManager creates a new LLM service manager from config
-func NewLLMServiceManager(cfg *LLMConfig, history *models.LLMRequestHistory) LLMProvider {
+func NewLLMServiceManager(cfg *LLMConfig) LLMProvider {
 	// Convert LLMConfig to models.Config
 	modelConfig := &models.Config{
 		AnthropicAPIKey: cfg.AnthropicAPIKey,
@@ -81,9 +81,10 @@ func NewLLMServiceManager(cfg *LLMConfig, history *models.LLMRequestHistory) LLM
 		FireworksAPIKey: cfg.FireworksAPIKey,
 		Gateway:         cfg.Gateway,
 		Logger:          cfg.Logger,
+		DB:              cfg.DB,
 	}
 
-	manager, err := models.NewManager(modelConfig, history)
+	manager, err := models.NewManager(modelConfig)
 	if err != nil {
 		// This shouldn't happen in practice, but handle it gracefully
 		cfg.Logger.Error("Failed to create models manager", "error", err)
@@ -262,7 +263,6 @@ func (s *Server) RegisterRoutes(mux *http.ServeMux) {
 	mux.Handle("/version", http.HandlerFunc(s.handleVersion)) // Small response
 
 	// Debug routes
-	mux.Handle("/debug/llm", gzipHandler(http.HandlerFunc(s.handleDebugLLM)))
 
 	// Serve embedded UI assets
 	mux.Handle("/", s.staticHandler(ui.Assets()))

test/anthropic_test.go 🔗

@@ -50,7 +50,7 @@ func TestWithAnthropicAPI(t *testing.T) {
 		FireworksAPIKey: os.Getenv("FIREWORKS_API_KEY"),
 		Logger:          logger,
 	}
-	llmManager := server.NewLLMServiceManager(llmConfig, nil)
+	llmManager := server.NewLLMServiceManager(llmConfig)
 
 	// Set up tools config
 	toolSetConfig := claudetool.ToolSetConfig{

test/server_test.go 🔗

@@ -47,7 +47,7 @@ func TestServerEndToEnd(t *testing.T) {
 	}))
 
 	// Create LLM service manager with predictable service
-	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger}, nil)
+	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
 	predictableService := loop.NewPredictableService()
 	// For testing, we'll override the manager's service selection
 	_ = predictableService // will need to mock this properly
@@ -384,7 +384,7 @@ func TestConversationCleanup(t *testing.T) {
 
 	// Create server with predictable service
 	logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug}))
-	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger}, nil)
+	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
 	svr := server.NewServer(database, llmManager, claudetool.ToolSetConfig{}, logger, false, "", "", "", nil)
 
 	// Create a conversation
@@ -420,7 +420,7 @@ func TestSlugGeneration(t *testing.T) {
 
 	// Create server
 	logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelWarn}))
-	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger}, nil)
+	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
 	_ = server.NewServer(database, llmManager, claudetool.ToolSetConfig{}, logger, false, "", "", "", nil)
 
 	// Test slug generation directly to avoid timing issues
@@ -501,7 +501,7 @@ func TestSanitizeSlug(t *testing.T) {
 func TestSlugGenerationWithPredictableService(t *testing.T) {
 	// Create server with predictable service only
 	logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelWarn}))
-	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger}, nil)
+	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
 
 	// Create a temporary database
 	tempDB := t.TempDir() + "/test.db"
@@ -608,7 +608,7 @@ func TestSSEIncrementalUpdates(t *testing.T) {
 
 	// Create logger and LLM manager
 	logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelWarn}))
-	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger}, nil)
+	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
 
 	// Create server
 	serviceInstance := server.NewServer(database, llmManager, claudetool.ToolSetConfig{}, logger, false, "", "", "", nil)
@@ -921,7 +921,7 @@ func TestVersionEndpoint(t *testing.T) {
 	}
 
 	logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError}))
-	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger}, nil)
+	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
 	svr := server.NewServer(database, llmManager, claudetool.ToolSetConfig{}, logger, true, "", "", "", nil)
 
 	mux := http.NewServeMux()
@@ -972,7 +972,7 @@ func TestScreenshotRouteServesImage(t *testing.T) {
 	}
 
 	logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError}))
-	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger}, nil)
+	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
 	svr := server.NewServer(database, llmManager, claudetool.ToolSetConfig{}, logger, true, "", "", "", nil)
 
 	mux := http.NewServeMux()
@@ -1153,7 +1153,7 @@ func TestSubagentEndToEnd(t *testing.T) {
 	}))
 
 	// Create LLM service manager with predictable service
-	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger}, nil)
+	llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
 
 	// Set up tools config
 	toolSetConfig := claudetool.ToolSetConfig{