shelley: update Gemini models and add thought signature support

Philip Zeyliger and Shelley created 3 months ago

Prompt: I think using Gemini isn't really working with shelley. Use the gemini key to see if you can get it to work. Use the pointers to the gemini3 series models to add to the model drop down etc.

Related to https://github.com/boldsoftware/shelley/issues/42

- Update default model from gemini-2.5-pro-preview-03-25 to gemini-2.5-pro
  (the preview model is no longer available)
- Add Gemini 3 models: gemini-3-pro and gemini-3-flash
- Add Gemini 2.5 Flash model
- Update context window size mappings for new models
- Add thought signature support required by Gemini 3 for function calling:
  - Added ThoughtSignature field to gemini.Part struct
  - Capture thought signature when receiving function call responses
  - Pass thought signature back when building requests with function calls
- Update tests to use current model names

Gemini 3 models require thought signatures to be passed back during function
calling workflows, otherwise they return a 400 error. All models have been
tested and work with both simple text completion and multi-turn tool usage.

Co-authored-by: Shelley <shelley@exe.dev>

Change summary

llm/gem/gem.go           | 27 +++++++++++++++-------
llm/gem/gem_test.go      | 24 ++++++++++++++++++-
llm/gem/gemini/gemini.go |  3 ++
models/models.go         | 50 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 92 insertions(+), 12 deletions(-)

Detailed changes

llm/gem/gem.go 🔗

@@ -16,7 +16,7 @@ import (
 )
 
 const (
-	DefaultModel    = "gemini-2.5-pro-preview-03-25"
+	DefaultModel    = "gemini-2.5-pro"
 	GeminiAPIKeyEnv = "GEMINI_API_KEY"
 )
 
@@ -210,13 +210,16 @@ func (s *Service) buildGeminiRequest(req *llm.Request) (*gemini.Request, error)
 				slog.DebugContext(context.Background(), "gemini_preparing_tool_use",
 					"tool_name", c.ToolName,
 					"tool_id", c.ID,
-					"input", string(c.ToolInput))
+					"input", string(c.ToolInput),
+					"thought_signature", c.Signature)
 
 				content.Parts = append(content.Parts, gemini.Part{
 					FunctionCall: &gemini.FunctionCall{
 						Name: c.ToolName,
 						Args: args,
 					},
+					// Gemini 3 requires thought signatures to be passed back for function calls
+					ThoughtSignature: c.Signature,
 				})
 			case llm.ContentTypeToolResult:
 				// Tool result becomes a function response
@@ -320,15 +323,16 @@ func convertGeminiResponseToContent(res *gemini.Response) []llm.Content {
 		if part.Text != "" {
 			// Simple text response
 			contents = append(contents, llm.Content{
-				Type: llm.ContentTypeText,
-				Text: part.Text,
+				Type:      llm.ContentTypeText,
+				Text:      part.Text,
+				Signature: part.ThoughtSignature, // Capture thought signature for text parts too
 			})
 		} else if part.FunctionCall != nil {
 			// Function call (tool use)
 			args, err := json.Marshal(part.FunctionCall.Args)
 			if err != nil {
 				// If we can't marshal, use empty args
-				slog.DebugContext(context.Background(), "gemini_failed_to_markshal_args",
+				slog.DebugContext(context.Background(), "gemini_failed_to_marshal_args",
 					"tool_name", part.FunctionCall.Name,
 					"args", string(args),
 					"err", err.Error(),
@@ -345,12 +349,15 @@ func convertGeminiResponseToContent(res *gemini.Response) []llm.Content {
 				Type:      llm.ContentTypeToolUse,
 				ToolName:  part.FunctionCall.Name,
 				ToolInput: json.RawMessage(args),
+				// Capture thought signature - required for Gemini 3 function calling
+				Signature: part.ThoughtSignature,
 			})
 
 			slog.DebugContext(context.Background(), "gemini_tool_call",
 				"tool_id", toolID,
 				"tool_name", part.FunctionCall.Name,
-				"args", string(args))
+				"args", string(args),
+				"thought_signature", part.ThoughtSignature)
 		} else if part.FunctionResponse != nil {
 			// We shouldn't normally get function responses from the model, but just in case
 			respData, _ := json.Marshal(part.FunctionResponse.Response)
@@ -446,9 +453,11 @@ func (s *Service) TokenContextWindow() int {
 
 	// Gemini models generally have large context windows
 	switch model {
-	case "gemini-2.5-pro-preview-03-25":
-		return 1000000 // 1M tokens for Gemini 2.5 Pro
-	case "gemini-2.0-flash-exp":
+	case "gemini-3-pro-preview", "gemini-3-flash-preview":
+		return 1000000 // 1M tokens for Gemini 3
+	case "gemini-2.5-pro", "gemini-2.5-flash":
+		return 1000000 // 1M tokens for Gemini 2.5
+	case "gemini-2.0-flash-exp", "gemini-2.0-flash":
 		return 1000000 // 1M tokens for Gemini 2.0 Flash
 	case "gemini-1.5-pro", "gemini-1.5-pro-latest":
 		return 2000000 // 2M tokens for Gemini 1.5 Pro

llm/gem/gem_test.go 🔗

@@ -372,8 +372,23 @@ func TestTokenContextWindow(t *testing.T) {
 		expected int
 	}{
 		{
-			name:     "gemini-2.5-pro-preview-03-25",
-			model:    "gemini-2.5-pro-preview-03-25",
+			name:     "gemini-3-pro-preview",
+			model:    "gemini-3-pro-preview",
+			expected: 1000000,
+		},
+		{
+			name:     "gemini-3-flash-preview",
+			model:    "gemini-3-flash-preview",
+			expected: 1000000,
+		},
+		{
+			name:     "gemini-2.5-pro",
+			model:    "gemini-2.5-pro",
+			expected: 1000000,
+		},
+		{
+			name:     "gemini-2.5-flash",
+			model:    "gemini-2.5-flash",
 			expected: 1000000,
 		},
 		{
@@ -381,6 +396,11 @@ func TestTokenContextWindow(t *testing.T) {
 			model:    "gemini-2.0-flash-exp",
 			expected: 1000000,
 		},
+		{
+			name:     "gemini-2.0-flash",
+			model:    "gemini-2.0-flash",
+			expected: 1000000,
+		},
 		{
 			name:     "gemini-1.5-pro",
 			model:    "gemini-1.5-pro",

llm/gem/gemini/gemini.go 🔗

@@ -50,6 +50,9 @@ type Part struct {
 	FunctionResponse    *FunctionResponse    `json:"functionResponse,omitempty"`
 	ExecutableCode      *ExecutableCode      `json:"executableCode,omitempty"`
 	CodeExecutionResult *CodeExecutionResult `json:"codeExecutionResult,omitempty"`
+	// ThoughtSignature is required for Gemini 3 models when using function calling.
+	// It must be passed back exactly as received when sending the conversation history.
+	ThoughtSignature string `json:"thoughtSignature,omitempty"`
 	// TODO inlineData
 	// TODO fileData
 }

models/models.go 🔗

@@ -227,6 +227,38 @@ func All() []Model {
 				return svc, nil
 			},
 		},
+		{
+			ID:              "gemini-3-pro",
+			Provider:        ProviderGemini,
+			Description:     "Gemini 3 Pro",
+			RequiredEnvVars: []string{"GEMINI_API_KEY"},
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
+				if config.GeminiAPIKey == "" {
+					return nil, fmt.Errorf("gemini-3-pro requires GEMINI_API_KEY")
+				}
+				svc := &gem.Service{APIKey: config.GeminiAPIKey, Model: "gemini-3-pro-preview", HTTPC: httpc}
+				if url := config.getGeminiURL(); url != "" {
+					svc.URL = url
+				}
+				return svc, nil
+			},
+		},
+		{
+			ID:              "gemini-3-flash",
+			Provider:        ProviderGemini,
+			Description:     "Gemini 3 Flash",
+			RequiredEnvVars: []string{"GEMINI_API_KEY"},
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
+				if config.GeminiAPIKey == "" {
+					return nil, fmt.Errorf("gemini-3-flash requires GEMINI_API_KEY")
+				}
+				svc := &gem.Service{APIKey: config.GeminiAPIKey, Model: "gemini-3-flash-preview", HTTPC: httpc}
+				if url := config.getGeminiURL(); url != "" {
+					svc.URL = url
+				}
+				return svc, nil
+			},
+		},
 		{
 			ID:              "gemini-2.5-pro",
 			Provider:        ProviderGemini,
@@ -236,7 +268,23 @@ func All() []Model {
 				if config.GeminiAPIKey == "" {
 					return nil, fmt.Errorf("gemini-2.5-pro requires GEMINI_API_KEY")
 				}
-				svc := &gem.Service{APIKey: config.GeminiAPIKey, Model: gem.DefaultModel, HTTPC: httpc}
+				svc := &gem.Service{APIKey: config.GeminiAPIKey, Model: "gemini-2.5-pro", HTTPC: httpc}
+				if url := config.getGeminiURL(); url != "" {
+					svc.URL = url
+				}
+				return svc, nil
+			},
+		},
+		{
+			ID:              "gemini-2.5-flash",
+			Provider:        ProviderGemini,
+			Description:     "Gemini 2.5 Flash",
+			RequiredEnvVars: []string{"GEMINI_API_KEY"},
+			Factory: func(config *Config, httpc *http.Client) (llm.Service, error) {
+				if config.GeminiAPIKey == "" {
+					return nil, fmt.Errorf("gemini-2.5-flash requires GEMINI_API_KEY")
+				}
+				svc := &gem.Service{APIKey: config.GeminiAPIKey, Model: "gemini-2.5-flash", HTTPC: httpc}
 				if url := config.getGeminiURL(); url != "" {
 					svc.URL = url
 				}