fix(venice): use `default_max_tokens` from the api list

Andrey Nering created

Fixes #225

Change summary

cmd/venice/main.go                     | 20 ------
internal/providers/configs/venice.json | 74 ++++++++++++++--------------
2 files changed, 39 insertions(+), 55 deletions(-)

Detailed changes

cmd/venice/main.go 🔗

@@ -33,6 +33,7 @@ type VeniceModel struct {
 
 type VeniceModelSpec struct {
 	AvailableContextTokens int64                   `json:"availableContextTokens"`
+	MaxCompletionTokens    int64                   `json:"maxCompletionTokens"`
 	Capabilities           VeniceModelCapabilities `json:"capabilities"`
 	Constraints            VeniceModelConstraints  `json:"constraints"`
 	Name                   string                  `json:"name"`
@@ -100,20 +101,6 @@ func fetchVeniceModels(apiEndpoint string) (*ModelsResponse, error) {
 	return &mr, nil
 }
 
-func minInt64(a, b int64) int64 {
-	if a < b {
-		return a
-	}
-	return b
-}
-
-func maxInt64(a, b int64) int64 {
-	if a > b {
-		return a
-	}
-	return b
-}
-
 func bestLargeModelID(models []catwalk.Model) string {
 	var best *catwalk.Model
 	for i := range models {
@@ -200,9 +187,6 @@ func main() {
 			continue
 		}
 
-		defaultMaxTokens := minInt64(contextWindow/4, 32768)
-		defaultMaxTokens = maxInt64(defaultMaxTokens, 2048)
-
 		canReason := model.ModelSpec.Capabilities.SupportsReasoning
 		var reasoningLevels []string
 		var defaultReasoning string
@@ -234,7 +218,7 @@ func main() {
 			CostPer1MInCached:      0,
 			CostPer1MOutCached:     0,
 			ContextWindow:          contextWindow,
-			DefaultMaxTokens:       defaultMaxTokens,
+			DefaultMaxTokens:       model.ModelSpec.MaxCompletionTokens,
 			CanReason:              canReason,
 			ReasoningLevels:        reasoningLevels,
 			DefaultReasoningEffort: defaultReasoning,

internal/providers/configs/venice.json 🔗

@@ -34,7 +34,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 1000000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 128000,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -53,7 +53,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 198000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 64000,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -72,7 +72,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 1000000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 64000,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -110,7 +110,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 198000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 16384,
       "can_reason": false,
       "supports_attachments": false,
       "options": {}
@@ -123,7 +123,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 198000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 16384,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -142,7 +142,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 128000,
-      "default_max_tokens": 32000,
+      "default_max_tokens": 16384,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -161,7 +161,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 200000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 24000,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -180,7 +180,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 198000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 32000,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -199,7 +199,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 128000,
-      "default_max_tokens": 32000,
+      "default_max_tokens": 16384,
       "can_reason": false,
       "supports_attachments": true,
       "options": {}
@@ -212,7 +212,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 128000,
-      "default_max_tokens": 32000,
+      "default_max_tokens": 16384,
       "can_reason": false,
       "supports_attachments": true,
       "options": {}
@@ -225,7 +225,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 65536,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -244,7 +244,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 65536,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -263,7 +263,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 400000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 128000,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -282,7 +282,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 1000000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 131072,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -301,7 +301,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 400000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 128000,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -320,7 +320,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 1000000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 128000,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -339,7 +339,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 65536,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -377,7 +377,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 198000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 16384,
       "can_reason": false,
       "supports_attachments": true,
       "options": {}
@@ -390,7 +390,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 1000000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 30000,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -409,7 +409,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 2000000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 128000,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -428,7 +428,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 10000,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -447,7 +447,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 65536,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -466,7 +466,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 65536,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -485,7 +485,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 128000,
-      "default_max_tokens": 32000,
+      "default_max_tokens": 4096,
       "can_reason": false,
       "supports_attachments": false,
       "options": {}
@@ -498,7 +498,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 128000,
-      "default_max_tokens": 32000,
+      "default_max_tokens": 4096,
       "can_reason": false,
       "supports_attachments": false,
       "options": {}
@@ -568,7 +568,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 16384,
       "can_reason": false,
       "supports_attachments": false,
       "options": {}
@@ -581,7 +581,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 128000,
-      "default_max_tokens": 32000,
+      "default_max_tokens": 16384,
       "can_reason": false,
       "supports_attachments": false,
       "options": {}
@@ -594,7 +594,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 128000,
-      "default_max_tokens": 32000,
+      "default_max_tokens": 16384,
       "can_reason": false,
       "supports_attachments": false,
       "options": {}
@@ -607,7 +607,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 128000,
-      "default_max_tokens": 32000,
+      "default_max_tokens": 16384,
       "can_reason": false,
       "supports_attachments": false,
       "options": {}
@@ -620,7 +620,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 128000,
-      "default_max_tokens": 32000,
+      "default_max_tokens": 16384,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -639,7 +639,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 65536,
       "can_reason": false,
       "supports_attachments": false,
       "options": {}
@@ -652,7 +652,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 65536,
       "can_reason": false,
       "supports_attachments": false,
       "options": {}
@@ -665,7 +665,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 16384,
       "can_reason": false,
       "supports_attachments": false,
       "options": {}
@@ -678,7 +678,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 65536,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -700,7 +700,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 65536,
       "can_reason": true,
       "reasoning_levels": [
         "low",
@@ -719,7 +719,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 256000,
-      "default_max_tokens": 32768,
+      "default_max_tokens": 16384,
       "can_reason": false,
       "supports_attachments": true,
       "options": {}
@@ -732,7 +732,7 @@
       "cost_per_1m_in_cached": 0,
       "cost_per_1m_out_cached": 0,
       "context_window": 128000,
-      "default_max_tokens": 32000,
+      "default_max_tokens": 4096,
       "can_reason": false,
       "supports_attachments": true,
       "options": {}