diff --git a/cmd/venice/main.go b/cmd/venice/main.go index c37a0add90731a2937c5c87c38b5e8917e882847..dfe306661d54506f58cbdf2ef45c81a3af783ddd 100644 --- a/cmd/venice/main.go +++ b/cmd/venice/main.go @@ -33,6 +33,7 @@ type VeniceModel struct { type VeniceModelSpec struct { AvailableContextTokens int64 `json:"availableContextTokens"` + MaxCompletionTokens int64 `json:"maxCompletionTokens"` Capabilities VeniceModelCapabilities `json:"capabilities"` Constraints VeniceModelConstraints `json:"constraints"` Name string `json:"name"` @@ -100,20 +101,6 @@ func fetchVeniceModels(apiEndpoint string) (*ModelsResponse, error) { return &mr, nil } -func minInt64(a, b int64) int64 { - if a < b { - return a - } - return b -} - -func maxInt64(a, b int64) int64 { - if a > b { - return a - } - return b -} - func bestLargeModelID(models []catwalk.Model) string { var best *catwalk.Model for i := range models { @@ -200,9 +187,6 @@ func main() { continue } - defaultMaxTokens := minInt64(contextWindow/4, 32768) - defaultMaxTokens = maxInt64(defaultMaxTokens, 2048) - canReason := model.ModelSpec.Capabilities.SupportsReasoning var reasoningLevels []string var defaultReasoning string @@ -234,7 +218,7 @@ func main() { CostPer1MInCached: 0, CostPer1MOutCached: 0, ContextWindow: contextWindow, - DefaultMaxTokens: defaultMaxTokens, + DefaultMaxTokens: model.ModelSpec.MaxCompletionTokens, CanReason: canReason, ReasoningLevels: reasoningLevels, DefaultReasoningEffort: defaultReasoning, diff --git a/internal/providers/configs/venice.json b/internal/providers/configs/venice.json index 6adf7032a97fe1584074fef9db03c624805956ec..657554a1eb9723cd8f9a74989b009c3aa1e26e5d 100644 --- a/internal/providers/configs/venice.json +++ b/internal/providers/configs/venice.json @@ -34,7 +34,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 1000000, - "default_max_tokens": 32768, + "default_max_tokens": 128000, "can_reason": true, "reasoning_levels": [ "low", @@ -53,7 +53,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 198000, - "default_max_tokens": 32768, + "default_max_tokens": 64000, "can_reason": true, "reasoning_levels": [ "low", @@ -72,7 +72,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 1000000, - "default_max_tokens": 32768, + "default_max_tokens": 64000, "can_reason": true, "reasoning_levels": [ "low", @@ -110,7 +110,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 198000, - "default_max_tokens": 32768, + "default_max_tokens": 16384, "can_reason": false, "supports_attachments": false, "options": {} @@ -123,7 +123,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 198000, - "default_max_tokens": 32768, + "default_max_tokens": 16384, "can_reason": true, "reasoning_levels": [ "low", @@ -142,7 +142,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 128000, - "default_max_tokens": 32000, + "default_max_tokens": 16384, "can_reason": true, "reasoning_levels": [ "low", @@ -161,7 +161,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 200000, - "default_max_tokens": 32768, + "default_max_tokens": 24000, "can_reason": true, "reasoning_levels": [ "low", @@ -180,7 +180,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 198000, - "default_max_tokens": 32768, + "default_max_tokens": 32000, "can_reason": true, "reasoning_levels": [ "low", @@ -199,7 +199,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 128000, - "default_max_tokens": 32000, + "default_max_tokens": 16384, "can_reason": false, "supports_attachments": true, "options": {} @@ -212,7 +212,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 128000, - "default_max_tokens": 32000, + "default_max_tokens": 16384, "can_reason": false, "supports_attachments": true, "options": {} @@ -225,7 +225,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ "low", @@ -244,7 +244,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ "low", @@ -263,7 +263,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 400000, - "default_max_tokens": 32768, + "default_max_tokens": 128000, "can_reason": true, "reasoning_levels": [ "low", @@ -282,7 +282,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 1000000, - "default_max_tokens": 32768, + "default_max_tokens": 131072, "can_reason": true, "reasoning_levels": [ "low", @@ -301,7 +301,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 400000, - "default_max_tokens": 32768, + "default_max_tokens": 128000, "can_reason": true, "reasoning_levels": [ "low", @@ -320,7 +320,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 1000000, - "default_max_tokens": 32768, + "default_max_tokens": 128000, "can_reason": true, "reasoning_levels": [ "low", @@ -339,7 +339,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ "low", @@ -377,7 +377,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 198000, - "default_max_tokens": 32768, + "default_max_tokens": 16384, "can_reason": false, "supports_attachments": true, "options": {} @@ -390,7 +390,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 1000000, - "default_max_tokens": 32768, + "default_max_tokens": 30000, "can_reason": true, "reasoning_levels": [ "low", @@ -409,7 +409,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 2000000, - "default_max_tokens": 32768, + "default_max_tokens": 128000, "can_reason": true, "reasoning_levels": [ "low", @@ -428,7 +428,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 10000, "can_reason": true, "reasoning_levels": [ "low", @@ -447,7 +447,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ "low", @@ -466,7 +466,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ "low", @@ -485,7 +485,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 128000, - "default_max_tokens": 32000, + "default_max_tokens": 4096, "can_reason": false, "supports_attachments": false, "options": {} @@ -498,7 +498,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 128000, - "default_max_tokens": 32000, + "default_max_tokens": 4096, "can_reason": false, "supports_attachments": false, "options": {} @@ -568,7 +568,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 16384, "can_reason": false, "supports_attachments": false, "options": {} @@ -581,7 +581,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 128000, - "default_max_tokens": 32000, + "default_max_tokens": 16384, "can_reason": false, "supports_attachments": false, "options": {} @@ -594,7 +594,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 128000, - "default_max_tokens": 32000, + "default_max_tokens": 16384, "can_reason": false, "supports_attachments": false, "options": {} @@ -607,7 +607,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 128000, - "default_max_tokens": 32000, + "default_max_tokens": 16384, "can_reason": false, "supports_attachments": false, "options": {} @@ -620,7 +620,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 128000, - "default_max_tokens": 32000, + "default_max_tokens": 16384, "can_reason": true, "reasoning_levels": [ "low", @@ -639,7 +639,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 65536, "can_reason": false, "supports_attachments": false, "options": {} @@ -652,7 +652,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 65536, "can_reason": false, "supports_attachments": false, "options": {} @@ -665,7 +665,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 16384, "can_reason": false, "supports_attachments": false, "options": {} @@ -678,7 +678,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ "low", @@ -700,7 +700,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ "low", @@ -719,7 +719,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 256000, - "default_max_tokens": 32768, + "default_max_tokens": 16384, "can_reason": false, "supports_attachments": true, "options": {} @@ -732,7 +732,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 128000, - "default_max_tokens": 32000, + "default_max_tokens": 4096, "can_reason": false, "supports_attachments": true, "options": {}