From 7468cef45728a92c6a3e92b9d5e8ef9ae707c742 Mon Sep 17 00:00:00 2001 From: Benson Schliesser Date: Wed, 30 Jul 2025 22:56:58 -0400 Subject: [PATCH] adding Lambda inference API as a provider --- internal/providers/configs/lambda.json | 256 +++++++++++++++++++++++++ internal/providers/providers.go | 8 + pkg/catwalk/provider.go | 2 + 3 files changed, 266 insertions(+) create mode 100644 internal/providers/configs/lambda.json diff --git a/internal/providers/configs/lambda.json b/internal/providers/configs/lambda.json new file mode 100644 index 0000000000000000000000000000000000000000..e290c6aef9ff9a098c2ee1e28083f4616ac397c2 --- /dev/null +++ b/internal/providers/configs/lambda.json @@ -0,0 +1,256 @@ +{ + "name": "Lambda", + "id": "lambda", + "type": "openai", + "api_key": "$LAMBDA_API_KEY", + "api_endpoint": "https://api.lambda.ai/v1", + "default_large_model_id": "qwen25-coder-32b-instruct", + "default_small_model_id": "llama3.1-8b-instruct", + "default_headers": { + "Authorization": "Bearer $LAMBDA_API_KEY" + }, + "models": [ + { + "id": "deepseek-r1-0528", + "name": "DeepSeek R1 0528 FP8", + "cost_per_1m_in": 0.5, + "cost_per_1m_out": 2.18, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 164000, + "default_max_tokens": 8192, + "can_reason": true, + "has_reasoning_effort": false, + "supports_attachments": false + }, + { + "id": "deepseek-r1-671b", + "name": "DeepSeek R1 671B", + "cost_per_1m_in": 0.5, + "cost_per_1m_out": 2.18, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 164000, + "default_max_tokens": 8192, + "can_reason": true, + "has_reasoning_effort": false, + "supports_attachments": false + }, + { + "id": "llama-4-maverick-17b-128e-instruct-fp8", + "name": "Llama 4 Maverick 17B", + "cost_per_1m_in": 0.18, + "cost_per_1m_out": 0.6, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "llama3.1-405b-instruct-fp8", + "name": "Llama 3.1 405B Instruct FP8", + "cost_per_1m_in": 0.8, + "cost_per_1m_out": 0.8, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "llama3.3-70b-instruct-fp8", + "name": "Llama 3.3 70B Instruct FP8", + "cost_per_1m_in": 0.12, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "llama3.1-70b-instruct-fp8", + "name": "Llama 3.1 70B Instruct FP8", + "cost_per_1m_in": 0.12, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "llama3.1-8b-instruct", + "name": "Llama 3.1 8B Instruct", + "cost_per_1m_in": 0.025, + "cost_per_1m_out": 0.04, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "llama3.2-3b-instruct", + "name": "Llama 3.2 3B Instruct", + "cost_per_1m_in": 0.025, + "cost_per_1m_out": 0.04, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "llama3.2-11b-vision-instruct", + "name": "Llama 3.2 11B Vision Instruct", + "cost_per_1m_in": 0.025, + "cost_per_1m_out": 0.04, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": true + }, + { + "id": "hermes3-8b", + "name": "Hermes 3 8B", + "cost_per_1m_in": 0.025, + "cost_per_1m_out": 0.04, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "hermes3-70b", + "name": "Hermes 3 70B", + "cost_per_1m_in": 0.12, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "hermes3-405b", + "name": "Hermes 3 405B", + "cost_per_1m_in": 0.8, + "cost_per_1m_out": 0.8, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "lfm-40b", + "name": "LFM 40B", + "cost_per_1m_in": 0.18, + "cost_per_1m_out": 0.6, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 65536, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "qwen25-coder-32b-instruct", + "name": "Qwen 2.5 Coder 32B Instruct", + "cost_per_1m_in": 0.12, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "llama3.1-nemotron-70b-instruct-fp8", + "name": "Llama 3.1 Nemotron 70B Instruct FP8", + "cost_per_1m_in": 0.12, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "deepseek-llama3.3-70b", + "name": "DeepSeek Llama 3.3 70B", + "cost_per_1m_in": 0.12, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "llama-4-scout-17b-16e-instruct", + "name": "Llama 4 Scout 17B", + "cost_per_1m_in": 0.18, + "cost_per_1m_out": 0.6, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "deepseek-v3-0324", + "name": "DeepSeek V3 0324", + "cost_per_1m_in": 0.5, + "cost_per_1m_out": 2.18, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 164000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "lfm-7b", + "name": "LFM 7B", + "cost_per_1m_in": 0.025, + "cost_per_1m_out": 0.04, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 65536, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "qwen3-32b-fp8", + "name": "Qwen 3 32B FP8", + "cost_per_1m_in": 0.12, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131000, + "default_max_tokens": 8192, + "can_reason": false, + "supports_attachments": false + } + ] +} \ No newline at end of file diff --git a/internal/providers/providers.go b/internal/providers/providers.go index 0d89ba16950e0687a2af03c14df53a452d4b29a8..a3634474452f634b81f9aa1b3604269fc5d7ed9e 100644 --- a/internal/providers/providers.go +++ b/internal/providers/providers.go @@ -36,6 +36,9 @@ var bedrockConfig []byte //go:embed configs/groq.json var groqConfig []byte +//go:embed configs/lambda.json +var lambdaConfig []byte + // ProviderFunc is a function that returns a Provider. type ProviderFunc func() catwalk.Provider @@ -49,6 +52,7 @@ var providerRegistry = []ProviderFunc{ xAIProvider, groqProvider, openRouterProvider, + lambdaProvider, } // GetAll returns all registered providers. @@ -104,3 +108,7 @@ func openRouterProvider() catwalk.Provider { func groqProvider() catwalk.Provider { return loadProviderFromConfig(groqConfig) } + +func lambdaProvider() catwalk.Provider { + return loadProviderFromConfig(lambdaConfig) +} diff --git a/pkg/catwalk/provider.go b/pkg/catwalk/provider.go index 4c441db8f83e6380dadff97d75fda07406e9e6b1..fa0094bb07a0d476b35e1858bf4ab79dc64b0647 100644 --- a/pkg/catwalk/provider.go +++ b/pkg/catwalk/provider.go @@ -27,6 +27,7 @@ const ( InferenceProviderXAI InferenceProvider = "xai" InferenceProviderGROQ InferenceProvider = "groq" InferenceProviderOpenRouter InferenceProvider = "openrouter" + InferenceProviderLambda InferenceProvider = "lambda" ) // Provider represents an AI provider configuration. @@ -70,5 +71,6 @@ func KnownProviders() []InferenceProvider { InferenceProviderXAI, InferenceProviderGROQ, InferenceProviderOpenRouter, + InferenceProviderLambda, } }