|
1 | 1 | {
|
2 | 2 | "replicate": {},
|
3 | 3 | "sambanova": {
|
4 |
| - "DeepSeek-R1": 32768, |
| 4 | + "DeepSeek-R1-0528": 32768, |
5 | 5 | "DeepSeek-R1-Distill-Llama-70B": 131072,
|
6 | 6 | "DeepSeek-V3-0324": 32768,
|
7 | 7 | "E5-Mistral-7B-Instruct": 4096,
|
|
15 | 15 | "Meta-Llama-Guard-3-8B": 16384,
|
16 | 16 | "QwQ-32B": 16384,
|
17 | 17 | "Qwen2-Audio-7B-Instruct": 4096,
|
18 |
| - "Qwen3-32B": 8192, |
| 18 | + "Qwen3-32B": 32768, |
19 | 19 | "Whisper-Large-v3": 4096
|
20 | 20 | },
|
21 | 21 | "nebius": {
|
|
46 | 46 | "microsoft/phi-4": 16384,
|
47 | 47 | "deepseek-ai/DeepSeek-V3": 163840,
|
48 | 48 | "deepseek-ai/DeepSeek-R1": 163840,
|
| 49 | + "deepseek-ai/DeepSeek-R1-0528": 163840, |
49 | 50 | "NousResearch/Hermes-3-Llama-405B": 131072,
|
50 | 51 | "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072,
|
51 | 52 | "deepseek-ai/DeepSeek-R1-fast": 163840,
|
|
70 | 71 | "stability-ai/sdxl": 0
|
71 | 72 | },
|
72 | 73 | "novita": {
|
| 74 | + "deepseek/deepseek-r1-0528": 128000, |
| 75 | + "deepseek/deepseek-r1-0528-qwen3-8b": 128000, |
73 | 76 | "deepseek/deepseek-v3-0324": 128000,
|
74 | 77 | "qwen/qwen3-235b-a22b-fp8": 40960,
|
75 | 78 | "qwen/qwen3-30b-a3b-fp8": 40960,
|
76 | 79 | "qwen/qwen3-32b-fp8": 40960,
|
77 |
| - "qwen/qwen2.5-vl-72b-instruct": 96000, |
| 80 | + "qwen/qwen2.5-vl-72b-instruct": 32768, |
78 | 81 | "deepseek/deepseek-v3-turbo": 64000,
|
79 | 82 | "meta-llama/llama-4-maverick-17b-128e-instruct-fp8": 1048576,
|
80 | 83 | "google/gemma-3-27b-it": 32000,
|
81 |
| - "qwen/qwq-32b": 32768, |
82 | 84 | "deepseek/deepseek-r1-turbo": 64000,
|
83 | 85 | "Sao10K/L3-8B-Stheno-v3.2": 8192,
|
84 | 86 | "gryphe/mythomax-l2-13b": 4096,
|
|
89 | 91 | "deepseek/deepseek-r1-distill-qwen-14b": 64000,
|
90 | 92 | "meta-llama/llama-3.3-70b-instruct": 131072,
|
91 | 93 | "qwen/qwen-2.5-72b-instruct": 32000,
|
92 |
| - "mistralai/mistral-nemo": 64000, |
| 94 | + "mistralai/mistral-nemo": 60288, |
93 | 95 | "deepseek/deepseek-r1-distill-qwen-32b": 64000,
|
94 | 96 | "meta-llama/llama-3-8b-instruct": 8192,
|
95 | 97 | "microsoft/wizardlm-2-8x22b": 65535,
|
96 | 98 | "deepseek/deepseek-r1-distill-llama-70b": 32000,
|
97 |
| - "meta-llama/llama-3.1-70b-instruct": 32768, |
98 |
| - "google/gemma-2-9b-it": 8192, |
99 | 99 | "mistralai/mistral-7b-instruct": 32768,
|
100 | 100 | "meta-llama/llama-3-70b-instruct": 8192,
|
101 | 101 | "nousresearch/hermes-2-pro-llama-3-8b": 8192,
|
102 | 102 | "sao10k/l3-70b-euryale-v2.1": 8192,
|
103 | 103 | "cognitivecomputations/dolphin-mixtral-8x22b": 16000,
|
104 | 104 | "sophosympatheia/midnight-rose-70b": 4096,
|
105 | 105 | "sao10k/l3-8b-lunaris": 8192,
|
106 |
| - "pa/cd-op-4-20250514": 200000, |
107 |
| - "qwen/qwen3-0.6b-fp8": 32000, |
108 |
| - "qwen/qwen3-1.7b-fp8": 32000, |
109 | 106 | "qwen/qwen3-8b-fp8": 128000,
|
110 | 107 | "qwen/qwen3-4b-fp8": 128000,
|
111 |
| - "qwen/qwen3-14b-fp8": 40960, |
112 | 108 | "thudm/glm-4-9b-0414": 32000,
|
113 | 109 | "thudm/glm-z1-9b-0414": 32000,
|
114 | 110 | "thudm/glm-z1-32b-0414": 32000,
|
115 | 111 | "thudm/glm-4-32b-0414": 32000,
|
116 | 112 | "thudm/glm-z1-rumination-32b-0414": 32000,
|
117 | 113 | "qwen/qwen2.5-7b-instruct": 32000,
|
118 | 114 | "meta-llama/llama-3.2-1b-instruct": 131000,
|
119 |
| - "meta-llama/llama-3.2-11b-vision-instruct": 32768, |
120 | 115 | "meta-llama/llama-3.2-3b-instruct": 32768,
|
121 | 116 | "meta-llama/llama-3.1-8b-instruct-bf16": 8192,
|
122 | 117 | "sao10k/l31-70b-euryale-v2.2": 8192
|
|
138 | 133 | "deepseek-ai/DeepSeek-V3": 131072,
|
139 | 134 | "deepseek-ai/DeepSeek-V3-0324": 163840,
|
140 | 135 | "meta-llama/Llama-3.3-70B-Instruct": 131072,
|
141 |
| - "Qwen/QwQ-32B-Preview": 32768, |
142 | 136 | "Qwen/Qwen2.5-Coder-32B-Instruct": 32768,
|
143 | 137 | "meta-llama/Llama-3.2-3B-Instruct": 131072,
|
144 | 138 | "NousResearch/Hermes-3-Llama-3.1-70B": 12288,
|
|
147 | 141 | "meta-llama/Meta-Llama-3.1-8B-Instruct": 131072,
|
148 | 142 | "mistralai/Pixtral-12B-2409": 32768,
|
149 | 143 | "Qwen/Qwen2.5-VL-7B-Instruct": 32768,
|
150 |
| - "meta-llama/Meta-Llama-3.1-405B": 32768, |
151 | 144 | "meta-llama/Meta-Llama-3.1-405B-FP8": 32768,
|
152 | 145 | "deepseek-ai/DeepSeek-R1": 163840,
|
153 | 146 | "Qwen/QwQ-32B": 131072
|
|
176 | 169 | "command": 4096
|
177 | 170 | },
|
178 | 171 | "together": {
|
179 |
| - "meta-llama/Llama-Guard-4-12B": 1048576, |
180 | 172 | "togethercomputer/m2-bert-80M-32k-retrieval": 32768,
|
181 | 173 | "cartesia/sonic": 0,
|
182 |
| - "scb10x/scb10x-llama3-1-typhoon2-8b-instruct": 8192, |
183 | 174 | "meta-llama-llama-2-70b-hf": 4096,
|
184 |
| - "togethercomputer/Refuel-Llm-V2": 16384, |
185 | 175 | "intfloat/multilingual-e5-large-instruct": 514,
|
186 |
| - "BAAI/bge-base-en-v1.5": 512, |
187 |
| - "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 130815, |
188 | 176 | "Gryphe/MythoMax-L2-13b": 4096,
|
189 | 177 | "Alibaba-NLP/gte-modernbert-base": 8192,
|
190 |
| - "mistralai/Mistral-7B-Instruct-v0.1": 32768, |
191 |
| - "Qwen/Qwen2.5-7B-Instruct-Turbo": 32768, |
192 |
| - "Qwen/Qwen2.5-72B-Instruct-Turbo": 131072, |
193 |
| - "Qwen/QwQ-32B": 131072, |
194 |
| - "mistralai/Mixtral-8x7B-Instruct-v0.1": 32768, |
195 |
| - "google/gemma-2-27b-it": 8192, |
196 |
| - "Qwen/Qwen2-VL-72B-Instruct": 32768, |
197 |
| - "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": 131072, |
198 | 178 | "meta-llama/LlamaGuard-2-8b": 8192,
|
199 | 179 | "cartesia/sonic-2": 0,
|
200 | 180 | "togethercomputer/m2-bert-80M-8k-retrieval": 8192,
|
201 |
| - "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 131072, |
202 |
| - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": 131072, |
203 |
| - "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192, |
204 |
| - "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 131072, |
205 |
| - "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072, |
206 |
| - "deepseek-ai/DeepSeek-V3": 131072, |
207 |
| - "togethercomputer/Refuel-Llm-V2-Small": 8192, |
| 181 | + "deepseek-ai/DeepSeek-R1": 163840, |
208 | 182 | "togethercomputer/MoA-1": 32768,
|
209 | 183 | "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": 8192,
|
210 |
| - "google/gemma-2b-it": 8192, |
211 |
| - "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": 131072, |
212 | 184 | "Gryphe/MythoMax-L2-13b-Lite": 4096,
|
213 | 185 | "meta-llama/Meta-Llama-Guard-3-8B": 8192,
|
214 |
| - "marin-community/marin-8b-instruct": 131072, |
215 |
| - "deepseek-ai/DeepSeek-R1": 163840, |
216 |
| - "Qwen/Qwen2.5-VL-72B-Instruct": 32768, |
217 |
| - "arcee-ai/arcee-blitz": 32768, |
218 |
| - "arcee-ai/caller": 32768, |
219 |
| - "arcee-ai/coder-large": 32768, |
220 |
| - "arcee-ai/maestro-reasoning": 131072, |
221 |
| - "arcee-ai/virtuoso-large": 131072, |
222 |
| - "arcee-ai/virtuoso-medium-v2": 131072, |
223 |
| - "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": 131072, |
224 |
| - "meta-llama/Llama-3-70b-chat-hf": 8192, |
| 186 | + "deepseek-ai/DeepSeek-V3": 131072, |
| 187 | + "mistralai/Mixtral-8x7B-Instruct-v0.1": 32768, |
| 188 | + "meta-llama/Llama-Vision-Free": 131072, |
225 | 189 | "meta-llama/Llama-3-8b-chat-hf": 8192,
|
226 |
| - "mistralai/Mistral-Small-24B-Instruct-2501": 32768, |
227 |
| - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 1048576, |
| 190 | + "mistralai/Mistral-7B-Instruct-v0.1": 32768, |
| 191 | + "BAAI/bge-base-en-v1.5-vllm": 512, |
| 192 | + "meta-llama/Llama-2-70b-hf": 4096, |
228 | 193 | "togethercomputer/MoA-1-Turbo": 32768,
|
229 |
| - "perplexity-ai/r1-1776": 163840, |
230 |
| - "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768, |
231 | 194 | "meta-llama/Meta-Llama-3-8B-Instruct-Lite": 8192,
|
232 |
| - "Qwen/Qwen3-235B-A22B-fp8": 40960, |
233 |
| - "Qwen/Qwen3-235B-A22B-fp8-tput": 40960, |
234 |
| - "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": 131072, |
235 |
| - "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 32768, |
236 |
| - "scb10x/scb10x-llama3-1-typhoon2-70b-instruct": 8192, |
| 195 | + "black-forest-labs/FLUX.1-kontext-max": 0, |
237 | 196 | "mistralai/Mistral-7B-Instruct-v0.2": 32768,
|
238 | 197 | "deepseek-ai/DeepSeek-V3-p-dp": 131072,
|
239 |
| - "Qwen/Qwen2.5-Coder-32B-Instruct": 16384, |
240 | 198 | "Qwen/Qwen2-72B-Instruct": 32768,
|
241 | 199 | "mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
242 |
| - "Salesforce/Llama-Rank-V1": 8192, |
243 |
| - "meta-llama/Llama-Vision-Free": 131072, |
| 200 | + "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768, |
| 201 | + "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": 131072, |
244 | 202 | "meta-llama/Llama-Guard-3-11B-Vision-Turbo": 131072,
|
| 203 | + "google/gemma-2-27b-it": 8192, |
| 204 | + "togethercomputer/Refuel-Llm-V2-Small": 8192, |
| 205 | + "Qwen/Qwen2-VL-72B-Instruct": 32768, |
| 206 | + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": 131072, |
| 207 | + "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": 131072, |
| 208 | + "scb10x/scb10x-llama3-1-typhoon2-70b-instruct": 8192, |
| 209 | + "arcee-ai/maestro-reasoning": 131072, |
| 210 | + "arcee_ai/arcee-spotlight": 131072, |
| 211 | + "togethercomputer/Refuel-Llm-V2": 16384, |
| 212 | + "arcee-ai/arcee-blitz": 32768, |
| 213 | + "perplexity-ai/r1-1776": 163840, |
| 214 | + "meta-llama/Llama-4-Scout-17B-16E-Instruct": 1048576, |
| 215 | + "Salesforce/Llama-Rank-V1": 8192, |
| 216 | + "Qwen/Qwen2.5-Coder-32B-Instruct": 16384, |
245 | 217 | "meta-llama/Llama-3.2-3B-Instruct-Turbo": 131072,
|
| 218 | + "arcee-ai/virtuoso-medium-v2": 131072, |
| 219 | + "arcee-ai/coder-large": 32768, |
| 220 | + "meta-llama/Llama-Guard-4-12B": 1048576, |
| 221 | + "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 131072, |
| 222 | + "arcee-ai/virtuoso-large": 131072, |
| 223 | + "Qwen/Qwen2.5-VL-72B-Instruct": 32768, |
| 224 | + "Qwen/Qwen2.5-72B-Instruct-Turbo": 131072, |
| 225 | + "meta-llama/Llama-3-70b-chat-hf": 8192, |
| 226 | + "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": 131072, |
| 227 | + "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072, |
| 228 | + "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 131072, |
| 229 | + "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 32768, |
| 230 | + "Qwen/QwQ-32B": 131072, |
| 231 | + "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192, |
| 232 | + "Qwen/Qwen3-235B-A22B-fp8-tput": 40960, |
| 233 | + "mistralai/Mistral-Small-24B-Instruct-2501": 32768, |
| 234 | + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 1048576, |
| 235 | + "marin-community/marin-8b-instruct": 131072, |
| 236 | + "Qwen/Qwen2.5-7B-Instruct-Turbo": 32768, |
| 237 | + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": 131072, |
246 | 238 | "meta-llama/Llama-3.3-70B-Instruct-Turbo": 131072,
|
247 |
| - "meta-llama/Llama-4-Scout-17B-16E-Instruct": 1048576, |
248 |
| - "meta-llama/Llama-2-70b-hf": 4096, |
249 |
| - "arcee_ai/arcee-spotlight": 131072 |
| 239 | + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 130815, |
| 240 | + "scb10x/scb10x-typhoon-2-1-gemma3-12b": 8192, |
| 241 | + "arcee-ai/caller": 32768, |
| 242 | + "black-forest-labs/FLUX.1-kontext-pro": 0, |
| 243 | + "lgai/exaone-deep-32b": 32768, |
| 244 | + "lgai/exaone-3-5-32b-instruct": 32768 |
250 | 245 | },
|
251 | 246 | "fireworks-ai": {
|
252 | 247 | "accounts/fireworks/models/qwq-32b": 131072,
|
|
258 | 253 | "accounts/fireworks/models/llama4-scout-instruct-basic": 1048576,
|
259 | 254 | "accounts/fireworks/models/qwen2-vl-72b-instruct": 32768,
|
260 | 255 | "accounts/fireworks/models/firesearch-ocr-v6": 131072,
|
| 256 | + "accounts/fireworks/models/deepseek-r1-0528": 163840, |
261 | 257 | "accounts/fireworks/models/deepseek-v3": 131072,
|
262 | 258 | "accounts/fireworks/models/llama-v3p1-8b-instruct": 131072,
|
263 | 259 | "accounts/fireworks/models/llama-v3p1-70b-instruct": 131072,
|
|
267 | 263 | "accounts/fireworks/models/deepseek-r1": 163840,
|
268 | 264 | "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new": 131072,
|
269 | 265 | "accounts/perplexity/models/r1-1776": 163840,
|
| 266 | + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b": 131072, |
270 | 267 | "accounts/fireworks/models/llama-v3p1-405b-instruct": 131072,
|
271 | 268 | "accounts/fireworks/models/mixtral-8x22b-instruct": 65536,
|
272 | 269 | "accounts/fireworks/models/qwen2p5-72b-instruct": 32768
|
|
0 commit comments