|
21 | 21 | "nebius": {
|
22 | 22 | "meta-llama/Meta-Llama-3.1-8B-Instruct-fast": 131072,
|
23 | 23 | "meta-llama/Meta-Llama-3.1-8B-Instruct": 131072,
|
24 |
| - "meta-llama/Meta-Llama-3.1-70B-Instruct-fast": 131072, |
25 | 24 | "meta-llama/Meta-Llama-3.1-70B-Instruct": 131072,
|
26 | 25 | "meta-llama/Meta-Llama-3.1-405B-Instruct": 131072,
|
27 | 26 | "meta-llama/Llama-Guard-3-8B": 131072,
|
28 |
| - "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF-fast": 131072, |
29 |
| - "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": 131072, |
30 | 27 | "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": 131072,
|
31 |
| - "mistralai/Mistral-Nemo-Instruct-2407-fast": 128000, |
32 | 28 | "mistralai/Mistral-Nemo-Instruct-2407": 128000,
|
33 |
| - "mistralai/Mixtral-8x7B-Instruct-v0.1-fast": 32768, |
34 |
| - "mistralai/Mixtral-8x7B-Instruct-v0.1": 32768, |
35 |
| - "mistralai/Mixtral-8x22B-Instruct-v0.1-fast": 65536, |
36 |
| - "mistralai/Mixtral-8x22B-Instruct-v0.1": 65536, |
37 |
| - "allenai/OLMo-7B-Instruct-hf": 2048, |
38 |
| - "microsoft/Phi-3-mini-4k-instruct-fast": 4096, |
39 |
| - "microsoft/Phi-3-mini-4k-instruct": 4096, |
40 |
| - "microsoft/Phi-3-medium-128k-instruct-fast": 131072, |
41 |
| - "microsoft/Phi-3-medium-128k-instruct": 131072, |
42 |
| - "google/gemma-2-2b-it-fast": 8192, |
43 | 29 | "google/gemma-2-2b-it": 8192,
|
44 | 30 | "google/gemma-2-9b-it-fast": 8192,
|
45 |
| - "google/gemma-2-9b-it": 8192, |
46 |
| - "google/gemma-2-27b-it-fast": 8192, |
47 |
| - "google/gemma-2-27b-it": 8192, |
48 |
| - "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct-fast": 128000, |
49 |
| - "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": 128000, |
50 | 31 | "Qwen/Qwen2.5-Coder-7B-fast": 32768,
|
51 | 32 | "Qwen/Qwen2.5-Coder-7B": 32768,
|
52 |
| - "Qwen/Qwen2.5-Coder-7B-Instruct-fast": 32768, |
53 |
| - "Qwen/Qwen2.5-Coder-7B-Instruct": 32768, |
54 | 33 | "Qwen/Qwen2.5-Coder-32B-Instruct-fast": 131072,
|
55 | 34 | "Qwen/Qwen2.5-Coder-32B-Instruct": 131072,
|
56 | 35 | "Qwen/Qwen2.5-32B-Instruct-fast": 131072,
|
57 | 36 | "Qwen/Qwen2.5-32B-Instruct": 131072,
|
58 | 37 | "Qwen/Qwen2.5-72B-Instruct-fast": 131072,
|
59 | 38 | "Qwen/Qwen2.5-72B-Instruct": 131072,
|
60 | 39 | "Qwen/Qwen2-VL-72B-Instruct": 32768,
|
61 |
| - "Qwen/Qwen2-VL-7B-Instruct": 32768, |
62 |
| - "llava-hf/llava-1.5-7b-hf": 4096, |
63 |
| - "llava-hf/llava-1.5-13b-hf": 4096, |
64 |
| - "aaditya/Llama3-OpenBioLLM-8B": 8192, |
65 | 40 | "aaditya/Llama3-OpenBioLLM-70B": 8192,
|
66 | 41 | "BAAI/bge-en-icl": 32768,
|
67 | 42 | "BAAI/bge-multilingual-gemma2": 8192,
|
68 | 43 | "intfloat/e5-mistral-7b-instruct": 32768,
|
69 |
| - "cognitivecomputations/dolphin-2.9.2-mixtral-8x22b": 65536, |
70 |
| - "microsoft/Phi-3.5-MoE-instruct": 131072, |
71 |
| - "microsoft/Phi-3.5-mini-instruct": 131072, |
72 |
| - "Qwen/Qwen2.5-1.5B-Instruct": 32768, |
73 | 44 | "meta-llama/Llama-3.3-70B-Instruct": 131072,
|
74 | 45 | "meta-llama/Llama-3.3-70B-Instruct-fast": 131072,
|
75 |
| - "meta-llama/Llama-3.2-1B-Instruct": 131072, |
76 |
| - "meta-llama/Llama-3.2-3B-Instruct": 131072, |
77 |
| - "Qwen/QwQ-32B-Preview": 32768, |
78 |
| - "Qwen/QVQ-72B-preview": 128000, |
79 | 46 | "microsoft/phi-4": 16384,
|
80 | 47 | "deepseek-ai/DeepSeek-V3": 163840,
|
81 | 48 | "deepseek-ai/DeepSeek-R1": 163840,
|
|
88 | 55 | "Qwen/Qwen3-30B-A3B": 40960,
|
89 | 56 | "Qwen/Qwen3-30B-A3B-fast": 40960,
|
90 | 57 | "Qwen/Qwen3-32B": 40960,
|
| 58 | + "Qwen/Qwen3-32B-fast": 40960, |
91 | 59 | "Qwen/Qwen3-14B": 40960,
|
92 | 60 | "Qwen/Qwen3-4B-fast": 40960,
|
93 | 61 | "nvidia/Llama-3_3-Nemotron-Super-49B-v1": 131072,
|
|
117 | 85 | "deepseek/deepseek-prover-v2-671b": 160000,
|
118 | 86 | "meta-llama/llama-4-scout-17b-16e-instruct": 131072,
|
119 | 87 | "deepseek/deepseek-r1-distill-llama-8b": 32000,
|
120 |
| - "deepseek/deepseek_v3": 64000, |
121 | 88 | "meta-llama/llama-3.1-8b-instruct": 16384,
|
122 | 89 | "deepseek/deepseek-r1-distill-qwen-14b": 64000,
|
123 | 90 | "meta-llama/llama-3.3-70b-instruct": 131072,
|
|
131 | 98 | "google/gemma-2-9b-it": 8192,
|
132 | 99 | "mistralai/mistral-7b-instruct": 32768,
|
133 | 100 | "meta-llama/llama-3-70b-instruct": 8192,
|
134 |
| - "deepseek/deepseek-r1": 64000, |
135 | 101 | "nousresearch/hermes-2-pro-llama-3-8b": 8192,
|
136 | 102 | "sao10k/l3-70b-euryale-v2.1": 8192,
|
137 | 103 | "cognitivecomputations/dolphin-mixtral-8x22b": 16000,
|
|
210 | 176 | "command": 4096
|
211 | 177 | },
|
212 | 178 | "together": {
|
213 |
| - "meta-llama/Llama-4-Scout-17B-16E-Instruct": 1048576, |
214 | 179 | "meta-llama/Llama-Guard-4-12B": 1048576,
|
215 | 180 | "togethercomputer/m2-bert-80M-32k-retrieval": 32768,
|
216 | 181 | "cartesia/sonic": 0,
|
217 | 182 | "scb10x/scb10x-llama3-1-typhoon2-8b-instruct": 8192,
|
218 |
| - "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192, |
219 | 183 | "meta-llama-llama-2-70b-hf": 4096,
|
| 184 | + "togethercomputer/Refuel-Llm-V2": 16384, |
220 | 185 | "intfloat/multilingual-e5-large-instruct": 514,
|
221 | 186 | "BAAI/bge-base-en-v1.5": 512,
|
222 | 187 | "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 130815,
|
|
233 | 198 | "meta-llama/LlamaGuard-2-8b": 8192,
|
234 | 199 | "cartesia/sonic-2": 0,
|
235 | 200 | "togethercomputer/m2-bert-80M-8k-retrieval": 8192,
|
| 201 | + "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 131072, |
| 202 | + "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": 131072, |
| 203 | + "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free": 8192, |
236 | 204 | "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": 131072,
|
237 |
| - "togethercomputer/Refuel-Llm-V2": 16384, |
| 205 | + "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072, |
238 | 206 | "deepseek-ai/DeepSeek-V3": 131072,
|
239 | 207 | "togethercomputer/Refuel-Llm-V2-Small": 8192,
|
240 | 208 | "togethercomputer/MoA-1": 32768,
|
|
260 | 228 | "togethercomputer/MoA-1-Turbo": 32768,
|
261 | 229 | "perplexity-ai/r1-1776": 163840,
|
262 | 230 | "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
263 |
| - "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 131072, |
264 |
| - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": 131072, |
265 | 231 | "meta-llama/Meta-Llama-3-8B-Instruct-Lite": 8192,
|
266 | 232 | "Qwen/Qwen3-235B-A22B-fp8": 40960,
|
267 | 233 | "Qwen/Qwen3-235B-A22B-fp8-tput": 40960,
|
|
270 | 236 | "scb10x/scb10x-llama3-1-typhoon2-70b-instruct": 8192,
|
271 | 237 | "mistralai/Mistral-7B-Instruct-v0.2": 32768,
|
272 | 238 | "deepseek-ai/DeepSeek-V3-p-dp": 131072,
|
273 |
| - "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 131072, |
274 | 239 | "Qwen/Qwen2.5-Coder-32B-Instruct": 16384,
|
275 | 240 | "Qwen/Qwen2-72B-Instruct": 32768,
|
276 | 241 | "mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
277 | 242 | "Salesforce/Llama-Rank-V1": 8192,
|
278 | 243 | "meta-llama/Llama-Vision-Free": 131072,
|
279 | 244 | "meta-llama/Llama-Guard-3-11B-Vision-Turbo": 131072,
|
280 | 245 | "meta-llama/Llama-3.2-3B-Instruct-Turbo": 131072,
|
281 |
| - "arcee_ai/arcee-spotlight": 131072, |
282 | 246 | "meta-llama/Llama-3.3-70B-Instruct-Turbo": 131072,
|
283 |
| - "meta-llama/Llama-2-70b-hf": 4096 |
| 247 | + "meta-llama/Llama-4-Scout-17B-16E-Instruct": 1048576, |
| 248 | + "meta-llama/Llama-2-70b-hf": 4096, |
| 249 | + "arcee_ai/arcee-spotlight": 131072 |
284 | 250 | },
|
285 | 251 | "fireworks-ai": {
|
286 | 252 | "accounts/fireworks/models/qwq-32b": 131072,
|
|
0 commit comments