Skip to content

Commit b9be167

Browse files
committed
fix(models): Update token limits and pricing for Llama models; add GLM 4 and GLM Z1 models
1 parent 236a316 commit b9be167

File tree

2 files changed

+106
-16
lines changed

2 files changed

+106
-16
lines changed

docs/guides/available-models.md

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ For more information about working with models, see the [Working with Models](/g
4949
Last updated: 2025-04-25
5050
{: .label .label-green }
5151

52-
### Chat Models (439)
52+
### Chat Models (441)
5353

5454
| ID | Type | Name | Provider | Context | MaxTok | Family | In$/M | Out$/M |
5555
| :-- | :-- | :-- | :-- | --: | --: | :-- | --: | --: |
@@ -309,7 +309,7 @@ Last updated: 2025-04-25
309309
| liquid/lfm-40b | chat | Liquid: LFM 40B MoE | openrouter | 32768 | | other | 0.15 | 0.15 |
310310
| liquid/lfm-7b | chat | Liquid: LFM 7B | openrouter | 32768 | | other | 0.01 | 0.01 |
311311
| mancer/weaver | chat | Mancer: Weaver (alpha) | openrouter | 8000 | 1000 | other | 1.12 | 1.12 |
312-
| meta-llama/llama-2-13b-chat | chat | Meta: Llama 2 13B Chat | openrouter | 4096 | 2048 | other | 0.22 | 0.22 |
312+
| meta-llama/llama-2-13b-chat | chat | Meta: Llama 2 13B Chat | openrouter | 4096 | | other | 0.30 | 0.30 |
313313
| meta-llama/llama-2-70b-chat | chat | Meta: Llama 2 70B Chat | openrouter | 4096 | | other | 0.90 | 0.90 |
314314
| meta-llama/llama-3-70b-instruct | chat | Meta: Llama 3 70B Instruct | openrouter | 8192 | 16384 | other | 0.30 | 0.40 |
315315
| meta-llama/llama-3-8b-instruct | chat | Meta: Llama 3 8B Instruct | openrouter | 8192 | 16384 | other | 0.03 | 0.06 |
@@ -322,7 +322,7 @@ Last updated: 2025-04-25
322322
| meta-llama/llama-3.2-11b-vision-instruct | chat | Meta: Llama 3.2 11B Vision Instruct | openrouter | 131072 | 16384 | other | 0.05 | 0.05 |
323323
| meta-llama/llama-3.2-11b-vision-instruct:free | chat | Meta: Llama 3.2 11B Vision Instruct (free) | openrouter | 131072 | 2048 | other | 0.00 | 0.00 |
324324
| meta-llama/llama-3.2-1b-instruct | chat | Meta: Llama 3.2 1B Instruct | openrouter | 131072 | | other | 0.01 | 0.01 |
325-
| meta-llama/llama-3.2-1b-instruct:free | chat | Meta: Llama 3.2 1B Instruct (free) | openrouter | 131072 | 131072 | other | 0.00 | 0.00 |
325+
| meta-llama/llama-3.2-1b-instruct:free | chat | Meta: Llama 3.2 1B Instruct (free) | openrouter | 131000 | | other | 0.00 | 0.00 |
326326
| meta-llama/llama-3.2-3b-instruct | chat | Meta: Llama 3.2 3B Instruct | openrouter | 131072 | 131072 | other | 0.02 | 0.02 |
327327
| meta-llama/llama-3.2-3b-instruct:free | chat | Meta: Llama 3.2 3B Instruct (free) | openrouter | 20000 | 20000 | other | 0.00 | 0.00 |
328328
| meta-llama/llama-3.2-90b-vision-instruct | chat | Meta: Llama 3.2 90B Vision Instruct | openrouter | 131072 | | other | 0.90 | 0.90 |
@@ -482,7 +482,9 @@ Last updated: 2025-04-25
482482
| thedrummer/rocinante-12b | chat | Rocinante 12B | openrouter | 32768 | | other | 0.25 | 0.50 |
483483
| thedrummer/skyfall-36b-v2 | chat | TheDrummer: Skyfall 36B V2 | openrouter | 32768 | 32768 | other | 0.50 | 0.80 |
484484
| thedrummer/unslopnemo-12b | chat | Unslopnemo 12B | openrouter | 32000 | | other | 0.50 | 0.50 |
485+
| thudm/glm-4-32b | chat | THUDM: GLM 4 32B | openrouter | 32000 | | other | 0.24 | 0.24 |
485486
| thudm/glm-4-32b:free | chat | THUDM: GLM 4 32B (free) | openrouter | 32768 | | other | 0.00 | 0.00 |
487+
| thudm/glm-z1-32b | chat | THUDM: GLM Z1 32B | openrouter | 32000 | | other | 0.24 | 0.24 |
486488
| thudm/glm-z1-32b:free | chat | THUDM: GLM Z1 32B (free) | openrouter | 32768 | | other | 0.00 | 0.00 |
487489
| undi95/remm-slerp-l2-13b | chat | ReMM SLERP 13B | openrouter | 6144 | 1024 | other | 0.56 | 1.12 |
488490
| undi95/toppy-m-7b | chat | Toppy M 7B | openrouter | 4096 | | other | 0.07 | 0.07 |
@@ -720,7 +722,7 @@ Last updated: 2025-04-25
720722
| anthropic.claude-v2:1:200k | chat | Claude | bedrock | 200000 | 4096 | claude2 | 8.00 | 24.00 |
721723
| us.anthropic.claude-3-7-sonnet-20250219-v1:0 | chat | Claude 3.7 Sonnet | bedrock | 200000 | 4096 | claude3_sonnet | 3.00 | 15.00 |
722724

723-
### Openrouter Models (301)
725+
### Openrouter Models (303)
724726

725727
| ID | Type | Name | Provider | Context | MaxTok | Family | In$/M | Out$/M |
726728
| :-- | :-- | :-- | :-- | --: | --: | :-- | --: | --: |
@@ -842,7 +844,7 @@ Last updated: 2025-04-25
842844
| liquid/lfm-40b | chat | Liquid: LFM 40B MoE | openrouter | 32768 | | other | 0.15 | 0.15 |
843845
| liquid/lfm-7b | chat | Liquid: LFM 7B | openrouter | 32768 | | other | 0.01 | 0.01 |
844846
| mancer/weaver | chat | Mancer: Weaver (alpha) | openrouter | 8000 | 1000 | other | 1.12 | 1.12 |
845-
| meta-llama/llama-2-13b-chat | chat | Meta: Llama 2 13B Chat | openrouter | 4096 | 2048 | other | 0.22 | 0.22 |
847+
| meta-llama/llama-2-13b-chat | chat | Meta: Llama 2 13B Chat | openrouter | 4096 | | other | 0.30 | 0.30 |
846848
| meta-llama/llama-2-70b-chat | chat | Meta: Llama 2 70B Chat | openrouter | 4096 | | other | 0.90 | 0.90 |
847849
| meta-llama/llama-3-70b-instruct | chat | Meta: Llama 3 70B Instruct | openrouter | 8192 | 16384 | other | 0.30 | 0.40 |
848850
| meta-llama/llama-3-8b-instruct | chat | Meta: Llama 3 8B Instruct | openrouter | 8192 | 16384 | other | 0.03 | 0.06 |
@@ -855,7 +857,7 @@ Last updated: 2025-04-25
855857
| meta-llama/llama-3.2-11b-vision-instruct | chat | Meta: Llama 3.2 11B Vision Instruct | openrouter | 131072 | 16384 | other | 0.05 | 0.05 |
856858
| meta-llama/llama-3.2-11b-vision-instruct:free | chat | Meta: Llama 3.2 11B Vision Instruct (free) | openrouter | 131072 | 2048 | other | 0.00 | 0.00 |
857859
| meta-llama/llama-3.2-1b-instruct | chat | Meta: Llama 3.2 1B Instruct | openrouter | 131072 | | other | 0.01 | 0.01 |
858-
| meta-llama/llama-3.2-1b-instruct:free | chat | Meta: Llama 3.2 1B Instruct (free) | openrouter | 131072 | 131072 | other | 0.00 | 0.00 |
860+
| meta-llama/llama-3.2-1b-instruct:free | chat | Meta: Llama 3.2 1B Instruct (free) | openrouter | 131000 | | other | 0.00 | 0.00 |
859861
| meta-llama/llama-3.2-3b-instruct | chat | Meta: Llama 3.2 3B Instruct | openrouter | 131072 | 131072 | other | 0.02 | 0.02 |
860862
| meta-llama/llama-3.2-3b-instruct:free | chat | Meta: Llama 3.2 3B Instruct (free) | openrouter | 20000 | 20000 | other | 0.00 | 0.00 |
861863
| meta-llama/llama-3.2-90b-vision-instruct | chat | Meta: Llama 3.2 90B Vision Instruct | openrouter | 131072 | | other | 0.90 | 0.90 |
@@ -1015,7 +1017,9 @@ Last updated: 2025-04-25
10151017
| thedrummer/rocinante-12b | chat | Rocinante 12B | openrouter | 32768 | | other | 0.25 | 0.50 |
10161018
| thedrummer/skyfall-36b-v2 | chat | TheDrummer: Skyfall 36B V2 | openrouter | 32768 | 32768 | other | 0.50 | 0.80 |
10171019
| thedrummer/unslopnemo-12b | chat | Unslopnemo 12B | openrouter | 32000 | | other | 0.50 | 0.50 |
1020+
| thudm/glm-4-32b | chat | THUDM: GLM 4 32B | openrouter | 32000 | | other | 0.24 | 0.24 |
10181021
| thudm/glm-4-32b:free | chat | THUDM: GLM 4 32B (free) | openrouter | 32768 | | other | 0.00 | 0.00 |
1022+
| thudm/glm-z1-32b | chat | THUDM: GLM Z1 32B | openrouter | 32000 | | other | 0.24 | 0.24 |
10191023
| thudm/glm-z1-32b:free | chat | THUDM: GLM Z1 32B (free) | openrouter | 32768 | | other | 0.00 | 0.00 |
10201024
| undi95/remm-slerp-l2-13b | chat | ReMM SLERP 13B | openrouter | 6144 | 1024 | other | 0.56 | 1.12 |
10211025
| undi95/toppy-m-7b | chat | Toppy M 7B | openrouter | 4096 | | other | 0.07 | 0.07 |

lib/ruby_llm/models.json

Lines changed: 96 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8779,14 +8779,14 @@
87798779
"display_name": "Meta: Llama 2 13B Chat",
87808780
"provider": "openrouter",
87818781
"context_window": 4096,
8782-
"max_tokens": 2048,
8782+
"max_tokens": null,
87838783
"type": "chat",
87848784
"family": "other",
87858785
"supports_vision": false,
87868786
"supports_functions": true,
87878787
"supports_json_mode": true,
8788-
"input_price_per_million": 0.22,
8789-
"output_price_per_million": 0.22,
8788+
"input_price_per_million": 0.3,
8789+
"output_price_per_million": 0.3,
87908790
"metadata": {
87918791
"object": null,
87928792
"description": "A 13 billion parameter language model from Meta, fine tuned for chat completions",
@@ -8802,16 +8802,16 @@
88028802
"instruct_type": "llama2"
88038803
},
88048804
"pricing": {
8805-
"prompt": "0.00000022",
8806-
"completion": "0.00000022",
8805+
"prompt": "0.0000003",
8806+
"completion": "0.0000003",
88078807
"request": "0",
88088808
"image": "0",
88098809
"web_search": "0",
88108810
"internal_reasoning": "0"
88118811
},
88128812
"top_provider": {
88138813
"context_length": 4096,
8814-
"max_completion_tokens": 2048,
8814+
"max_completion_tokens": null,
88158815
"is_moderated": false
88168816
}
88178817
}
@@ -9339,8 +9339,8 @@
93399339
"created_at": "2024-09-25T02:00:00+02:00",
93409340
"display_name": "Meta: Llama 3.2 1B Instruct (free)",
93419341
"provider": "openrouter",
9342-
"context_window": 131072,
9343-
"max_tokens": 131072,
9342+
"context_window": 131000,
9343+
"max_tokens": null,
93449344
"type": "chat",
93459345
"family": "other",
93469346
"supports_vision": false,
@@ -9371,8 +9371,8 @@
93719371
"internal_reasoning": "0"
93729372
},
93739373
"top_provider": {
9374-
"context_length": 131072,
9375-
"max_completion_tokens": 131072,
9374+
"context_length": 131000,
9375+
"max_completion_tokens": null,
93769376
"is_moderated": false
93779377
}
93789378
}
@@ -16278,6 +16278,49 @@
1627816278
}
1627916279
}
1628016280
},
16281+
{
16282+
"id": "thudm/glm-4-32b",
16283+
"created_at": "2025-04-17T22:15:15+02:00",
16284+
"display_name": "THUDM: GLM 4 32B",
16285+
"provider": "openrouter",
16286+
"context_window": 32000,
16287+
"max_tokens": null,
16288+
"type": "chat",
16289+
"family": "other",
16290+
"supports_vision": false,
16291+
"supports_functions": true,
16292+
"supports_json_mode": true,
16293+
"input_price_per_million": 0.24,
16294+
"output_price_per_million": 0.24,
16295+
"metadata": {
16296+
"object": null,
16297+
"description": "GLM-4-32B-0414 is a 32B bilingual (Chinese-English) open-weight language model optimized for code generation, function calling, and agent-style tasks. Pretrained on 15T of high-quality and reasoning-heavy data, it was further refined using human preference alignment, rejection sampling, and reinforcement learning. The model excels in complex reasoning, artifact generation, and structured output tasks, achieving performance comparable to GPT-4o and DeepSeek-V3-0324 across several benchmarks.",
16298+
"architecture": {
16299+
"modality": "text->text",
16300+
"input_modalities": [
16301+
"text"
16302+
],
16303+
"output_modalities": [
16304+
"text"
16305+
],
16306+
"tokenizer": "Other",
16307+
"instruct_type": null
16308+
},
16309+
"pricing": {
16310+
"prompt": "0.00000024",
16311+
"completion": "0.00000024",
16312+
"request": "0",
16313+
"image": "0",
16314+
"web_search": "0",
16315+
"internal_reasoning": "0"
16316+
},
16317+
"top_provider": {
16318+
"context_length": 32000,
16319+
"max_completion_tokens": null,
16320+
"is_moderated": false
16321+
}
16322+
}
16323+
},
1628116324
{
1628216325
"id": "thudm/glm-4-32b:free",
1628316326
"created_at": "2025-04-17T22:15:15+02:00",
@@ -16321,6 +16364,49 @@
1632116364
}
1632216365
}
1632316366
},
16367+
{
16368+
"id": "thudm/glm-z1-32b",
16369+
"created_at": "2025-04-17T23:09:08+02:00",
16370+
"display_name": "THUDM: GLM Z1 32B",
16371+
"provider": "openrouter",
16372+
"context_window": 32000,
16373+
"max_tokens": null,
16374+
"type": "chat",
16375+
"family": "other",
16376+
"supports_vision": false,
16377+
"supports_functions": true,
16378+
"supports_json_mode": true,
16379+
"input_price_per_million": 0.24,
16380+
"output_price_per_million": 0.24,
16381+
"metadata": {
16382+
"object": null,
16383+
"description": "GLM-Z1-32B-0414 is an enhanced reasoning variant of GLM-4-32B, built for deep mathematical, logical, and code-oriented problem solving. It applies extended reinforcement learning—both task-specific and general pairwise preference-based—to improve performance on complex multi-step tasks. Compared to the base GLM-4-32B model, Z1 significantly boosts capabilities in structured reasoning and formal domains.\n\nThe model supports enforced “thinking” steps via prompt engineering and offers improved coherence for long-form outputs. It’s optimized for use in agentic workflows, and includes support for long context (via YaRN), JSON tool calling, and fine-grained sampling configuration for stable inference. Ideal for use cases requiring deliberate, multi-step reasoning or formal derivations.",
16384+
"architecture": {
16385+
"modality": "text->text",
16386+
"input_modalities": [
16387+
"text"
16388+
],
16389+
"output_modalities": [
16390+
"text"
16391+
],
16392+
"tokenizer": "Other",
16393+
"instruct_type": "deepseek-r1"
16394+
},
16395+
"pricing": {
16396+
"prompt": "0.00000024",
16397+
"completion": "0.00000024",
16398+
"request": "0",
16399+
"image": "0",
16400+
"web_search": "0",
16401+
"internal_reasoning": "0"
16402+
},
16403+
"top_provider": {
16404+
"context_length": 32000,
16405+
"max_completion_tokens": null,
16406+
"is_moderated": false
16407+
}
16408+
}
16409+
},
1632416410
{
1632516411
"id": "thudm/glm-z1-32b:free",
1632616412
"created_at": "2025-04-17T23:09:08+02:00",

0 commit comments

Comments
 (0)