diff --git a/CHANGELOG.md b/CHANGELOG.md index ec13dd3..9fdce1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,12 @@ This is the changelog for the open source version of tiktoken. +## [v0.10.0] + +- Support for gpt-4.1 and gpt-4.5 models + ## [v0.9.0] + - Support for `o1` and `o3` models - Better error messages when loading invalid vocabulary files - Support for encoding to numpy arrays diff --git a/tests/test_misc.py b/tests/test_misc.py index 7da5389..01f7f03 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -15,8 +15,17 @@ def test_encoding_for_model(): assert enc.name == "cl100k_base" enc = tiktoken.encoding_for_model("gpt-4") assert enc.name == "cl100k_base" - enc = tiktoken.encoding_for_model("gpt-4o") - assert enc.name == "o200k_base" + + for model in [ + "gpt-4o", + "gpt-4o-mini", + "gpt-4.1", + "gpt-4.1-mini", + "gpt-4.1-nano", + "gpt-4.5-preview", + ]: + enc = tiktoken.encoding_for_model(model) + assert enc.name == "o200k_base", f"{model} should use o200k_base" def test_optional_blobfile_dependency(): diff --git a/tiktoken/model.py b/tiktoken/model.py index 4298ae7..a96cc9e 100644 --- a/tiktoken/model.py +++ b/tiktoken/model.py @@ -11,6 +11,8 @@ "chatgpt-4o-": "o200k_base", "gpt-4o-": "o200k_base", # e.g., gpt-4o-2024-05-13 "gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k + "gpt-4.1-": "o200k_base", # e.g., gpt-4.1-nano, gpt-4.1-mini + "gpt-4.5-": "o200k_base", # e.g., gpt-4.5-preview "gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc. "gpt-35-turbo-": "cl100k_base", # Azure deployment name # fine-tuned @@ -28,6 +30,8 @@ # chat "gpt-4o": "o200k_base", "gpt-4": "cl100k_base", + "gpt-4.1": "o200k_base", + "gpt-4.5": "o200k_base", "gpt-3.5-turbo": "cl100k_base", "gpt-3.5": "cl100k_base", # Common shorthand "gpt-35-turbo": "cl100k_base", # Azure deployment name