Skip to content

Fix issue 10264 unicode docode error #10350

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion litellm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@
# Python 3.9+
with resources.files("litellm.litellm_core_utils.tokenizers").joinpath(
"anthropic_tokenizer.json"
).open("r") as f:
).open("r", encoding="utf-8") as f:
json_data = json.load(f)
except (ImportError, AttributeError, TypeError):
with resources.open_text(
Expand Down
49 changes: 48 additions & 1 deletion tests/litellm_utils_tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2179,4 +2179,51 @@ def test_get_valid_models_from_dynamic_api_key():
assert len(valid_models) > 0
assert "anthropic/claude-3-7-sonnet-20250219" in valid_models



def test_anthropic_tokenizer_json_utf8_readable(monkeypatch):
"""
Test that UTF-8 strings in anthropic_tokenizer.json are correctly read
"""
import sys
import types
import json

# JSON with UTF-8 strings for testing
test_json_content = '{"test": "日本語テキスト"}'

# Mock resources.files(...).joinpath(...).open(...)
class DummyFile:
def __enter__(self):
import io
return io.StringIO(test_json_content)
def __exit__(self, exc_type, exc_val, exc_tb):
pass

class DummyPath:
def open(self, mode, encoding=None):
assert encoding == "utf-8"
return DummyFile()

class DummyFiles:
def joinpath(self, filename):
assert filename == "anthropic_tokenizer.json"
return DummyPath()

class DummyResources:
@staticmethod
def files(pkg):
assert pkg == "litellm.litellm_core_utils.tokenizers"
return DummyFiles()

# Mock the resources module
monkeypatch.setattr("litellm.utils.resources", DummyResources)

# Reload utils.py to apply the monkeypatched resources
import importlib
import litellm.utils
importlib.reload(litellm.utils)

# Verify that json_data was read correctly
assert hasattr(litellm.utils, "json_data")
assert litellm.utils.json_data["test"] == "日本語テキスト"

Loading