Skip to content

Commit 697ef76

Browse files
authored
[Refactor][V1] Move outlines utils for V1 imports (#20878)
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
1 parent a99b9f7 commit 697ef76

File tree

2 files changed

+204
-5
lines changed

2 files changed

+204
-5
lines changed

vllm/v1/structured_output/backend_outlines.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@
1313
import torch
1414
from regex import escape as regex_escape
1515

16-
from vllm.model_executor.guided_decoding.outlines_logits_processors import (
17-
OutlinesVocabulary, get_cache, get_vocabulary)
1816
from vllm.sampling_params import SamplingParams
1917
from vllm.utils import LazyLoader
2018
from vllm.v1.structured_output.backend_types import (StructuredOutputBackend,
2119
StructuredOutputGrammar,
2220
StructuredOutputOptions)
21+
from vllm.v1.structured_output.utils import (OutlinesVocabulary,
22+
get_outlines_cache,
23+
get_outlines_vocabulary)
2324

2425
if TYPE_CHECKING:
2526
import outlines_core as oc
@@ -47,8 +48,8 @@
4748
class OutlinesBackend(StructuredOutputBackend):
4849

4950
def __post_init__(self):
50-
self.vocabulary = get_vocabulary(self.tokenizer)
51-
self.cache = get_cache()
51+
self.vocabulary = get_outlines_vocabulary(self.tokenizer)
52+
self.cache = get_outlines_cache()
5253

5354
def _compile_index(self, regex_string: str,
5455
vocabulary: OutlinesVocabulary) -> oc.Index:

vllm/v1/structured_output/utils.py

Lines changed: 199 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,205 @@
33

44
from __future__ import annotations
55

6+
import hashlib
7+
import importlib.metadata
8+
import os
9+
from typing import TYPE_CHECKING
10+
611
import regex as re
12+
from cachetools import LRUCache
13+
from diskcache import Cache
14+
15+
import vllm.envs as envs
16+
from vllm.logger import init_logger
17+
from vllm.utils import LazyLoader
18+
19+
if TYPE_CHECKING:
20+
import outlines_core as oc
21+
import transformers.file_utils as file_utils
22+
import transformers.models.gpt2.tokenization_gpt2 as tokenization_gpt2
23+
24+
from vllm.transformers_utils.tokenizer import AnyTokenizer
25+
else:
26+
oc = LazyLoader("oc", globals(), "outlines_core")
27+
file_utils = LazyLoader("file_utils", globals(), "transformers.file_utils")
28+
tokenization_gpt2 = LazyLoader(
29+
"tokenization_gpt2",
30+
globals(),
31+
"transformers.models.gpt2.tokenization_gpt2",
32+
)
33+
34+
logger = init_logger(__name__)
35+
36+
CACHE = None
37+
38+
39+
class OutlinesVocabulary:
40+
"""
41+
Wrapper class for `outlines_core.Vocabulary`,
42+
which allows us to store a hash with the vocabulary
43+
"""
44+
45+
def __init__(self, vocabulary: oc.Vocabulary) -> None:
46+
# Actual vocabulary object
47+
self.inner = vocabulary
48+
# Have to do abs(hash()) because python hashes can
49+
# be negative, and we are using hash as a cache key.
50+
hex_str = hashlib.sha256(
51+
vocabulary.__repr__().encode('utf-8')).hexdigest()
52+
hash_int = int(hex_str, 16)
53+
self._hash = hash_int
54+
55+
56+
def get_outlines_cache_path() -> str:
57+
"""Get the context object that contains previously-computed return values"""
58+
outlines_cache_dir = os.getenv("OUTLINES_CACHE_DIR")
59+
xdg_cache_home = os.getenv("XDG_CACHE_HOME")
60+
home_dir = os.path.expanduser("~")
61+
62+
if outlines_cache_dir:
63+
# OUTLINES_CACHE_DIR takes precedence
64+
return outlines_cache_dir
65+
elif xdg_cache_home:
66+
return os.path.join(xdg_cache_home, ".cache", "outlines")
67+
# If homedir is "/", we may be inside a container, and thus writing to
68+
# root would be problematic, so we fallback to using a tempfile.
69+
# Also validate the path exists, since os.path.expanduser does
70+
# not garuntee existence.
71+
elif os.path.isdir(home_dir) and home_dir != "/":
72+
# Default Unix fallback: ~/.cache/outlines
73+
return os.path.join(home_dir, ".cache", "outlines")
74+
else:
75+
import tempfile
76+
77+
# home_dir may be / inside a docker container without existing user
78+
tempdir = tempfile.gettempdir()
79+
return os.path.join(tempdir, ".cache", "outlines")
80+
81+
82+
def get_outlines_cache():
83+
"""Get the Cache instance to be used for index caching"""
84+
85+
cache_dir = get_outlines_cache_path()
86+
if envs.VLLM_V1_USE_OUTLINES_CACHE:
87+
logger.warning("Enabling outlines cache. This is an unbounded on-disk "
88+
"cache. It may consume a lot of disk space and should "
89+
"not be used with untrusted clients.")
90+
cache = Cache(cache_dir, eviction_policy="none", cull_limit=0)
91+
outlines_version = importlib.metadata.version("outlines_core")
92+
93+
cached_version = cache.get('__version__', None)
94+
if cached_version != outlines_version:
95+
cache.clear()
96+
cache.set('__version__', outlines_version)
97+
return cache
98+
else:
99+
return LRUCache(maxsize=128)
100+
101+
102+
re_llama_byte_token = re.compile(r"^<0x[0-9A-F]{2}>$")
103+
re_replacement_seq = re.compile(r"^.{0,6}�+.{0,6}$")
104+
105+
106+
def _reduced_vocabulary(
107+
tokenizer: AnyTokenizer,
108+
eos_token_id: int,
109+
) -> dict[bytes, list[int]]:
110+
"""Create a map from vocabulary tokens to lists of equivalent token ids.
111+
112+
Returns:
113+
A Dict of token string -> equivalent token ids
114+
"""
115+
116+
unicode_to_bytes = {
117+
v: k
118+
for k, v in tokenization_gpt2.bytes_to_unicode().items()
119+
}
120+
121+
def convert_token_to_string(token: str) -> str:
122+
123+
string = tokenizer.convert_tokens_to_string([token])
124+
125+
# A hack to handle missing spaces to HF's Llama tokenizers
126+
if (type(token) is str
127+
and token.startswith(file_utils.SPIECE_UNDERLINE)
128+
or token == "<0x20>"):
129+
return " " + string
130+
131+
return string
132+
133+
vocabulary: dict[bytes, list[int]] = {}
134+
empty_token_ids: list[int] = []
135+
for token, token_idx in tokenizer.get_vocab().items():
136+
if token in tokenizer.all_special_tokens: # type: ignore
137+
continue
138+
139+
token_str = convert_token_to_string(token)
140+
if token_str:
141+
if isinstance(token, (bytes, bytearray)):
142+
# For BPE tokenizers where tokens are stored as bytes.
143+
144+
# safe to ignore since token_str is of type (bytearray, bytes)
145+
# by this point.
146+
token_bytes = bytes(token_str) # type: ignore[arg-type]
147+
148+
elif "\ufffd" in token_str and not re_replacement_seq.match(
149+
token_str):
150+
# Handle tokens with invalid UTF-8 sequences.
151+
if re_llama_byte_token.match(token):
152+
# Llama-like tokenizers use <0xXX> for incomplete sequences.
153+
token_bytes = bytes([int(token[3:5], 16)])
154+
else:
155+
# GPT2 tokenizers: map each byte back using unicode_to_bytes
156+
byte_vals = [unicode_to_bytes.get(c) for c in token]
157+
if None in byte_vals:
158+
raise RuntimeError(
159+
f"Cannot convert token `{token}`"
160+
f" ({token_idx}) to bytes: {token_str}")
161+
# safe to ignore, since if None in byte_vals,
162+
# an error is thrown.
163+
token_bytes = bytes(byte_vals) # type: ignore[arg-type]
164+
else:
165+
token_bytes = token_str.encode('utf-8')
166+
167+
if token_idx != eos_token_id:
168+
vocabulary.setdefault(token_bytes, []).append(token_idx)
169+
else:
170+
empty_token_ids.append(token_idx)
171+
172+
return vocabulary
173+
174+
175+
def get_outlines_vocabulary(tokenizer: AnyTokenizer) -> oc.Vocabulary:
176+
"""Get the `Vocabulary` object for a given tokenizer.
177+
"""
178+
if hasattr(tokenizer, "_outlines_vocabulary"):
179+
return tokenizer._outlines_vocabulary # type: ignore
180+
181+
try:
182+
if hasattr(
183+
tokenizer,
184+
"eos_token_id",
185+
) and tokenizer.eos_token_id is not None:
186+
eos_token_id = tokenizer.eos_token_id
187+
else:
188+
raise ValueError(
189+
f"Error during structured outputs setup for outlines: Tokenizer ({type(tokenizer)}) has no `eos_token_id` property, but `eos_token_id` is required for structured outputs to work properly." # noqa: E501
190+
)
191+
192+
reduced_vocab = _reduced_vocabulary(
193+
tokenizer,
194+
eos_token_id #type: ignore
195+
)
196+
vocabulary = OutlinesVocabulary(
197+
oc.Vocabulary(eos_token_id, reduced_vocab))
198+
tokenizer._outlines_vocabulary = vocabulary # type: ignore
199+
200+
return vocabulary
201+
except AttributeError as e:
202+
raise ValueError(f"Cannot get the vocabulary of the tokenizer "
203+
f"({type(tokenizer)}). The tokenizer should have a "
204+
"get_vocab method.") from e
7205

8206

9207
def grammar_is_likely_lark(grammar_str: str) -> bool:
@@ -77,7 +275,7 @@ def check_quotes(text: str, rule_name: str, line_num: int) -> None:
77275
raise ValueError(
78276
f"Mismatched quotes in {rule_name} on line {line_num}")
79277

80-
def extract_references(text: str) -> set:
278+
def extract_references(text: str) -> set[str]:
81279
"""Extract rule references from text."""
82280
# Remove quoted strings and special characters
83281
text = re.sub(r'"[^"]*"', '', text)

0 commit comments

Comments
 (0)