|
3 | 3 |
|
4 | 4 | from __future__ import annotations
|
5 | 5 |
|
| 6 | +import hashlib |
| 7 | +import importlib.metadata |
| 8 | +import os |
| 9 | +from typing import TYPE_CHECKING |
| 10 | + |
6 | 11 | import regex as re
|
| 12 | +from cachetools import LRUCache |
| 13 | +from diskcache import Cache |
| 14 | + |
| 15 | +import vllm.envs as envs |
| 16 | +from vllm.logger import init_logger |
| 17 | +from vllm.utils import LazyLoader |
| 18 | + |
| 19 | +if TYPE_CHECKING: |
| 20 | + import outlines_core as oc |
| 21 | + import transformers.file_utils as file_utils |
| 22 | + import transformers.models.gpt2.tokenization_gpt2 as tokenization_gpt2 |
| 23 | + |
| 24 | + from vllm.transformers_utils.tokenizer import AnyTokenizer |
| 25 | +else: |
| 26 | + oc = LazyLoader("oc", globals(), "outlines_core") |
| 27 | + file_utils = LazyLoader("file_utils", globals(), "transformers.file_utils") |
| 28 | + tokenization_gpt2 = LazyLoader( |
| 29 | + "tokenization_gpt2", |
| 30 | + globals(), |
| 31 | + "transformers.models.gpt2.tokenization_gpt2", |
| 32 | + ) |
| 33 | + |
| 34 | +logger = init_logger(__name__) |
| 35 | + |
| 36 | +CACHE = None |
| 37 | + |
| 38 | + |
| 39 | +class OutlinesVocabulary: |
| 40 | + """ |
| 41 | + Wrapper class for `outlines_core.Vocabulary`, |
| 42 | + which allows us to store a hash with the vocabulary |
| 43 | + """ |
| 44 | + |
| 45 | + def __init__(self, vocabulary: oc.Vocabulary) -> None: |
| 46 | + # Actual vocabulary object |
| 47 | + self.inner = vocabulary |
| 48 | + # Have to do abs(hash()) because python hashes can |
| 49 | + # be negative, and we are using hash as a cache key. |
| 50 | + hex_str = hashlib.sha256( |
| 51 | + vocabulary.__repr__().encode('utf-8')).hexdigest() |
| 52 | + hash_int = int(hex_str, 16) |
| 53 | + self._hash = hash_int |
| 54 | + |
| 55 | + |
| 56 | +def get_outlines_cache_path() -> str: |
| 57 | + """Get the context object that contains previously-computed return values""" |
| 58 | + outlines_cache_dir = os.getenv("OUTLINES_CACHE_DIR") |
| 59 | + xdg_cache_home = os.getenv("XDG_CACHE_HOME") |
| 60 | + home_dir = os.path.expanduser("~") |
| 61 | + |
| 62 | + if outlines_cache_dir: |
| 63 | + # OUTLINES_CACHE_DIR takes precedence |
| 64 | + return outlines_cache_dir |
| 65 | + elif xdg_cache_home: |
| 66 | + return os.path.join(xdg_cache_home, ".cache", "outlines") |
| 67 | + # If homedir is "/", we may be inside a container, and thus writing to |
| 68 | + # root would be problematic, so we fallback to using a tempfile. |
| 69 | + # Also validate the path exists, since os.path.expanduser does |
| 70 | + # not garuntee existence. |
| 71 | + elif os.path.isdir(home_dir) and home_dir != "/": |
| 72 | + # Default Unix fallback: ~/.cache/outlines |
| 73 | + return os.path.join(home_dir, ".cache", "outlines") |
| 74 | + else: |
| 75 | + import tempfile |
| 76 | + |
| 77 | + # home_dir may be / inside a docker container without existing user |
| 78 | + tempdir = tempfile.gettempdir() |
| 79 | + return os.path.join(tempdir, ".cache", "outlines") |
| 80 | + |
| 81 | + |
| 82 | +def get_outlines_cache(): |
| 83 | + """Get the Cache instance to be used for index caching""" |
| 84 | + |
| 85 | + cache_dir = get_outlines_cache_path() |
| 86 | + if envs.VLLM_V1_USE_OUTLINES_CACHE: |
| 87 | + logger.warning("Enabling outlines cache. This is an unbounded on-disk " |
| 88 | + "cache. It may consume a lot of disk space and should " |
| 89 | + "not be used with untrusted clients.") |
| 90 | + cache = Cache(cache_dir, eviction_policy="none", cull_limit=0) |
| 91 | + outlines_version = importlib.metadata.version("outlines_core") |
| 92 | + |
| 93 | + cached_version = cache.get('__version__', None) |
| 94 | + if cached_version != outlines_version: |
| 95 | + cache.clear() |
| 96 | + cache.set('__version__', outlines_version) |
| 97 | + return cache |
| 98 | + else: |
| 99 | + return LRUCache(maxsize=128) |
| 100 | + |
| 101 | + |
| 102 | +re_llama_byte_token = re.compile(r"^<0x[0-9A-F]{2}>$") |
| 103 | +re_replacement_seq = re.compile(r"^.{0,6}�+.{0,6}$") |
| 104 | + |
| 105 | + |
| 106 | +def _reduced_vocabulary( |
| 107 | + tokenizer: AnyTokenizer, |
| 108 | + eos_token_id: int, |
| 109 | +) -> dict[bytes, list[int]]: |
| 110 | + """Create a map from vocabulary tokens to lists of equivalent token ids. |
| 111 | +
|
| 112 | + Returns: |
| 113 | + A Dict of token string -> equivalent token ids |
| 114 | + """ |
| 115 | + |
| 116 | + unicode_to_bytes = { |
| 117 | + v: k |
| 118 | + for k, v in tokenization_gpt2.bytes_to_unicode().items() |
| 119 | + } |
| 120 | + |
| 121 | + def convert_token_to_string(token: str) -> str: |
| 122 | + |
| 123 | + string = tokenizer.convert_tokens_to_string([token]) |
| 124 | + |
| 125 | + # A hack to handle missing spaces to HF's Llama tokenizers |
| 126 | + if (type(token) is str |
| 127 | + and token.startswith(file_utils.SPIECE_UNDERLINE) |
| 128 | + or token == "<0x20>"): |
| 129 | + return " " + string |
| 130 | + |
| 131 | + return string |
| 132 | + |
| 133 | + vocabulary: dict[bytes, list[int]] = {} |
| 134 | + empty_token_ids: list[int] = [] |
| 135 | + for token, token_idx in tokenizer.get_vocab().items(): |
| 136 | + if token in tokenizer.all_special_tokens: # type: ignore |
| 137 | + continue |
| 138 | + |
| 139 | + token_str = convert_token_to_string(token) |
| 140 | + if token_str: |
| 141 | + if isinstance(token, (bytes, bytearray)): |
| 142 | + # For BPE tokenizers where tokens are stored as bytes. |
| 143 | + |
| 144 | + # safe to ignore since token_str is of type (bytearray, bytes) |
| 145 | + # by this point. |
| 146 | + token_bytes = bytes(token_str) # type: ignore[arg-type] |
| 147 | + |
| 148 | + elif "\ufffd" in token_str and not re_replacement_seq.match( |
| 149 | + token_str): |
| 150 | + # Handle tokens with invalid UTF-8 sequences. |
| 151 | + if re_llama_byte_token.match(token): |
| 152 | + # Llama-like tokenizers use <0xXX> for incomplete sequences. |
| 153 | + token_bytes = bytes([int(token[3:5], 16)]) |
| 154 | + else: |
| 155 | + # GPT2 tokenizers: map each byte back using unicode_to_bytes |
| 156 | + byte_vals = [unicode_to_bytes.get(c) for c in token] |
| 157 | + if None in byte_vals: |
| 158 | + raise RuntimeError( |
| 159 | + f"Cannot convert token `{token}`" |
| 160 | + f" ({token_idx}) to bytes: {token_str}") |
| 161 | + # safe to ignore, since if None in byte_vals, |
| 162 | + # an error is thrown. |
| 163 | + token_bytes = bytes(byte_vals) # type: ignore[arg-type] |
| 164 | + else: |
| 165 | + token_bytes = token_str.encode('utf-8') |
| 166 | + |
| 167 | + if token_idx != eos_token_id: |
| 168 | + vocabulary.setdefault(token_bytes, []).append(token_idx) |
| 169 | + else: |
| 170 | + empty_token_ids.append(token_idx) |
| 171 | + |
| 172 | + return vocabulary |
| 173 | + |
| 174 | + |
| 175 | +def get_outlines_vocabulary(tokenizer: AnyTokenizer) -> oc.Vocabulary: |
| 176 | + """Get the `Vocabulary` object for a given tokenizer. |
| 177 | + """ |
| 178 | + if hasattr(tokenizer, "_outlines_vocabulary"): |
| 179 | + return tokenizer._outlines_vocabulary # type: ignore |
| 180 | + |
| 181 | + try: |
| 182 | + if hasattr( |
| 183 | + tokenizer, |
| 184 | + "eos_token_id", |
| 185 | + ) and tokenizer.eos_token_id is not None: |
| 186 | + eos_token_id = tokenizer.eos_token_id |
| 187 | + else: |
| 188 | + raise ValueError( |
| 189 | + f"Error during structured outputs setup for outlines: Tokenizer ({type(tokenizer)}) has no `eos_token_id` property, but `eos_token_id` is required for structured outputs to work properly." # noqa: E501 |
| 190 | + ) |
| 191 | + |
| 192 | + reduced_vocab = _reduced_vocabulary( |
| 193 | + tokenizer, |
| 194 | + eos_token_id #type: ignore |
| 195 | + ) |
| 196 | + vocabulary = OutlinesVocabulary( |
| 197 | + oc.Vocabulary(eos_token_id, reduced_vocab)) |
| 198 | + tokenizer._outlines_vocabulary = vocabulary # type: ignore |
| 199 | + |
| 200 | + return vocabulary |
| 201 | + except AttributeError as e: |
| 202 | + raise ValueError(f"Cannot get the vocabulary of the tokenizer " |
| 203 | + f"({type(tokenizer)}). The tokenizer should have a " |
| 204 | + "get_vocab method.") from e |
7 | 205 |
|
8 | 206 |
|
9 | 207 | def grammar_is_likely_lark(grammar_str: str) -> bool:
|
@@ -77,7 +275,7 @@ def check_quotes(text: str, rule_name: str, line_num: int) -> None:
|
77 | 275 | raise ValueError(
|
78 | 276 | f"Mismatched quotes in {rule_name} on line {line_num}")
|
79 | 277 |
|
80 |
| - def extract_references(text: str) -> set: |
| 278 | + def extract_references(text: str) -> set[str]: |
81 | 279 | """Extract rule references from text."""
|
82 | 280 | # Remove quoted strings and special characters
|
83 | 281 | text = re.sub(r'"[^"]*"', '', text)
|
|
0 commit comments