Skip to content

Commit ebcbc45

Browse files
authored
Add inverse chat template metadata
1 parent 1e43630 commit ebcbc45

File tree

3 files changed

+15
-0
lines changed

3 files changed

+15
-0
lines changed

gguf-py/gguf/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ class Tokenizer:
166166
CHAT_TEMPLATE = "tokenizer.chat_template"
167167
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
168168
CHAT_TEMPLATES = "tokenizer.chat_templates"
169+
INVERSE_TEMPLATE = "tokenizer.inverse_template"
169170
# FIM/Infill special tokens constants
170171
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
171172
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"

gguf-py/gguf/gguf_writer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,9 @@ def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
840840

841841
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
842842

843+
def add_inverse_template(self, value: str) -> None:
844+
self.add_string(Keys.Tokenizer.INVERSE_TEMPLATE, value)
845+
843846
def add_prefix_token_id(self, id: int) -> None:
844847
self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
845848

gguf-py/gguf/vocab.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class SpecialVocab:
2121
add_special_token: dict[str, bool]
2222
special_token_ids: dict[str, int]
2323
chat_template: str | Sequence[Mapping[str, str]] | None
24+
inverse_template: str | None
2425

2526
def __init__(
2627
self, path: str | os.PathLike[str], load_merges: bool = False,
@@ -33,6 +34,7 @@ def __init__(
3334
self.load_merges = load_merges
3435
self.merges = []
3536
self.chat_template = None
37+
self.inverse_template = None
3638
if special_token_types is not None:
3739
self.special_token_types = special_token_types
3840
else:
@@ -71,6 +73,10 @@ def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
7173
if not quiet:
7274
logger.info(f'Setting chat_template to {self.chat_template}')
7375
gw.add_chat_template(self.chat_template)
76+
if self.inverse_template is not None:
77+
if not quiet:
78+
logger.info(f'Setting inverse_template to {self.inverse_template}')
79+
gw.add_inverse_template(self.inverse_template)
7480

7581
def _load(self, path: Path) -> None:
7682
self._try_load_from_tokenizer_json(path)
@@ -137,6 +143,11 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
137143
self.chat_template = chat_template
138144
else:
139145
logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
146+
inverse_template = tokenizer_config.get('inverse_template')
147+
if inverse_template is None or isinstance(inverse_template, str):
148+
self.inverse_template = inverse_template
149+
else:
150+
logger.warning(f'Bad type for inverse_template field in {tokenizer_config_file!r} - ignoring')
140151
for typ in self.special_token_types:
141152
add_entry = tokenizer_config.get(f'add_{typ}_token')
142153
if isinstance(add_entry, bool):

0 commit comments

Comments
 (0)