Skip to content

Commit 8559e8c

Browse files
andreabakabetlen
andauthored
feat: Add Llama-3 chat format (#1371)
* feat: Add Llama-3 chat format * feat: Auto-detect Llama-3 chat format from gguf template * feat: Update llama.cpp to b2715 Includes proper Llama-3 <|eot_id|> token handling. --------- Co-authored-by: Andrei Betlen <abetlen@gmail.com>
1 parent 617d536 commit 8559e8c

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

llama_cpp/llama_chat_format.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
# Source: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1/blob/main/tokenizer_config.json
3636
MIXTRAL_INSTRUCT_CHAT_TEMPLATE = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
3737

38+
# Source: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json
39+
LLAMA3_INSTRUCT_CHAT_TEMPLATE = "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
40+
3841
### Chat Completion Handler ###
3942

4043

@@ -729,6 +732,9 @@ def guess_chat_format_from_gguf_metadata(metadata: Dict[str, str]) -> Optional[s
729732
metadata["tokenizer.chat_template"] == MIXTRAL_INSTRUCT_CHAT_TEMPLATE):
730733
return "mistral-instruct"
731734

735+
if metadata["tokenizer.chat_template"] == LLAMA3_INSTRUCT_CHAT_TEMPLATE:
736+
return "llama-3"
737+
732738
return None
733739

734740

@@ -920,6 +926,26 @@ def format_llama2(
920926
return ChatFormatterResponse(prompt=_prompt)
921927

922928

929+
# Chat format for Llama-3 models, see more details at:
930+
# https://github.com/meta-llama/llama3/blob/main/llama/tokenizer.py#L202-L229
931+
@register_chat_format("llama-3")
932+
def format_llama3(
933+
messages: List[llama_types.ChatCompletionRequestMessage],
934+
**kwargs: Any,
935+
) -> ChatFormatterResponse:
936+
_roles = dict(
937+
system="<|start_header_id|>system<|end_header_id|>\n\n",
938+
user="<|start_header_id|>user<|end_header_id|>\n\n",
939+
assistant="<|start_header_id|>assistant<|end_header_id|>\n\n",
940+
)
941+
_begin_token = "<|begin_of_text|>"
942+
_sep = "<|eot_id|>"
943+
_messages = _map_roles(messages, _roles)
944+
_messages.append((_roles["assistant"], None))
945+
_prompt = _format_no_colon_single(_begin_token, _messages, _sep)
946+
return ChatFormatterResponse(prompt=_prompt, stop=_sep)
947+
948+
923949
@register_chat_format("alpaca")
924950
def format_alpaca(
925951
messages: List[llama_types.ChatCompletionRequestMessage],

0 commit comments

Comments
 (0)