|
35 | 35 | # Source: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1/blob/main/tokenizer_config.json
|
36 | 36 | MIXTRAL_INSTRUCT_CHAT_TEMPLATE = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
|
37 | 37 |
|
| 38 | +# Source: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json |
| 39 | +LLAMA3_INSTRUCT_CHAT_TEMPLATE = "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}" |
| 40 | + |
38 | 41 | ### Chat Completion Handler ###
|
39 | 42 |
|
40 | 43 |
|
@@ -729,6 +732,9 @@ def guess_chat_format_from_gguf_metadata(metadata: Dict[str, str]) -> Optional[s
|
729 | 732 | metadata["tokenizer.chat_template"] == MIXTRAL_INSTRUCT_CHAT_TEMPLATE):
|
730 | 733 | return "mistral-instruct"
|
731 | 734 |
|
| 735 | + if metadata["tokenizer.chat_template"] == LLAMA3_INSTRUCT_CHAT_TEMPLATE: |
| 736 | + return "llama-3" |
| 737 | + |
732 | 738 | return None
|
733 | 739 |
|
734 | 740 |
|
@@ -920,6 +926,26 @@ def format_llama2(
|
920 | 926 | return ChatFormatterResponse(prompt=_prompt)
|
921 | 927 |
|
922 | 928 |
|
| 929 | +# Chat format for Llama-3 models, see more details at: |
| 930 | +# https://github.com/meta-llama/llama3/blob/main/llama/tokenizer.py#L202-L229 |
| 931 | +@register_chat_format("llama-3") |
| 932 | +def format_llama3( |
| 933 | + messages: List[llama_types.ChatCompletionRequestMessage], |
| 934 | + **kwargs: Any, |
| 935 | +) -> ChatFormatterResponse: |
| 936 | + _roles = dict( |
| 937 | + system="<|start_header_id|>system<|end_header_id|>\n\n", |
| 938 | + user="<|start_header_id|>user<|end_header_id|>\n\n", |
| 939 | + assistant="<|start_header_id|>assistant<|end_header_id|>\n\n", |
| 940 | + ) |
| 941 | + _begin_token = "<|begin_of_text|>" |
| 942 | + _sep = "<|eot_id|>" |
| 943 | + _messages = _map_roles(messages, _roles) |
| 944 | + _messages.append((_roles["assistant"], None)) |
| 945 | + _prompt = _format_no_colon_single(_begin_token, _messages, _sep) |
| 946 | + return ChatFormatterResponse(prompt=_prompt, stop=_sep) |
| 947 | + |
| 948 | + |
923 | 949 | @register_chat_format("alpaca")
|
924 | 950 | def format_alpaca(
|
925 | 951 | messages: List[llama_types.ChatCompletionRequestMessage],
|
|
0 commit comments