Skip to content

Commit d390717

Browse files
committed
Sync llama : Support llama 4 text-only
1 parent 46d700f commit d390717

File tree

2 files changed

+26
-0
lines changed

2 files changed

+26
-0
lines changed

llama_cpp/llama_chat_format.py

+24
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@
5353
# Source: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json
5454
LLAMA3_INSTRUCT_CHAT_TEMPLATE = "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
5555

56+
# Source: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct/blob/main/tokenizer_config.json
57+
LLAMA4_INSTRUCT_CHAT_TEMPLATE = "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %} \n {%- if messages[0]['content'] is string %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- else %}\n {#- FIXME: The processor requires an array, always. #}\n {%- set system_message = messages[0]['content'][0]['text']|trim %}\n {%- endif %}\n {%- set messages = messages[1:] %}\n {%- set user_supplied_system_message = true %}\n{%- else %}\n {%- set system_message = \"\" %}\n {%- set user_supplied_system_message = false %}\n{%- endif %}\n\n{#- System message if the user supplied one #}\n{%- if user_supplied_system_message %}\n {{- \"<|header_start|>system<|header_end|>\\n\\n\" }}\n {%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n {%- endif %}\n {%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {%- endif %}\n {{- system_message }}\n {{- \"<|eot|>\" }}\n{%- endif %}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|header_start|>user<|header_end|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|header_start|>' + message['role'] + '<|header_end|>\\n\\n' }}\n {%- if message['content'] is string %}\n {{- message['content'] }}\n {%- else %}\n {%- for content in message['content'] %}\n {%- if content['type'] == 'image' %}\n {{- '<|image|>' }}\n {%- elif content['type'] == 'text' %}\n {{- content['text'] }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- \"<|eot|>\" }}\n {%- elif 'tool_calls' in message and message.tool_calls|length > 0 %}\n {{- '<|header_start|>assistant<|header_end|>\\n\\n' -}}\n {{- '<|python_start|>' }}\n {%- if message['content'] is string %}\n {{- message['content'] }}\n {%- else %}\n {%- for content in message['content'] %}\n {%- if content['type'] == 'image' %}\n {{- '<|image|>' }}\n {%- elif content['type'] == 'text' %}\n {{- content['text'] }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '<|python_end|>' }}\n {%- for tool_call in message.tool_calls %}\n {{- '{\"name\": \"' + tool_call.function.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.function.arguments | tojson }}\n {{- \"}\" }}\n {%- endfor %}\n {{- \"<|eot|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|header_start|>ipython<|header_end|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|header_start|>assistant<|header_end|>\\n\\n' }}\n{%- endif %}\n"
58+
LLAMA4_INSTRUCT_BOS_TOKEN = "<|begin_of_text|>"
59+
LLAMA4_INSTRUCT_EOS_TOKEN = "<|eot|>"
60+
5661
### Chat Completion Handler ###
5762

5863

@@ -1035,6 +1040,25 @@ def format_llama3(
10351040
return ChatFormatterResponse(prompt=_prompt, stop=_sep)
10361041

10371042

1043+
# Chat format for Llama-4 models text only, see more details at:
1044+
# https://github.com/meta-llama/llama-models/blob/main/models/llama4/chat_format.py#L61-L316
1045+
@register_chat_format("llama-4")
1046+
def format_llama4(
1047+
messages: List[llama_types.ChatCompletionRequestMessage],
1048+
**kwargs: Any,
1049+
) -> ChatFormatterResponse:
1050+
_roles = dict(
1051+
system="<|header_start|>system<|header_end|>\n\n",
1052+
user="<|header_start|>user<|header_end|>\n\n",
1053+
assistant="<|header_start|>assistant<|header_end|>\n\n",
1054+
)
1055+
_sep = "<|eot|>"
1056+
_messages = _map_roles(messages, _roles)
1057+
_messages.append((_roles["assistant"], None))
1058+
_prompt = _format_no_colon_single("", _messages, _sep)
1059+
return ChatFormatterResponse(prompt=_prompt, stop=_sep)
1060+
1061+
10381062
@register_chat_format("alpaca")
10391063
def format_alpaca(
10401064
messages: List[llama_types.ChatCompletionRequestMessage],

llama_cpp/llama_cpp.py

+2
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@
241241
# LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
242242
# LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
243243
# LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
244+
# LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
244245
# };
245246
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
246247
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -275,6 +276,7 @@
275276
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30
276277
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31
277278
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32
279+
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33
278280

279281

280282
# // note: these values should be synchronized with ggml_rope

0 commit comments

Comments
 (0)