@@ -448,6 +448,7 @@ std::string common_chat_format_name(common_chat_format format) {
448
448
case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING: return " Hermes 2 Pro (extract reasoning)" ;
449
449
case COMMON_CHAT_FORMAT_COMMAND_R7B: return " Command R7B" ;
450
450
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return " Command R7B (extract reasoning)" ;
451
+ case COMMON_CHAT_FORMAT_PHI_4: return " Phi-4" ;
451
452
default :
452
453
throw std::runtime_error (" Unknown chat format" );
453
454
}
@@ -1356,6 +1357,184 @@ static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::s
1356
1357
return parse_json_tool_calls (input, std::nullopt, function_regex, close_regex);
1357
1358
}
1358
1359
1360
+ static common_chat_params common_chat_params_init_phi_4 (const common_chat_template & tmpl, const struct templates_params & inputs) {
1361
+ // Phi-4 has a unique format that expects tools in the system message with <|tool|> tags
1362
+ // and returns function calls as a JSON object after <|tool_call|> tag
1363
+ common_chat_params data;
1364
+
1365
+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1366
+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1367
+ std::vector<std::string> tool_rules;
1368
+ std::vector<std::string> tool_call_alts;
1369
+ foreach_function (inputs.tools , [&](const json & tool) {
1370
+ const auto & function = tool.at (" function" );
1371
+ std::string name = function.at (" name" );
1372
+ auto parameters = function.at (" parameters" );
1373
+ builder.resolve_refs (parameters);
1374
+ tool_rules.push_back (builder.add_schema (name + " -call" , {
1375
+ {" type" , " object" },
1376
+ {" properties" , {
1377
+ {" name" , {{" const" , name}}},
1378
+ {" arguments" , parameters},
1379
+ }},
1380
+ {" required" , json::array ({" name" , " arguments" })},
1381
+ }));
1382
+ });
1383
+ auto any_tool_call = builder.add_rule (" any_tool_call" , " ( " + string_join (tool_rules, " | " ) + " ) space" );
1384
+ std::vector<std::string> alt_tags {
1385
+ any_tool_call,
1386
+ };
1387
+ tool_call_alts.push_back (any_tool_call);
1388
+ auto tool_call = builder.add_rule (" tool_call" , string_join (tool_call_alts, " | " ));
1389
+ builder.add_rule (" root" , inputs.parallel_tool_calls ? " (" + tool_call + " )+" : tool_call);
1390
+ data.grammar_triggers .push_back ({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " <|tool_call|>" });
1391
+ data.preserved_tokens = {
1392
+ " <|tool_call|>" ,
1393
+ " </|tool_call|>" ,
1394
+ };
1395
+ });
1396
+
1397
+ // For Phi-4, we need to inject tools into the system message
1398
+ // because the template expects tools in the system message with <|tool|> tags
1399
+ if (inputs.tools .empty ()) {
1400
+ // No tools, use normal approach
1401
+ data.prompt = apply (tmpl, inputs.messages , json::array (), inputs.add_generation_prompt );
1402
+ } else {
1403
+ // Make a copy of messages that we can modify
1404
+ json adjusted_messages = inputs.messages ;
1405
+
1406
+ // Extract just the function part of the OpenAI-formatted tools
1407
+ json phi4_tools = json::array ();
1408
+ foreach_function (inputs.tools , [&](const json & tool) {
1409
+ phi4_tools.push_back (tool.at (" function" ));
1410
+ });
1411
+
1412
+ // Phi-4 template expects tools in the system message with <|tool|> tags.
1413
+ // Find the system message, or add one if it doesn't exist
1414
+ bool found_system_msg = false ;
1415
+ for (auto & message : adjusted_messages) {
1416
+ if (message.contains (" role" ) && message[" role" ] == " system" ) {
1417
+ // Add tools to the existing system message and update content to mention tools
1418
+ message[" tools" ] = phi4_tools;
1419
+
1420
+ // If the system message doesn't mention tools, append that information
1421
+ std::string content = message[" content" ];
1422
+ if (content.find (" tool" ) == std::string::npos &&
1423
+ content.find (" function" ) == std::string::npos) {
1424
+ message[" content" ] = content + " You have access to some tools." ;
1425
+ }
1426
+
1427
+ found_system_msg = true ;
1428
+ break ;
1429
+ }
1430
+ }
1431
+
1432
+ // If no system message, add one with tools
1433
+ if (!found_system_msg && !adjusted_messages.empty ()) {
1434
+ json system_msg = {
1435
+ {" role" , " system" },
1436
+ {" content" , " You are a helpful assistant with access to tools.\n To use a tool, respond in this format: <|tool_call|>{\" name\" : \" foo\" , \" arguments\" : {\" a\" : 1}}<|/tool_call|>" },
1437
+ {" tools" , phi4_tools}
1438
+ };
1439
+ // Insert system message at the beginning
1440
+ adjusted_messages.insert (adjusted_messages.begin (), system_msg);
1441
+ }
1442
+
1443
+ // Apply template with tools embedded in system message, passing empty tools separately
1444
+ data.prompt = apply (tmpl, adjusted_messages, json (), inputs.add_generation_prompt );
1445
+ }
1446
+
1447
+ data.format = COMMON_CHAT_FORMAT_PHI_4;
1448
+ return data;
1449
+ }
1450
+
1451
+ static common_chat_msg common_chat_parse_phi_4 (const std::string & input) {
1452
+ common_chat_msg result;
1453
+ result.role = " assistant" ;
1454
+
1455
+ std::string final_content = " " ;
1456
+
1457
+ const std::string opening_tag = " <|tool_call|>" ;
1458
+ const std::string closing_tag = " </|tool_call|>" ;
1459
+
1460
+ size_t start_pos = 0 ;
1461
+ while (true ) {
1462
+ // Find next tool call
1463
+ size_t tool_start = input.find (opening_tag, start_pos);
1464
+ if (tool_start == std::string::npos) {
1465
+ // No more tool calls.
1466
+
1467
+ // Is start_pos within string bounds?
1468
+ if (start_pos < input.length ()) {
1469
+ // Add the rest of the string to final_content
1470
+ final_content += input.substr (start_pos);
1471
+ }
1472
+ break ;
1473
+ }
1474
+
1475
+ // Add content before the tool call to final_content
1476
+ final_content += input.substr (start_pos, tool_start - start_pos);
1477
+
1478
+ // Find closing tag
1479
+ size_t content_start = tool_start + opening_tag.length ();
1480
+ size_t tool_end = input.find (closing_tag, content_start);
1481
+
1482
+ if (tool_end == std::string::npos) {
1483
+ // No closing tag found, so just include the rest of the string as tool.
1484
+ tool_end = input.length ();
1485
+ }
1486
+
1487
+ // Extract tool call content
1488
+ std::string tool_content = input.substr (
1489
+ content_start,
1490
+ tool_end - content_start
1491
+ );
1492
+
1493
+ // Try to parse the tool call
1494
+ try {
1495
+ auto tool_call = json::parse (tool_content);
1496
+
1497
+ // Verify the required fields exist
1498
+ if (!tool_call.contains (" name" )) {
1499
+ throw std::runtime_error (" Missing 'name' field in tool call" );
1500
+ }
1501
+
1502
+ if (!tool_call.contains (" arguments" )) {
1503
+ throw std::runtime_error (" Missing 'arguments' field in tool call" );
1504
+ }
1505
+
1506
+ std::string name = tool_call[" name" ].get <std::string>();
1507
+
1508
+ std::string arguments;
1509
+ try {
1510
+ arguments = tool_call[" arguments" ].dump ();
1511
+ } catch (const std::exception & e) {
1512
+ LOG_ERR (" Failed to serialize arguments: %s\n " , e.what ());
1513
+ arguments = " {}" ;
1514
+ }
1515
+
1516
+ result.tool_calls .push_back ({
1517
+ name,
1518
+ arguments,
1519
+ /* id= */ " " ,
1520
+ });
1521
+ } catch (const std::exception & e) {
1522
+ // If parsing fails, include the entire tool call in the content
1523
+ final_content += input.substr (
1524
+ tool_start,
1525
+ tool_end + closing_tag.length () - tool_start
1526
+ );
1527
+ }
1528
+
1529
+ // Move past this tool call for next iteration
1530
+ start_pos = tool_end + closing_tag.length ();
1531
+ }
1532
+
1533
+ result.content = final_content;
1534
+ return result;
1535
+ }
1536
+
1537
+
1359
1538
static common_chat_params common_chat_params_init_hermes_2_pro (const common_chat_template & tmpl, const struct templates_params & inputs) {
1360
1539
common_chat_params data;
1361
1540
// (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
@@ -1642,6 +1821,11 @@ static common_chat_params common_chat_templates_apply_jinja(
1642
1821
return common_chat_params_init_firefunction_v2 (tmpl, params);
1643
1822
}
1644
1823
1824
+ // Phi-4 mini.
1825
+ if (src.find (" <|tool|>" ) != std::string::npos) {
1826
+ return common_chat_params_init_phi_4 (tmpl, params);
1827
+ }
1828
+
1645
1829
// Plain handler (no tools)
1646
1830
if (params.tools .is_null () || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
1647
1831
return common_chat_params_init_without_tools (tmpl, params);
@@ -1773,6 +1957,8 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format
1773
1957
return common_chat_parse_command_r7b (input, /* extract_reasoning= */ false );
1774
1958
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING:
1775
1959
return common_chat_parse_command_r7b (input, /* extract_reasoning= */ true );
1960
+ case COMMON_CHAT_FORMAT_PHI_4:
1961
+ return common_chat_parse_phi_4 (input);
1776
1962
default :
1777
1963
throw std::runtime_error (" Unsupported format: " + common_chat_format_name (format));
1778
1964
}
0 commit comments