Skip to content

Commit 1d11838

Browse files
authored
server : fix infill when prompt is empty (ggml-org#4833)
1 parent 7edefbd commit 1d11838

File tree

1 file changed

+8
-7
lines changed

1 file changed

+8
-7
lines changed

examples/server/server.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,7 +1406,7 @@ struct llama_server_context
14061406
task.multitask_id = multitask_id;
14071407

14081408
// when a completion task's prompt array is not a singleton, we split it into multiple requests
1409-
if (task.data.at("prompt").size() > 1)
1409+
if (task.data.count("prompt") && task.data.at("prompt").size() > 1)
14101410
{
14111411
lock.unlock(); // entering new func scope
14121412
return split_multiprompt_task(task);
@@ -1577,9 +1577,9 @@ struct llama_server_context
15771577

15781578
slot->reset();
15791579

1580-
slot->infill = task.infill_mode;
1581-
slot->embedding = task.embedding_mode;
1582-
slot->task_id = task.id;
1580+
slot->infill = task.infill_mode;
1581+
slot->embedding = task.embedding_mode;
1582+
slot->task_id = task.id;
15831583
slot->multitask_id = task.multitask_id;
15841584

15851585
if (!launch_slot_with_data(slot, task.data))
@@ -1731,7 +1731,8 @@ struct llama_server_context
17311731
const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get<std::string>().empty()) || !slot.images.empty();
17321732

17331733
// empty prompt passed -> release the slot and send empty response
1734-
if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt)
1734+
// note: infill mode allows empty prompt
1735+
if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt && !slot.infill)
17351736
{
17361737
slot.release();
17371738
slot.print_timings();
@@ -2609,8 +2610,8 @@ static json format_final_response_oaicompat(const json &request, const task_resu
26092610
{"object", streaming ? "chat.completion.chunk" : "chat.completion"},
26102611
{"usage",
26112612
json{{"completion_tokens", num_tokens_predicted},
2612-
{"prompt_tokens", num_prompt_tokens},
2613-
{"total_tokens", num_tokens_predicted + num_prompt_tokens}}},
2613+
{"prompt_tokens", num_prompt_tokens},
2614+
{"total_tokens", num_tokens_predicted + num_prompt_tokens}}},
26142615
{"id", gen_chatcmplid()}};
26152616

26162617
if (server_verbose) {

0 commit comments

Comments
 (0)