@@ -1180,8 +1180,9 @@ struct llama_server_context
11801180 return slot.images .size () > 0 ;
11811181 }
11821182
1183- void send_error (task_server& task, std::string error)
1183+ void send_error (task_server& task, const std::string & error)
11841184 {
1185+ LOG_TEE (" task %i - error: %s\n " , task.id , error.c_str ());
11851186 std::unique_lock<std::mutex> lock (mutex_results);
11861187 task_result res;
11871188 res.id = task.id ;
@@ -1570,12 +1571,22 @@ struct llama_server_context
15701571 LOG_TEE (" slot unavailable\n " );
15711572 // send error result
15721573 send_error (task, " slot unavailable" );
1573- return ;
1574+ break ;
15741575 }
15751576
15761577 if (task.data .contains (" system_prompt" ))
15771578 {
1579+ if (!all_slots_are_idle) {
1580+ send_error (task, " system prompt can only be updated when all slots are idle" );
1581+ break ;
1582+ }
15781583 process_system_prompt_data (task.data [" system_prompt" ]);
1584+
1585+ // reset cache_tokens for all slots
1586+ for (llama_client_slot &slot : slots)
1587+ {
1588+ slot.cache_tokens .clear ();
1589+ }
15791590 }
15801591
15811592 slot->reset ();
@@ -1652,8 +1663,7 @@ struct llama_server_context
16521663 // attend tasks
16531664 process_tasks ();
16541665
1655- // update the system prompt wait until all slots are idle state
1656- if (system_need_update && all_slots_are_idle)
1666+ if (system_need_update)
16571667 {
16581668 LOG_TEE (" updating system prompt\n " );
16591669 update_system_prompt ();
0 commit comments