@@ -725,12 +725,12 @@ struct server_context {
725
725
return nullptr ;
726
726
}
727
727
728
- server_slot * get_available_slot (const std::string & prompt ) {
728
+ server_slot * get_available_slot (const server_task & task ) {
729
729
server_slot * ret = nullptr ;
730
730
731
731
// find the slot that has at least n% prompt similarity
732
- if (ret == nullptr && slot_prompt_similarity != 0 .0f && !prompt. empty () ) {
733
- int max_lcp_len = 0 ;
732
+ if (ret == nullptr && slot_prompt_similarity != 0 .0f ) {
733
+ int max_lcs_len = 0 ;
734
734
float similarity = 0 ;
735
735
736
736
for (server_slot & slot : slots) {
@@ -740,25 +740,25 @@ struct server_context {
740
740
}
741
741
742
742
// skip the slot if it does not contains cached tokens
743
- if (slot.prompt_tokens .empty ()) {
743
+ if (slot.cache_tokens .empty ()) {
744
744
continue ;
745
745
}
746
746
747
- // length of the Longest Common Prefix between the current slot's prompt and the input prompt
748
- int lcp_len = longest_common_prefix (slot.cache_tokens , slot .prompt_tokens );
747
+ // length of the Longest Common Subsequence between the current slot's prompt and the input prompt
748
+ int lcs_len = longest_common_subsequence (slot.cache_tokens , task .prompt_tokens );
749
749
750
- // fraction of the common substring length compared to the current slot's prompt length
751
- similarity = static_cast <float >(lcp_len ) / static_cast <int >(slot.prompt_tokens .size ());
750
+ // fraction of the common subsequence length compared to the current slot's prompt length
751
+ similarity = static_cast <float >(lcs_len ) / static_cast <int >(slot.cache_tokens .size ());
752
752
753
753
// select the current slot if the criteria match
754
- if (lcp_len > max_lcp_len && similarity > slot_prompt_similarity) {
755
- max_lcp_len = lcp_len ;
754
+ if (lcs_len > max_lcs_len && similarity > slot_prompt_similarity) {
755
+ max_lcs_len = lcs_len ;
756
756
ret = &slot;
757
757
}
758
758
}
759
759
760
760
if (ret != nullptr ) {
761
- SLT_DBG (*ret, " selected slot by lcp similarity, max_lcp_len = %d, similarity = %f\n " , max_lcp_len , similarity);
761
+ SLT_DBG (*ret, " selected slot by lcs similarity, max_lcs_len = %d, similarity = %f\n " , max_lcs_len , similarity);
762
762
}
763
763
}
764
764
@@ -1514,18 +1514,7 @@ struct server_context {
1514
1514
{
1515
1515
const int id_slot = json_value (task.data , " id_slot" , -1 );
1516
1516
1517
- server_slot * slot;
1518
-
1519
- if (id_slot != -1 ) {
1520
- slot = get_slot_by_id (id_slot);
1521
- } else {
1522
- std::string prompt;
1523
- if (task.data .contains (" prompt" ) && task.data .at (" prompt" ).is_string ()) {
1524
- prompt = json_value (task.data , " prompt" , std::string ());
1525
- }
1526
-
1527
- slot = get_available_slot (prompt);
1528
- }
1517
+ server_slot * slot = id_slot != -1 ? get_slot_by_id (id_slot) : get_available_slot (task);
1529
1518
1530
1519
if (slot == nullptr ) {
1531
1520
// if no slot is available, we defer this task for processing later
0 commit comments