@@ -99,6 +99,7 @@ struct slot_params {
99
99
100
100
uint32_t seed = -1 ; // RNG seed
101
101
int32_t n_keep = 0 ; // number of tokens to keep from initial prompt
102
+ int32_t n_discard = 0 ; // number of tokens after n_keep that may be discarded when shifting context, 0 defaults to half
102
103
int32_t n_predict = -1 ; // new tokens to predict
103
104
104
105
std::vector<std::string> antiprompt;
@@ -846,6 +847,7 @@ struct server_context {
846
847
slot.sparams .mirostat_eta = json_value (data, " mirostat_eta" , default_sparams.mirostat_eta );
847
848
slot.sparams .penalize_nl = json_value (data, " penalize_nl" , default_sparams.penalize_nl );
848
849
slot.params .n_keep = json_value (data, " n_keep" , slot.params .n_keep );
850
+ slot.params .n_discard = json_value (data, " n_discard" , default_params.n_discard );
849
851
slot.params .seed = json_value (data, " seed" , default_params.seed );
850
852
slot.sparams .n_probs = json_value (data, " n_probs" , default_sparams.n_probs );
851
853
slot.sparams .min_keep = json_value (data, " min_keep" , default_sparams.min_keep );
@@ -1253,6 +1255,7 @@ struct server_context {
1253
1255
{" stop" , slot.params .antiprompt },
1254
1256
{" n_predict" , slot.params .n_predict }, // TODO: fix duplicate key n_predict
1255
1257
{" n_keep" , slot.params .n_keep },
1258
+ {" n_discard" , slot.params .n_discard },
1256
1259
{" ignore_eos" , ignore_eos},
1257
1260
{" stream" , slot.params .stream },
1258
1261
{" logit_bias" , slot.sparams .logit_bias },
@@ -1696,7 +1699,7 @@ struct server_context {
1696
1699
// Shift context
1697
1700
const int n_keep = slot.params .n_keep + add_bos_token;
1698
1701
const int n_left = (int ) system_tokens.size () + slot.n_past - n_keep;
1699
- const int n_discard = n_left / 2 ;
1702
+ const int n_discard = slot. params . n_discard ? slot. params . n_discard : ( n_left / 2 ) ;
1700
1703
1701
1704
LOG_INFO (" slot context shift" , {
1702
1705
{" id_slot" , slot.id },
0 commit comments