@@ -275,14 +275,23 @@ def __init__(self, params: GptParams) -> None:
275
275
presence_penalty = { self .params .presence_penalty } ,\
276
276
frequency_penalty = { self .params .frequency_penalty } ,\
277
277
top_k = { self .params .top_k } ,\
278
- tfs_z = { self .params .tfs_z } ,\
278
+ top_n_sigma = { self .params .top_n_sigma } ,\
279
279
top_p = { self .params .top_p } ,\
280
280
typical_p = { self .params .typical_p } ,\
281
281
temp = { self .params .temp } ,\
282
282
mirostat = { self .params .mirostat } ,\
283
283
mirostat_lr = { self .params .mirostat_eta } ,\
284
284
mirostat_ent = { self .params .mirostat_tau } ,\
285
285
286
+ xtc_threshold = { self .params .xtc_threshold } ,\
287
+ xtc_probability = { self .params .xtc_probability } ,\
288
+
289
+ dry_multiplier = { self .params .dry_multiplier } ,\
290
+ dry_base = { self .params .dry_base } ,\
291
+ dry_allowed_length = { self .params .dry_allowed_length } ,\
292
+ dry_penalty_last_n = { self .params .dry_penalty_last_n } ,\
293
+ dry_seq_breakers = { self .params .dry_seq_breakers } ,\
294
+
286
295
generate: n_ctx = { self .n_ctx } ,\
287
296
n_batch = { self .params .n_batch } ,\
288
297
n_predict = { self .params .n_predict } ,\
@@ -454,7 +463,7 @@ def generate(self):
454
463
_arr = (llama_cpp .llama_token * last_n_repeat )(
455
464
* self .last_n_tokens [len (self .last_n_tokens ) - last_n_repeat :]
456
465
)
457
- llama_cpp .llama_sample_repetition_penalties (
466
+ llama_cpp .llama_sampler_init_penalties (
458
467
ctx = self .ctx ,
459
468
candidates = candidates_p ,
460
469
last_tokens_data = _arr ,
@@ -474,15 +483,15 @@ def generate(self):
474
483
475
484
if self .params .temp <= 0 :
476
485
# Greedy sampling
477
- id = llama_cpp .llama_sample_token_greedy (self .ctx , candidates_p )
486
+ id = llama_cpp .llama_sampler_init_greedy (self .ctx , candidates_p )
478
487
else :
479
488
if self .params .mirostat == 1 :
480
489
mirostat_mu = 2.0 * self .params .mirostat_tau
481
490
mirostat_m = 100
482
- llama_cpp .llama_sample_temperature (
491
+ llama_cpp .llama_sampler_init_temp (
483
492
self .ctx , candidates_p , llama_cpp .c_float (self .params .temp )
484
493
)
485
- id = llama_cpp .llama_sample_token_mirostat (
494
+ id = llama_cpp .llama_sampler_init_mirostat (
486
495
self .ctx ,
487
496
candidates_p ,
488
497
llama_cpp .c_float (self .params .mirostat_tau ),
@@ -495,7 +504,7 @@ def generate(self):
495
504
llama_cpp .llama_sample_temperature (
496
505
self .ctx , candidates_p , llama_cpp .c_float (self .params .temp )
497
506
)
498
- id = llama_cpp .llama_sample_token_mirostat_v2 (
507
+ id = llama_cpp .llama_sampler_init_mirostat_v2 (
499
508
self .ctx ,
500
509
candidates_p ,
501
510
llama_cpp .c_float (self .params .mirostat_tau ),
@@ -504,31 +513,31 @@ def generate(self):
504
513
)
505
514
else :
506
515
# Temperature sampling
507
- llama_cpp .llama_sample_top_k (
516
+ llama_cpp .llama_sampler_init_top_k (
508
517
self .ctx ,
509
518
candidates_p ,
510
519
top_k ,
511
520
min_keep = llama_cpp .c_size_t (1 ),
512
521
)
513
- llama_cpp .llama_sample_tail_free (
522
+ llama_cpp .llama_sampler_init_top_n_sigma (
514
523
self .ctx ,
515
524
candidates_p ,
516
- llama_cpp .c_float (self .params .tfs_z ),
525
+ llama_cpp .c_float (self .params .top_n_sigma ),
517
526
min_keep = llama_cpp .c_size_t (1 ),
518
527
)
519
- llama_cpp .llama_sample_typical (
528
+ llama_cpp .llama_sampler_init_typical (
520
529
self .ctx ,
521
530
candidates_p ,
522
531
llama_cpp .c_float (self .params .typical_p ),
523
532
min_keep = llama_cpp .c_size_t (1 ),
524
533
)
525
- llama_cpp .llama_sample_top_p (
534
+ llama_cpp .llama_sampler_init_top_p (
526
535
self .ctx ,
527
536
candidates_p ,
528
537
llama_cpp .c_float (self .params .top_p ),
529
538
min_keep = llama_cpp .c_size_t (1 ),
530
539
)
531
- llama_cpp .llama_sample_temperature (
540
+ llama_cpp .llama_sampler_init_temp (
532
541
self .ctx , candidates_p , llama_cpp .c_float (self .params .temp )
533
542
)
534
543
id = llama_cpp .llama_sample_token (self .ctx , candidates_p )
0 commit comments