Added DRY and XTC samplers

llmixer · llmixer · commit eda526decfcd · 2024-11-12T13:27:04.000+01:00
diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py
@@ -800,6 +800,22 @@ def add_mirostat_v2(self, seed: int, tau: float, eta: float):
         sampler = llama_cpp.llama_sampler_init_mirostat_v2(seed, tau, eta)
         self._add_sampler(sampler)
 
+    def add_xtc(self, probability: float, threshold: float, min_keep: int, seed: int):
+        sampler = llama_cpp.llama_sampler_init_xtc(probability, threshold, min_keep, seed)
+        self._add_sampler(sampler)
+
+    def add_dry(self, model: LlamaModel, multiplier: float, base: float,
+                allowed_length: int, penalty_last_n: int, seq_breakers: list[str] = []):
+
+        # Convert Python strings to bytes
+        seq_breakers_bytes = [s.encode('utf-8') for s in seq_breakers]
+        # Create array of char*
+        arr = (ctypes.c_char_p * len(seq_breakers_bytes))(*seq_breakers_bytes)
+        sampler = llama_cpp.llama_sampler_init_dry(model.model, multiplier, base,
+                                                allowed_length, penalty_last_n,
+                                                arr, len(seq_breakers))
+        self._add_sampler(sampler)
+
     def add_grammar(self, model: LlamaModel, grammar: LlamaGrammar):
         sampler = llama_cpp.llama_sampler_init_grammar(
             model.model, grammar._grammar.encode("utf-8"), grammar._root.encode("utf-8")
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -677,6 +677,13 @@ def _init_sampler(
         mirostat_mode: int = 0,
         mirostat_eta: float = 0.1,
         mirostat_tau: float = 5.0,
+        xtc_probability: float = 0.0,
+        xtc_threshold: float = 0.1,
+        dry_multiplier: float = 0.0,
+        dry_allowed_length: int = 2,
+        dry_base: float = 1.75,
+        dry_range: int = 0,
+        dry_seq_breakers: list[str] = [],
         penalize_nl: bool = True,
         logits_processor: Optional[LogitsProcessorList] = None,
         grammar: Optional[LlamaGrammar] = None,
@@ -744,13 +751,15 @@ def apply_func(token_data_array: llama_cpp.llama_token_data_array_p):
             else:
                 n_probs = 0
                 min_keep = max(1, n_probs)
+                sampler.add_dry(self._model, dry_multiplier, dry_base, dry_allowed_length, dry_range, dry_seq_breakers)
                 sampler.add_top_k(top_k)
                 sampler.add_tail_free(tfs_z, min_keep)
                 sampler.add_typical(typical_p, min_keep)
                 sampler.add_top_p(top_p, min_keep)
                 sampler.add_min_p(min_p, min_keep)
                 sampler.add_temp(temp)
                 sampler.add_dist(self._seed)
+                sampler.add_xtc(xtc_probability, xtc_threshold, min_keep, self._seed)
         return sampler
 
     def sample(
@@ -767,6 +776,13 @@ def sample(
         mirostat_mode: int = 0,
         mirostat_eta: float = 0.1,
         mirostat_tau: float = 5.0,
+        xtc_probability: float = 0.0,
+        xtc_threshold: float = 0.1,
+        dry_multiplier: float = 0.0,
+        dry_allowed_length: int = 2,
+        dry_base: float = 1.75,
+        dry_range: int = 0,
+        dry_seq_breakers: list[str] = [],
         penalize_nl: bool = True,
         logits_processor: Optional[LogitsProcessorList] = None,
         grammar: Optional[LlamaGrammar] = None,
@@ -802,6 +818,13 @@ def sample(
                 mirostat_mode=mirostat_mode,
                 mirostat_tau=mirostat_tau,
                 mirostat_eta=mirostat_eta,
+                xtc_probability=xtc_probability,
+                xtc_threshold=xtc_threshold,
+                dry_multiplier=dry_multiplier,
+                dry_allowed_length=dry_allowed_length,
+                dry_base=dry_base,
+                dry_range=dry_range,
+                dry_seq_breakers=dry_seq_breakers,
                 penalize_nl=penalize_nl,
                 logits_processor=logits_processor,
                 grammar=grammar,
@@ -831,6 +854,13 @@ def generate(
         mirostat_mode: int = 0,
         mirostat_tau: float = 5.0,
         mirostat_eta: float = 0.1,
+        xtc_probability: float = 0.0,
+        xtc_threshold: float = 0.1,
+        dry_multiplier: float = 0.0,
+        dry_allowed_length: int = 2,
+        dry_base: float = 1.75,
+        dry_range: int = 0,
+        dry_seq_breakers: list[str] = [],
         penalize_nl: bool = True,
         logits_processor: Optional[LogitsProcessorList] = None,
         stopping_criteria: Optional[StoppingCriteriaList] = None,
@@ -870,6 +900,13 @@ def generate(
             mirostat_mode=mirostat_mode,
             mirostat_tau=mirostat_tau,
             mirostat_eta=mirostat_eta,
+            xtc_probability=xtc_probability,
+            xtc_threshold=xtc_threshold,
+            dry_multiplier=dry_multiplier,
+            dry_allowed_length=dry_allowed_length,
+            dry_base=dry_base,
+            dry_range=dry_range,
+            dry_seq_breakers=dry_seq_breakers,
             penalize_nl=penalize_nl,
             logits_processor=logits_processor,
             grammar=grammar,
@@ -922,6 +959,13 @@ def generate(
                     mirostat_mode=mirostat_mode,
                     mirostat_tau=mirostat_tau,
                     mirostat_eta=mirostat_eta,
+                    xtc_probability=xtc_probability,
+                    xtc_threshold=xtc_threshold,
+                    dry_multiplier=dry_multiplier,
+                    dry_allowed_length=dry_allowed_length,
+                    dry_base=dry_base,
+                    dry_range=dry_range,
+                    dry_seq_breakers=dry_seq_breakers,
                     logits_processor=logits_processor,
                     grammar=grammar,
                     penalize_nl=penalize_nl,
@@ -1138,6 +1182,13 @@ def _create_completion(
         mirostat_mode: int = 0,
         mirostat_tau: float = 5.0,
         mirostat_eta: float = 0.1,
+        xtc_probability: float = 0.0,
+        xtc_threshold: float = 0.1,
+        dry_multiplier: float = 0.0,
+        dry_allowed_length: int = 2,
+        dry_base: float = 1.75,
+        dry_range: int = 0,
+        dry_seq_breakers: list[str] = [],
         model: Optional[str] = None,
         stopping_criteria: Optional[StoppingCriteriaList] = None,
         logits_processor: Optional[LogitsProcessorList] = None,
@@ -1326,6 +1377,13 @@ def logit_bias_processor(
             mirostat_mode=mirostat_mode,
             mirostat_tau=mirostat_tau,
             mirostat_eta=mirostat_eta,
+            xtc_probability=xtc_probability,
+            xtc_threshold=xtc_threshold,
+            dry_multiplier=dry_multiplier,
+            dry_allowed_length=dry_allowed_length,
+            dry_base=dry_base,
+            dry_range=dry_range,
+            dry_seq_breakers=dry_seq_breakers,
             frequency_penalty=frequency_penalty,
             presence_penalty=presence_penalty,
             repeat_penalty=repeat_penalty,
@@ -1758,6 +1816,13 @@ def create_completion(
         mirostat_mode: int = 0,
         mirostat_tau: float = 5.0,
         mirostat_eta: float = 0.1,
+        xtc_probability: float = 0.0,
+        xtc_threshold: float = 0.1,
+        dry_multiplier: float = 0.0,
+        dry_allowed_length: int = 2,
+        dry_base: float = 1.75,
+        dry_range: int = 0,
+        dry_seq_breakers: list[str] = [],
         model: Optional[str] = None,
         stopping_criteria: Optional[StoppingCriteriaList] = None,
         logits_processor: Optional[LogitsProcessorList] = None,
@@ -1821,6 +1886,13 @@ def create_completion(
             mirostat_mode=mirostat_mode,
             mirostat_tau=mirostat_tau,
             mirostat_eta=mirostat_eta,
+            xtc_probability=xtc_probability,
+            xtc_threshold=xtc_threshold,
+            dry_multiplier=dry_multiplier,
+            dry_allowed_length=dry_allowed_length,
+            dry_base=dry_base,
+            dry_range=dry_range,
+            dry_seq_breakers=dry_seq_breakers,
             model=model,
             stopping_criteria=stopping_criteria,
             logits_processor=logits_processor,
@@ -1855,6 +1927,13 @@ def __call__(
         mirostat_mode: int = 0,
         mirostat_tau: float = 5.0,
         mirostat_eta: float = 0.1,
+        xtc_probability: float = 0.0,
+        xtc_threshold: float = 0.1,
+        dry_multiplier: float = 0.0,
+        dry_allowed_length: int = 2,
+        dry_base: float = 1.75,
+        dry_range: int = 0,
+        dry_seq_breakers: list[str] = [],
         model: Optional[str] = None,
         stopping_criteria: Optional[StoppingCriteriaList] = None,
         logits_processor: Optional[LogitsProcessorList] = None,
@@ -1918,6 +1997,13 @@ def __call__(
             mirostat_mode=mirostat_mode,
             mirostat_tau=mirostat_tau,
             mirostat_eta=mirostat_eta,
+            xtc_probability=xtc_probability,
+            xtc_threshold=xtc_threshold,
+            dry_multiplier=dry_multiplier,
+            dry_allowed_length=dry_allowed_length,
+            dry_base=dry_base,
+            dry_range=dry_range,
+            dry_seq_breakers=dry_seq_breakers,
             model=model,
             stopping_criteria=stopping_criteria,
             logits_processor=logits_processor,
@@ -1949,6 +2035,13 @@ def create_chat_completion(
         mirostat_mode: int = 0,
         mirostat_tau: float = 5.0,
         mirostat_eta: float = 0.1,
+        xtc_probability: float = 0.0,
+        xtc_threshold: float = 0.1,
+        dry_multiplier: float = 0.0,
+        dry_allowed_length: int = 2,
+        dry_base: float = 1.75,
+        dry_range: int = 0,
+        dry_seq_breakers: list[str] = [],
         model: Optional[str] = None,
         logits_processor: Optional[LogitsProcessorList] = None,
         grammar: Optional[LlamaGrammar] = None,
@@ -2022,6 +2115,13 @@ def create_chat_completion(
             mirostat_mode=mirostat_mode,
             mirostat_tau=mirostat_tau,
             mirostat_eta=mirostat_eta,
+            xtc_probability=xtc_probability,
+            xtc_threshold=xtc_threshold,
+            dry_multiplier=dry_multiplier,
+            dry_allowed_length=dry_allowed_length,
+            dry_base=dry_base,
+            dry_range=dry_range,
+            dry_seq_breakers=dry_seq_breakers,
             model=model,
             logits_processor=logits_processor,
             grammar=grammar,
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -3244,6 +3244,38 @@ def llama_sampler_init_xtc(
 ) -> llama_sampler_p:
     ...
 
+#    LLAMA_API struct llama_sampler *    llama_sampler_init_dry(
+#            const struct llama_model *  model,
+#                               float    dry_multiplier,
+#                               float    dry_base,
+#                             int32_t    dry_allowed_length,
+#                             int32_t    dry_penalty_last_n,
+#                          const char ** seq_breakers,
+#                              size_t    num_breakers);
+@ctypes_function(
+"llama_sampler_init_dry",
+    [
+        llama_model_p_ctypes,
+        ctypes.c_float,
+        ctypes.c_float,
+        ctypes.c_int32,
+        ctypes.c_int32,
+        ctypes.POINTER(ctypes.c_char_p),
+        ctypes.c_size_t
+    ],
+    llama_sampler_p_ctypes,
+)
+def llama_sampler_init_dry(
+    model: llama_model_p,
+    dry_multiplier: float,
+    dry_base: float,
+    dry_allowed_length: int,
+    dry_penalty_last_n: int,
+    seq_breakers: list[str],
+    num_breakers: int,
+) -> llama_sampler_p:
+    ...
+
 
 # /// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
 # /// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.