fix: linter for dry issues

0xymoro · 0xymoro · commit f11a0e227e70 · 2025-04-15T22:18:22.000-07:00
diff --git a/vllm/v1/sample/ops/penalties.py b/vllm/v1/sample/ops/penalties.py
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import torch
-from typing import List, Set, Optional, Dict, Any
-from vllm.v1.sample.metadata import SamplingMetadata 
+from typing import List, Optional, Dict, Any
+from vllm.v1.sample.metadata import SamplingMetadata
 
 from vllm.model_executor.layers.utils import apply_penalties
 from vllm.utils import is_pin_memory_available, make_tensor_with_pad
@@ -69,8 +69,8 @@ def _convert_to_tensors(output_token_ids: list[list[int]], vocab_size: int,
 _DRY_DEFAULT_MULTIPLIER = 0.0
 _DRY_DEFAULT_BASE = 1.0
 _DRY_DEFAULT_ALLOWED_LEN = 3
-_DRY_DEFAULT_RANGE = 1500 
-_DRY_DEFAULT_BREAKERS: Set[int] = set()
+_DRY_DEFAULT_RANGE = 1500
+_DRY_DEFAULT_BREAKERS: set[int] = set()
 
 
 def apply_dry(
@@ -113,8 +113,9 @@ def apply_dry(
     for irow in range(batch_size):
         # Ensure sampling_metadata has data for this row index
         if irow >= len(sampling_metadata.extra_data):
-             # If metadata doesn't cover this row (shouldn't happen in normal flow), skip
-             continue
+            # If metadata doesn't cover this row (shouldn't happen in
+            # normal flow), skip
+            continue
         extra_data = sampling_metadata.extra_data[irow]
         if not extra_data:
             continue
@@ -135,7 +136,9 @@ def apply_dry(
         # Assuming prompt_token_ids is available and correctly indexed
         # Need prompt_lens if prompt_token_ids is padded
         prompt_len_attr = getattr(sampling_metadata, 'prompt_lens', None)
-        current_prompt_len = prompt_len_attr[irow] if prompt_len_attr and irow < len(prompt_len_attr) else None
+        current_prompt_len = None
+        if prompt_len_attr and irow < len(prompt_len_attr):
+            current_prompt_len = prompt_len_attr[irow]
 
         # Ensure prompt_token_ids covers this row
         if irow >= sampling_metadata.prompt_token_ids.shape[0]:
@@ -186,8 +189,10 @@ def apply_dry(
         max_ngram = 0
         for offset in range(1, min(seq_len, _DRY_MAX_NGRAM + 1)):
             check_idx = -offset - 1
-            if check_idx < -seq_len: break
-            if break_mask[check_idx]: break
+            if check_idx < -seq_len:
+                break
+            if break_mask[check_idx]:
+                break
             max_ngram = offset
 
         if max_ngram < allowed_length:
@@ -203,43 +208,43 @@ def apply_dry(
         if len(endpoint_indices) > _DRY_MAX_OCCURRENCES:
             endpoint_indices = endpoint_indices[-_DRY_MAX_OCCURRENCES:]
 
-        # 6. Calculate match lengths for potential next tokens
-        ngram_lens = torch.zeros(vocab_size, dtype=torch.int32, device=device)
-        found_early_exit_match = False
-
-        for idx_tensor in reversed(endpoint_indices):
-            idx = idx_tensor.item()
+        # 6. Iterate through found endpoints to compare n-grams
+        ngram_occurrence_count = 0
+        for end_idx in endpoint_indices:
+            # Compare n-grams backwards from endpoints
             match_len = 0
-            for unwind in range(1, max_ngram + 1):
-                current_idx = idx - unwind
-                history_idx = seq_len - 1 - unwind
-                if current_idx < 0: break
-                # Check breaks using the precomputed mask
-                if break_mask[current_idx] or break_mask[history_idx]: break
-                if token_seq_tensor[current_idx] != token_seq_tensor[history_idx]: break
-                match_len = unwind
-
-            if match_len >= allowed_length: # Match length must meet minimum
-                next_tok_idx = idx + 1
-                if next_tok_idx < seq_len:
-                    next_tok = token_seq_tensor[next_tok_idx].item()
-                    # Use match_len as the length of the *matched* sequence
-                    new_len = match_len
-                    current_max = ngram_lens[next_tok].item()
-                    ngram_lens[next_tok] = max(current_max, new_len)
-                    if new_len >= _DRY_EARLY_EXIT_MATCH_LEN:
-                        found_early_exit_match = True
-
-            if found_early_exit_match: break
-
-        # 7. Apply penalty to logits for this row
-        penalty_mask = ngram_lens > 0
-        if penalty_mask.any():
-            match_lengths_for_penalty = ngram_lens[penalty_mask]
-            # Clamp exponent >= 0
-            exponents = (match_lengths_for_penalty.float() - allowed_length).clamp_(min=0.0)
-            scales = base ** exponents
-            logits[irow, penalty_mask] -= multiplier * scales
-        # --- End of DRY logic for row ---
+            for ngram_offset in range(max_ngram):
+                p_idx = end_idx - ngram_offset
+                q_idx = seq_len - 1 - ngram_offset
+                if p_idx < 0 or q_idx < 0:
+                    break # Should not happen with logic checks
+
+                # Early exit inner loop if mismatch
+                if token_seq_tensor[p_idx] != token_seq_tensor[q_idx]:
+                    break
+
+                # Do not count matches across breaker tokens
+                if break_mask[p_idx - 1] or break_mask[q_idx - 1]:
+                    break
+
+                match_len = ngram_offset + 1
+
+            # Penalize the token that would continue the n-gram
+            next_token_idx = end_idx + 1
+            if 0 <= next_token_idx < seq_len:
+                 next_token = token_seq_tensor[next_token_idx]
+                 if not break_mask[next_token_idx-1]: # Check break mask before penalizing
+                     if 0 <= next_token < vocab_size: # Ensure token is within vocab bounds
+                          penalty = (multiplier ** (ngram_occurrence_count + 1)) * (base ** match_len)
+                          logits[irow, next_token] /= penalty
+                          ngram_occurrence_count += 1
+
+            # Stop checking this endpoint if max occurrences reached
+            if ngram_occurrence_count >= _DRY_MAX_OCCURRENCES:
+                break
+
+        # Early exit outer loop if a long match is found
+        if match_len >= _DRY_EARLY_EXIT_MATCH_LEN:
+            break
 
     return logits