vllm-project
diff --git a/‎tests/v1/core/test_kv_cache_utils.py
Lines changed: 4 additions & 11 deletions b/‎tests/v1/core/test_kv_cache_utils.py
Lines changed: 4 additions & 11 deletions
@@ -496,8 +496,7 @@ def test_allocate_with_lookahead():
 
     # Test case 1: Requires additional lookahead tokens
     kv_cache_manager = KVCacheManager(kv_cache_config=config,
-                                      max_model_len=100,
-                                      num_preallocate_tokens=0)
+                                      max_model_len=100)
     blocks = kv_cache_manager.allocate_slots(
         request,
         num_tokens=3,
@@ -507,25 +506,19 @@ def test_allocate_with_lookahead():
 
     # Test case 2: With precomputed blocks
     kv_cache_manager = KVCacheManager(kv_cache_config=config,
-                                      max_model_len=100,
-                                      num_preallocate_tokens=4)
-    # num_preallocate_blocks = 4 // 4 - 2 // 4 = 1
+                                      max_model_len=100)
     # required_blocks = ceil((3 + 2) /4) = 2
-    # total_blocks = 1 + 2 = 3
     blocks = kv_cache_manager.allocate_slots(
         request,
         num_tokens=3,
         num_lookahead_tokens=2,
     )
-    assert len(blocks) == 3
+    assert len(blocks) == 2
 
     # Test case 3: With precomputed blocks
-    # num_preallocate_blocks = 4 // 4 - 4 // 4 = 0
     # required_blocks = ceil((3 + 4) / 4) = 2
-    # total_blocks = 0 + 2 = 2
     kv_cache_manager = KVCacheManager(kv_cache_config=config,
-                                      max_model_len=100,
-                                      num_preallocate_tokens=4)
+                                      max_model_len=100)
     blocks = kv_cache_manager.allocate_slots(
         request,
         num_tokens=3,