@@ -496,8 +496,7 @@ def test_allocate_with_lookahead():
496
496
497
497
# Test case 1: Requires additional lookahead tokens
498
498
kv_cache_manager = KVCacheManager (kv_cache_config = config ,
499
- max_model_len = 100 ,
500
- num_preallocate_tokens = 0 )
499
+ max_model_len = 100 )
501
500
blocks = kv_cache_manager .allocate_slots (
502
501
request ,
503
502
num_tokens = 3 ,
@@ -507,25 +506,19 @@ def test_allocate_with_lookahead():
507
506
508
507
# Test case 2: With precomputed blocks
509
508
kv_cache_manager = KVCacheManager (kv_cache_config = config ,
510
- max_model_len = 100 ,
511
- num_preallocate_tokens = 4 )
512
- # num_preallocate_blocks = 4 // 4 - 2 // 4 = 1
509
+ max_model_len = 100 )
513
510
# required_blocks = ceil((3 + 2) /4) = 2
514
- # total_blocks = 1 + 2 = 3
515
511
blocks = kv_cache_manager .allocate_slots (
516
512
request ,
517
513
num_tokens = 3 ,
518
514
num_lookahead_tokens = 2 ,
519
515
)
520
- assert len (blocks ) == 3
516
+ assert len (blocks ) == 2
521
517
522
518
# Test case 3: With precomputed blocks
523
- # num_preallocate_blocks = 4 // 4 - 4 // 4 = 0
524
519
# required_blocks = ceil((3 + 4) / 4) = 2
525
- # total_blocks = 0 + 2 = 2
526
520
kv_cache_manager = KVCacheManager (kv_cache_config = config ,
527
- max_model_len = 100 ,
528
- num_preallocate_tokens = 4 )
521
+ max_model_len = 100 )
529
522
blocks = kv_cache_manager .allocate_slots (
530
523
request ,
531
524
num_tokens = 3 ,
0 commit comments