Skip to content

Commit c4ab9f3

Browse files
authored
[V1] Remove pre-allocation for KV cache (#16941)
Signed-off-by: Woosuk Kwon <[email protected]>
1 parent 2689d5c commit c4ab9f3

File tree

5 files changed

+59
-139
lines changed

5 files changed

+59
-139
lines changed

tests/v1/core/test_kv_cache_utils.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -496,8 +496,7 @@ def test_allocate_with_lookahead():
496496

497497
# Test case 1: Requires additional lookahead tokens
498498
kv_cache_manager = KVCacheManager(kv_cache_config=config,
499-
max_model_len=100,
500-
num_preallocate_tokens=0)
499+
max_model_len=100)
501500
blocks = kv_cache_manager.allocate_slots(
502501
request,
503502
num_tokens=3,
@@ -507,25 +506,19 @@ def test_allocate_with_lookahead():
507506

508507
# Test case 2: With precomputed blocks
509508
kv_cache_manager = KVCacheManager(kv_cache_config=config,
510-
max_model_len=100,
511-
num_preallocate_tokens=4)
512-
# num_preallocate_blocks = 4 // 4 - 2 // 4 = 1
509+
max_model_len=100)
513510
# required_blocks = ceil((3 + 2) /4) = 2
514-
# total_blocks = 1 + 2 = 3
515511
blocks = kv_cache_manager.allocate_slots(
516512
request,
517513
num_tokens=3,
518514
num_lookahead_tokens=2,
519515
)
520-
assert len(blocks) == 3
516+
assert len(blocks) == 2
521517

522518
# Test case 3: With precomputed blocks
523-
# num_preallocate_blocks = 4 // 4 - 4 // 4 = 0
524519
# required_blocks = ceil((3 + 4) / 4) = 2
525-
# total_blocks = 0 + 2 = 2
526520
kv_cache_manager = KVCacheManager(kv_cache_config=config,
527-
max_model_len=100,
528-
num_preallocate_tokens=4)
521+
max_model_len=100)
529522
blocks = kv_cache_manager.allocate_slots(
530523
request,
531524
num_tokens=3,

0 commit comments

Comments
 (0)