@@ -51,7 +51,7 @@ def test_prefill():
51
51
all_token_ids = common_token_ids + unique_token_ids
52
52
req0 = make_request ("0" , all_token_ids )
53
53
computed_blocks , num_computed_tokens = manager .get_computed_blocks (req0 )
54
- assert len (req0 .kv_block_hashes ) == 3
54
+ assert len (manager . req_to_block_hashes [ req0 .request_id ] ) == 3
55
55
assert not computed_blocks
56
56
assert num_computed_tokens == 0
57
57
blocks = manager .allocate_slots (req0 , 55 , computed_blocks )
@@ -76,7 +76,7 @@ def test_prefill():
76
76
unique_token_ids = [3 ] * 5
77
77
req1 = make_request ("1" , common_token_ids + unique_token_ids )
78
78
computed_blocks , num_computed_tokens = manager .get_computed_blocks (req1 )
79
- assert len (req1 .kv_block_hashes ) == 3
79
+ assert len (manager . req_to_block_hashes [ req1 .request_id ] ) == 3
80
80
assert [b .block_id for b in computed_blocks ] == [0 , 1 , 2 ]
81
81
assert num_computed_tokens == 3 * 16
82
82
num_new_tokens = 53 - 3 * 16
@@ -107,7 +107,7 @@ def test_prefill():
107
107
unique_token_ids = [3 ] * 6
108
108
req2 = make_request ("2" , common_token_ids + unique_token_ids )
109
109
computed_blocks , num_computed_tokens = manager .get_computed_blocks (req2 )
110
- assert len (req2 .kv_block_hashes ) == 3
110
+ assert len (manager . req_to_block_hashes [ req2 .request_id ] ) == 3
111
111
assert [b .block_id for b in computed_blocks ] == [0 , 1 , 2 ]
112
112
assert num_computed_tokens == 3 * 16
113
113
num_new_tokens = 53 - 3 * 16
@@ -494,10 +494,11 @@ def test_mm_prefix_caching():
494
494
# Completed block should have hashes with extra keys.
495
495
assert not computed_blocks
496
496
assert num_computed_tokens == 0
497
- assert len (req0 .kv_block_hashes ) == 3
498
- assert req0 .kv_block_hashes [0 ].extra_keys == ("aaa" , )
499
- assert req0 .kv_block_hashes [1 ].extra_keys == ("aaa" , "bbb" )
500
- assert req0 .kv_block_hashes [2 ].extra_keys == ("bbb" , )
497
+ block_hashes = manager .req_to_block_hashes [req0 .request_id ]
498
+ assert len (block_hashes ) == 3
499
+ assert block_hashes [0 ].extra_keys == ("aaa" , )
500
+ assert block_hashes [1 ].extra_keys == ("aaa" , "bbb" )
501
+ assert block_hashes [2 ].extra_keys == ("bbb" , )
501
502
502
503
blocks = manager .allocate_slots (req0 , 59 , computed_blocks )
503
504
assert [b .block_id for b in blocks ] == [0 , 1 , 2 , 3 , 4 ]
@@ -510,8 +511,8 @@ def test_mm_prefix_caching():
510
511
assert new_blocks is not None and len (new_blocks ) == 0
511
512
512
513
# The just completed block should have hashes with extra keys.
513
- assert len (req0 . kv_block_hashes ) == 4
514
- assert req0 . kv_block_hashes [3 ].extra_keys == ("ccc" , )
514
+ assert len (block_hashes ) == 4
515
+ assert block_hashes [3 ].extra_keys == ("ccc" , )
515
516
516
517
# Cache hit.
517
518
unique_token_ids = [- 1 ] * 7 + [200 ] * 5
@@ -613,7 +614,7 @@ def test_reset_prefix_cache():
613
614
all_token_ids = full_block_token_ids + unique_token_ids
614
615
req1 = make_request ("1" , all_token_ids )
615
616
computed_blocks , _ = manager .get_computed_blocks (req1 )
616
- assert len (req1 .kv_block_hashes ) == 3
617
+ assert len (manager . req_to_block_hashes [ req1 .request_id ] ) == 3
617
618
assert len (computed_blocks ) == 3
618
619
blocks = manager .allocate_slots (req1 , 7 , computed_blocks )
619
620
assert [b .block_id for b in blocks ] == [4 ]
0 commit comments