Skip to content

Commit cc38d84

Browse files
WoosukKwonrasmith
authored andcommitted
[V1][Minor] Minor optimizations for update_from_output (vllm-project#12454)
Signed-off-by: Woosuk Kwon <[email protected]>
1 parent b409e64 commit cc38d84

File tree

1 file changed

+13
-7
lines changed

1 file changed

+13
-7
lines changed

vllm/v1/core/scheduler.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,10 @@ def update_from_output(
411411
num_scheduled_tokens = scheduler_output.num_scheduled_tokens
412412
new_running: List[Request] = []
413413
outputs: List[EngineCoreOutput] = []
414+
415+
# NOTE(woosuk): As len(self.running) can be up to 1K or more, the below
416+
# loop can be a performance bottleneck. We should do our best to avoid
417+
# expensive operations inside the loop.
414418
for request in self.running:
415419
req_id = request.request_id
416420
request.num_computed_tokens += num_scheduled_tokens[req_id]
@@ -421,13 +425,15 @@ def update_from_output(
421425

422426
cached_encoder_input_ids = (
423427
self.encoder_cache_manager.get_cached_input_ids(request))
424-
for input_id in list(cached_encoder_input_ids):
425-
start_pos = request.mm_positions[input_id]["offset"]
426-
num_tokens = request.mm_positions[input_id]["length"]
427-
if start_pos + num_tokens <= request.num_computed_tokens:
428-
# The encoder output is already processed and stored
429-
# in the decoder's KV cache.
430-
self.encoder_cache_manager.free(request, input_id)
428+
# OPTIMIZATION: Avoid list(set) if the set is empty.
429+
if cached_encoder_input_ids:
430+
for input_id in list(cached_encoder_input_ids):
431+
start_pos = request.mm_positions[input_id]["offset"]
432+
num_tokens = request.mm_positions[input_id]["length"]
433+
if start_pos + num_tokens <= request.num_computed_tokens:
434+
# The encoder output is already processed and stored
435+
# in the decoder's KV cache.
436+
self.encoder_cache_manager.free(request, input_id)
431437

432438
if request.num_computed_tokens == request.num_tokens:
433439
req_index = model_runner_output.req_id_to_index[req_id]

0 commit comments

Comments
 (0)