Skip to content

Commit 0a289b7

Browse files
committed
Lint fixes
1 parent 3725309 commit 0a289b7

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -241,17 +241,18 @@ def __init__(
241241
device=self.device)
242242

243243
# OPTIMIZATION: Cache the tensors rather than creating them every step.
244-
245-
# For long context, we may need to store using int64 so max token idx doesn't overflow
246-
# token_indices is calculated by adding (req_idx * max_model_len) to per-request token indices
247-
# e.g. [0, 1, 0, 1, 2, 3, 4, 0, 1, 2]
244+
# For long context, may need to store int64 so max idx doesn't overflow
245+
# token_indices calculated by adding (req_idx * max_model_len)
246+
# to per-request indices e.g. [0, 1, 0, 1, 2, 3, 4, 0, 1, 2]
248247
# -> [0, 1, M, M + 1, M + 2, M + 3, M + 4, 2 * M, 2 * M + 1, 2 * M + 2]
249248
# where M is the max_model_len.
250-
max_token_idx = self.max_num_tokens + self.max_num_reqs * self.max_model_len
249+
max_token_idx = self.max_num_tokens + self.max_num_reqs * \
250+
self.max_model_len
251251
self.arange_np = np.arange(max(self.max_num_reqs + 1,
252252
self.max_model_len,
253253
self.max_num_tokens),
254-
dtype=np.int32 if max_token_idx <= np.iinfo(np.int32).max else np.int64)
254+
dtype=np.int32 if max_token_idx <= np.iinfo(
255+
np.int32).max else np.int64)
255256

256257
# NOTE(woosuk): These tensors are "stateless", i.e., they are literally
257258
# a faster version of creating a new tensor every time. Thus, we should

0 commit comments

Comments
 (0)