Skip to content

Commit 04149cc

Browse files
authored
[BugFix] fix some typos found by typos. (#16314)
Signed-off-by: yihong0618 <[email protected]>
1 parent 24834f4 commit 04149cc

File tree

21 files changed

+33
-33
lines changed

21 files changed

+33
-33
lines changed

Diff for: benchmarks/benchmark_serving.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -921,15 +921,15 @@ def main(args: argparse.Namespace):
921921
"--percentile-metrics",
922922
type=str,
923923
default="ttft,tpot,itl",
924-
help="Comma-seperated list of selected metrics to report percentils. "
924+
help="Comma-separated list of selected metrics to report percentils. "
925925
"This argument specifies the metrics to report percentiles. "
926926
"Allowed metric names are \"ttft\", \"tpot\", \"itl\", \"e2el\". "
927927
"Default value is \"ttft,tpot,itl\".")
928928
parser.add_argument(
929929
"--metric-percentiles",
930930
type=str,
931931
default="99",
932-
help="Comma-seperated list of percentiles for selected metrics. "
932+
help="Comma-separated list of percentiles for selected metrics. "
933933
"To report 25-th, 50-th, and 75-th percentiles, use \"25,50,75\". "
934934
"Default value is \"99\". "
935935
"Use \"--percentile-metrics\" to select metrics.",

Diff for: benchmarks/benchmark_serving_structured_output.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -963,15 +963,15 @@ def main(args: argparse.Namespace):
963963
"--percentile-metrics",
964964
type=str,
965965
default="ttft,tpot,itl",
966-
help="Comma-seperated list of selected metrics to report percentils. "
966+
help="Comma-separated list of selected metrics to report percentils. "
967967
"This argument specifies the metrics to report percentiles. "
968968
"Allowed metric names are \"ttft\", \"tpot\", \"itl\", \"e2el\". "
969969
"Default value is \"ttft,tpot,itl\".")
970970
parser.add_argument(
971971
"--metric-percentiles",
972972
type=str,
973973
default="99",
974-
help="Comma-seperated list of percentiles for selected metrics. "
974+
help="Comma-separated list of percentiles for selected metrics. "
975975
"To report 25-th, 50-th, and 75-th percentiles, use \"25,50,75\". "
976976
"Default value is \"99\". "
977977
"Use \"--percentile-metrics\" to select metrics.",

Diff for: csrc/mamba/causal_conv1d/causal_conv1d.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ void causal_conv1d_fwd_kernel(ConvParamsBase params) {
422422
int final_state_position = ((seqlen - (kWidth - 1)) - (n_chunks - 1) * kChunkSize);
423423
// in case the final state is separated between the last "smem_exchange" and
424424
// and the one before it (chunk = n_chunks - 1 and chunk = n_chunks - 2),
425-
// (which occurs when `final_state_position` is a non-positivie index)
425+
// (which occurs when `final_state_position` is a non-positive index)
426426
// we load the correct data from smem_exchange from both chunks, the last chunk iteration and the one before it
427427
if (conv_states != nullptr && final_state_position < 0 && seqlen > kWidth){
428428
input_t vals_load[kNElts] = {0};

Diff for: vllm/attention/backends/flash_attn.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ def advance_step(self,
326326
assert self.use_cuda_graph
327327

328328
if turn_prefills_into_decodes:
329-
# When Mutli-Step is enabled with Chunked-Prefill, prefills and
329+
# When Multi-Step is enabled with Chunked-Prefill, prefills and
330330
# decodes are scheduled together. In the first step, all the
331331
# prefills turn into decodes. This update reflects that
332332
# conversion.

Diff for: vllm/attention/backends/hpu_attn.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,11 @@ def __init__(
152152
logger.warning("Could not import HPU FusedSDPA kernel. "
153153
"vLLM will use native implementation.")
154154

155-
suppored_head_sizes = HPUPagedAttention.get_supported_head_sizes()
156-
if head_size not in suppored_head_sizes:
155+
supported_head_sizes = HPUPagedAttention.get_supported_head_sizes()
156+
if head_size not in supported_head_sizes:
157157
raise ValueError(
158158
f"Head size {head_size} is not supported by PagedAttention. "
159-
f"Supported head sizes are: {suppored_head_sizes}.")
159+
f"Supported head sizes are: {supported_head_sizes}.")
160160

161161
if attn_type != AttentionType.DECODER:
162162
raise NotImplementedError("Encoder self-attention and "

Diff for: vllm/attention/backends/mla/common.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@
8383
return spda_o @ W_O
8484
8585
NOTE: in the actual code,
86-
`kv_b_proj` is [W_UK; W_UV] concatnated per head
87-
`q_b_proj` is [W_UQ; W_QR] concatnated per head
86+
`kv_b_proj` is [W_UK; W_UV] concatenated per head
87+
`q_b_proj` is [W_UQ; W_QR] concatenated per head
8888
`out_proj` is W_O
8989
9090
@@ -667,7 +667,7 @@ def advance_step(self,
667667
assert num_seqs > num_queries
668668

669669
if turn_prefills_into_decodes:
670-
# When Mutli-Step is enabled with Chunked-Prefill, prefills and
670+
# When Multi-Step is enabled with Chunked-Prefill, prefills and
671671
# decodes are scheduled together. In the first step, all the
672672
# prefills turn into decodes. This update reflects that
673673
# conversion.

Diff for: vllm/attention/backends/xformers.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -414,11 +414,11 @@ def __init__(
414414
assert self.num_heads % self.num_kv_heads == 0
415415
self.num_queries_per_kv = self.num_heads // self.num_kv_heads
416416

417-
suppored_head_sizes = PagedAttention.get_supported_head_sizes()
418-
if head_size not in suppored_head_sizes:
417+
supported_head_sizes = PagedAttention.get_supported_head_sizes()
418+
if head_size not in supported_head_sizes:
419419
raise ValueError(
420420
f"Head size {head_size} is not supported by PagedAttention. "
421-
f"Supported head sizes are: {suppored_head_sizes}.")
421+
f"Supported head sizes are: {supported_head_sizes}.")
422422

423423
self.attn_type = attn_type
424424

Diff for: vllm/attention/ops/nki_flash_attn.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ def flash_paged_attention(
446446
IO tensor dtypes:
447447
- This kernel assumes all IO tensors have the same dtype except for
448448
block_tables (int32) and mask (int32)
449-
- If mixed_percision is True, then all Tensor Engine operation will be
449+
- If mixed_precision is True, then all Tensor Engine operation will be
450450
performed in bfloat16 and accumulation will be performed in float32.
451451
Otherwise the intermediates will be in the same type as the inputs.
452452

Diff for: vllm/benchmarks/serve.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -724,14 +724,14 @@ def add_cli_args(parser: argparse.ArgumentParser):
724724
"--percentile-metrics",
725725
type=str,
726726
default="ttft,tpot,itl",
727-
help="Comma-seperated list of selected metrics to report percentils. "
727+
help="Comma-separated list of selected metrics to report percentils. "
728728
"This argument specifies the metrics to report percentiles. "
729729
"Allowed metric names are \"ttft\", \"tpot\", \"itl\", \"e2el\". ")
730730
parser.add_argument(
731731
"--metric-percentiles",
732732
type=str,
733733
default="99",
734-
help="Comma-seperated list of percentiles for selected metrics. "
734+
help="Comma-separated list of percentiles for selected metrics. "
735735
"To report 25-th, 50-th, and 75-th percentiles, use \"25,50,75\". "
736736
"Use \"--percentile-metrics\" to select metrics.",
737737
)

Diff for: vllm/engine/output_processor/multi_step.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def process_outputs(self,
9393
externally (before the next schedule() call)
9494
"""
9595
# Sequences can be in RUNNING or FINISHED_ABORTED state
96-
# once scheduled, as a sequence is moved to FINSIHED_ABORTED
96+
# once scheduled, as a sequence is moved to FINISHED_ABORTED
9797
# if a client disconnects from the api server.
9898
seqs = sequence_group.get_seqs(status=SequenceStatus.RUNNING)
9999
if seqs is None:

Diff for: vllm/entrypoints/openai/tool_parsers/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def find_all_indices(string: str, substring: str) -> list[int]:
9898

9999

100100
# partial_json_parser doesn't support extra data and
101-
# JSONDecorder.raw_decode doesn't support partial JSON
101+
# JSONDecoder.raw_decode doesn't support partial JSON
102102
def partial_json_loads(input_str: str, flags: Allow) -> tuple[Any, int]:
103103
try:
104104
return (partial_json_parser.loads(input_str, flags), len(input_str))

Diff for: vllm/model_executor/layers/quantization/kernels/scaled_mm/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def choose_scaled_mm_linear_kernel(
2929
compute_capability: Optional[int] = None
3030
) -> Type[ScaledMMLinearKernel]:
3131
"""
32-
Choose an ScalledMMLinearKernel that can implement the given config for the
32+
Choose an ScaledMMLinearKernel that can implement the given config for the
3333
given compute capability. Attempts to choose the best kernel in terms of
3434
performance.
3535

Diff for: vllm/platforms/cpu.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,12 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
6969

7070
cache_config = vllm_config.cache_config
7171

72-
ipex_avaliable = find_spec("intel_extension_for_pytorch") is not None
72+
ipex_available = find_spec("intel_extension_for_pytorch") is not None
7373

7474
if cache_config and cache_config.block_size is None:
75-
cache_config.block_size = 128 if ipex_avaliable else 16
75+
cache_config.block_size = 128 if ipex_available else 16
7676

77-
if not ipex_avaliable and cache_config.block_size != 16:
77+
if not ipex_available and cache_config.block_size != 16:
7878
raise RuntimeError(
7979
f"--block-size={cache_config.block_size} requires"
8080
" intel_extension_for_pytorch")

Diff for: vllm/platforms/interface.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def pre_register_and_update(cls,
231231
parser: Optional[FlexibleArgumentParser] = None
232232
) -> None:
233233
"""
234-
Do some pre-registeration or update action for the current platform.
234+
Do some pre-registration or update action for the current platform.
235235
236236
This function is called before global VllmConfig is initialized or cli
237237
arguments are parsed. It's used for out-of-tree platforms to register or

Diff for: vllm/reasoning/granite_reasoning_parser.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def extract_reasoning_content(
6060
6161
Args:
6262
model_output (str): Output of the model to be parsed.
63-
request (ChatCompletionReqest): Request being processed.
63+
request (ChatCompletionRequest): Request being processed.
6464
6565
Returns:
6666
tuple[Optional[str], Optional[str]]: Tuple pair containing the

Diff for: vllm/sampling_params.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ class RequestOutputKind(Enum):
101101
CUMULATIVE = 0
102102
# Return only deltas in each RequestOutput
103103
DELTA = 1
104-
# Do not return intermediate RequestOuputs
104+
# Do not return intermediate RequestOutput
105105
FINAL_ONLY = 2
106106

107107

Diff for: vllm/third_party/pynvml.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1119,7 +1119,7 @@ class _PrintableStructure(Structure):
11191119
e.g. class that has _field_ 'hex_value', c_uint could be formatted with
11201120
_fmt_ = {"hex_value" : "%08X"}
11211121
to produce nicer output.
1122-
Default fomratting string for all fields can be set with key "<default>" like:
1122+
Default formatting string for all fields can be set with key "<default>" like:
11231123
_fmt_ = {"<default>" : "%d MHz"} # e.g all values are numbers in MHz.
11241124
If not set it's assumed to be just "%s"
11251125

Diff for: vllm/v1/attention/backends/mla/common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@
8383
return spda_o @ W_O
8484
8585
NOTE: in the actual code,
86-
`kv_b_proj` is [W_UK; W_UV] concatnated per head
87-
`q_b_proj` is [W_UQ; W_QR] concatnated per head
86+
`kv_b_proj` is [W_UK; W_UV] concatenated per head
87+
`q_b_proj` is [W_UQ; W_QR] concatenated per head
8888
`out_proj` is W_O
8989
9090

Diff for: vllm/v1/executor/multiproc_executor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ def signal_handler(signum, frame):
326326
logger.debug("Worker interrupted.")
327327

328328
except Exception:
329-
# worker_busy_loop sends exceptions exceptons to Executor
329+
# worker_busy_loop sends exceptions to Executor
330330
# for shutdown, but if there is an error in startup or an
331331
# error with IPC itself, we need to alert the parent.
332332
psutil.Process().parent().send_signal(signal.SIGUSR1)

Diff for: vllm/v1/worker/gpu_model_runner.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -998,7 +998,7 @@ def execute_model(
998998
) -> Union[ModelRunnerOutput, torch.Tensor]:
999999
self._update_states(scheduler_output)
10001000
if not scheduler_output.total_num_scheduled_tokens:
1001-
# Return empty ModelRunnerOuptut if there's no work to do.
1001+
# Return empty ModelRunnerOutput if there's no work to do.
10021002
return EMPTY_MODEL_RUNNER_OUTPUT
10031003

10041004
if self.is_multimodal_model:

Diff for: vllm/v1/worker/tpu_model_runner.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,7 @@ def execute_model(
652652
# Update cached state
653653
self._update_states(scheduler_output)
654654
if not scheduler_output.total_num_scheduled_tokens:
655-
# Return empty ModelRunnerOuptut if there's no work to do.
655+
# Return empty ModelRunnerOutput if there's no work to do.
656656
return EMPTY_MODEL_RUNNER_OUTPUT
657657

658658
if self.is_multimodal_model:

0 commit comments

Comments
 (0)