Skip to content

Commit 72e58b2

Browse files
committed
./format.sh
Signed-off-by: Harry Mellor <[email protected]>
1 parent 98394a0 commit 72e58b2

File tree

5 files changed

+17
-15
lines changed

5 files changed

+17
-15
lines changed

docs/source/generate_examples.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,8 @@ def generate(self) -> str:
132132
ROOT_DIR)
133133

134134
content = f"Source <gh-file:{self.path.relative_to(ROOT_DIR)}>.\n\n"
135-
include = "include" if self.main_file.suffix == ".md" else "literalinclude"
135+
include = "include" if self.main_file.suffix == ".md" else \
136+
"literalinclude"
136137
if include == "literalinclude":
137138
content += f"# {self.title}\n\n"
138139
content += f":::{{{include}}} {make_relative(self.main_file)}\n"

tools/profiler/visualize_layerwise_profile.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -534,11 +534,11 @@ def make_plot_title_suffix(profile_json: dict) -> str:
534534
if __name__ == "__main__":
535535
parser = argparse.ArgumentParser()
536536

537-
parser.add_argument(
538-
"--json-trace",
539-
type=str,
540-
required=True,
541-
help="json trace file output by examples/offline_inference/offline_profile.py")
537+
parser.add_argument("--json-trace",
538+
type=str,
539+
required=True,
540+
help="json trace file output by \
541+
examples/offline_inference/offline_profile.py")
542542
parser.add_argument("--output-directory",
543543
type=str,
544544
required=False,

vllm/model_executor/model_loader/loader.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,8 @@ class ShardedStateLoader(BaseModelLoader):
529529
Model loader that directly loads each worker's model state dict, which
530530
enables a fast load path for large tensor-parallel models where each worker
531531
only needs to read its own shard rather than the entire checkpoint. See
532-
`examples/offline_inference/save_sharded_state.py` for creating a sharded checkpoint.
532+
`examples/offline_inference/save_sharded_state.py` for creating a sharded
533+
checkpoint.
533534
"""
534535

535536
DEFAULT_PATTERN = "model-rank-{rank}-part-{part}.safetensors"

vllm/model_executor/model_loader/tensorizer.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -363,13 +363,12 @@ def deserialize(self):
363363
def tensorizer_weights_iterator(
364364
tensorizer_args: "TensorizerArgs"
365365
) -> Generator[Tuple[str, torch.Tensor], None, None]:
366-
logger.warning(
367-
"Deserializing HuggingFace models is not optimized for "
368-
"loading on vLLM, as tensorizer is forced to load to CPU. "
369-
"Consider deserializing a vLLM model instead for faster "
370-
"load times. See the "
371-
"examples/other/tensorize_vllm_model.py example script "
372-
"for serializing vLLM models.")
366+
logger.warning("Deserializing HuggingFace models is not optimized for "
367+
"loading on vLLM, as tensorizer is forced to load to CPU. "
368+
"Consider deserializing a vLLM model instead for faster "
369+
"load times. See the "
370+
"examples/other/tensorize_vllm_model.py example script "
371+
"for serializing vLLM models.")
373372

374373
deserializer_args = tensorizer_args.deserializer_params
375374
stream_params = tensorizer_args.stream_params

vllm/model_executor/model_loader/weight_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,8 @@ def kv_cache_scales_loader(
503503
KV cache scaling factors. The serialization should represent a dictionary
504504
whose keys are the TP ranks and values are another dictionary mapping layers
505505
to their KV cache scaling factors.
506-
Keep this function in sync with the output of examples/other/fp8/extract_scales.py
506+
Keep this function in sync with the output of
507+
examples/other/fp8/extract_scales.py
507508
"""
508509
try:
509510
with open(filename) as f:

0 commit comments

Comments
 (0)