Skip to content

Commit 273cb3b

Browse files
[Doc] Fix top-level API links/docs (#18621)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 8ddd1cf commit 273cb3b

File tree

9 files changed

+88
-103
lines changed

9 files changed

+88
-103
lines changed

vllm/benchmarks/datasets.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -129,16 +129,17 @@ def get_random_lora_request(
129129
130130
Args:
131131
tokenizer (PreTrainedTokenizerBase): The base tokenizer to use if no
132-
LoRA is selected. max_loras (Optional[int]): The maximum number of
133-
LoRAs available. If None, LoRA is not used. lora_path
134-
(Optional[str]): Path to the LoRA parameters on disk. If None, LoRA
135-
is not used.
132+
LoRA is selected.
133+
max_loras (Optional[int]): The maximum number of LoRAs available.
134+
If `None`, LoRA is not used.
135+
lora_path (Optional[str]): Path to the LoRA parameters on disk.
136+
If `None`, LoRA is not used.
136137
137138
Returns:
138-
tuple[Optional[LoRARequest], AnyTokenizer]: A tuple where the first
139-
element is a LoRARequest (or None if not applicable) and the second
140-
element is the tokenizer associated with the LoRA request (or the
141-
base tokenizer).
139+
A tuple with the following elements:
140+
- A new [LoRARequest][] (or `None` if not applicable).
141+
- The tokenizer associated with the LoRA request
142+
(or the base tokenizer).
142143
"""
143144
if max_loras is None or lora_path is None:
144145
return None, tokenizer
@@ -167,7 +168,7 @@ def sample(self, tokenizer: PreTrainedTokenizerBase,
167168
168169
Args:
169170
tokenizer (PreTrainedTokenizerBase): The tokenizer to be used
170-
for processing the dataset's text.
171+
for processing the dataset's text.
171172
num_requests (int): The number of sample requests to generate.
172173
173174
Returns:
@@ -184,7 +185,8 @@ def maybe_oversample_requests(self, requests: list[SampleRequest],
184185
185186
Args:
186187
requests (List[SampleRequest]): The current list of sampled
187-
requests. num_requests (int): The target number of requests.
188+
requests.
189+
num_requests (int): The target number of requests.
188190
"""
189191
if len(requests) < num_requests:
190192
random.seed(self.random_seed)

vllm/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4552,7 +4552,7 @@ def contains_object_print(text):
45524552
text (str): The text to check
45534553
45544554
Returns:
4555-
bool: True if a match is found, False otherwise
4555+
result (bool): `True` if a match is found, `False` otherwise.
45564556
"""
45574557
pattern = r'at 0x[a-fA-F0-9]{2,16}>'
45584558
match = re.search(pattern, text)

vllm/distributed/kv_transfer/kv_connector/v1/base.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,11 @@ def get_num_new_matched_tokens(
210210
computed tokens for this request
211211
212212
Returns:
213-
* the number of tokens that can be loaded from the
214-
external KV cache beyond what is already computed.
215-
* true if external KV cache tokens will be loaded
216-
asynchronously (between scheduler steps).
213+
A tuple with the following elements:
214+
- The number of tokens that can be loaded from the
215+
external KV cache beyond what is already computed.
216+
- `True` if external KV cache tokens will be loaded
217+
asynchronously (between scheduler steps).
217218
"""
218219
pass
219220

vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,11 @@ def _make_metadata(self, tensor: Optional[torch.Tensor]) -> Metadata:
118118
"""
119119
Create the metadata as a dictionary based on the input tensor.
120120
121-
Parameters:
122-
- tensor: The input tensor or None if no tensor is provided.
121+
Args:
122+
tensor: The input tensor or None if no tensor is provided.
123123
124124
Returns:
125-
- metadata: A dictionary with the following keys:
125+
metadata: A dictionary with the following keys:
126126
- "dtype": The data type of the tensor or None.
127127
- "shape": The shape of the tensor or None.
128128
"""
@@ -135,13 +135,13 @@ def _prepare_recv_buffer(self, metadata: Metadata) -> torch.Tensor:
135135
"""
136136
Create a buffer to receive the tensor based on the provided metadata.
137137
138-
Parameters:
139-
- metadata: A dictionary with keys "dtype" and "shape", describing
140-
the tensor's data type and shape.
138+
Args:
139+
metadata: A dictionary with keys "dtype" and "shape",
140+
describing the tensor's data type and shape.
141141
142142
Returns:
143-
- buffer: A tensor of the specified type and shape, allocated on
144-
self.device.
143+
buffer: A tensor of the specified type and shape,
144+
allocated on `self.device`.
145145
"""
146146
return torch.empty(metadata["shape"],
147147
dtype=metadata["dtype"],
@@ -151,8 +151,8 @@ def _send_metadata(self, metadata: Metadata):
151151
"""
152152
Send the metadata dictionary to the target rank.
153153
154-
Parameters:
155-
- metadata: A dictionary with keys "dtype" and "shape".
154+
Args:
155+
metadata: A dictionary with keys "dtype" and "shape".
156156
"""
157157
self.group.send_obj(metadata, self.target_rank_for_send)
158158

@@ -161,8 +161,8 @@ def _recv_metadata(self) -> Metadata:
161161
Receive the metadata dictionary from the target rank.
162162
163163
Returns:
164-
- metadata: A dictionary with keys "dtype" and "shape" describing
165-
the tensor.
164+
metadata: A dictionary with keys "dtype" and "shape"
165+
describing the tensor.
166166
"""
167167
return self.group.recv_obj(self.target_rank_for_recv)
168168

@@ -171,9 +171,9 @@ def _send_impl(self, tensor: Optional[torch.Tensor]) -> None:
171171
The actual implementation of sending the tensor and its metadata to the
172172
target rank.
173173
174-
Parameters:
175-
- tensor: The input tensor to be sent, or None if no tensor is
176-
being sent.
174+
Args:
175+
tensor: The input tensor to be sent, or `None` if no tensor is
176+
being sent.
177177
"""
178178
metadata = self._make_metadata(tensor)
179179
self._send_metadata(metadata)
@@ -187,7 +187,7 @@ def _recv_impl(self) -> Optional[torch.Tensor]:
187187
the target rank.
188188
189189
Returns:
190-
- buffer: The received tensor, or None if no tensor is received.
190+
buffer: The received tensor, or `None` if no tensor is received.
191191
"""
192192
metadata = self._recv_metadata()
193193
if metadata["dtype"] is None:
@@ -227,8 +227,8 @@ def send_tensor(self, tensor: Optional[torch.Tensor]) -> None:
227227
Sends a tensor and its metadata to the destination rank in a
228228
non-blocking way.
229229
230-
Parameters:
231-
- tensor: The tensor to send, or None if no tensor is being sent.
230+
Args:
231+
tensor: The tensor to send, or `None` if no tensor is being sent.
232232
"""
233233
if self.transport_thread is None:
234234
self.transport_thread = ThreadPoolExecutor(max_workers=1)
@@ -250,8 +250,8 @@ def recv_tensor(self) -> Optional[torch.Tensor]:
250250
"""
251251
Receives a tensor and its metadata from the source rank. Blocking call.
252252
253-
Returns:
254-
- tensor: The received tensor, or None if no tensor is received.
253+
Args:
254+
tensor: The received tensor, or `None` if no tensor is received.
255255
"""
256256
if self.transport_thread is None:
257257
self.transport_thread = ThreadPoolExecutor(max_workers=1)

vllm/engine/llm_engine.py

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -130,26 +130,16 @@ class LLMEngine:
130130
iteration-level scheduling and efficient memory management to maximize the
131131
serving throughput.
132132
133-
The {class}`~vllm.LLM` class wraps this class for offline batched inference
134-
and the {class}`AsyncLLMEngine` class wraps this class for online serving.
133+
The [LLM][vllm.LLM] class wraps this class for offline batched inference
134+
and the [AsyncLLMEngine][] class wraps this class for online serving.
135135
136-
The config arguments are derived from {class}`~vllm.EngineArgs`. (See
137-
{ref}`engine-args`)
136+
The config arguments are derived from [EngineArgs][vllm.EngineArgs]. (See
137+
[engine-args][])
138138
139139
Args:
140-
model_config: The configuration related to the LLM model.
141-
cache_config: The configuration related to the KV cache memory
142-
management.
143-
parallel_config: The configuration related to distributed execution.
144-
scheduler_config: The configuration related to the request scheduler.
145-
device_config: The configuration related to the device.
146-
lora_config (Optional): The configuration related to serving multi-LoRA.
147-
speculative_config (Optional): The configuration related to speculative
148-
decoding.
140+
vllm_config: The configuration for initializing and running vLLM.
149141
executor_class: The model executor class for managing distributed
150142
execution.
151-
prompt_adapter_config (Optional): The configuration related to serving
152-
prompt adapters.
153143
log_stats: Whether to log statistics.
154144
usage_context: Specified entry point, used for usage info collection.
155145
"""
@@ -695,11 +685,12 @@ def add_request(
695685
696686
Args:
697687
request_id: The unique ID of the request.
698-
prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
688+
prompt: The prompt to the LLM. See
689+
[PromptType][vllm.inputs.PromptType]
699690
for more details about the format of each input.
700691
params: Parameters for sampling or pooling.
701-
{class}`~vllm.SamplingParams` for text generation.
702-
{class}`~vllm.PoolingParams` for pooling.
692+
[SamplingParams][vllm.SamplingParams] for text generation.
693+
[PoolingParams][vllm.PoolingParams] for pooling.
703694
arrival_time: The arrival time of the request. If None, we use
704695
the current monotonic time.
705696
lora_request: The LoRA request to add.
@@ -711,10 +702,11 @@ def add_request(
711702
Details:
712703
- Set arrival_time to the current time if it is None.
713704
- Set prompt_token_ids to the encoded prompt if it is None.
714-
- Create `n` number of {class}`~vllm.Sequence` objects.
715-
- Create a {class}`~vllm.SequenceGroup` object
716-
from the list of {class}`~vllm.Sequence`.
717-
- Add the {class}`~vllm.SequenceGroup` object to the scheduler.
705+
- Create `n` number of [Sequence][vllm.Sequence] objects.
706+
- Create a [SequenceGroup][vllm.SequenceGroup] object
707+
from the list of [Sequence][vllm.Sequence].
708+
- Add the [SequenceGroup][vllm.SequenceGroup] object to the
709+
scheduler.
718710
719711
Example:
720712
>>> # initialize engine
@@ -861,9 +853,7 @@ def abort_request(self, request_id: Union[str, Iterable[str]]) -> None:
861853
request_id: The ID(s) of the request to abort.
862854
863855
Details:
864-
- Refer to the
865-
{meth}`~vllm.core.scheduler.Scheduler.abort_seq_group`
866-
from class {class}`~vllm.core.scheduler.Scheduler`.
856+
- Refer to [vllm.core.scheduler.Scheduler.abort_seq_group][].
867857
868858
Example:
869859
>>> # initialize engine and add a request with request_id

0 commit comments

Comments
 (0)