Skip to content

Commit 5a6a5a8

Browse files
Your NameYour Name
Your Name
authored and
Your Name
committed
boundary condition
1 parent c4d7ffe commit 5a6a5a8

File tree

2 files changed

+8
-28
lines changed

2 files changed

+8
-28
lines changed

csrc/quick_all_reduce.cu

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,6 @@ void DeviceComms::allreduce(int profile, hipStream_t stream, T const* A, T* B,
159159
break;
160160
}
161161

162-
#endif
163-
164162
// -------------------------------------------------
165163
// Rotate the flag color.
166164
flag_color++;

vllm/distributed/device_communicators/quick_all_reduce.py

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,15 @@ def __init__(self,
5252
group: ProcessGroup,
5353
device: Union[int, str, torch.device],
5454
max_size=512 * 1024 * 1024,
55-
min_size=32 * 1024) -> None:
55+
min_size=128 * 1024) -> None:
5656
"""
5757
Args:
5858
group: the process group to work on. If None, it will use the
5959
default process group.
6060
device: the device to bind the QuickAllreduce to. If None,
6161
it will be bind to f"cuda:{local_rank}".
62+
max_size: max supported size.
63+
min_size: Less than this size, custom_allreduce is better.
6264
It is the caller's responsibility to make sure each communicator
6365
is bind to a unique device, and all communicators in this group
6466
are in the same node.
@@ -168,40 +170,20 @@ def should_quick_ar(self, inp: torch.Tensor):
168170
return inp_size < self.max_size # and inp_size > self.min_size
169171
return False
170172

171-
def all_reduce(self,
172-
inp: torch.Tensor,
173-
*,
174-
out: torch.Tensor = None,
175-
registered: bool = False):
176-
"""Performs an out-of-place all reduce.
177-
178-
If registered is True, this assumes inp's pointer is already
179-
IPC-registered. Otherwise, inp is first copied into a pre-registered
180-
buffer.
181-
"""
173+
def all_reduce(self, inp: torch.Tensor, *, out: torch.Tensor = None):
174+
"""Performs an out-of-place all reduce."""
182175
if out is None:
183176
out = torch.empty_like(inp)
184-
if registered:
185-
ops.all_reduce(self._ptr, inp, out, 0, 0)
186-
else:
187-
# print("qr")
188-
ops.qr_all_reduce(self._ptr, envs.VLLM_QUICK_ALLREDUCE, inp, out)
177+
ops.qr_all_reduce(self._ptr, envs.VLLM_QUICK_ALLREDUCE, inp, out)
189178
return out
190179

191180
def quick_all_reduce(self, input: torch.Tensor) -> Optional[torch.Tensor]:
192181
"""The main allreduce API that provides support for cuda graph."""
193182
# When quick allreduce is disabled, this will be None.
194183
if self.disabled or not self.should_quick_ar(input):
195184
return None
196-
if self._IS_CAPTURING:
197-
if torch.cuda.is_current_stream_capturing():
198-
return self.all_reduce(input, registered=True)
199-
else:
200-
# If warm up, mimic the allocation pattern since quick
201-
# allreduce is out-of-place.
202-
return torch.empty_like(input)
203-
else:
204-
return self.all_reduce(input, registered=False)
185+
186+
return self.all_reduce(input)
205187

206188
def close(self):
207189
'''del self._ptr and del buffer'''

0 commit comments

Comments
 (0)