Skip to content

Commit 803b106

Browse files
kzawora-inteljikunshangscsudhak-intelmadamczyk-intelmfylcek
authored andcommitted
[Hardware][Intel-Gaudi] Add Intel Gaudi (HPU) inference backend (vllm-project#6143)
Signed-off-by: yuwenzho <[email protected]> Signed-off-by: Chendi.Xue <[email protected]> Signed-off-by: Bob Zhu <[email protected]> Signed-off-by: zehao-intel <[email protected]> Signed-off-by: Konrad Zawora <[email protected]> Co-authored-by: Kunshang Ji <[email protected]> Co-authored-by: Sanju C Sudhakaran <[email protected]> Co-authored-by: Michal Adamczyk <[email protected]> Co-authored-by: Marceli Fylcek <[email protected]> Co-authored-by: Himangshu Lahkar <[email protected]> Co-authored-by: Vivek Goel <[email protected]> Co-authored-by: yuwenzho <[email protected]> Co-authored-by: Dominika Olszewska <[email protected]> Co-authored-by: barak goldberg <[email protected]> Co-authored-by: Michal Szutenberg <[email protected]> Co-authored-by: Jan Kaniecki <[email protected]> Co-authored-by: Agata Dobrzyniewicz <[email protected]> Co-authored-by: Krzysztof Wisniewski <[email protected]> Co-authored-by: Dudi Lester <[email protected]> Co-authored-by: Ilia Taraban <[email protected]> Co-authored-by: Chendi.Xue <[email protected]> Co-authored-by: Michał Kuligowski <[email protected]> Co-authored-by: Jakub Maksymczuk <[email protected]> Co-authored-by: Tomasz Zielinski <[email protected]> Co-authored-by: Sun Choi <[email protected]> Co-authored-by: Iryna Boiko <[email protected]> Co-authored-by: Bob Zhu <[email protected]> Co-authored-by: hlin99 <[email protected]> Co-authored-by: Zehao Huang <[email protected]> Co-authored-by: Andrzej Kotłowski <[email protected]> Co-authored-by: Yan Tomsinsky <[email protected]> Co-authored-by: Nir David <[email protected]> Co-authored-by: Yu-Zhou <[email protected]> Co-authored-by: Ruheena Suhani Shaik <[email protected]> Co-authored-by: Karol Damaszke <[email protected]> Co-authored-by: Marcin Swiniarski <[email protected]> Co-authored-by: Woosuk Kwon <[email protected]> Co-authored-by: Jacek Czaja <[email protected]> Co-authored-by: Jacek Czaja <[email protected]> Co-authored-by: Yuan <[email protected]> Signed-off-by: Loc Huynh <[email protected]>
1 parent 4eefb4f commit 803b106

31 files changed

+4279
-20
lines changed

Dockerfile.hpu

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest
2+
3+
COPY ./ /workspace/vllm
4+
5+
WORKDIR /workspace/vllm
6+
7+
RUN pip install -v -r requirements-hpu.txt
8+
9+
ENV no_proxy=localhost,127.0.0.1
10+
ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true
11+
12+
RUN VLLM_TARGET_DEVICE=hpu python3 setup.py install
13+
14+
WORKDIR /workspace/
15+
16+
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]

docs/source/getting_started/gaudi-installation.rst

Lines changed: 402 additions & 0 deletions
Large diffs are not rendered by default.

docs/source/index.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ vLLM is flexible and easy to use with:
4343
* Tensor parallelism and pipeline parallelism support for distributed inference
4444
* Streaming outputs
4545
* OpenAI-compatible API server
46-
* Support NVIDIA GPUs, AMD CPUs and GPUs, Intel CPUs and GPUs, PowerPC CPUs, TPU, and AWS Trainium and Inferentia Accelerators.
46+
* Support NVIDIA GPUs, AMD CPUs and GPUs, Intel CPUs, Gaudi® accelerators and GPUs, PowerPC CPUs, TPU, and AWS Trainium and Inferentia Accelerators.
4747
* Prefix caching support
4848
* Multi-lora support
4949

@@ -66,6 +66,7 @@ Documentation
6666
getting_started/amd-installation
6767
getting_started/openvino-installation
6868
getting_started/cpu-installation
69+
getting_started/gaudi-installation
6970
getting_started/neuron-installation
7071
getting_started/tpu-installation
7172
getting_started/xpu-installation

requirements-hpu.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Common dependencies
2+
-r requirements-common.txt
3+
4+
# Dependencies for HPU code
5+
ray
6+
triton
7+
pandas
8+
tabulate
9+
setuptools>=61
10+
setuptools-scm>=8
11+
vllm-hpu-extension @ git+https://github.com/HabanaAI/vllm-hpu-extension.git@fd7f2e6

setup.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,14 +253,32 @@ def run(self):
253253
self.copy_file(file, dst_file)
254254

255255

256+
def _is_hpu() -> bool:
257+
is_hpu_available = True
258+
try:
259+
subprocess.run(["hl-smi"], capture_output=True, check=True)
260+
except (FileNotFoundError, PermissionError, subprocess.CalledProcessError):
261+
if not os.path.exists('/dev/accel/accel0') and not os.path.exists(
262+
'/dev/accel/accel_controlD0'):
263+
# last resort...
264+
try:
265+
output = subprocess.check_output(
266+
'lsmod | grep habanalabs | wc -l', shell=True)
267+
is_hpu_available = int(output) > 0
268+
except (ValueError, FileNotFoundError, PermissionError,
269+
subprocess.CalledProcessError):
270+
is_hpu_available = False
271+
return is_hpu_available or VLLM_TARGET_DEVICE == "hpu"
272+
273+
256274
def _no_device() -> bool:
257275
return VLLM_TARGET_DEVICE == "empty"
258276

259277

260278
def _is_cuda() -> bool:
261279
has_cuda = torch.version.cuda is not None
262280
return (VLLM_TARGET_DEVICE == "cuda" and has_cuda
263-
and not (_is_neuron() or _is_tpu()))
281+
and not (_is_neuron() or _is_tpu() or _is_hpu()))
264282

265283

266284
def _is_hip() -> bool:
@@ -356,6 +374,23 @@ def get_path(*filepath) -> str:
356374
return os.path.join(ROOT_DIR, *filepath)
357375

358376

377+
def get_gaudi_sw_version():
378+
"""
379+
Returns the driver version.
380+
"""
381+
# Enable console printing for `hl-smi` check
382+
output = subprocess.run("hl-smi",
383+
shell=True,
384+
text=True,
385+
stdout=subprocess.PIPE,
386+
stderr=subprocess.PIPE,
387+
env={"ENABLE_CONSOLE": "true"})
388+
if output.returncode == 0 and output.stdout:
389+
return output.stdout.split("\n")[2].replace(
390+
" ", "").split(":")[1][:-1].split("-")[0]
391+
return "0.0.0" # when hl-smi is not available
392+
393+
359394
def get_vllm_version() -> str:
360395
version = get_version(
361396
write_to="vllm/_version.py", # TODO: move this to pyproject.toml
@@ -385,6 +420,12 @@ def get_vllm_version() -> str:
385420
if neuron_version != MAIN_CUDA_VERSION:
386421
neuron_version_str = neuron_version.replace(".", "")[:3]
387422
version += f"{sep}neuron{neuron_version_str}"
423+
elif _is_hpu():
424+
# Get the Intel Gaudi Software Suite version
425+
gaudi_sw_version = str(get_gaudi_sw_version())
426+
if gaudi_sw_version != MAIN_CUDA_VERSION:
427+
gaudi_sw_version = gaudi_sw_version.replace(".", "")[:3]
428+
version += f"{sep}gaudi{gaudi_sw_version}"
388429
elif _is_openvino():
389430
version += f"{sep}openvino"
390431
elif _is_tpu():
@@ -443,6 +484,8 @@ def _read_requirements(filename: str) -> List[str]:
443484
requirements = _read_requirements("requirements-rocm.txt")
444485
elif _is_neuron():
445486
requirements = _read_requirements("requirements-neuron.txt")
487+
elif _is_hpu():
488+
requirements = _read_requirements("requirements-hpu.txt")
446489
elif _is_openvino():
447490
requirements = _read_requirements("requirements-openvino.txt")
448491
elif _is_tpu():
@@ -453,7 +496,7 @@ def _read_requirements(filename: str) -> List[str]:
453496
requirements = _read_requirements("requirements-xpu.txt")
454497
else:
455498
raise ValueError(
456-
"Unsupported platform, please use CUDA, ROCm, Neuron, "
499+
"Unsupported platform, please use CUDA, ROCm, Neuron, HPU, "
457500
"OpenVINO, or CPU.")
458501
return requirements
459502

vllm/_custom_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
logger = init_logger(__name__)
1414

15-
if not current_platform.is_tpu():
15+
if not current_platform.is_tpu() and not current_platform.is_hpu():
1616
try:
1717
import vllm._C
1818
except ImportError as e:

0 commit comments

Comments
 (0)