|
1 | 1 | # SPDX-License-Identifier: Apache-2.0
|
2 | 2 |
|
3 |
| -from functools import lru_cache |
| 3 | +import os |
| 4 | +from functools import lru_cache, wraps |
4 | 5 | from typing import TYPE_CHECKING, Dict, List, Optional
|
5 | 6 |
|
6 | 7 | import torch
|
| 8 | +from amdsmi import (amdsmi_get_gpu_asic_info, amdsmi_get_processor_handles, |
| 9 | + amdsmi_init, amdsmi_shut_down) |
7 | 10 |
|
8 | 11 | import vllm.envs as envs
|
9 | 12 | from vllm.logger import init_logger
|
|
53 | 56 | "by setting `VLLM_USE_TRITON_FLASH_ATTN=0`")
|
54 | 57 | }
|
55 | 58 |
|
| 59 | +# Prevent use of clashing `{CUDA/HIP}_VISIBLE_DEVICES`` |
| 60 | +if "HIP_VISIBLE_DEVICES" in os.environ: |
| 61 | + val = os.environ["HIP_VISIBLE_DEVICES"] |
| 62 | + if cuda_val := os.environ.get("CUDA_VISIBLE_DEVICES", None): |
| 63 | + assert val == cuda_val |
| 64 | + else: |
| 65 | + os.environ["CUDA_VISIBLE_DEVICES"] = val |
| 66 | + |
| 67 | +# AMDSMI utils |
| 68 | +# Note that NVML is not affected by `{CUDA/HIP}_VISIBLE_DEVICES`, |
| 69 | +# all the related functions work on real physical device ids. |
| 70 | +# the major benefit of using AMDSMI is that it will not initialize CUDA |
| 71 | + |
| 72 | + |
| 73 | +def with_amdsmi_context(fn): |
| 74 | + |
| 75 | + @wraps(fn) |
| 76 | + def wrapper(*args, **kwargs): |
| 77 | + amdsmi_init() |
| 78 | + try: |
| 79 | + return fn(*args, **kwargs) |
| 80 | + finally: |
| 81 | + amdsmi_shut_down() |
| 82 | + |
| 83 | + return wrapper |
| 84 | + |
| 85 | + |
| 86 | +def device_id_to_physical_device_id(device_id: int) -> int: |
| 87 | + if "CUDA_VISIBLE_DEVICES" in os.environ: |
| 88 | + device_ids = os.environ["CUDA_VISIBLE_DEVICES"].split(",") |
| 89 | + physical_device_id = device_ids[device_id] |
| 90 | + return int(physical_device_id) |
| 91 | + else: |
| 92 | + return device_id |
| 93 | + |
56 | 94 |
|
57 | 95 | class RocmPlatform(Platform):
|
58 | 96 | _enum = PlatformEnum.ROCM
|
@@ -96,13 +134,12 @@ def get_device_capability(cls, device_id: int = 0) -> DeviceCapability:
|
96 | 134 | return DeviceCapability(major=major, minor=minor)
|
97 | 135 |
|
98 | 136 | @classmethod
|
| 137 | + @with_amdsmi_context |
99 | 138 | @lru_cache(maxsize=8)
|
100 | 139 | def get_device_name(cls, device_id: int = 0) -> str:
|
101 |
| - # NOTE: When using V1 this function is called when overriding the |
102 |
| - # engine args. Calling torch.cuda.get_device_name(device_id) here |
103 |
| - # will result in the ROCm context being initialized before other |
104 |
| - # processes can be created. |
105 |
| - return "AMD" |
| 140 | + physical_device_id = device_id_to_physical_device_id(device_id) |
| 141 | + handle = amdsmi_get_processor_handles()[physical_device_id] |
| 142 | + return amdsmi_get_gpu_asic_info(handle)["market_name"] |
106 | 143 |
|
107 | 144 | @classmethod
|
108 | 145 | def get_device_total_memory(cls, device_id: int = 0) -> int:
|
|
0 commit comments