|
27 | 27 |
|
28 | 28 | _GLOBAL_RUNTIME_DATA: Dict[str, Union[str, int, bool]] = {}
|
29 | 29 |
|
| 30 | +_USAGE_ENV_VARS_TO_COLLECT = [ |
| 31 | + "VLLM_USE_MODELSCOPE", |
| 32 | + "VLLM_USE_TRITON_FLASH_ATTN", |
| 33 | + "VLLM_ATTENTION_BACKEND", |
| 34 | + "VLLM_USE_FLASHINFER_SAMPLER", |
| 35 | + "VLLM_PP_LAYER_PARTITION", |
| 36 | + "VLLM_USE_TRITON_AWQ", |
| 37 | + "VLLM_USE_V1", |
| 38 | + "VLLM_ENABLE_V1_MULTIPROCESSING", |
| 39 | +] |
| 40 | + |
30 | 41 |
|
31 | 42 | def set_runtime_usage_data(key: str, value: Union[str, int, bool]) -> None:
|
32 | 43 | """Set global usage data that will be sent with every usage heartbeat."""
|
@@ -122,6 +133,7 @@ def __init__(self) -> None:
|
122 | 133 | self.gpu_count: Optional[int] = None
|
123 | 134 | self.gpu_type: Optional[str] = None
|
124 | 135 | self.gpu_memory_per_device: Optional[int] = None
|
| 136 | + self.env_var_json: Optional[str] = None |
125 | 137 |
|
126 | 138 | # vLLM Information
|
127 | 139 | self.model_architecture: Optional[str] = None
|
@@ -176,6 +188,12 @@ def _report_usage_once(self, model_architecture: str,
|
176 | 188 | self.vllm_version = VLLM_VERSION
|
177 | 189 | self.model_architecture = model_architecture
|
178 | 190 |
|
| 191 | + # Environment variables |
| 192 | + self.env_var_json = json.dumps({ |
| 193 | + env_var: getattr(envs, env_var) |
| 194 | + for env_var in _USAGE_ENV_VARS_TO_COLLECT |
| 195 | + }) |
| 196 | + |
179 | 197 | # Metadata
|
180 | 198 | self.log_time = _get_current_timestamp_ns()
|
181 | 199 | self.source = envs.VLLM_USAGE_SOURCE
|
|
0 commit comments