Skip to content

Commit 64bef7d

Browse files
committed
Solved an issue where lighteval vllm would hang indefinitely in multi node settings
1 parent 515bd01 commit 64bef7d

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

src/lighteval/models/vllm/vllm_model.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ class VLLMModelConfig:
9393
)
9494
pairwise_tokenization: bool = False # whether to tokenize the context and continuation separately or together.
9595
generation_parameters: GenerationParameters = None # sampling parameters to use for generation
96+
enforce_eager: bool = False # whether or not to disable cuda graphs with vllm
9697

9798
subfolder: Optional[str] = None
9899

@@ -136,13 +137,19 @@ def tokenizer(self):
136137
return self._tokenizer
137138

138139
def cleanup(self):
139-
destroy_model_parallel()
140+
if ray is not None:
141+
ray.get(ray.remote(destroy_model_parallel).remote())
142+
else:
143+
destroy_model_parallel()
140144
if self.model is not None:
141145
del self.model.llm_engine.model_executor.driver_worker
142146
self.model = None
143147
gc.collect()
144148
ray.shutdown()
145-
destroy_distributed_environment()
149+
if ray is not None:
150+
ray.get(ray.remote(destroy_distributed_environment).remote())
151+
else:
152+
destroy_distributed_environment()
146153
torch.cuda.empty_cache()
147154

148155
@property
@@ -182,6 +189,7 @@ def _create_auto_model(self, config: VLLMModelConfig, env_config: EnvConfig) ->
182189
"max_model_len": self._max_length,
183190
"swap_space": 4,
184191
"seed": 1234,
192+
"enforce_eager": config.enforce_eager,
185193
}
186194
if int(config.data_parallel_size) > 1:
187195
self.model_args["worker_use_ray"] = True

0 commit comments

Comments
 (0)