From c33a0cbf472ce208f3d34111c275959bb133b6b8 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Thu, 6 Feb 2025 12:55:06 -0300 Subject: [PATCH 1/6] Prevent unecessary requests to huggingface hub When repository files are already present in the cache or a model directory, we shouldn't try to open http connections to huggingface hub to verify if the files exist. Signed-off-by: Max de Bayser --- vllm/transformers_utils/config.py | 116 ++++++++++++++++++------------ 1 file changed, 72 insertions(+), 44 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 85056158bab..05b3056ed22 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -265,49 +265,64 @@ def get_config( return config -def get_hf_file_to_dict(file_name: str, - model: Union[str, Path], - revision: Optional[str] = 'main'): +def try_get_local_file(model: Union[str, Path], + file_name: str, + revision: Optional[str] = 'main') -> Optional[Path]: + file_path = Path(model) / file_name + if file_path.is_file(): + return file_path + else: + cached_filepath = try_to_load_from_cache(repo_id=model, + filename=file_name, + revision=revision) + if isinstance(cached_filepath, str): + return Path(cached_filepath) + return None + + +def get_hf_file_to_dict( + file_name: str, + model: Union[str, Path], + revision: Optional[str] = 'main') -> Optional[Dict[str, Any]]: """ - Downloads a file from the Hugging Face Hub and returns + Downloads a file from the Hugging Face Hub and returns its contents as a dictionary. Parameters: - file_name (str): The name of the file to download. - model (str): The name of the model on the Hugging Face Hub. - - revision (str): The specific version of the model. + - revision (str): The specific version of the model. Returns: - - config_dict (dict): A dictionary containing + - config_dict (dict): A dictionary containing the contents of the downloaded file. """ - file_path = Path(model) / file_name - if file_or_path_exists(model=model, - config_name=file_name, - revision=revision): + file_path = try_get_local_file(model=model, + file_name=file_name, + revision=revision) - if not file_path.is_file(): - try: - hf_hub_file = hf_hub_download(model, - file_name, - revision=revision) - except (RepositoryNotFoundError, RevisionNotFoundError, - EntryNotFoundError, LocalEntryNotFoundError) as e: - logger.debug("File or repository not found in hf_hub_download", - e) - return None - except HfHubHTTPError as e: - logger.warning( - "Cannot connect to Hugging Face Hub. Skipping file " - "download for '%s':", - file_name, - exc_info=e) - return None - file_path = Path(hf_hub_file) + if file_path is None and file_or_path_exists( + model=model, config_name=file_name, revision=revision): + try: + hf_hub_file = hf_hub_download(model, file_name, revision=revision) + except (RepositoryNotFoundError, RevisionNotFoundError, + EntryNotFoundError, LocalEntryNotFoundError) as e: + logger.debug("File or repository not found in hf_hub_download", e) + return None + except HfHubHTTPError as e: + logger.warning( + "Cannot connect to Hugging Face Hub. Skipping file " + "download for '%s':", + file_name, + exc_info=e) + return None + file_path = Path(hf_hub_file) + if file_path is not None and file_path.is_file(): with open(file_path) as file: return json.load(file) + return None @@ -378,21 +393,21 @@ def get_pooling_config_name(pooling_name: str) -> Union[str, None]: return None -def get_sentence_transformer_tokenizer_config(model: str, - revision: Optional[str] = 'main' - ): +def get_sentence_transformer_tokenizer_config( + model: str, + revision: Optional[str] = 'main') -> Optional[Dict[str, Any]]: """ - Returns the tokenization configuration dictionary for a + Returns the tokenization configuration dictionary for a given Sentence Transformer BERT model. Parameters: - - model (str): The name of the Sentence Transformer + - model (str): The name of the Sentence Transformer BERT model. - revision (str, optional): The revision of the m odel to use. Defaults to 'main'. Returns: - - dict: A dictionary containing the configuration parameters + - dict: A dictionary containing the configuration parameters for the Sentence Transformer BERT model. """ sentence_transformer_config_files = [ @@ -404,20 +419,33 @@ def get_sentence_transformer_tokenizer_config(model: str, "sentence_xlm-roberta_config.json", "sentence_xlnet_config.json", ] - try: - # If model is on HuggingfaceHub, get the repo files - repo_files = list_repo_files(model, revision=revision, token=HF_TOKEN) - except Exception as e: - logger.debug("Error getting repo files", e) - repo_files = [] - encoder_dict = None - for config_name in sentence_transformer_config_files: - if config_name in repo_files or Path(model).exists(): - encoder_dict = get_hf_file_to_dict(config_name, model, revision) + + for config_file in sentence_transformer_config_files: + if try_get_local_file(model=model, + file_name=config_file, + revision=revision) is not None: + encoder_dict = get_hf_file_to_dict(config_file, model, revision) if encoder_dict: break + if not encoder_dict: + try: + # If model is on HuggingfaceHub, get the repo files + repo_files = list_repo_files(model, + revision=revision, + token=HF_TOKEN) + except Exception as e: + logger.debug("Error getting repo files", e) + repo_files = [] + + for config_name in sentence_transformer_config_files: + if config_name in repo_files: + encoder_dict = get_hf_file_to_dict(config_name, model, + revision) + if encoder_dict: + break + if not encoder_dict: return None From 9c004c7eb5b09c6b68dc0a1b3be782b9699ce275 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Thu, 6 Feb 2025 13:16:36 -0300 Subject: [PATCH 2/6] undo type annotation Signed-off-by: Max de Bayser --- vllm/transformers_utils/config.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 05b3056ed22..c68770fb0e6 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -280,10 +280,9 @@ def try_get_local_file(model: Union[str, Path], return None -def get_hf_file_to_dict( - file_name: str, - model: Union[str, Path], - revision: Optional[str] = 'main') -> Optional[Dict[str, Any]]: +def get_hf_file_to_dict(file_name: str, + model: Union[str, Path], + revision: Optional[str] = 'main'): """ Downloads a file from the Hugging Face Hub and returns its contents as a dictionary. @@ -393,9 +392,9 @@ def get_pooling_config_name(pooling_name: str) -> Union[str, None]: return None -def get_sentence_transformer_tokenizer_config( - model: str, - revision: Optional[str] = 'main') -> Optional[Dict[str, Any]]: +def get_sentence_transformer_tokenizer_config(model: str, + revision: Optional[str] = 'main' + ): """ Returns the tokenization configuration dictionary for a given Sentence Transformer BERT model. From 779e2b519d6f8e43f8325fcd50ce691d936e9248 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Thu, 6 Feb 2025 14:05:43 -0300 Subject: [PATCH 3/6] fix model name validation erro Signed-off-by: Max de Bayser --- vllm/transformers_utils/config.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index c68770fb0e6..04ce7185e3b 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -10,7 +10,7 @@ from huggingface_hub import (file_exists, hf_hub_download, list_repo_files, try_to_load_from_cache) from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError, - LocalEntryNotFoundError, + HFValidationError, LocalEntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError) from torch import nn @@ -272,11 +272,14 @@ def try_get_local_file(model: Union[str, Path], if file_path.is_file(): return file_path else: - cached_filepath = try_to_load_from_cache(repo_id=model, - filename=file_name, - revision=revision) - if isinstance(cached_filepath, str): - return Path(cached_filepath) + try: + cached_filepath = try_to_load_from_cache(repo_id=model, + filename=file_name, + revision=revision) + if isinstance(cached_filepath, str): + return Path(cached_filepath) + except HFValidationError: + ... return None From 5f467ee00c09ba0fb2c844c58e3347da589d7075 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Thu, 6 Feb 2025 16:19:20 -0300 Subject: [PATCH 4/6] make get_pooling_config work in offline mode without internet connection Signed-off-by: Max de Bayser --- tests/entrypoints/offline_mode/test_offline_mode.py | 9 +++++++++ vllm/transformers_utils/config.py | 7 ++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/entrypoints/offline_mode/test_offline_mode.py b/tests/entrypoints/offline_mode/test_offline_mode.py index eac76f2ba0f..d11c6125d2f 100644 --- a/tests/entrypoints/offline_mode/test_offline_mode.py +++ b/tests/entrypoints/offline_mode/test_offline_mode.py @@ -28,6 +28,15 @@ "tensor_parallel_size": 1, "tokenizer_mode": "mistral", }, + { + "model": "sentence-transformers/all-MiniLM-L12-v2", + "enforce_eager": True, + "gpu_memory_utilization": 0.20, + "max_model_len": 64, + "max_num_batched_tokens": 64, + "max_num_seqs": 64, + "tensor_parallel_size": 1, + }, ] diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 04ce7185e3b..fb5cc3ec072 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -345,7 +345,12 @@ def get_pooling_config(model: str, revision: Optional[str] = 'main'): """ modules_file_name = "modules.json" - modules_dict = get_hf_file_to_dict(modules_file_name, model, revision) + + modules_dict = None + if file_or_path_exists(model=model, + config_name=modules_file_name, + revision=revision): + modules_dict = get_hf_file_to_dict(modules_file_name, model, revision) if modules_dict is None: return None From 357ef3fd5fa34825986fd588ff68ecaaaa519026 Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Thu, 6 Feb 2025 19:44:55 -0300 Subject: [PATCH 5/6] make sure that offline mode works without internet connection Signed-off-by: Max de Bayser --- tests/entrypoints/offline_mode/test_offline_mode.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/entrypoints/offline_mode/test_offline_mode.py b/tests/entrypoints/offline_mode/test_offline_mode.py index d11c6125d2f..85156d6931c 100644 --- a/tests/entrypoints/offline_mode/test_offline_mode.py +++ b/tests/entrypoints/offline_mode/test_offline_mode.py @@ -4,6 +4,7 @@ import sys import pytest +import urllib3 from vllm import LLM from vllm.distributed import cleanup_dist_env_and_memory @@ -56,6 +57,16 @@ def test_offline_mode(monkeypatch): # Set HF to offline mode and ensure we can still construct an LLM try: monkeypatch.setenv("HF_HUB_OFFLINE", "1") + monkeypatch.setenv("VLLM_NO_USAGE_STATS", "1") + + def disable_connect(*args, **kwargs): + raise RuntimeError("No http calls allowed") + + monkeypatch.setattr(urllib3.connection.HTTPConnection, "connect", + disable_connect) + monkeypatch.setattr(urllib3.connection.HTTPSConnection, "connect", + disable_connect) + # Need to re-import huggingface_hub and friends to setup offline mode _re_import_modules() # Cached model files should be used in offline mode @@ -65,6 +76,7 @@ def test_offline_mode(monkeypatch): # Reset the environment after the test # NB: Assuming tests are run in online mode monkeypatch.delenv("HF_HUB_OFFLINE") + monkeypatch.delenv("VLLM_NO_USAGE_STATS") _re_import_modules() pass From 4df99aa7bd61052847148b6a5788ffd4f691499b Mon Sep 17 00:00:00 2001 From: Max de Bayser Date: Thu, 6 Feb 2025 19:48:19 -0300 Subject: [PATCH 6/6] trigger ci Signed-off-by: Max de Bayser