Skip to content

Prevent unecessary requests to huggingface hub #12837

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions tests/entrypoints/offline_mode/test_offline_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys

import pytest
import urllib3

from vllm import LLM
from vllm.distributed import cleanup_dist_env_and_memory
Expand All @@ -28,6 +29,15 @@
"tensor_parallel_size": 1,
"tokenizer_mode": "mistral",
},
{
"model": "sentence-transformers/all-MiniLM-L12-v2",
"enforce_eager": True,
"gpu_memory_utilization": 0.20,
"max_model_len": 64,
"max_num_batched_tokens": 64,
"max_num_seqs": 64,
"tensor_parallel_size": 1,
},
]


Expand All @@ -47,6 +57,16 @@ def test_offline_mode(monkeypatch):
# Set HF to offline mode and ensure we can still construct an LLM
try:
monkeypatch.setenv("HF_HUB_OFFLINE", "1")
monkeypatch.setenv("VLLM_NO_USAGE_STATS", "1")

def disable_connect(*args, **kwargs):
raise RuntimeError("No http calls allowed")

monkeypatch.setattr(urllib3.connection.HTTPConnection, "connect",
disable_connect)
monkeypatch.setattr(urllib3.connection.HTTPSConnection, "connect",
disable_connect)

# Need to re-import huggingface_hub and friends to setup offline mode
_re_import_modules()
# Cached model files should be used in offline mode
Expand All @@ -56,6 +76,7 @@ def test_offline_mode(monkeypatch):
# Reset the environment after the test
# NB: Assuming tests are run in online mode
monkeypatch.delenv("HF_HUB_OFFLINE")
monkeypatch.delenv("VLLM_NO_USAGE_STATS")
_re_import_modules()
pass

Expand Down
115 changes: 75 additions & 40 deletions vllm/transformers_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from huggingface_hub import (file_exists, hf_hub_download, list_repo_files,
try_to_load_from_cache)
from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError,
LocalEntryNotFoundError,
HFValidationError, LocalEntryNotFoundError,
RepositoryNotFoundError,
RevisionNotFoundError)
from torch import nn
Expand Down Expand Up @@ -265,49 +265,66 @@ def get_config(
return config


def try_get_local_file(model: Union[str, Path],
file_name: str,
revision: Optional[str] = 'main') -> Optional[Path]:
file_path = Path(model) / file_name
if file_path.is_file():
return file_path
else:
try:
cached_filepath = try_to_load_from_cache(repo_id=model,
filename=file_name,
revision=revision)
if isinstance(cached_filepath, str):
return Path(cached_filepath)
except HFValidationError:
...
return None


def get_hf_file_to_dict(file_name: str,
model: Union[str, Path],
revision: Optional[str] = 'main'):
"""
Downloads a file from the Hugging Face Hub and returns
Downloads a file from the Hugging Face Hub and returns
its contents as a dictionary.

Parameters:
- file_name (str): The name of the file to download.
- model (str): The name of the model on the Hugging Face Hub.
- revision (str): The specific version of the model.
- revision (str): The specific version of the model.

Returns:
- config_dict (dict): A dictionary containing
- config_dict (dict): A dictionary containing
the contents of the downloaded file.
"""
file_path = Path(model) / file_name

if file_or_path_exists(model=model,
config_name=file_name,
revision=revision):
file_path = try_get_local_file(model=model,
file_name=file_name,
revision=revision)

if not file_path.is_file():
try:
hf_hub_file = hf_hub_download(model,
file_name,
revision=revision)
except (RepositoryNotFoundError, RevisionNotFoundError,
EntryNotFoundError, LocalEntryNotFoundError) as e:
logger.debug("File or repository not found in hf_hub_download",
e)
return None
except HfHubHTTPError as e:
logger.warning(
"Cannot connect to Hugging Face Hub. Skipping file "
"download for '%s':",
file_name,
exc_info=e)
return None
file_path = Path(hf_hub_file)
if file_path is None and file_or_path_exists(
model=model, config_name=file_name, revision=revision):
try:
hf_hub_file = hf_hub_download(model, file_name, revision=revision)
except (RepositoryNotFoundError, RevisionNotFoundError,
EntryNotFoundError, LocalEntryNotFoundError) as e:
logger.debug("File or repository not found in hf_hub_download", e)
return None
except HfHubHTTPError as e:
logger.warning(
"Cannot connect to Hugging Face Hub. Skipping file "
"download for '%s':",
file_name,
exc_info=e)
return None
file_path = Path(hf_hub_file)

if file_path is not None and file_path.is_file():
with open(file_path) as file:
return json.load(file)

return None


Expand All @@ -328,7 +345,12 @@ def get_pooling_config(model: str, revision: Optional[str] = 'main'):
"""

modules_file_name = "modules.json"
modules_dict = get_hf_file_to_dict(modules_file_name, model, revision)

modules_dict = None
if file_or_path_exists(model=model,
config_name=modules_file_name,
revision=revision):
modules_dict = get_hf_file_to_dict(modules_file_name, model, revision)

if modules_dict is None:
return None
Expand Down Expand Up @@ -382,17 +404,17 @@ def get_sentence_transformer_tokenizer_config(model: str,
revision: Optional[str] = 'main'
):
"""
Returns the tokenization configuration dictionary for a
Returns the tokenization configuration dictionary for a
given Sentence Transformer BERT model.

Parameters:
- model (str): The name of the Sentence Transformer
- model (str): The name of the Sentence Transformer
BERT model.
- revision (str, optional): The revision of the m
odel to use. Defaults to 'main'.

Returns:
- dict: A dictionary containing the configuration parameters
- dict: A dictionary containing the configuration parameters
for the Sentence Transformer BERT model.
"""
sentence_transformer_config_files = [
Expand All @@ -404,20 +426,33 @@ def get_sentence_transformer_tokenizer_config(model: str,
"sentence_xlm-roberta_config.json",
"sentence_xlnet_config.json",
]
try:
# If model is on HuggingfaceHub, get the repo files
repo_files = list_repo_files(model, revision=revision, token=HF_TOKEN)
except Exception as e:
logger.debug("Error getting repo files", e)
repo_files = []

encoder_dict = None
for config_name in sentence_transformer_config_files:
if config_name in repo_files or Path(model).exists():
encoder_dict = get_hf_file_to_dict(config_name, model, revision)

for config_file in sentence_transformer_config_files:
if try_get_local_file(model=model,
file_name=config_file,
revision=revision) is not None:
encoder_dict = get_hf_file_to_dict(config_file, model, revision)
if encoder_dict:
break

if not encoder_dict:
try:
# If model is on HuggingfaceHub, get the repo files
repo_files = list_repo_files(model,
revision=revision,
token=HF_TOKEN)
except Exception as e:
logger.debug("Error getting repo files", e)
repo_files = []

for config_name in sentence_transformer_config_files:
if config_name in repo_files:
encoder_dict = get_hf_file_to_dict(config_name, model,
revision)
if encoder_dict:
break

if not encoder_dict:
return None

Expand Down