From 506c3b16eb8c40d08ed7dc8355d8d9f02256254e Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Thu, 6 Feb 2025 00:47:21 +0000 Subject: [PATCH 1/4] p Signed-off-by: EC2 Default User --- vllm/transformers_utils/config.py | 32 ++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 1c0f20a6e04..37a492b2c94 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -8,7 +8,7 @@ import huggingface_hub from huggingface_hub import (file_exists, hf_hub_download, - try_to_load_from_cache) + try_to_load_from_cache, list_repo_files) from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError, LocalEntryNotFoundError, RepositoryNotFoundError, @@ -395,18 +395,24 @@ def get_sentence_transformer_tokenizer_config(model: str, - dict: A dictionary containing the configuration parameters for the Sentence Transformer BERT model. """ - for config_name in [ - "sentence_bert_config.json", - "sentence_roberta_config.json", - "sentence_distilbert_config.json", - "sentence_camembert_config.json", - "sentence_albert_config.json", - "sentence_xlm-roberta_config.json", - "sentence_xlnet_config.json", - ]: - encoder_dict = get_hf_file_to_dict(config_name, model, revision) - if encoder_dict: - break + sentence_transformer_config_files = [ + "sentence_bert_config.json", + "sentence_roberta_config.json", + "sentence_distilbert_config.json", + "sentence_camembert_config.json", + "sentence_albert_config.json", + "sentence_xlm-roberta_config.json", + "sentence_xlnet_config.json", + ] + repo_files = list_repo_files(model, revision=revision, token=HF_TOKEN) + if not any(config_name in repo_files for config_name in sentence_transformer_config_files): + return None + + for config_name in sentence_transformer_config_files: + if config_name in repo_files: + encoder_dict = get_hf_file_to_dict(config_name, model, revision) + if encoder_dict: + break if not encoder_dict: return None From d414008e7b4a7b351f2c96716b87ab3271376836 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Thu, 6 Feb 2025 00:49:34 +0000 Subject: [PATCH 2/4] p Signed-off-by: EC2 Default User --- vllm/transformers_utils/config.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 37a492b2c94..23381dcb48e 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -405,8 +405,6 @@ def get_sentence_transformer_tokenizer_config(model: str, "sentence_xlnet_config.json", ] repo_files = list_repo_files(model, revision=revision, token=HF_TOKEN) - if not any(config_name in repo_files for config_name in sentence_transformer_config_files): - return None for config_name in sentence_transformer_config_files: if config_name in repo_files: From 692d1b606c52b63420fcc7bbdf14b292b7634be9 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Thu, 6 Feb 2025 06:17:26 +0000 Subject: [PATCH 3/4] p Signed-off-by: EC2 Default User Signed-off-by: <> --- vllm/transformers_utils/config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 23381dcb48e..9769f2f6fbb 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -7,8 +7,8 @@ from typing import Any, Dict, Optional, Type, Union import huggingface_hub -from huggingface_hub import (file_exists, hf_hub_download, - try_to_load_from_cache, list_repo_files) +from huggingface_hub import (file_exists, hf_hub_download, list_repo_files, + try_to_load_from_cache) from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError, LocalEntryNotFoundError, RepositoryNotFoundError, @@ -406,6 +406,7 @@ def get_sentence_transformer_tokenizer_config(model: str, ] repo_files = list_repo_files(model, revision=revision, token=HF_TOKEN) + encoder_dict = None for config_name in sentence_transformer_config_files: if config_name in repo_files: encoder_dict = get_hf_file_to_dict(config_name, model, revision) From 5d851aa12345aeaf7f4b32848e7e50387886fd00 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Thu, 6 Feb 2025 07:45:55 +0000 Subject: [PATCH 4/4] p Signed-off-by: EC2 Default User Signed-off-by: <> --- vllm/transformers_utils/config.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 9769f2f6fbb..85056158bab 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -404,11 +404,16 @@ def get_sentence_transformer_tokenizer_config(model: str, "sentence_xlm-roberta_config.json", "sentence_xlnet_config.json", ] - repo_files = list_repo_files(model, revision=revision, token=HF_TOKEN) + try: + # If model is on HuggingfaceHub, get the repo files + repo_files = list_repo_files(model, revision=revision, token=HF_TOKEN) + except Exception as e: + logger.debug("Error getting repo files", e) + repo_files = [] encoder_dict = None for config_name in sentence_transformer_config_files: - if config_name in repo_files: + if config_name in repo_files or Path(model).exists(): encoder_dict = get_hf_file_to_dict(config_name, model, revision) if encoder_dict: break