Skip to content

Commit 561e121

Browse files
DarkLight1337Isotr0py
authored andcommitted
[CI/Build][Doc] Update gte-Qwen2-1.5B-instruct usage (vllm-project#18683)
Signed-off-by: DarkLight1337 <[email protected]> Signed-off-by: Isotr0py <[email protected]> Co-authored-by: Isotr0py <[email protected]> Signed-off-by: amit <[email protected]>
1 parent 92dbf40 commit 561e121

File tree

3 files changed

+2
-18
lines changed

3 files changed

+2
-18
lines changed

docs/models/supported_models.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -404,10 +404,7 @@ Specified using `--task embed`.
404404
You should manually set mean pooling by passing `--override-pooler-config '{"pooling_type": "MEAN"}'`.
405405

406406
!!! note
407-
The HF implementation of `Alibaba-NLP/gte-Qwen2-1.5B-instruct` is hardcoded to use causal attention despite what is shown in `config.json`. To compare vLLM vs HF results,
408-
you should set `--hf-overrides '{"is_causal": true}'` in vLLM so that the two implementations are consistent with each other.
409-
410-
For both the 1.5B and 7B variants, you also need to enable `--trust-remote-code` for the correct tokenizer to be loaded.
407+
For `Alibaba-NLP/gte-Qwen2-*`, you need to enable `--trust-remote-code` for the correct tokenizer to be loaded.
411408
See [relevant issue on HF Transformers](https://github.com/huggingface/transformers/issues/34882).
412409

413410
!!! note

tests/models/language/pooling/test_embedding.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,12 @@
1515
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
1616
pytest.param("sentence-transformers/all-MiniLM-L12-v2"),
1717
pytest.param("intfloat/multilingual-e5-small"),
18-
pytest.param("Alibaba-NLP/gte-Qwen2-7B-instruct"),
18+
pytest.param("Alibaba-NLP/gte-Qwen2-1.5B-instruct"),
1919
# [Decoder-only]
2020
pytest.param("BAAI/bge-multilingual-gemma2",
2121
marks=[pytest.mark.core_model]),
2222
pytest.param("intfloat/e5-mistral-7b-instruct",
2323
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
24-
pytest.param("Alibaba-NLP/gte-Qwen2-1.5B-instruct"),
2524
pytest.param("ssmits/Qwen2-7B-Instruct-embed-base"),
2625
# [Cross-Encoder]
2726
pytest.param("sentence-transformers/stsb-roberta-base-v2"),
@@ -47,9 +46,6 @@ def test_models(
4746
vllm_extra_kwargs["override_pooler_config"] = \
4847
PoolerConfig(pooling_type="MEAN")
4948

50-
if model == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
51-
vllm_extra_kwargs["hf_overrides"] = {"is_causal": True}
52-
5349
# The example_prompts has ending "\n", for example:
5450
# "Write a short story about a robot that dreams for the first time.\n"
5551
# sentence_transformers will strip the input texts, see:

tests/models/language/pooling/test_gte.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,6 @@
4545
EmbedModelInfo("Alibaba-NLP/gte-Qwen2-1.5B-instruct",
4646
architecture="Qwen2ForCausalLM",
4747
enable_test=True),
48-
EmbedModelInfo("Alibaba-NLP/gte-Qwen2-7B-instruct",
49-
architecture="Qwen2ForCausalLM",
50-
enable_test=False),
5148
########## ModernBertModel
5249
EmbedModelInfo("Alibaba-NLP/gte-modernbert-base",
5350
architecture="ModernBertModel",
@@ -61,9 +58,6 @@ def test_models_mteb(hf_runner, vllm_runner,
6158
from .mteb_utils import mteb_test_embed_models
6259

6360
vllm_extra_kwargs: dict[str, Any] = {}
64-
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
65-
vllm_extra_kwargs["hf_overrides"] = {"is_causal": True}
66-
6761
if model_info.architecture == "GteNewModel":
6862
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}
6963

@@ -81,9 +75,6 @@ def test_models_correctness(hf_runner, vllm_runner, model_info: EmbedModelInfo,
8175
example_prompts = [str(s).strip() for s in example_prompts]
8276

8377
vllm_extra_kwargs: dict[str, Any] = {}
84-
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
85-
vllm_extra_kwargs["hf_overrides"] = {"is_causal": True}
86-
8778
if model_info.architecture == "GteNewModel":
8879
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}
8980

0 commit comments

Comments
 (0)