Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions docs/models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -404,10 +404,7 @@ Specified using `--task embed`.
You should manually set mean pooling by passing `--override-pooler-config '{"pooling_type": "MEAN"}'`.

!!! note
The HF implementation of `Alibaba-NLP/gte-Qwen2-1.5B-instruct` is hardcoded to use causal attention despite what is shown in `config.json`. To compare vLLM vs HF results,
you should set `--hf-overrides '{"is_causal": true}'` in vLLM so that the two implementations are consistent with each other.

For both the 1.5B and 7B variants, you also need to enable `--trust-remote-code` for the correct tokenizer to be loaded.
For `Alibaba-NLP/gte-Qwen2-*`, you need to enable `--trust-remote-code` for the correct tokenizer to be loaded.
See [relevant issue on HF Transformers](https://github.com/huggingface/transformers/issues/34882).

!!! note
Expand Down
6 changes: 1 addition & 5 deletions tests/models/language/pooling/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,12 @@
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
pytest.param("sentence-transformers/all-MiniLM-L12-v2"),
pytest.param("intfloat/multilingual-e5-small"),
pytest.param("Alibaba-NLP/gte-Qwen2-7B-instruct"),
pytest.param("Alibaba-NLP/gte-Qwen2-1.5B-instruct"),
# [Decoder-only]
pytest.param("BAAI/bge-multilingual-gemma2",
marks=[pytest.mark.core_model]),
pytest.param("intfloat/e5-mistral-7b-instruct",
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
pytest.param("Alibaba-NLP/gte-Qwen2-1.5B-instruct"),
pytest.param("ssmits/Qwen2-7B-Instruct-embed-base"),
# [Cross-Encoder]
pytest.param("sentence-transformers/stsb-roberta-base-v2"),
Expand All @@ -47,9 +46,6 @@ def test_models(
vllm_extra_kwargs["override_pooler_config"] = \
PoolerConfig(pooling_type="MEAN")

if model == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
vllm_extra_kwargs["hf_overrides"] = {"is_causal": True}

# The example_prompts has ending "\n", for example:
# "Write a short story about a robot that dreams for the first time.\n"
# sentence_transformers will strip the input texts, see:
Expand Down
9 changes: 0 additions & 9 deletions tests/models/language/pooling/test_gte.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@
EmbedModelInfo("Alibaba-NLP/gte-Qwen2-1.5B-instruct",
architecture="Qwen2ForCausalLM",
enable_test=True),
EmbedModelInfo("Alibaba-NLP/gte-Qwen2-7B-instruct",
architecture="Qwen2ForCausalLM",
enable_test=False),
########## ModernBertModel
EmbedModelInfo("Alibaba-NLP/gte-modernbert-base",
architecture="ModernBertModel",
Expand All @@ -61,9 +58,6 @@ def test_models_mteb(hf_runner, vllm_runner,
from .mteb_utils import mteb_test_embed_models

vllm_extra_kwargs: dict[str, Any] = {}
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
vllm_extra_kwargs["hf_overrides"] = {"is_causal": True}

if model_info.architecture == "GteNewModel":
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}

Expand All @@ -81,9 +75,6 @@ def test_models_correctness(hf_runner, vllm_runner, model_info: EmbedModelInfo,
example_prompts = [str(s).strip() for s in example_prompts]

vllm_extra_kwargs: dict[str, Any] = {}
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
vllm_extra_kwargs["hf_overrides"] = {"is_causal": True}

if model_info.architecture == "GteNewModel":
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}

Expand Down