Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions tests/lora/test_phi.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import pytest

import vllm
from vllm.lora.request import LoRARequest

Expand Down Expand Up @@ -49,9 +47,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
return generated_texts


# Skipping for V1 for now as we are hitting,
# "Head size 80 is not supported by FlashAttention." error.
@pytest.mark.skip(reason="Head size 80 is not supported by FlashAttention")
def test_phi2_lora(phi2_lora_files):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider adding a comment explaining why this test was previously skipped and the reason for enabling enforce_eager=True. This provides valuable context for future maintainers.

Suggested change
def test_phi2_lora(phi2_lora_files):
# The phi-2 LoRA test runs in eager mode to avoid a FlashAttention
# incompatibility with head size 80.
def test_phi2_lora(phi2_lora_files):

# We enable enforce_eager=True here to reduce VRAM usage for lora-test CI,
# Otherwise, the lora-test will fail due to CUDA OOM.
Expand Down