Skip to content

Commit c88cee5

Browse files
committed
waive some fixed bug in 1.0 until massive merge is done
Signed-off-by: Xin He (SW-GPU) <[email protected]>
1 parent e3378b9 commit c88cee5

File tree

4 files changed

+15
-44
lines changed

4 files changed

+15
-44
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2010,42 +2010,7 @@ def test_nvfp4_multi_gpus_chunked_prefill(self, tp_size, pp_size, ep_size,
20102010
assert llm.args.moe_config.backend == moe_backend
20112011
assert llm.args.quant_config.quant_algo == QuantAlgo.NVFP4
20122012

2013-
def test_nvfp4_multi_gpus_corner_case(self):
2014-
"""
2015-
This test is used to test the corner case of the NVFP4 model.
2016-
When using the same value for max_seq_len and max_num_tokens, there will be no
2017-
enough kv block for the dummy requests in CUDA graph warmup when creating
2018-
the py_executor before estimating kv cache. Then CUDA graph capture will be
2019-
triggered when estimating kv cache. This may cause some errors.
2020-
More info in https://nvbugs/5485325.
2021-
"""
2022-
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.80,
2023-
dtype="fp8",
2024-
enable_block_reuse=False)
2025-
pytorch_config = dict(disable_overlap_scheduler=False,
2026-
cuda_graph_config=CudaGraphConfig(
2027-
enable_padding=True, max_batch_size=1024),
2028-
moe_config=MoeConfig(backend="TRTLLM"))
2029-
2030-
mtp_config = MTPDecodingConfig(num_nextn_predict_layers=1)
2031-
with LLM(f"{llm_models_root()}/DeepSeek-R1/DeepSeek-R1-FP4",
2032-
tensor_parallel_size=8,
2033-
pipeline_parallel_size=1,
2034-
moe_expert_parallel_size=8,
2035-
kv_cache_config=kv_cache_config,
2036-
**pytorch_config,
2037-
enable_attention_dp=False,
2038-
speculative_config=mtp_config,
2039-
max_seq_len=5120,
2040-
max_num_tokens=5120) as llm:
2041-
2042-
assert llm.args.quant_config.quant_algo == QuantAlgo.NVFP4
2043-
2044-
task = MMLU(self.MODEL_NAME)
2045-
task.evaluate(llm)
2046-
task = GSM8K(self.MODEL_NAME)
2047-
task.evaluate(llm)
2048-
2013+
@skip_pre_blackwell
20492014
def test_nvfp4_multi_gpus_corner_case(self):
20502015
"""
20512016
This test is used to test the corner case of the NVFP4 model.

tests/integration/defs/test_e2e.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1735,8 +1735,10 @@ def test_openai_multinodes_chat_tp8pp2(llm_root, llm_venv):
17351735

17361736

17371737
@pytest.mark.skip_less_device_memory(80000)
1738-
@pytest.mark.parametrize(
1739-
"model_name", ["llama-3.1-model/Meta-Llama-3.1-8B", "gpt_oss/gpt-oss-20b"])
1738+
@pytest.mark.parametrize("model_name", [
1739+
"llama-3.1-model/Meta-Llama-3.1-8B",
1740+
pytest.param("gpt_oss/gpt-oss-20b", marks=skip_pre_hopper)
1741+
])
17401742
def test_trtllm_benchmark_serving(llm_venv, model_name):
17411743
test_root = unittest_path() / "llmapi" / "apps"
17421744
llm_venv.run_cmd([

tests/integration/test_lists/waives.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,5 +346,7 @@ test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B
346346
test_e2e.py::test_ptp_quickstart_multimodal[qwen2.5-vl-7b-instruct-Qwen2.5-VL-7B-Instruct-video-True] SKIP (https://nvbugs/5509024)
347347
test_e2e.py::test_trtllm_multimodal_benchmark_serving SKIP (https://nvbugs/5523315)
348348
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=2-ctx_pp=4] SKIP (https://nvbugs/5434320)
349-
test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True] SKIP (https://nvbugs/5509024)
350-
test_e2e.py::test_trtllm_benchmark_serving[gpt_oss/gpt-oss-20b] SKIP (https://nvbugs/5526591)
349+
test_e2e.py/test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-mixture_text_image-True] SKIP (https://nvbugs/5509024)
350+
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] SKIP (https://nvbugs/5481198)
351+
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale_chunked_prefill[latency] SKIP (https://nvbugs/5481198)
352+
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale_chunked_prefill[throughput] SKIP (https://nvbugs/5481198)

tests/unittest/llmapi/apps/_test_trtllm_serve_benchmark.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import sys
44

55
import pytest
6-
from utils.util import skip_gpu_memory_less_than_80gb
6+
from utils.util import skip_gpu_memory_less_than_80gb, skip_pre_hopper
77

88
from .openai_server import RemoteOpenAIServer
99

@@ -45,9 +45,11 @@ def dataset_path(dataset_name: str):
4545

4646

4747
@skip_gpu_memory_less_than_80gb
48-
@pytest.mark.parametrize(
49-
"model_name", ["llama-3.1-model/Meta-Llama-3.1-8B", "gpt_oss/gpt-oss-20b"],
50-
indirect=True)
48+
@pytest.mark.parametrize("model_name", [
49+
"llama-3.1-model/Meta-Llama-3.1-8B",
50+
pytest.param("gpt_oss/gpt-oss-20b", marks=skip_pre_hopper)
51+
],
52+
indirect=True)
5153
def test_trtllm_serve_benchmark(server: RemoteOpenAIServer, benchmark_root: str,
5254
model_path: str):
5355
model_name = model_path.split("/")[-1]

0 commit comments

Comments
 (0)