Skip to content

Commit ac36633

Browse files
crazydemodominicshanshan
authored andcommitted
[None][fix] update skip config (NVIDIA#6891)
Signed-off-by: Ivy Zhang <[email protected]> Signed-off-by: Wangshanshan <[email protected]>
1 parent f64603e commit ac36633

File tree

5 files changed

+28
-26
lines changed

5 files changed

+28
-26
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,7 @@ def test_auto_dtype(self, disable_overlap_scheduler):
376376
task.evaluate(llm)
377377

378378
@pytest.mark.skip_less_device(2)
379+
@skip_pre_hopper
379380
def test_ngram(self):
380381
speculative_decoding_config = {
381382
"decoding_type": "NGram",
@@ -582,7 +583,6 @@ def test_tp_pp_symmetric(self, tp, pp, testset):
582583
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, pp, tp, pp,
583584
tp, 1, 1, [get_accuracy_task(testset)])
584585

585-
@pytest.mark.skip_less_device(4)
586586
@parametrize_with_ids("ctx_pp", [2, 4])
587587
@parametrize_with_ids("gen_tp", [1, 2])
588588
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
@@ -593,20 +593,18 @@ def test_ctx_pp_gen_tp_asymmetric(self, ctx_pp, gen_tp, testset):
593593
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, ctx_pp, 1, 1,
594594
gen_tp, 1, 1, [get_accuracy_task(testset)])
595595

596-
@pytest.mark.skip_less_device(4)
597596
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
598597
def test_multi_instance(self, testset):
599598
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, 1, 1, 1, 1,
600599
2, 2, [get_accuracy_task(testset)])
601600

602601

603-
@pytest.mark.skip_less_device_memory(140000)
604-
@pytest.mark.timeout(3600)
605-
@pytest.mark.skip_less_device(4)
606602
class TestLlama4ScoutInstruct(LlmapiAccuracyTestHarness):
607603
MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
608604
MODEL_PATH = f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct"
609605

606+
@pytest.mark.skip_less_device_memory(140000)
607+
@pytest.mark.timeout(3600)
610608
@pytest.mark.skip_less_device(8)
611609
@pytest.mark.parametrize("overlap_scheduler", [False, True])
612610
def test_auto_dtype(self, overlap_scheduler):
@@ -687,7 +685,7 @@ def test_nixl_backend(self):
687685
@parametrize_with_ids("overlap_scheduler", [True, False])
688686
@parametrize_with_ids("mtp_nextn",
689687
[0, pytest.param(2, marks=skip_pre_hopper)])
690-
@pytest.mark.skip_less_device(4)
688+
@pytest.mark.skip_less_device(8)
691689
def test_auto_dtype(self, overlap_scheduler, mtp_nextn):
692690
ctx_server_config = {"disable_overlap_scheduler": True}
693691
gen_server_config = {"disable_overlap_scheduler": not overlap_scheduler}
@@ -731,6 +729,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness):
731729
MODEL_NAME = "google/gemma-3-1b-it"
732730
MODEL_PATH = f"{llm_models_root()}/gemma/gemma-3-1b-it/"
733731

732+
@pytest.mark.skip_less_device(2)
734733
@pytest.mark.parametrize("overlap_scheduler", [False, True])
735734
def test_auto_dtype(self, overlap_scheduler):
736735
pytest.skip(
@@ -820,8 +819,9 @@ def test_nixl_backend(self):
820819
task = GSM8K(self.MODEL_NAME)
821820
task.evaluate(llm)
822821

823-
@pytest.mark.parametrize("overlap_scheduler", [False, True])
824822
@skip_pre_hopper
823+
@pytest.mark.skip_less_device(2)
824+
@pytest.mark.parametrize("overlap_scheduler", [False, True])
825825
def test_auto_dtype(self, overlap_scheduler):
826826
ctx_server_config = {
827827
"disable_overlap_scheduler": True,

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -886,25 +886,26 @@ def test_auto_dtype(self):
886886

887887
class TestMistralSmall24B(LlmapiAccuracyTestHarness):
888888
MODEL_NAME = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
889+
MODEL_PATH = f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503"
889890

890891
@pytest.mark.skip_less_device_memory(80000)
891-
@pytest.mark.parametrize(
892-
"model_path, expected_quant_algo",
893-
[
894-
# Original bfloat16 model.
895-
(f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503", None),
896-
# FP8 model.
897-
pytest.param(
898-
f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503-fp8",
899-
QuantAlgo.FP8,
900-
marks=skip_pre_ada,
901-
),
902-
],
903-
)
904-
def test_auto_dtype(self, model_path, expected_quant_algo):
892+
def test_auto_dtype(self):
905893
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
894+
with LLM(self.MODEL_PATH, kv_cache_config=kv_cache_config) as llm:
895+
task = CnnDailymail(self.MODEL_NAME)
896+
task.evaluate(llm)
897+
task = MMLU(self.MODEL_NAME)
898+
task.evaluate(llm)
899+
task = GSM8K(self.MODEL_NAME)
900+
task.evaluate(llm)
901+
902+
@skip_pre_ada
903+
@pytest.mark.skip_less_device_memory(80000)
904+
def test_fp8(self):
905+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
906+
model_path = f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503-fp8"
906907
with LLM(model_path, kv_cache_config=kv_cache_config) as llm:
907-
assert llm.args.quant_config.quant_algo == expected_quant_algo
908+
assert llm.args.quant_config.quant_algo == QuantAlgo.FP8
908909
task = CnnDailymail(self.MODEL_NAME)
909910
task.evaluate(llm)
910911
task = MMLU(self.MODEL_NAME)

tests/integration/test_lists/qa/llm_function_full.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -475,8 +475,8 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[
475475
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=False]
476476
accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype
477477
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
478-
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
479-
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-fp8-FP8]
478+
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
479+
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_fp8
480480
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8-cuda_graph=False]
481481
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep4-cuda_graph=True]
482482
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep8-cuda_graph=True]

tests/integration/test_lists/qa/llm_function_sanity.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass]
5858
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton]
5959
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-trtllm]
6060
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4]
61+
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
6162
accuracy/test_llm_api_pytorch.py::TestKanana_Instruct::test_auto_dtype
6263
accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency]
6364
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4

tests/integration/test_lists/test-db/l0_h100.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,8 @@ l0_h100:
225225
- accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_fp8_prequantized
226226
- accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_fp8_prequantized
227227
- accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_auto_dtype
228-
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
229-
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-fp8-FP8]
228+
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
229+
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_fp8
230230
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=False]
231231
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=True]
232232
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[llguidance]

0 commit comments

Comments
 (0)