Skip to content

Commit 590fb23

Browse files
committed
update test config
Signed-off-by: Ivy Zhang <[email protected]>
1 parent 51b9f5a commit 590fb23

File tree

4 files changed

+22
-21
lines changed

4 files changed

+22
-21
lines changed

tests/integration/defs/accuracy/references/gsm8k.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ meta-llama/Llama-4-Scout-17B-16E-Instruct:
2121
- accuracy: 89.70
2222
- quant_algo: FP8
2323
kv_cache_quant_algo: FP8
24-
accuracy: 0.00
24+
accuracy: 89.61
2525
- quant_algo: NVFP4
2626
kv_cache_quant_algo: FP8
27-
accuracy: 0.00
27+
accuracy: 89.00
2828
deepseek-ai/DeepSeek-V3-Lite:
2929
- accuracy: 64.74
3030
- quant_algo: NVFP4

tests/integration/defs/accuracy/references/mmlu.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@ meta-llama/Llama-4-Scout-17B-16E-Instruct:
7373
- accuracy: 80.00
7474
- quant_algo: FP8
7575
kv_cache_quant_algo: FP8
76-
accuracy: 0.00
76+
accuracy: 80.00
7777
- quant_algo: NVFP4
7878
kv_cache_quant_algo: FP8
79-
accuracy: 0.00
79+
accuracy: 80.00
8080
mistralai/Mistral-7B-v0.1:
8181
- accuracy: 66
8282
mistralai/Mistral-7B-Instruct-v0.3:

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -486,10 +486,10 @@ def test_auto_dtype(self, cuda_graph, tp_size, pp_size, ep_size):
486486
task.evaluate(llm)
487487

488488
@skip_pre_hopper
489-
@pytest.mark.skip_less_mpi_world_size(8)
489+
@pytest.mark.skip_less_mpi_world_size(4)
490490
@parametrize_with_ids("cuda_graph", [True])
491-
@pytest.mark.parametrize("tp_size,pp_size,ep_size", [(8, 1, 8), (4, 1, 1)],
492-
ids=["tp8ep8", "tp4"])
491+
@pytest.mark.parametrize("tp_size,pp_size,ep_size", [(4, 1, 4), (4, 1, 1)],
492+
ids=["tp4ep4", "tp4"])
493493
def test_fp8_prequantized(self, cuda_graph, tp_size, pp_size, ep_size):
494494
model_path = f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct-FP8"
495495
with LLM(
@@ -499,8 +499,7 @@ def test_fp8_prequantized(self, cuda_graph, tp_size, pp_size, ep_size):
499499
max_seq_len=8192,
500500
pipeline_parallel_size=pp_size,
501501
moe_expert_parallel_size=ep_size,
502-
cuda_graph_config=CudaGraphConfig()
503-
if cuda_graph else None) as llm:
502+
use_cuda_graph=cuda_graph) as llm:
504503
assert llm.args.quant_config.quant_algo == QuantAlgo.FP8
505504
assert llm.args.quant_config.kv_cache_quant_algo == QuantAlgo.FP8
506505
task = MMLU(self.MODEL_NAME)
@@ -509,14 +508,15 @@ def test_fp8_prequantized(self, cuda_graph, tp_size, pp_size, ep_size):
509508
task.evaluate(llm)
510509

511510
@skip_pre_hopper
512-
@pytest.mark.skip_less_mpi_world_size(8)
511+
@pytest.mark.skip_less_mpi_world_size(4)
513512
@parametrize_with_ids("cuda_graph", [True])
514-
@pytest.mark.parametrize("tp_size,pp_size,ep_size", [(8, 1, 8)],
515-
ids=["tp8ep8"])
513+
@pytest.mark.parametrize("tp_size,pp_size,ep_size", [(4, 1, 4)],
514+
ids=["tp4ep4"])
516515
def test_fp8_chunked_prefill(self, cuda_graph, tp_size, pp_size, ep_size):
517516
with LLM(
518517
f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct-FP8",
519518
tensor_parallel_size=tp_size,
519+
max_seq_len=22000,
520520
pipeline_parallel_size=pp_size,
521521
moe_expert_parallel_size=ep_size,
522522
enable_chunked_prefill=True,
@@ -530,10 +530,10 @@ def test_fp8_chunked_prefill(self, cuda_graph, tp_size, pp_size, ep_size):
530530
task.evaluate(llm)
531531

532532
@skip_pre_blackwell
533-
@pytest.mark.skip_less_mpi_world_size(8)
533+
@pytest.mark.skip_less_mpi_world_size(4)
534534
@parametrize_with_ids("cuda_graph", [True])
535-
@pytest.mark.parametrize("tp_size,pp_size,ep_size", [(8, 1, 8), (4, 1, 1)],
536-
ids=["tp8ep8", "tp4"])
535+
@pytest.mark.parametrize("tp_size,pp_size,ep_size", [(4, 1, 4), (4, 1, 1)],
536+
ids=["tp4ep4", "tp4"])
537537
def test_fp4_prequantized(self, cuda_graph, tp_size, pp_size, ep_size):
538538
model_path = f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct-FP4"
539539
with LLM(
@@ -554,14 +554,15 @@ def test_fp4_prequantized(self, cuda_graph, tp_size, pp_size, ep_size):
554554
@skip_pre_blackwell
555555
@pytest.mark.skip_less_mpi_world_size(8)
556556
@parametrize_with_ids("cuda_graph", [True])
557-
@pytest.mark.parametrize("tp_size,pp_size,ep_size", [(8, 1, 8)],
558-
ids=["tp8ep8"])
557+
@pytest.mark.parametrize("tp_size,pp_size,ep_size", [(4, 1, 4)],
558+
ids=["tp4ep4"])
559559
def test_fp4_chunked_prefill(self, cuda_graph, tp_size, pp_size, ep_size):
560560
with LLM(
561561
f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct-FP4",
562562
tensor_parallel_size=tp_size,
563563
pipeline_parallel_size=pp_size,
564564
moe_expert_parallel_size=ep_size,
565+
max_seq_len=22000,
565566
enable_chunked_prefill=True,
566567
max_num_tokens=256,
567568
use_cuda_graph=cuda_graph) as llm:

tests/integration/test_lists/qa/examples_test_list.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -462,12 +462,12 @@ accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_chunked_p
462462
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp8-cuda_graph=False]
463463
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp8ep4-cuda_graph=True]
464464
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp8ep8-cuda_graph=True]
465-
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8_prequantized[tp8ep8-cuda_graph=True]
465+
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8_prequantized[tp4ep4-cuda_graph=True]
466466
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8_prequantized[tp4-cuda_graph=True]
467-
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8_chunked_prefill[tp8ep8-cuda_graph=False]
468-
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8_chunked_prefill[tp8ep8-cuda_graph=True]
469-
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4_prequantized[tp8ep8-cuda_graph=True]
467+
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8_chunked_prefill[tp4ep4-cuda_graph=True]
468+
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4_prequantized[tp4ep4-cuda_graph=True]
470469
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4_prequantized[tp4-cuda_graph=True]
470+
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4_chunked_prefill[tp4ep4-cuda_graph=True]
471471
accuracy/test_llm_api_pytorch.py::TestMixtral8x7B::test_fp8_tp2
472472
accuracy/test_llm_api_pytorch.py::TestMixtral8x7B::test_nvfp4_tp2
473473
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]

0 commit comments

Comments
 (0)