diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index d1e2cc2c492..4565193c349 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -1967,6 +1967,7 @@ def launchTestJobs(pipeline, testFilter) x86TestConfigs = [ "DGX_H100-4_GPUs-PyTorch-DeepSeek-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 2, 4], "DGX_H100-4_GPUs-PyTorch-DeepSeek-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 2, 4], + "DGX_H100-2_GPUs-PyTorch-Others-1": ["dgx-h100-x2", "l0_dgx_h100", 1, 1, 2], "DGX_H100-4_GPUs-PyTorch-Others-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4], "DGX_H100-4_GPUs-CPP-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4], "A10-PyTorch-1": ["a10", "l0_a10", 1, 1], @@ -2353,9 +2354,9 @@ def launchTestJobs(pipeline, testFilter) }, {}, true) }]} - multiGpuJobs = parallelJobs.findAll{(it.key.contains("4_GPUs") || it.key.contains("8_GPUs")) && !it.key.contains("Post-Merge")} + multiGpuJobs = parallelJobs.findAll{(it.key.contains("2_GPUs") || it.key.contains("4_GPUs") || it.key.contains("8_GPUs")) && !it.key.contains("Post-Merge")} println multiGpuJobs.keySet() - multiGpuJobsPostMerge = parallelJobs.findAll{(it.key.contains("4_GPUs") || it.key.contains("8_GPUs")) && it.key.contains("Post-Merge")} + multiGpuJobsPostMerge = parallelJobs.findAll{(it.key.contains("2_GPUs") || it.key.contains("4_GPUs") || it.key.contains("8_GPUs")) && it.key.contains("Post-Merge")} parallelJobs += docBuildJobs parallelJobs += sanityCheckJobs diff --git a/tests/integration/test_lists/test-db/l0_dgx_h100.yml b/tests/integration/test_lists/test-db/l0_dgx_h100.yml index 3de8724a06b..eb9aba3fa0a 100644 --- a/tests/integration/test_lists/test-db/l0_dgx_h100.yml +++ b/tests/integration/test_lists/test-db/l0_dgx_h100.yml @@ -1,5 +1,40 @@ version: 0.0.1 l0_dgx_h100: +- condition: + ranges: + system_gpu_count: + gte: 2 + lte: 2 + wildcards: + gpu: + - '*h100*' + linux_distribution_name: ubuntu* + terms: + stage: pre_merge + backend: pytorch + auto_trigger: others + tests: + - unittest/llmapi/test_llm_multi_gpu_pytorch.py -m "gpu2" + - unittest/_torch/multi_gpu -m "not post_merge" TIMEOUT (90) + - unittest/_torch/auto_deploy/unit/multigpu + - unittest/_torch/modeling/test_modeling_pixtral.py::test_tensor_parallelism + - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=False-overlap_scheduler=False] + - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=True-overlap_scheduler=True] + - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding[xgrammar] + - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar-eagle3_one_model=True] + - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar-eagle3_one_model=False] + - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[False] + - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[True] + - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_chunked_prefill + - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend + - accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend + - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram + - accuracy/test_disaggregated_serving.py::TestGemma3_1BInstruct::test_auto_dtype[False] + - accuracy/test_disaggregated_serving.py::TestGemma3_1BInstruct::test_auto_dtype[True] + - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False] + - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True] + # ------------- AutoDeploy tests --------------- + - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype - condition: ranges: system_gpu_count: @@ -15,9 +50,7 @@ l0_dgx_h100: auto_trigger: others tests: # ------------- PyTorch tests --------------- - - unittest/_torch/multi_gpu -m "not post_merge" TIMEOUT (90) - - unittest/_torch/auto_deploy/unit/multigpu - - unittest/llmapi/test_llm_multi_gpu_pytorch.py -m "gpu4 or gpu2" + - unittest/llmapi/test_llm_multi_gpu_pytorch.py -m "gpu4" - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=TRTLLM-torch_compile=False] - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM-torch_compile=False] - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM-torch_compile=True] @@ -35,19 +68,6 @@ l0_dgx_h100: - disaggregated/test_disaggregated.py::test_disaggregated_ctxpp2_gentp2[TinyLlama-1.1B-Chat-v1.0] - disaggregated/test_disaggregated.py::test_disaggregated_ctxpp4_gentp4[TinyLlama-1.1B-Chat-v1.0] - disaggregated/test_disaggregated.py::test_disaggregated_genbs1[TinyLlama-1.1B-Chat-v1.0] - - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False] - - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True] - - accuracy/test_disaggregated_serving.py::TestGemma3_1BInstruct::test_auto_dtype[False] - - accuracy/test_disaggregated_serving.py::TestGemma3_1BInstruct::test_auto_dtype[True] - - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram - - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[False] - - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[True] - - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_chunked_prefill - - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=False-overlap_scheduler=False] - - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=True-overlap_scheduler=True] - - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding[xgrammar] - - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar-eagle3_one_model=True] - - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar-eagle3_one_model=False] - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[GSM8K-tp1pp2] - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp1pp2] - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[GSM8K-tp2pp1] @@ -58,13 +78,8 @@ l0_dgx_h100: - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=2-ctx_pp=2] - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[GSM8K] - accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[MMLU] - - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend - - accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend - test_e2e.py::test_ptp_quickstart_advanced_bs1 - test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_lite_4gpus_adp_balance[DeepSeek-V3-Lite-FP8-DeepSeek-V3-Lite/fp8] - - unittest/_torch/modeling/test_modeling_pixtral.py::test_tensor_parallelism - # ------------- AutoDeploy tests --------------- - - accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype - condition: ranges: system_gpu_count: diff --git a/tests/unittest/_torch/multi_gpu/test_lowprecision_allreduce.py b/tests/unittest/_torch/multi_gpu/test_lowprecision_allreduce.py index 331e250b349..3b149fc86d4 100644 --- a/tests/unittest/_torch/multi_gpu/test_lowprecision_allreduce.py +++ b/tests/unittest/_torch/multi_gpu/test_lowprecision_allreduce.py @@ -249,9 +249,9 @@ def test(self, mode="acc"): ids=lambda x: f"size{x}") @pytest.mark.parametrize( "mpi_pool_executor", - [2, 4], # 8 - ids=["tp_size_2", "tp_size_4"], - indirect=True) # "tp_size_8" + [2], # 4, 8 + ids=["tp_size_2"], + indirect=True) # "tp_size_4", "tp_size_8" def test_lowprecision_allreduce_acc(dtype, strategy, message_size, mpi_pool_executor): """