@@ -418,15 +418,16 @@ steps:
418418  - pytest -v -s compile/test_basic_correctness.py 
419419  - pytest -v -s compile/piecewise/ 
420420
421- - label : PyTorch Fullgraph Test  #  20min 
422-   timeout_in_minutes : 30 
421+ - label : PyTorch Fullgraph Test  #  22min 
422+   timeout_in_minutes : 35 
423423  mirror_hardwares : [amdexperimental] 
424424  torch_nightly : true 
425425  source_file_dependencies :
426426  - vllm/ 
427427  - tests/compile 
428428  commands :
429429  - pytest -v -s compile/test_full_graph.py 
430+   - pytest -v -s compile/test_fusions_e2e.py 
430431
431432- label : Kernels Core Operation Test  #  48min
432433  timeout_in_minutes : 75 
@@ -808,8 +809,8 @@ steps:
808809    #  Whisper needs spawn method to avoid deadlock
809810    - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper 
810811
811- - label : Blackwell Test  #  38  min
812-   timeout_in_minutes : 60 
812+ - label : Blackwell Test  #  TODO  min
813+   timeout_in_minutes : 70 
813814  working_dir : " /vllm-workspace/" 
814815  gpu : b200 
815816  #  optional: true
@@ -822,8 +823,6 @@ steps:
822823  - vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py 
823824  - vllm/model_executor/layers/quantization/utils/flashinfer_utils.py 
824825  - vllm/v1/attention/backends/flashinfer.py 
825-   - vllm/compilation/fusion.py 
826-   - vllm/compilation/fusion_attn.py 
827826  commands :
828827    - nvidia-smi 
829828    - python3 examples/offline_inference/basic/chat.py 
@@ -840,15 +839,32 @@ steps:
840839    - pytest -v -s tests/kernels/quantization/test_nvfp4_scaled_mm.py 
841840    - pytest -v -s tests/kernels/quantization/test_flashinfer_scaled_mm.py 
842841    - pytest -v -s tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py 
842+     - pytest -v -s tests/kernels/quantization/test_nvfp4_qutlass.py 
843+     - pytest -v -s tests/kernels/quantization/test_mxfp4_qutlass.py 
843844    - pytest -v -s tests/kernels/moe/test_nvfp4_moe.py 
844845    - pytest -v -s tests/kernels/moe/test_ocp_mx_moe.py 
845-     #  Fusion
846-     - pytest -v -s tests/compile/test_fusion_all_reduce.py 
847-     - pytest -v -s tests/compile/test_fusion_attn.py::test_attention_quant_pattern 
848846    - pytest -v -s tests/kernels/moe/test_flashinfer.py 
847+ 
848+ - label : Blackwell Fusion Tests  #  TODO min
849+   timeout_in_minutes : 70 
850+   working_dir : " /vllm-workspace/" 
851+   gpu : b200 
852+   source_file_dependencies :
853+   - csrc/quantization/fp4/ 
854+   - vllm/model_executor/layers/quantization/utils/flashinfer_utils.py 
855+   - vllm/v1/attention/backends/flashinfer.py 
856+   - vllm/compilation/ 
857+   #  can affect pattern matching
858+   - vllm/model_executor/layers/layernorm.py 
859+   - vllm/model_executor/layers/activation.py 
860+   - vllm/model_executor/layers/quantization/input_quant_fp8.py 
861+   commands :
862+     - nvidia-smi 
863+     - pytest -v -s tests/compile/test_fusion_attn.py 
849864    - pytest -v -s tests/compile/test_silu_mul_quant_fusion.py 
850-     - pytest -v -s tests/kernels/quantization/test_nvfp4_qutlass.py 
851-     - pytest -v -s tests/kernels/quantization/test_mxfp4_qutlass.py 
865+     #  this runner has 2 GPUs available even though num_gpus=2 is not set
866+     - pytest -v -s tests/compile/test_fusion_all_reduce.py 
867+     - pytest -v -s tests/compile/test_fusions_e2e.py 
852868
853869- label : Blackwell GPT-OSS Eval 
854870  timeout_in_minutes : 60 
@@ -1103,14 +1119,16 @@ steps:
11031119  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4 
11041120
11051121# #### H200 test #####
1106- - label : Distrubted  Tests (H200) #  optional
1122+ - label : Distributed  Tests (H200) #  optional
11071123  gpu : h200 
11081124  optional : true 
11091125  working_dir : " /vllm-workspace/" 
11101126  num_gpus : 2 
11111127  commands :
11121128    - pytest -v -s tests/compile/test_async_tp.py 
11131129    - pytest -v -s tests/compile/test_sequence_parallelism.py 
1130+     - pytest -v -s tests/compile/test_fusion_all_reduce.py 
1131+     - pytest -v -s tests/compile/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm 
11141132    - pytest -v -s tests/distributed/test_context_parallel.py 
11151133    - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1  --dp-size=2 --max-model-len 2048 
11161134
0 commit comments