diff --git a/tests/integration/defs/examples/test_gemma.py b/tests/integration/defs/examples/test_gemma.py index 9e747799e3a..1759d7631bc 100644 --- a/tests/integration/defs/examples/test_gemma.py +++ b/tests/integration/defs/examples/test_gemma.py @@ -208,6 +208,7 @@ def test_llm_gemma_1gpu_summary_vswa(batch_size, data_type, gemma_model_root, max_attention_window) +@pytest.mark.timeout(5400) @pytest.mark.parametrize("batch_size", [8]) @pytest.mark.parametrize("data_type", ['float16', 'bfloat16']) @pytest.mark.parametrize("test_case", [ diff --git a/tests/integration/test_lists/qa/examples_test_list.txt b/tests/integration/test_lists/qa/examples_test_list.txt index 4d0d0da24e0..d6488038e53 100644 --- a/tests/integration/test_lists/qa/examples_test_list.txt +++ b/tests/integration/test_lists/qa/examples_test_list.txt @@ -402,7 +402,7 @@ accuracy/test_cli_flow.py::TestMixtral8x7B::test_plugin_tp8[renormalize-expert_p accuracy/test_cli_flow.py::TestMixtral8x7B::test_plugin_tp8[renormalize-mixed_parallel] accuracy/test_cli_flow.py::TestMixtral8x7B::test_plugin_tp8[renormalize-tensor_parallel] accuracy/test_cli_flow.py::TestMixtral8x7B::test_nvfp4_prequantized -accuracy/test_cli_flow.py::TestMixtral8x22B::test_fp8_tp2pp2 TIMEOUT (90) +accuracy/test_cli_flow.py::TestMixtral8x22B::test_fp8_tp2pp2 TIMEOUT (120) accuracy/test_cli_flow.py::TestMixtral8x22B::test_int8_plugin_tp8[renormalize-tensor_parallel] TIMEOUT (90) accuracy/test_cli_flow.py::TestGemma2_9BIt::test_auto_dtype accuracy/test_cli_flow.py::TestGemma2_9BIt::test_weight_only[int8] diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index b8e29e56281..962ff607f77 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -438,3 +438,13 @@ accuracy/test_llm_api.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[xg accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=vanilla-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5354946) examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5354936) examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5354936) +accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpus_static_eplb SKIP (https://nvbugs/5354925) +accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_sampler SKIP (https://nvbugs/5354884) +examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-build] SKIP (https://nvbugs/5247243) +examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-infer] SKIP (https://nvbugs/5247243) +test_e2e.py::test_openai_multinodes_chat_tp16pp1 SKIP (https://nvbugs/5112075) +examples/test_qwen.py::test_llm_hf_qwen_quantization_1gpu[qwen2_vl_7b_instruct-fp8-bfloat16] SKIP (https://nvbugs/5322488) +accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype SKIP (https://nvbugs/5234043) +full:B200/disaggregated/test_workers.py::test_workers_kv_cache_aware_router_eviction[TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5300551) +full:B200/accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] SKIP (https://nvbugs/5355219) +full:B200/accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm] SKIP (https://nvbugs/5355219)