diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index eec9c8219ce..43a8ca27793 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -187,10 +187,11 @@ def test_fp8_4gpus(self, tp_size, pp_size, fp8kv, attn_backend, task = GSM8K(self.MODEL_NAME) task.evaluate(llm) + @pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5292517") @skip_pre_hopper - def test_fp8_llm_decoder(self): + def test_fp8_llm_sampler(self): model_path = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct-FP8" - pytorch_config = PyTorchConfig(enable_trtllm_decoder=True) + pytorch_config = PyTorchConfig(enable_trtllm_sampler=True) llm = LLM(model_path, pytorch_backend_config=pytorch_config) assert llm.args.quant_config.quant_algo == QuantAlgo.FP8 diff --git a/tests/integration/test_lists/qa/examples_test_list.txt b/tests/integration/test_lists/qa/examples_test_list.txt index 018204c868d..6dd0b6c834f 100644 --- a/tests/integration/test_lists/qa/examples_test_list.txt +++ b/tests/integration/test_lists/qa/examples_test_list.txt @@ -425,7 +425,7 @@ accuracy/test_llm_api.py::TestMixtral8x7B::test_tp2 accuracy/test_llm_api.py::TestMixtral8x7B::test_smooth_quant_tp2pp2 accuracy/test_llm_api.py::TestMixtral8x7BInstruct::test_awq_tp2 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4 -accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_decoder +accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_sampler accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4 accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4 accuracy/test_cli_flow.py::TestLlama3_3_70BInstruct::test_fp8_prequantized_tp4