diff --git a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/aarch64-linux-gnu/tensorrt_llm_internal_cutlass_kernels_static.tar.xz b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/aarch64-linux-gnu/tensorrt_llm_internal_cutlass_kernels_static.tar.xz index 4ce5dc1d4b1..aa53b594f4a 100644 --- a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/aarch64-linux-gnu/tensorrt_llm_internal_cutlass_kernels_static.tar.xz +++ b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/aarch64-linux-gnu/tensorrt_llm_internal_cutlass_kernels_static.tar.xz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:02d0983056f925c59313e03964bc04e4dcddd9f9fc46c7917db14296af4abdb5 -size 63562780 +oid sha256:090be002758e4fb864e16ec25c0db3f8eb562a0033e60a156bbbfd6bce67a5a1 +size 63577888 diff --git a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/aarch64-linux-gnu/version.txt b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/aarch64-linux-gnu/version.txt index f15ea3c403c..ed1277dce44 100644 --- a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/aarch64-linux-gnu/version.txt +++ b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/aarch64-linux-gnu/version.txt @@ -1,2 +1,2 @@ -557bea7fa3fcd0b9230edecf3a8dbb10eeb1b8d78928861b1c6da1043f973c35 libtensorrt_llm_internal_cutlass_kernels_static.a -commit 2e68c0113ad486c0026619711e5974f5ae0dc626 +aff0f8e617f6ca2f95d121ab9cf0ab17c4e8077cf9e8896bf153d3942a4a50df libtensorrt_llm_internal_cutlass_kernels_static.a +commit d61e7684bc095c8ff5ec540363949bd1f491c960 diff --git a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/tensorrt_llm_internal_cutlass_kernels_static.tar.xz b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/tensorrt_llm_internal_cutlass_kernels_static.tar.xz index deef5aafc39..6143671a5c8 100644 --- a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/tensorrt_llm_internal_cutlass_kernels_static.tar.xz +++ b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/tensorrt_llm_internal_cutlass_kernels_static.tar.xz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:04f4daeb5e4f0762be994b583c50573fda5f994e95a5e737b0352ce2f6376d0b -size 63062972 +oid sha256:16608c6e8ca9f1c3f26408c0ec97049ac05b1ee8b647480d969d488d9cef91f0 +size 63066816 diff --git a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/version.txt b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/version.txt index 021d4f2b621..3e93949bcee 100644 --- a/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/version.txt +++ b/cpp/tensorrt_llm/kernels/internal_cutlass_kernels/x86_64-linux-gnu/version.txt @@ -1,2 +1,2 @@ -a956018ada7a223b2f9e4aa3331d7bdc4be8f69bdef9e3828f5f1c65beb11699 libtensorrt_llm_internal_cutlass_kernels_static.a -commit 2e68c0113ad486c0026619711e5974f5ae0dc626 +9f0a29070b95a7db62f70cc45ef151e27c2a58697a2d50cbb002ff339035fb8e libtensorrt_llm_internal_cutlass_kernels_static.a +commit d61e7684bc095c8ff5ec540363949bd1f491c960 diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index cdd909cbb4f..5fc66d443a9 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -1275,6 +1275,9 @@ def test_ptp_quickstart(llm_root, llm_venv): pytest.param('Mixtral-8x7B-NVFP4', 'nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1', marks=skip_pre_blackwell), + pytest.param('Mixtral-8x7B-FP8', + 'Mixtral-8x7B-Instruct-v0.1-fp8', + marks=skip_pre_blackwell), ]) def test_ptp_quickstart_advanced(llm_root, llm_venv, model_name, model_path): print(f"Testing {model_name}.") diff --git a/tests/integration/test_lists/qa/llm_release_rtx_pro_6000.txt b/tests/integration/test_lists/qa/llm_release_rtx_pro_6000.txt index 3066fa35c6a..803e6f82d09 100644 --- a/tests/integration/test_lists/qa/llm_release_rtx_pro_6000.txt +++ b/tests/integration/test_lists/qa/llm_release_rtx_pro_6000.txt @@ -26,6 +26,7 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-NVFP4-nvfp4-quantized/Met test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8] test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8] test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1] +test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-FP8-Mixtral-8x7B-Instruct-v0.1-fp8] test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Llama3.1-70B-BF16-llama-3.1-model/Meta-Llama-3.1-70B] test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Nemotron-Super-49B-v1-BF16-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1] test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Mixtral-8x7B-BF16-Mixtral-8x7B-Instruct-v0.1] diff --git a/tests/integration/test_lists/test-db/l0_rtx_pro_6000.yml b/tests/integration/test_lists/test-db/l0_rtx_pro_6000.yml index 2fa43adf435..2d87ba753bb 100644 --- a/tests/integration/test_lists/test-db/l0_rtx_pro_6000.yml +++ b/tests/integration/test_lists/test-db/l0_rtx_pro_6000.yml @@ -27,3 +27,4 @@ l0_rtx_pro_6000: - test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama-3.1-70B-Instruct-FP8] - test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8] - test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1] + - test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-FP8-Mixtral-8x7B-Instruct-v0.1-fp8]