From 6d2e95f9d18f5c184e524660c56a6466770c9259 Mon Sep 17 00:00:00 2001 From: Yi Zhang <187001205+yizhang-nv@users.noreply.github.com> Date: Mon, 7 Jul 2025 01:19:36 +0000 Subject: [PATCH] fix: Adjust free GPU memory fraction in KvCacheConfig for DeepSeek R1 tests Signed-off-by: Yi Zhang <187001205+yizhang-nv@users.noreply.github.com> --- tests/integration/defs/accuracy/test_llm_api_pytorch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index 769196c1734..0db89d5d5b5 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -1326,7 +1326,7 @@ def test_nvfp4_multi_gpus(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv, attention_dp, cuda_graph, overlap_scheduler, max_batch_size, moe_backend): - kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9) + kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.85) pytorch_config = dict( disable_overlap_scheduler=not overlap_scheduler, cuda_graph_config=CudaGraphConfig() if cuda_graph else None,