From 6d2e95f9d18f5c184e524660c56a6466770c9259 Mon Sep 17 00:00:00 2001
From: Yi Zhang <187001205+yizhang-nv@users.noreply.github.com>
Date: Mon, 7 Jul 2025 01:19:36 +0000
Subject: [PATCH] fix: Adjust free GPU memory fraction in KvCacheConfig for
 DeepSeek R1 tests

Signed-off-by: Yi Zhang <187001205+yizhang-nv@users.noreply.github.com>
---
 tests/integration/defs/accuracy/test_llm_api_pytorch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
index 769196c1734..0db89d5d5b5 100644
--- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py
+++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@@ -1326,7 +1326,7 @@ def test_nvfp4_multi_gpus(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
                               attention_dp, cuda_graph, overlap_scheduler,
                               max_batch_size, moe_backend):
 
-        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.9)
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.85)
         pytorch_config = dict(
             disable_overlap_scheduler=not overlap_scheduler,
             cuda_graph_config=CudaGraphConfig() if cuda_graph else None,