diff --git a/tests/unittest/llmapi/test_executor.py b/tests/unittest/llmapi/test_executor.py index 2d9509c78aa..d10859e7320 100644 --- a/tests/unittest/llmapi/test_executor.py +++ b/tests/unittest/llmapi/test_executor.py @@ -399,7 +399,7 @@ def test_ZeroMqQueue_serialization_complicated_dataclass(): TokenRangeRetentionConfig = tllm.KvCacheRetentionConfig.TokenRangeRetentionConfig kvcache_config = tllm.KvCacheRetentionConfig( [TokenRangeRetentionConfig(0, 2, 30, datetime.timedelta(seconds=30))], - 80) + 80, None, tllm.KvCacheTransferMode.DRAM, "test_dir") sampling_params = SamplingParams(max_tokens=4, embedding_bias=torch.randn(2, 2)) diff --git a/tests/unittest/llmapi/test_llm.py b/tests/unittest/llmapi/test_llm.py index aa39e9acfa3..0095cfcb0d2 100644 --- a/tests/unittest/llmapi/test_llm.py +++ b/tests/unittest/llmapi/test_llm.py @@ -357,7 +357,7 @@ def test_llm_with_kv_cache_retention_config(): kv_cache_retention_config = KvCacheRetentionConfig([ KvCacheRetentionConfig.TokenRangeRetentionConfig( 0, 2, 30, datetime.timedelta(seconds=30)) - ], 80) + ], 80, None, tllm.KvCacheTransferMode.DRAM, "test_dir") llm = LLM(model=llama_model_path, kv_cache_config=global_kvcache_config,