Skip to content

Commit 97d4b18

Browse files
committed
modified the test params
Signed-off-by: Eran Geva <[email protected]>
1 parent 4daae98 commit 97d4b18

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

tensorrt_llm/llmapi/llm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -932,8 +932,8 @@ def _build_model(self):
932932
max_batch_size=max_batch_size,
933933
max_num_tokens=max_num_tokens,
934934
gather_generation_logits=self.args.gather_generation_logits,
935-
fail_fast_on_attention_window_too_large=getattr(
936-
self.args, 'fail_fast_on_attention_window_too_large', False),
935+
# fail_fast_on_attention_window_too_large=getattr(
936+
# self.args, 'fail_fast_on_attention_window_too_large', False),
937937
**kwargs)
938938

939939
if self.args.kv_cache_config is not None:

tests/unittest/_torch/auto_deploy/unit/singlegpu/test_ad_trtllm_bench.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ def print_kv_cache_metrics(kv_cache_metrics):
448448
def trtllm_bench_unified_comparison(
449449
llm_root, # noqa: F811
450450
comparison_mode="backend",
451-
free_mem_ratio=0.5,
451+
free_mem_ratio=0.1,
452452
num_hidden_layers=2,
453453
max_batch_size=32, # below this value the kv cache resizing is skipped
454454
golden_tokens_per_sec=1400,
@@ -483,7 +483,7 @@ def trtllm_bench_unified_comparison(
483483
yaml.dump(
484484
{
485485
"model_kwargs": {"num_hidden_layers": num_hidden_layers},
486-
# "cuda_graph_batch_sizes": [1, 2],
486+
"cuda_graph_batch_sizes": [1, 2, 4, 8, 16, 32],
487487
"compile_backend": "torch-opt",
488488
"free_mem_ratio": free_mem_ratio,
489489
"runtime": "trtllm",

0 commit comments

Comments
 (0)