diff --git a/tests/integration/defs/perf/pytorch_model_config.py b/tests/integration/defs/perf/pytorch_model_config.py index 23ccd0f1841..8f6520885d6 100644 --- a/tests/integration/defs/perf/pytorch_model_config.py +++ b/tests/integration/defs/perf/pytorch_model_config.py @@ -186,6 +186,17 @@ def get_model_yaml_config(model_label: str, 'max_lora_rank': 64 } } + if 'phi_4_multimodal_instruct' in model_label: + lora_config['lora_config']['lora_target_modules'] = [ + "attn_qkv", "attn_dense", "mlp_h_to_4h", "mlp_4h_to_h" + ] + lora_config['lora_config']['trtllm_modules_to_hf_modules'] = { + "attn_qkv": "qkv_proj", + "attn_dense": "o_proj", + "mlp_h_to_4h": "gate_up_proj", + "mlp_4h_to_h": "down_proj" + } + lora_config['lora_config']['max_lora_rank'] = 64 base_config.update(lora_config) kv_cache_config = base_config.get('kv_cache_config', KvCacheConfig()) diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py index 759ff9273f8..1303f078138 100644 --- a/tests/integration/defs/perf/test_perf.py +++ b/tests/integration/defs/perf/test_perf.py @@ -114,6 +114,11 @@ "phi_3_mini_4k_instruct": "Phi-3/Phi-3-mini-4k-instruct", "phi_3_mini_128k_instruct": "Phi-3/Phi-3-mini-128k-instruct", "phi_4_mini_instruct": "Phi-4-mini-instruct", + "phi_4_multimodal_instruct": "multimodals/Phi-4-multimodal-instruct", + "phi_4_multimodal_instruct_image": "multimodals/Phi-4-multimodal-instruct", + "phi_4_multimodal_instruct_audio": "multimodals/Phi-4-multimodal-instruct", + "bielik_11b_v2.2_instruct": "Bielik-11B-v2.2-Instruct", + "bielik_11b_v2.2_instruct_fp8": "Bielik-11B-v2.2-Instruct-FP8", } # Model PATH of HuggingFace HF_MODEL_PATH = { @@ -145,11 +150,18 @@ "phi_4_mini_instruct_hf": "microsoft/Phi-4-mini-instruct", } LORA_MODEL_PATH = { - "llama_v2_13b": "llama-models-v2/chinese-llama-2-lora-13b", - "mixtral_8x7b_0.1": "chinese-mixtral-lora", - "llama_v3.1_8b_instruct_fp8": "lora/llama-3-chinese-8b-instruct-v2-lora/", + "llama_v2_13b": + "llama-models-v2/chinese-llama-2-lora-13b", + "mixtral_8x7b_0.1": + "chinese-mixtral-lora", + "llama_v3.1_8b_instruct_fp8": + "lora/llama-3-chinese-8b-instruct-v2-lora/", "ministral_8b": "lora/ministral/Ministral-8B-Instruct-2410-Loras-Dummy", # Dummy LoRA for Ministral + "phi_4_multimodal_instruct_image": + "multimodals/Phi-4-multimodal-instruct/vision-lora", + "phi_4_multimodal_instruct_audio": + "multimodals/Phi-4-multimodal-instruct/speech-lora", } TIMING_CACHE_DIR = os.environ.get("TIMING_CACHE_DIR", "") @@ -1245,7 +1257,8 @@ def get_trtllm_bench_command(self, engine_dir): #use default yaml config if self._config.backend == "pytorch": import yaml - config = get_model_yaml_config(self._config.to_string()) + config = get_model_yaml_config(self._config.to_string(), + lora_dirs=self.lora_dirs) print_info(f"pytorch model config: {config}") with open('extra-llm-api-config.yml', 'w') as f: yaml.dump(config, f, default_flow_style=False) diff --git a/tests/integration/test_lists/qa/trt_llm_release_perf_test.yml b/tests/integration/test_lists/qa/trt_llm_release_perf_test.yml index 1b3b539fd3e..a9120e41f18 100644 --- a/tests/integration/test_lists/qa/trt_llm_release_perf_test.yml +++ b/tests/integration/test_lists/qa/trt_llm_release_perf_test.yml @@ -72,6 +72,16 @@ trt_llm_release_perf_test: # reduced 'reqs' to fit timeout limit - perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:500,2000-reqs:8-con:1] - perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:500,2000-quant:fp8-reqs:8-con:1] + # Phi-4-multimodal-instruct + - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:500,2000-con:250] + - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:1000,1000-con:250] + - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:128,128] + - perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:512,32] + # Bielik-11B-v2.2-Instruct + - perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct-bench-pytorch-bfloat16-input_output_len:128,128] + - perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct-bench-pytorch-bfloat16-input_output_len:512,32] + - perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct-bench-pytorch-bfloat16-input_output_len:1000,1000-con:250] + - perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct-bench-pytorch-bfloat16-input_output_len:2000,2000-con:250] # Test list validation - test_list_validation.py::test_list_validation @@ -89,7 +99,9 @@ trt_llm_release_perf_test: - perf/test_perf.py::test_perf[llama_v3_8b_instruct-cppmanager-exe-plugin_ifb-float16-mp-input_output_len:128,128+512,32] #oom for l40s, l20(cuda_runtime_error)#44, mpi abort on a100 36 - perf/test_perf.py::test_perf[llama_v3_8b_instruct-cppmanager-exe-plugin_ifb-bfloat16-gwp:0.0-input_output_len:128,128+512,32] #oom for l40s, l20, mpi abort on a100 35 - perf/test_perf.py::test_perf[llama_v3_8b_instruct-cppmanager-exe-plugin_ifb-bfloat16-gwp:0.5-input_output_len:128,128+512,32] #oom for l40s, l20 - - perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-maxnt:5000-input_output_len:5000,500-reqs:10-con:1] # timeout for l20, l40s + - perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-bfloat16-maxbs:32-input_output_len:5000,500-reqs:10-con:1] # timeout for l20, l40s + - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_image-bench-pytorch-bfloat16-input_output_len:1000,1000-loras:1-con:250] + - perf/test_perf.py::test_perf[phi_4_multimodal_instruct_audio-bench-pytorch-bfloat16-input_output_len:1000,1000-loras:1-con:250] # Llama-3.1-Nemotron-Nano-8B-v1 # cpp backend @@ -158,6 +170,8 @@ trt_llm_release_perf_test: - perf/test_perf.py::test_perf[mistral_7b_v0.1-bench-float16-maxbs:256-input_output_len:500,2000-quant:fp8] - perf/test_perf.py::test_perf[phi_3_mini_4k_instruct-bench-float16-maxbs:128-input_output_len:1000,1000-quant:fp8] - perf/test_perf.py::test_perf[phi_3_mini_4k_instruct-bench-float16-maxbs:64-input_output_len:500,2000-quant:fp8] + - perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000-con:250] + - perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct_fp8-bench-pytorch-float8-input_output_len:2000,2000-con:250] - condition: terms: