Fix test_ptp_quickstart_multimodal_phi4mm - for stability set lora cache sizes, fix incorrect lora request creation

amitz-nv · amitz-nv · commit bce06ad96b48 · 2025-07-30T09:58:16.000+03:00
Signed-off-by: Amit Zuker &lt;203509407+amitz-nv@users.noreply.github.com&gt;
diff --git a/examples/llm-api/quickstart_multimodal.py b/examples/llm-api/quickstart_multimodal.py
@@ -148,6 +148,9 @@ def main():
         models_module = importlib.import_module('tensorrt_llm._torch.models')
         model_class = getattr(models_module, args.auto_model_name)
         lora_config = model_class.lora_config(args.model_dir)
+        # For stability - explicitly set the LoRA GPU cache & CPU cache to have space for 2 adapters
+        lora_config.max_loras = 2
+        lora_config.max_cpu_loras = 2
 
     llm, sampling_params = setup_llm(args, lora_config=lora_config)
 
diff --git a/tensorrt_llm/_torch/models/modeling_phi4mm.py b/tensorrt_llm/_torch/models/modeling_phi4mm.py
@@ -271,16 +271,16 @@ def lora_request(num_requests: int, modality: str, base_model_dir: str):
         if modality == "image" or modality == "image_audio":
             lora_request = [
                 LoRARequest(
-                    lora_name=f"vision-lora-{i}",
-                    lora_int_id=i,
+                    lora_name="vision-lora",
+                    lora_int_id=0,
                     lora_path=f"{base_model_dir}/vision-lora",
                 ) for i in range(num_requests)
             ]
         elif modality == "audio":
             lora_request = [
                 LoRARequest(
-                    lora_name=f"speech-lora-{i}",
-                    lora_int_id=i,
+                    lora_name="speech-lora",
+                    lora_int_id=1,
                     lora_path=f"{base_model_dir}/speech-lora",
                 ) for i in range(num_requests)
             ]