nvbugs-5331031; nvbugs-5344203 - address intermittent issues with Mistral Small multimodal for BS=8

brb-nv · brb-nv · commit 456d5cd566aa · 2025-06-24T23:36:52.000Z
Signed-off-by: Balaram Buddharaju &lt;169953907+brb-nv@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/runtime/multimodal_model_runner.py b/tensorrt_llm/runtime/multimodal_model_runner.py
@@ -15,7 +15,7 @@
 import torch.nn.functional as F
 from cuda import cudart
 from huggingface_hub import hf_hub_download
-from PIL import Image
+from PIL import Image, UnidentifiedImageError
 from safetensors import safe_open
 from torch import nn
 from transformers import (AutoConfig, AutoModelForCausalLM, AutoProcessor,
@@ -2173,8 +2173,23 @@ def load_images(image_paths):
                 if image_path.startswith("http") or image_path.startswith(
                         "https"):
                     logger.info(f"downloading image from url {image_path}")
-                    response = requests.get(image_path, timeout=5)
-                    image = Image.open(BytesIO(response.content)).convert("RGB")
+                    try:
+                        response = requests.get(image_path, timeout=5)
+                        response.raise_for_status()
+                        if 'image' not in response.headers.get(
+                                'Content-Type', ''):
+                            raise Exception(
+                                f"URL does not point to an image: {image_path}."
+                            )
+                        image = Image.open(BytesIO(
+                            response.content)).convert("RGB")
+                    except (UnidentifiedImageError, IOError):
+                        raise Exception(
+                            f"Cannot identify image file at URL: {image_path}.")
+                    except Exception as e:
+                        raise Exception(
+                            f"Failed to download image from url {image_path}: {e}"
+                        )
                 else:
                     image = Image.open(image_path).convert("RGB")
                 images.append(image)
diff --git a/tests/integration/defs/examples/test_multimodal.py b/tests/integration/defs/examples/test_multimodal.py
@@ -16,6 +16,7 @@
 import os
 
 import pytest
+import torch
 from defs.common import convert_weights, venv_check_call, venv_mpi_check_call
 from defs.conftest import get_device_memory, skip_post_blackwell, skip_pre_ada
 from defs.trt_test_alternative import check_call
@@ -75,6 +76,10 @@ def _test_llm_multimodal_general(llm_venv,
                                  cpp_e2e=False,
                                  num_beams=1):
 
+    # Empty the torch CUDA cache before each multimodal test to reduce risk of OOM errors.
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+
     world_size = tp_size * pp_size
     print("Locate model checkpoints in test storage...")
     tllm_model_name, model_ckpt_path = multimodal_model_root
diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml
@@ -222,7 +222,7 @@ l0_h100:
   - examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]
   - examples/test_multimodal.py::test_llm_multimodal_general[Phi-3.5-vision-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]
   - examples/test_multimodal.py::test_llm_multimodal_general[Phi-4-multimodal-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1]
-  - examples/test_multimodal.py::test_llm_multimodal_general[Mistral-Small-3.1-24B-Instruct-2503-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1]
+  - examples/test_multimodal.py::test_llm_multimodal_general[Mistral-Small-3.1-24B-Instruct-2503-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1]
   - examples/test_multimodal.py::test_llm_multimodal_general[VILA1.5-3b-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] # 10 mins
   - examples/test_enc_dec.py::test_llm_enc_dec_mmlu[flan-t5-small-float32-tp:1-pp:1-nb:1-enable_fp8] # 7 mins
   - examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8] # 13 mins
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -415,7 +415,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpu
 test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B] SKIP (https://nvbugs/5333659)
 test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1] SKIP (https://nvbugs/5333659)
 test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-NVFP4-nvfp4-quantized/Llama-3_3-Nemotron-Super-49B-v1_nvfp4_hf] SKIP (https://nvbugs/5333659)
-examples/test_multimodal.py::test_llm_multimodal_general[Mistral-Small-3.1-24B-Instruct-2503-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5331031)
 accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=True] SKIP (https://nvbugs/5322354)
 accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=2-overlap_scheduler=True] SKIP (https://nvbugs/5322354)
 accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5336321)