diff --git a/tensorrt_llm/runtime/multimodal_model_runner.py b/tensorrt_llm/runtime/multimodal_model_runner.py index 479c333b454..bf333556d78 100644 --- a/tensorrt_llm/runtime/multimodal_model_runner.py +++ b/tensorrt_llm/runtime/multimodal_model_runner.py @@ -15,7 +15,7 @@ import torch.nn.functional as F from cuda import cudart from huggingface_hub import hf_hub_download -from PIL import Image +from PIL import Image, UnidentifiedImageError from safetensors import safe_open from torch import nn from transformers import (AutoConfig, AutoModelForCausalLM, AutoProcessor, @@ -2173,8 +2173,23 @@ def load_images(image_paths): if image_path.startswith("http") or image_path.startswith( "https"): logger.info(f"downloading image from url {image_path}") - response = requests.get(image_path, timeout=5) - image = Image.open(BytesIO(response.content)).convert("RGB") + try: + response = requests.get(image_path, timeout=5) + response.raise_for_status() + if 'image' not in response.headers.get( + 'Content-Type', ''): + raise Exception( + f"URL does not point to an image: {image_path}." + ) + image = Image.open(BytesIO( + response.content)).convert("RGB") + except (UnidentifiedImageError, IOError): + raise Exception( + f"Cannot identify image file at URL: {image_path}.") + except Exception as e: + raise Exception( + f"Failed to download image from url {image_path}: {e}" + ) else: image = Image.open(image_path).convert("RGB") images.append(image) diff --git a/tests/integration/defs/examples/test_multimodal.py b/tests/integration/defs/examples/test_multimodal.py index bf6ac91d45b..25b2d45d539 100644 --- a/tests/integration/defs/examples/test_multimodal.py +++ b/tests/integration/defs/examples/test_multimodal.py @@ -16,6 +16,7 @@ import os import pytest +import torch from defs.common import convert_weights, venv_check_call, venv_mpi_check_call from defs.conftest import get_device_memory, skip_post_blackwell, skip_pre_ada from defs.trt_test_alternative import check_call @@ -75,6 +76,10 @@ def _test_llm_multimodal_general(llm_venv, cpp_e2e=False, num_beams=1): + # Empty the torch CUDA cache before each multimodal test to reduce risk of OOM errors. + if torch.cuda.is_available(): + torch.cuda.empty_cache() + world_size = tp_size * pp_size print("Locate model checkpoints in test storage...") tllm_model_name, model_ckpt_path = multimodal_model_root diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml index ec3fac1fc51..a78cc60ea8a 100644 --- a/tests/integration/test_lists/test-db/l0_h100.yml +++ b/tests/integration/test_lists/test-db/l0_h100.yml @@ -222,7 +222,7 @@ l0_h100: - examples/test_multimodal.py::test_llm_multimodal_general[Phi-3-vision-128k-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] - examples/test_multimodal.py::test_llm_multimodal_general[Phi-3.5-vision-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] - examples/test_multimodal.py::test_llm_multimodal_general[Phi-4-multimodal-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] - - examples/test_multimodal.py::test_llm_multimodal_general[Mistral-Small-3.1-24B-Instruct-2503-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] + - examples/test_multimodal.py::test_llm_multimodal_general[Mistral-Small-3.1-24B-Instruct-2503-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] - examples/test_multimodal.py::test_llm_multimodal_general[VILA1.5-3b-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] # 10 mins - examples/test_enc_dec.py::test_llm_enc_dec_mmlu[flan-t5-small-float32-tp:1-pp:1-nb:1-enable_fp8] # 7 mins - examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8] # 13 mins diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index fa565a2bc23..40e70604254 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -415,7 +415,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales_4gpu test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta-Llama-3.1-8B] SKIP (https://nvbugs/5333659) test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1] SKIP (https://nvbugs/5333659) test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-NVFP4-nvfp4-quantized/Llama-3_3-Nemotron-Super-49B-v1_nvfp4_hf] SKIP (https://nvbugs/5333659) -examples/test_multimodal.py::test_llm_multimodal_general[Mistral-Small-3.1-24B-Instruct-2503-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5331031) accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=True] SKIP (https://nvbugs/5322354) accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=2-overlap_scheduler=True] SKIP (https://nvbugs/5322354) accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5336321)