vllm-project · yeqcharlotte · Oct 28, 2025 · Oct 27, 2025 · Oct 27, 2025 · yeqcharlotte
diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
@@ -553,7 +553,7 @@ steps:
 
 - label: Model Executor Test # 23min
   timeout_in_minutes: 35
-  mirror_hardwares: [amdexperimental]
+  mirror_hardwares: [amdexperimental, amdproduction]
   agent_pool: mi325_1
   # grade: Blocking
   source_file_dependencies:

diff --git a/tests/model_executor/model_loader/fastsafetensors_loader/test_fastsafetensors_loader.py b/tests/model_executor/model_loader/fastsafetensors_loader/test_fastsafetensors_loader.py
@@ -1,7 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+import pytest
+
 from vllm import SamplingParams
+from vllm.platforms import current_platform
 
 test_model = "openai-community/gpt2"
 
@@ -15,6 +18,9 @@
 sampling_params = SamplingParams(temperature=0.8, top_p=0.95, seed=0)
 
 
+@pytest.mark.skipif(
+    not current_platform.is_cuda(), reason="fastsafetensors requires CUDA/NVIDIA GPUs"
+)
 def test_model_loader_download_files(vllm_runner):
     with vllm_runner(test_model, load_format="fastsafetensors") as llm:
         deserialized_outputs = llm.generate(prompts, sampling_params)

diff --git a/tests/model_executor/model_loader/fastsafetensors_loader/test_weight_utils.py b/tests/model_executor/model_loader/fastsafetensors_loader/test_weight_utils.py
@@ -5,15 +5,20 @@
 import tempfile
 
 import huggingface_hub.constants
+import pytest
 import torch
 
 from vllm.model_executor.model_loader.weight_utils import (
     download_weights_from_hf,
     fastsafetensors_weights_iterator,
     safetensors_weights_iterator,
 )
+from vllm.platforms import current_platform
 
 
+@pytest.mark.skipif(
+    not current_platform.is_cuda(), reason="fastsafetensors requires CUDA/NVIDIA GPUs"
+)
 def test_fastsafetensors_model_loader():
     with tempfile.TemporaryDirectory() as tmpdir:
         huggingface_hub.constants.HF_HUB_OFFLINE = False