From 0c4e59bd6c9095bca358bcc808ae7dc3de517bc8 Mon Sep 17 00:00:00 2001
From: statelesshz <hzji210@gmail.com>
Date: Mon, 30 Oct 2023 20:14:05 +0800
Subject: [PATCH 1/5] device agnostic models testing

---
 tests/generation/test_utils.py                | 10 ++++++++--
 tests/models/bark/test_modeling_bark.py       | 20 ++++++++++++-------
 tests/models/bert/test_modeling_bert.py       |  4 ++--
 tests/models/blip/test_modeling_blip.py       | 12 +++++++++--
 tests/models/blip_2/test_modeling_blip_2.py   | 10 +++++-----
 .../models/convbert/test_modeling_convbert.py |  4 ++--
 .../test_modeling_deformable_detr.py          |  8 ++++----
 tests/models/deit/test_modeling_deit.py       |  6 ++++--
 .../distilbert/test_modeling_distilbert.py    |  4 ++--
 tests/models/ernie/test_modeling_ernie.py     |  4 ++--
 .../models/flaubert/test_modeling_flaubert.py |  4 ++--
 tests/models/fuyu/test_modeling_fuyu.py       |  5 ++---
 tests/models/jukebox/test_modeling_jukebox.py | 14 ++++++++++---
 tests/models/llama/test_modeling_llama.py     | 11 ++++++++--
 .../mask2former/test_modeling_mask2former.py  |  6 ++++--
 .../maskformer/test_modeling_maskformer.py    |  6 ++++--
 .../oneformer/test_modeling_oneformer.py      |  6 ++++--
 tests/models/opt/test_modeling_opt.py         |  5 +++--
 tests/models/owlv2/test_modeling_owlv2.py     | 12 +++++++++--
 tests/models/owlvit/test_modeling_owlvit.py   | 12 +++++++++--
 .../persimmon/test_modeling_persimmon.py      | 11 ++++++++--
 tests/models/pvt/test_modeling_pvt.py         |  6 ++++--
 .../test_modeling_switch_transformers.py      |  5 +++--
 tests/models/vit/test_modeling_vit.py         |  6 ++++--
 .../test_modeling_wav2vec2_conformer.py       |  8 +++++---
 tests/models/xglm/test_modeling_xglm.py       | 11 ++++++++--
 tests/test_modeling_utils.py                  | 10 +++++-----
 27 files changed, 150 insertions(+), 70 deletions(-)

diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py
index 86a3d5efd90b..76775a9d3f8f 100644
--- a/tests/generation/test_utils.py
+++ b/tests/generation/test_utils.py
@@ -22,7 +22,13 @@
 import numpy as np
 
 from transformers import is_torch_available, pipeline
-from transformers.testing_utils import require_accelerate, require_torch, require_torch_multi_gpu, slow, torch_device
+from transformers.testing_utils import (
+    require_accelerate, 
+    require_torch, 
+    require_torch_multi_accelerator, 
+    slow, 
+    torch_device,
+)
 
 from ..test_modeling_common import floats_tensor, ids_tensor
 from .test_framework_agnostic import GenerationIntegrationTestsMixin
@@ -1019,7 +1025,7 @@ def test_beam_search_generate_dict_outputs_use_cache(self):
                 )
 
     @require_accelerate
-    @require_torch_multi_gpu
+    @require_torch_multi_accelerator
     def test_model_parallel_beam_search(self):
         for model_class in self.all_generative_model_classes:
             if model_class._no_split_modules is None:
diff --git a/tests/models/bark/test_modeling_bark.py b/tests/models/bark/test_modeling_bark.py
index d80ee24a1610..42184bb85e38 100644
--- a/tests/models/bark/test_modeling_bark.py
+++ b/tests/models/bark/test_modeling_bark.py
@@ -32,7 +32,13 @@
     BarkFineGenerationConfig,
     BarkSemanticGenerationConfig,
 )
-from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import (
+    require_torch,
+    require_torch_fp16, 
+    require_torch_gpu, 
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -570,13 +576,13 @@ def test_inputs_embeds(self):
             with torch.no_grad():
                 model(**inputs)[0]
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = self.all_generative_model_classes[0](config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
@@ -636,13 +642,13 @@ def test_inputs_embeds(self):
             with torch.no_grad():
                 model(**inputs)[0]
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = self.all_generative_model_classes[0](config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
@@ -700,14 +706,14 @@ def test_inputs_embeds(self):
             with torch.no_grad():
                 model(**inputs)[0]
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         # take first codebook channel
 
         model = self.all_model_classes[0](config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
 
         # toy generation_configs
         semantic_generation_config = BarkSemanticGenerationConfig(semantic_vocab_size=0)
diff --git a/tests/models/bert/test_modeling_bert.py b/tests/models/bert/test_modeling_bert.py
index 9aec91367d8d..2601c92cfb76 100644
--- a/tests/models/bert/test_modeling_bert.py
+++ b/tests/models/bert/test_modeling_bert.py
@@ -18,7 +18,7 @@
 
 from transformers import BertConfig, is_torch_available
 from transformers.models.auto import get_values
-from transformers.testing_utils import CaptureLogger, require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import CaptureLogger, require_torch, require_torch_accelerator, slow, torch_device
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -601,7 +601,7 @@ def test_model_from_pretrained(self):
             self.assertIsNotNone(model)
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_torchscript_device_change(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
         for model_class in self.all_model_classes:
diff --git a/tests/models/blip/test_modeling_blip.py b/tests/models/blip/test_modeling_blip.py
index e5a5652f6048..1faa28fe00f3 100644
--- a/tests/models/blip/test_modeling_blip.py
+++ b/tests/models/blip/test_modeling_blip.py
@@ -24,7 +24,14 @@
 import requests
 
 from transformers import BlipConfig, BlipTextConfig, BlipVisionConfig
-from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device
+from transformers.testing_utils import (
+    require_torch, 
+    require_torch_accelerator,
+    require_torch_fp16, 
+    require_vision,
+    slow,
+    torch_device,
+)
 from transformers.utils import is_torch_available, is_vision_available
 
 from ...test_configuration_common import ConfigTester
@@ -1280,7 +1287,8 @@ def test_inference_image_captioning(self):
             [30522, 1037, 3861, 1997, 1037, 2450, 1998, 2014, 3899, 2006, 1996, 3509, 102],
         )
 
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_inference_image_captioning_fp16(self):
         model = BlipForConditionalGeneration.from_pretrained(
             "Salesforce/blip-image-captioning-base", torch_dtype=torch.float16
diff --git a/tests/models/blip_2/test_modeling_blip_2.py b/tests/models/blip_2/test_modeling_blip_2.py
index 9138061ee1bf..910166c4a228 100644
--- a/tests/models/blip_2/test_modeling_blip_2.py
+++ b/tests/models/blip_2/test_modeling_blip_2.py
@@ -23,7 +23,7 @@
 import requests
 
 from transformers import CONFIG_MAPPING, Blip2Config, Blip2QFormerConfig, Blip2VisionConfig
-from transformers.testing_utils import require_torch, require_torch_multi_gpu, require_vision, slow, torch_device
+from transformers.testing_utils import require_torch, require_torch_multi_accelerator, require_vision, slow, torch_device
 from transformers.utils import is_torch_available, is_vision_available
 
 from ...test_configuration_common import ConfigTester
@@ -934,8 +934,8 @@ def test_inference_t5_batched_beam_search(self):
         self.assertEqual(predictions[0].tolist(), [0, 2335, 1556, 28, 1782, 30, 8, 2608, 1])
         self.assertEqual(predictions[1].tolist(), [0, 2335, 1556, 28, 1782, 30, 8, 2608, 1])
 
-    @require_torch_multi_gpu
-    def test_inference_opt_multi_gpu(self):
+    @require_torch_multi_accelerator
+    def test_inference_opt_multi_accelerator(self):
         processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
         model = Blip2ForConditionalGeneration.from_pretrained(
             "Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16, device_map="balanced"
@@ -966,8 +966,8 @@ def test_inference_opt_multi_gpu(self):
         )
         self.assertEqual(generated_text, "it's not a city, it's a beach")
 
-    @require_torch_multi_gpu
-    def test_inference_t5_multi_gpu(self):
+    @require_torch_multi_accelerator
+    def test_inference_t5_multi_accelerator(self):
         processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
         device_map = device_map = {
             "query_tokens": 0,
diff --git a/tests/models/convbert/test_modeling_convbert.py b/tests/models/convbert/test_modeling_convbert.py
index 754967ce0039..281a8e477b0b 100644
--- a/tests/models/convbert/test_modeling_convbert.py
+++ b/tests/models/convbert/test_modeling_convbert.py
@@ -19,7 +19,7 @@
 
 from transformers import ConvBertConfig, is_torch_available
 from transformers.models.auto import get_values
-from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
 
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
@@ -427,7 +427,7 @@ def test_attention_outputs(self):
                 )
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_torchscript_device_change(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
         for model_class in self.all_model_classes:
diff --git a/tests/models/deformable_detr/test_modeling_deformable_detr.py b/tests/models/deformable_detr/test_modeling_deformable_detr.py
index b44564f69193..8cfe6ca451d9 100644
--- a/tests/models/deformable_detr/test_modeling_deformable_detr.py
+++ b/tests/models/deformable_detr/test_modeling_deformable_detr.py
@@ -25,7 +25,7 @@
 from transformers.testing_utils import (
     require_timm,
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
     require_vision,
     slow,
     torch_device,
@@ -648,7 +648,7 @@ def test_inference_object_detection_head_with_box_refine_two_stage(self):
         self.assertEqual(outputs.pred_boxes.shape, expected_shape_boxes)
         self.assertTrue(torch.allclose(outputs.pred_boxes[0, :3, :3], expected_boxes, atol=1e-4))
 
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_inference_object_detection_head_equivalence_cpu_gpu(self):
         image_processor = self.default_image_processor
         image = prepare_img()
@@ -663,10 +663,10 @@ def test_inference_object_detection_head_equivalence_cpu_gpu(self):
             cpu_outputs = model(pixel_values, pixel_mask)
 
         # 2. run model on GPU
-        model.to("cuda")
+        model.to(torch_device)
 
         with torch.no_grad():
-            gpu_outputs = model(pixel_values.to("cuda"), pixel_mask.to("cuda"))
+            gpu_outputs = model(pixel_values.to(torch_device), pixel_mask.to(torch_device))
 
         # 3. assert equivalence
         for key in cpu_outputs.keys():
diff --git a/tests/models/deit/test_modeling_deit.py b/tests/models/deit/test_modeling_deit.py
index 7b7de3f320b3..7071def61210 100644
--- a/tests/models/deit/test_modeling_deit.py
+++ b/tests/models/deit/test_modeling_deit.py
@@ -24,7 +24,8 @@
 from transformers.testing_utils import (
     require_accelerate,
     require_torch,
-    require_torch_gpu,
+    require_torch_fp16,
+    require_torch_accelerator,
     require_vision,
     slow,
     torch_device,
@@ -424,7 +425,8 @@ def test_inference_image_classification_head(self):
 
     @slow
     @require_accelerate
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_inference_fp16(self):
         r"""
         A small test to make sure that inference work in half precision without any problem.
diff --git a/tests/models/distilbert/test_modeling_distilbert.py b/tests/models/distilbert/test_modeling_distilbert.py
index ff56afd0a981..22e976535369 100644
--- a/tests/models/distilbert/test_modeling_distilbert.py
+++ b/tests/models/distilbert/test_modeling_distilbert.py
@@ -17,7 +17,7 @@
 import unittest
 
 from transformers import DistilBertConfig, is_torch_available
-from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
 
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
@@ -264,7 +264,7 @@ def test_model_from_pretrained(self):
             self.assertIsNotNone(model)
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_torchscript_device_change(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
         for model_class in self.all_model_classes:
diff --git a/tests/models/ernie/test_modeling_ernie.py b/tests/models/ernie/test_modeling_ernie.py
index f0bdec3efb91..6fc557219c85 100644
--- a/tests/models/ernie/test_modeling_ernie.py
+++ b/tests/models/ernie/test_modeling_ernie.py
@@ -18,7 +18,7 @@
 
 from transformers import ErnieConfig, is_torch_available
 from transformers.models.auto import get_values
-from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -574,7 +574,7 @@ def test_model_from_pretrained(self):
             self.assertIsNotNone(model)
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_torchscript_device_change(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
         for model_class in self.all_model_classes:
diff --git a/tests/models/flaubert/test_modeling_flaubert.py b/tests/models/flaubert/test_modeling_flaubert.py
index 61806182bb7f..f21695e39c56 100644
--- a/tests/models/flaubert/test_modeling_flaubert.py
+++ b/tests/models/flaubert/test_modeling_flaubert.py
@@ -17,7 +17,7 @@
 import unittest
 
 from transformers import FlaubertConfig, is_torch_available
-from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
 
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
@@ -463,7 +463,7 @@ def test_model_from_pretrained(self):
             self.assertIsNotNone(model)
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_torchscript_device_change(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
         for model_class in self.all_model_classes:
diff --git a/tests/models/fuyu/test_modeling_fuyu.py b/tests/models/fuyu/test_modeling_fuyu.py
index 58d671bd5701..b9c061e7a004 100644
--- a/tests/models/fuyu/test_modeling_fuyu.py
+++ b/tests/models/fuyu/test_modeling_fuyu.py
@@ -4,7 +4,7 @@
 import requests
 
 from transformers import AutoTokenizer, FuyuConfig, is_torch_available, is_vision_available
-from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
 
 from ...test_modeling_common import ids_tensor, random_attention_mask
 
@@ -257,7 +257,7 @@ def prepare_config_and_inputs_for_common(self):
 
 
 @require_torch
-@require_torch_gpu
+@require_torch_accelerator
 @slow
 class FuyuIntegrationTest(unittest.TestCase):  # , ModelTesterMixin)
     """
@@ -279,7 +279,6 @@ def setUp(self):
         self.bus_image_pil = Image.open(io.BytesIO(requests.get(self.bus_image_url).content))
 
     @slow
-    @require_torch_gpu
     def test_model_8b_chat_greedy_generation_bus_captioning(self):
         EXPECTED_TEXT_COMPLETION = """A bus parked on the side of a road.|ENDOFTEXT|"""
         text_prompt_coco_captioning = "Generate a coco-style caption.\n"
diff --git a/tests/models/jukebox/test_modeling_jukebox.py b/tests/models/jukebox/test_modeling_jukebox.py
index 8de0696c04b1..718bf908e07b 100644
--- a/tests/models/jukebox/test_modeling_jukebox.py
+++ b/tests/models/jukebox/test_modeling_jukebox.py
@@ -16,7 +16,13 @@
 from unittest import skip
 
 from transformers import is_torch_available
-from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import (
+    require_torch, 
+    require_torch_accelerator, 
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 from transformers.trainer_utils import set_seed
 
 
@@ -363,7 +369,8 @@ def test_sampling(self):
         self.assertIn(zs[2][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_0, self.EXPECTED_OUTPUT_0_PT_2])
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     @skip("Not enough GPU memory on CI runners")
     def test_slow_sampling(self):
         model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()
@@ -388,7 +395,8 @@ def test_slow_sampling(self):
         torch.testing.assert_allclose(zs[2][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_0))
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_fp16_slow_sampling(self):
         prior_id = "ArthurZ/jukebox_prior_0"
         model = JukeboxPrior.from_pretrained(prior_id, min_duration=0).eval().half().to(torch_device)
diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
index 2402986900fd..4d6b363e4a75 100644
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -21,7 +21,14 @@
 from pytest import mark
 
 from transformers import LlamaConfig, is_torch_available, set_seed
-from transformers.testing_utils import require_flash_attn, require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import (
+    require_flash_attn,
+    require_torch,
+    require_torch_accelerator,
+    require_torch_gpu,
+    slow,
+    torch_device,
+)
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -534,7 +541,7 @@ def main():
 """,
     ]
 
-    @require_torch_gpu
+    @require_torch_accelerator
     @slow
     def test_model_7b_logits(self):
         model = LlamaForCausalLM.from_pretrained("codellama/CodeLlama-7b-hf").to(torch_device)
diff --git a/tests/models/mask2former/test_modeling_mask2former.py b/tests/models/mask2former/test_modeling_mask2former.py
index c05901a9dccd..b2fc84e7d324 100644
--- a/tests/models/mask2former/test_modeling_mask2former.py
+++ b/tests/models/mask2former/test_modeling_mask2former.py
@@ -23,7 +23,8 @@
 from transformers import Mask2FormerConfig, is_torch_available, is_vision_available
 from transformers.testing_utils import (
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
+    require_torch_fp16,
     require_torch_multi_gpu,
     require_vision,
     slow,
@@ -427,7 +428,8 @@ def test_inference_universal_segmentation_head(self):
         ).to(torch_device)
         self.assertTrue(torch.allclose(outputs.class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE))
 
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_inference_fp16(self):
         model = (
             Mask2FormerForUniversalSegmentation.from_pretrained(self.model_checkpoints)
diff --git a/tests/models/maskformer/test_modeling_maskformer.py b/tests/models/maskformer/test_modeling_maskformer.py
index a2e3ee306a82..fe1cc3423e0f 100644
--- a/tests/models/maskformer/test_modeling_maskformer.py
+++ b/tests/models/maskformer/test_modeling_maskformer.py
@@ -24,7 +24,8 @@
 from transformers import DetrConfig, MaskFormerConfig, SwinConfig, is_torch_available, is_vision_available
 from transformers.testing_utils import (
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
+    require_torch_fp16,
     require_torch_multi_gpu,
     require_vision,
     slow,
@@ -516,7 +517,8 @@ def test_inference_instance_segmentation_head_resnet_backbone(self):
         ).to(torch_device)
         self.assertTrue(torch.allclose(outputs.class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE))
 
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_inference_fp16(self):
         model = (
             MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-resnet101-coco-stuff")
diff --git a/tests/models/oneformer/test_modeling_oneformer.py b/tests/models/oneformer/test_modeling_oneformer.py
index 1fa400a22855..cb00170799f9 100644
--- a/tests/models/oneformer/test_modeling_oneformer.py
+++ b/tests/models/oneformer/test_modeling_oneformer.py
@@ -24,7 +24,8 @@
 from transformers import OneFormerConfig, is_torch_available, is_vision_available
 from transformers.testing_utils import (
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
+    require_torch_fp16,
     require_torch_multi_gpu,
     require_vision,
     slow,
@@ -540,7 +541,8 @@ def test_inference_universal_segmentation_head(self):
         ).to(torch_device)
         self.assertTrue(torch.allclose(class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE))
 
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_inference_fp16(self):
         model = (
             OneFormerForUniversalSegmentation.from_pretrained(self.model_checkpoints)
diff --git a/tests/models/opt/test_modeling_opt.py b/tests/models/opt/test_modeling_opt.py
index 18c0c9a7efee..1c252ab97d52 100644
--- a/tests/models/opt/test_modeling_opt.py
+++ b/tests/models/opt/test_modeling_opt.py
@@ -22,7 +22,7 @@
 import timeout_decorator  # noqa
 
 from transformers import OPTConfig, is_torch_available
-from transformers.testing_utils import require_torch, require_torch_fp16, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import require_torch, require_torch_fp16, require_torch_accelerator, slow, torch_device
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -514,7 +514,8 @@ def test_generation_post_attn_layer_norm(self):
 
         self.assertListEqual(predicted_outputs, EXPECTED_OUTPUTS)
 
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_batched_nan_fp16(self):
         # a bug manifested starting at models facebook/opt-1.3 and larger when running batched generations,
         # therefore not using a tiny model, but the smallest model the problem was seen with which is opt-1.3b.
diff --git a/tests/models/owlv2/test_modeling_owlv2.py b/tests/models/owlv2/test_modeling_owlv2.py
index a45df63c39d3..6c76736631a6 100644
--- a/tests/models/owlv2/test_modeling_owlv2.py
+++ b/tests/models/owlv2/test_modeling_owlv2.py
@@ -24,7 +24,14 @@
 import requests
 
 from transformers import Owlv2Config, Owlv2TextConfig, Owlv2VisionConfig
-from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device
+from transformers.testing_utils import (
+    require_torch, 
+    require_torch_accelerator, 
+    require_torch_fp16,
+    require_vision, 
+    slow, 
+    torch_device,
+)
 from transformers.utils import is_torch_available, is_vision_available
 
 from ...test_configuration_common import ConfigTester
@@ -869,7 +876,8 @@ def test_inference_one_shot_object_detection(self):
         self.assertTrue(torch.allclose(outputs.target_pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4))
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_inference_one_shot_object_detection_fp16(self):
         model_name = "google/owlv2-base-patch16"
         model = Owlv2ForObjectDetection.from_pretrained(model_name, torch_dtype=torch.float16).to(torch_device)
diff --git a/tests/models/owlvit/test_modeling_owlvit.py b/tests/models/owlvit/test_modeling_owlvit.py
index ae419c69f0fe..a107c7281dc0 100644
--- a/tests/models/owlvit/test_modeling_owlvit.py
+++ b/tests/models/owlvit/test_modeling_owlvit.py
@@ -24,7 +24,14 @@
 import requests
 
 from transformers import OwlViTConfig, OwlViTTextConfig, OwlViTVisionConfig
-from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device
+from transformers.testing_utils import (
+    require_torch, 
+    require_torch_accelerator, 
+    require_torch_fp16,
+    require_vision, 
+    slow, 
+    torch_device,
+)
 from transformers.utils import is_torch_available, is_vision_available
 
 from ...test_configuration_common import ConfigTester
@@ -860,7 +867,8 @@ def test_inference_one_shot_object_detection(self):
         self.assertTrue(torch.allclose(outputs.target_pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4))
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_inference_one_shot_object_detection_fp16(self):
         model_name = "google/owlvit-base-patch32"
         model = OwlViTForObjectDetection.from_pretrained(model_name, torch_dtype=torch.float16).to(torch_device)
diff --git a/tests/models/persimmon/test_modeling_persimmon.py b/tests/models/persimmon/test_modeling_persimmon.py
index aa092f3870d2..b91f40ca7ed2 100644
--- a/tests/models/persimmon/test_modeling_persimmon.py
+++ b/tests/models/persimmon/test_modeling_persimmon.py
@@ -21,7 +21,13 @@
 from parameterized import parameterized
 
 from transformers import PersimmonConfig, is_torch_available, set_seed
-from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import (
+    require_torch, 
+    require_torch_accelerator, 
+    require_torch_fp16, 
+    slow, 
+    torch_device
+)
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -418,7 +424,8 @@ def test_model_8b_chat_logits(self):
         gc.collect()
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_model_8b_chat_greedy_generation(self):
         EXPECTED_TEXT_COMPLETION = """human: Simply put, the theory of relativity states that?\n\nadept: The theory of relativity states that the laws of physics are the same for all observers, regardless of their relative motion."""
         prompt = "human: Simply put, the theory of relativity states that?\n\nadept:"
diff --git a/tests/models/pvt/test_modeling_pvt.py b/tests/models/pvt/test_modeling_pvt.py
index eb1370d0bc29..04ce21530531 100644
--- a/tests/models/pvt/test_modeling_pvt.py
+++ b/tests/models/pvt/test_modeling_pvt.py
@@ -23,7 +23,8 @@
 from transformers.testing_utils import (
     require_accelerate,
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
+    require_torch_fp16,
     slow,
     torch_device,
 )
@@ -318,7 +319,8 @@ def test_inference_model(self):
 
     @slow
     @require_accelerate
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_inference_fp16(self):
         r"""
         A small test to make sure that inference work in half precision without any problem.
diff --git a/tests/models/switch_transformers/test_modeling_switch_transformers.py b/tests/models/switch_transformers/test_modeling_switch_transformers.py
index 54e17b91b7b2..194e43d4703f 100644
--- a/tests/models/switch_transformers/test_modeling_switch_transformers.py
+++ b/tests/models/switch_transformers/test_modeling_switch_transformers.py
@@ -19,7 +19,7 @@
 import unittest
 
 from transformers import SwitchTransformersConfig, is_torch_available
-from transformers.testing_utils import require_tokenizers, require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import require_tokenizers, require_torch, require_torch_accelerator, require_torch_bf16, slow, torch_device
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -1017,7 +1017,8 @@ def test_max_routing_capacity(self):
 @require_torch
 @require_tokenizers
 class SwitchTransformerModelIntegrationTests(unittest.TestCase):
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_bf16
     def test_small_logits(self):
         r"""
         Logits testing to check implementation consistency between `t5x` implementation
diff --git a/tests/models/vit/test_modeling_vit.py b/tests/models/vit/test_modeling_vit.py
index 82ba910ec869..d1e887183329 100644
--- a/tests/models/vit/test_modeling_vit.py
+++ b/tests/models/vit/test_modeling_vit.py
@@ -22,7 +22,8 @@
 from transformers.testing_utils import (
     require_accelerate,
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
+    require_torch_fp16,
     require_vision,
     slow,
     torch_device,
@@ -316,7 +317,8 @@ def test_inference_interpolate_pos_encoding(self):
 
     @slow
     @require_accelerate
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_inference_fp16(self):
         r"""
         A small test to make sure that inference work in half precision without any problem.
diff --git a/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py b/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py
index 33d37a073be9..3bf71bb1c814 100644
--- a/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py
+++ b/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py
@@ -21,7 +21,7 @@
 from datasets import load_dataset
 
 from transformers import Wav2Vec2ConformerConfig, is_torch_available
-from transformers.testing_utils import is_pt_flax_cross_test, require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import is_pt_flax_cross_test, require_torch, require_torch_accelerator, require_torch_fp16, slow, torch_device
 
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import (
@@ -468,12 +468,14 @@ def test_model_with_adapter_proj_dim(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_model_with_adapter_proj_dim(*config_and_inputs)
 
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_model_float16_with_relative(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs(position_embeddings_type="relative")
         self.model_tester.create_and_check_model_float16(*config_and_inputs)
 
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_model_float16_with_rotary(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs(position_embeddings_type="rotary")
         self.model_tester.create_and_check_model_float16(*config_and_inputs)
diff --git a/tests/models/xglm/test_modeling_xglm.py b/tests/models/xglm/test_modeling_xglm.py
index 105ad5c44e99..cdfcb757a5d2 100644
--- a/tests/models/xglm/test_modeling_xglm.py
+++ b/tests/models/xglm/test_modeling_xglm.py
@@ -19,7 +19,13 @@
 import unittest
 
 from transformers import XGLMConfig, is_torch_available
-from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import (
+    require_torch, 
+    require_torch_accelerator, 
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -492,7 +498,8 @@ def test_xglm_sample_max_time(self):
         duration = datetime.datetime.now() - start
         self.assertGreater(duration, datetime.timedelta(seconds=1.25 * MAX_TIME))
 
-    @require_torch_gpu
+    @require_torch_accelerator
+    @require_torch_fp16
     def test_batched_nan_fp16(self):
         model_name = "facebook/xglm-564M"
         tokenizer = XGLMTokenizer.from_pretrained(model_name, use_fast=False, padding_side="left")
diff --git a/tests/test_modeling_utils.py b/tests/test_modeling_utils.py
index ffdb2ae7d0e6..9e824e8efa03 100755
--- a/tests/test_modeling_utils.py
+++ b/tests/test_modeling_utils.py
@@ -44,8 +44,8 @@
     require_accelerate,
     require_safetensors,
     require_torch,
-    require_torch_gpu,
-    require_torch_multi_gpu,
+    require_torch_accelerator,
+    require_torch_multi_accelerator,
     require_usr_bin_time,
     slow,
     torch_device,
@@ -681,7 +681,7 @@ def test_from_pretrained_low_cpu_mem_usage_measured(self):
 
     @require_accelerate
     @mark.accelerate_tests
-    @require_torch_multi_gpu
+    @require_torch_multi_accelerator
     @slow
     def test_model_parallelism_gpt2(self):
         device_map = {"transformer.wte": 0, "transformer.wpe": 0, "lm_head": 0, "transformer.ln_f": 1}
@@ -699,7 +699,7 @@ def test_model_parallelism_gpt2(self):
 
     @require_accelerate
     @mark.accelerate_tests
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_from_pretrained_disk_offload_task_model(self):
         model = AutoModel.from_pretrained("hf-internal-testing/tiny-random-gpt2")
         device_map = {
@@ -1036,7 +1036,7 @@ def f(input_ids):
             opt_fn(input_ids)
             self.assertEqual(compile_counter.frame_count, 0)
 
-    @require_torch_gpu
+    @require_torch_accelerator
     @slow
     def test_pretrained_low_mem_new_config(self):
         # Checking for 1 model(the same one which was described in the issue) .

From 443c50c146f24fabe9a554647405de416f4a907f Mon Sep 17 00:00:00 2001
From: statelesshz <hzji210@gmail.com>
Date: Mon, 30 Oct 2023 21:11:55 +0800
Subject: [PATCH 2/5] add decorator `require_torch_fp16`

---
 tests/models/bart/test_modeling_bart.py         | 17 ++++++++++++-----
 .../test_modeling_bigbird_pegasus.py            | 13 ++++++++++---
 .../blenderbot/test_modeling_blenderbot.py      | 16 ++++++++++++----
 .../test_modeling_blenderbot_small.py           | 11 ++++++++---
 tests/models/ctrl/test_modeling_ctrl.py         |  6 +++---
 tests/models/fsmt/test_modeling_fsmt.py         | 16 +++++++++++-----
 tests/models/gpt2/test_modeling_gpt2.py         |  6 +++---
 tests/models/gptj/test_modeling_gptj.py         |  2 +-
 tests/models/led/test_modeling_led.py           | 13 ++++++++++---
 tests/models/m2m_100/test_modeling_m2m_100.py   | 13 ++++++++++---
 tests/models/marian/test_modeling_marian.py     | 17 ++++++++++++-----
 tests/models/mbart/test_modeling_mbart.py       | 13 ++++++++++---
 tests/models/mega/test_modeling_mega.py         | 12 +++++++++---
 tests/models/mistral/test_modeling_mistral.py   | 13 ++++++++++---
 tests/models/musicgen/test_modeling_musicgen.py | 12 +++++++++---
 tests/models/mvp/test_modeling_mvp.py           | 17 ++++++++++++-----
 tests/models/nllb_moe/test_modeling_nllb_moe.py |  5 +++--
 tests/models/pegasus/test_modeling_pegasus.py   | 17 +++++++++++------
 .../models/pegasus_x/test_modeling_pegasus_x.py | 13 ++++++++++---
 .../models/persimmon/test_modeling_persimmon.py |  7 ++++---
 tests/models/plbart/test_modeling_plbart.py     | 13 ++++++++++---
 tests/models/sam/test_modeling_sam.py           |  8 +++-----
 .../test_modeling_speech_to_text.py             | 10 +++++++---
 tests/models/wav2vec2/test_modeling_wav2vec2.py |  3 ++-
 tests/models/whisper/test_modeling_whisper.py   | 15 +++++++++++----
 25 files changed, 203 insertions(+), 85 deletions(-)

diff --git a/tests/models/bart/test_modeling_bart.py b/tests/models/bart/test_modeling_bart.py
index d91ecf4cf5d4..8526c8e7797a 100644
--- a/tests/models/bart/test_modeling_bart.py
+++ b/tests/models/bart/test_modeling_bart.py
@@ -22,7 +22,14 @@
 import timeout_decorator  # noqa
 
 from transformers import BartConfig, is_torch_available
-from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_sentencepiece, 
+    require_tokenizers, 
+    require_torch, 
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -383,12 +390,12 @@ def test_tokenization(self):
             bart_toks = tokenizer.encode(ex, return_tensors="pt").squeeze()
             assert_tensors_close(desired_result.long(), bart_toks, prefix=ex)
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_ids, batch_size = self._get_config_and_data()
         attention_mask = input_ids.ne(1).to(torch_device)
         model = BartForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
@@ -497,13 +504,13 @@ def test_inputs_embeds(self):
             with torch.no_grad():
                 model(**inputs)[0]
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = BartForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
diff --git a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
index aedbbb46341e..c0a03b3f6737 100644
--- a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
+++ b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
@@ -20,7 +20,14 @@
 import unittest
 
 from transformers import BigBirdPegasusConfig, is_torch_available
-from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_sentencepiece, 
+    require_tokenizers, 
+    require_torch, 
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -376,13 +383,13 @@ def test_inputs_embeds(self):
             with torch.no_grad():
                 model(**inputs)[0]
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_dict.pop("decoder_attention_mask")
         input_dict.pop("decoder_input_ids")
         model = BigBirdPegasusForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(**input_dict)
         model.generate(**input_dict, do_sample=True, early_stopping=False, num_return_sequences=3)
 
diff --git a/tests/models/blenderbot/test_modeling_blenderbot.py b/tests/models/blenderbot/test_modeling_blenderbot.py
index ca1630b3cfd3..19b88dcef8ea 100644
--- a/tests/models/blenderbot/test_modeling_blenderbot.py
+++ b/tests/models/blenderbot/test_modeling_blenderbot.py
@@ -18,7 +18,15 @@
 import unittest
 
 from transformers import BlenderbotConfig, is_torch_available
-from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    backend_empty_cache,
+    require_sentencepiece, 
+    require_tokenizers, 
+    require_torch, 
+    require_torch_fp16, 
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -269,13 +277,13 @@ def test_encoder_decoder_model_standalone(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
         self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = BlenderbotForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
@@ -315,7 +323,7 @@ def test_generation_from_short_input_same_as_parlai_3B(self):
         FASTER_GEN_KWARGS = {"num_beams": 1, "early_stopping": True, "min_length": 15, "max_length": 25}
         TOK_DECODE_KW = {"skip_special_tokens": True, "clean_up_tokenization_spaces": True}
 
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
         model = BlenderbotForConditionalGeneration.from_pretrained(self.ckpt).half().to(torch_device)
 
         src_text = ["Sam"]
diff --git a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py
index 2397b6fee972..b8078312c83a 100644
--- a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py
+++ b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py
@@ -18,7 +18,12 @@
 import unittest
 
 from transformers import BlenderbotSmallConfig, is_torch_available
-from transformers.testing_utils import require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_torch,
+    require_torch_fp16, 
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -274,13 +279,13 @@ def test_encoder_decoder_model_standalone(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
         self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = BlenderbotSmallForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
diff --git a/tests/models/ctrl/test_modeling_ctrl.py b/tests/models/ctrl/test_modeling_ctrl.py
index 65d3cbebc4f1..13b35926117d 100644
--- a/tests/models/ctrl/test_modeling_ctrl.py
+++ b/tests/models/ctrl/test_modeling_ctrl.py
@@ -17,7 +17,7 @@
 import unittest
 
 from transformers import CTRLConfig, is_torch_available
-from transformers.testing_utils import require_torch, slow, torch_device
+from transformers.testing_utils import backend_empty_cache, require_torch, slow, torch_device
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -230,7 +230,7 @@ def tearDown(self):
         super().tearDown()
         # clean-up as much as possible GPU memory occupied by PyTorch
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     def test_config(self):
         self.config_tester.run_common_tests()
@@ -260,7 +260,7 @@ def tearDown(self):
         super().tearDown()
         # clean-up as much as possible GPU memory occupied by PyTorch
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     @slow
     def test_lm_generate_ctrl(self):
diff --git a/tests/models/fsmt/test_modeling_fsmt.py b/tests/models/fsmt/test_modeling_fsmt.py
index f533da772783..6ebc9833a35b 100644
--- a/tests/models/fsmt/test_modeling_fsmt.py
+++ b/tests/models/fsmt/test_modeling_fsmt.py
@@ -20,7 +20,14 @@
 from parameterized import parameterized
 
 from transformers import FSMTConfig, is_torch_available
-from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_sentencepiece, 
+    require_tokenizers, 
+    require_torch, 
+    require_torch_fp16, 
+    slow,
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -398,12 +405,12 @@ def test_shift_tokens_right(self):
         self.assertEqual(n_pad_after, n_pad_before - 1)
         self.assertTrue(torch.eq(shifted[:, 0], 2).all())
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_ids, batch_size = self._get_config_and_data()
         attention_mask = input_ids.ne(1).to(torch_device)
         model = FSMTForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
@@ -538,8 +545,7 @@ def test_translation_direct(self, pair):
     @slow
     def test_translation_pipeline(self, pair):
         tokenizer, model, src_text, tgt_text = self.translation_setup(pair)
-        device = 0 if torch_device == "cuda" else -1
-        pipeline = TranslationPipeline(model, tokenizer, framework="pt", device=device)
+        pipeline = TranslationPipeline(model, tokenizer, framework="pt", device=torch_device)
         output = pipeline([src_text])
         self.assertEqual([tgt_text], [x["translation_text"] for x in output])
 
diff --git a/tests/models/gpt2/test_modeling_gpt2.py b/tests/models/gpt2/test_modeling_gpt2.py
index 17b32a22fb1f..4c49c0b746dc 100644
--- a/tests/models/gpt2/test_modeling_gpt2.py
+++ b/tests/models/gpt2/test_modeling_gpt2.py
@@ -20,7 +20,7 @@
 import unittest
 
 from transformers import GPT2Config, is_torch_available
-from transformers.testing_utils import require_torch, slow, torch_device
+from transformers.testing_utils import backend_empty_cache, require_torch, slow, torch_device
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -505,7 +505,7 @@ def tearDown(self):
         super().tearDown()
         # clean-up as much as possible GPU memory occupied by PyTorch
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     def test_config(self):
         self.config_tester.run_common_tests()
@@ -712,7 +712,7 @@ def tearDown(self):
         super().tearDown()
         # clean-up as much as possible GPU memory occupied by PyTorch
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     def _test_lm_generate_gpt2_helper(
         self,
diff --git a/tests/models/gptj/test_modeling_gptj.py b/tests/models/gptj/test_modeling_gptj.py
index f0e02700700c..37d57bbc937b 100644
--- a/tests/models/gptj/test_modeling_gptj.py
+++ b/tests/models/gptj/test_modeling_gptj.py
@@ -562,7 +562,7 @@ def test_gptj_sample(self):
         output_seq_strs = tokenizer.batch_decode(output_seq, skip_special_tokens=True)
         output_seq_tt_strs = tokenizer.batch_decode(output_seq_tt, skip_special_tokens=True)
 
-        if torch_device == "cuda":
+        if torch_device != "cpu":
             EXPECTED_OUTPUT_STR = (
                 "Today is a nice day and I've already been enjoying it. I walked to work with my wife"
             )
diff --git a/tests/models/led/test_modeling_led.py b/tests/models/led/test_modeling_led.py
index b6dfc3256b05..a549fce6ec5e 100644
--- a/tests/models/led/test_modeling_led.py
+++ b/tests/models/led/test_modeling_led.py
@@ -21,7 +21,14 @@
 
 from transformers import LEDConfig, is_torch_available
 from transformers.models.auto import get_values
-from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_sentencepiece, 
+    require_tokenizers, 
+    require_torch, 
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -363,13 +370,13 @@ def test_inputs_embeds(self):
             with torch.no_grad():
                 model(**inputs)[0]
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = LEDForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
diff --git a/tests/models/m2m_100/test_modeling_m2m_100.py b/tests/models/m2m_100/test_modeling_m2m_100.py
index d081041978c0..5f928b3a9b7b 100644
--- a/tests/models/m2m_100/test_modeling_m2m_100.py
+++ b/tests/models/m2m_100/test_modeling_m2m_100.py
@@ -20,7 +20,14 @@
 import unittest
 
 from transformers import M2M100Config, is_torch_available
-from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_sentencepiece, 
+    require_tokenizers, 
+    require_torch, 
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -312,13 +319,13 @@ def test_inputs_embeds(self):
             with torch.no_grad():
                 model(**inputs)[0]
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = M2M100ForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
diff --git a/tests/models/marian/test_modeling_marian.py b/tests/models/marian/test_modeling_marian.py
index c590b216cc47..ea0966519f37 100644
--- a/tests/models/marian/test_modeling_marian.py
+++ b/tests/models/marian/test_modeling_marian.py
@@ -20,7 +20,14 @@
 from huggingface_hub.hf_api import list_models
 
 from transformers import MarianConfig, is_torch_available
-from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_sentencepiece, 
+    require_tokenizers, 
+    require_torch, 
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -281,13 +288,13 @@ def test_encoder_decoder_model_standalone(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
         self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = MarianMTModel(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
@@ -620,9 +627,9 @@ def test_batch_generation_en_ROMANCE_multi(self):
         self._assert_generated_batch_equal_expected()
 
     @slow
+    @require_torch
     def test_pipeline(self):
-        device = 0 if torch_device == "cuda" else -1
-        pipeline = TranslationPipeline(self.model, self.tokenizer, framework="pt", device=device)
+        pipeline = TranslationPipeline(self.model, self.tokenizer, framework="pt", device=torch_device)
         output = pipeline(self.src_text)
         self.assertEqual(self.expected_text, [x["translation_text"] for x in output])
 
diff --git a/tests/models/mbart/test_modeling_mbart.py b/tests/models/mbart/test_modeling_mbart.py
index deaa8b5dafe6..3a324f8c6d5d 100644
--- a/tests/models/mbart/test_modeling_mbart.py
+++ b/tests/models/mbart/test_modeling_mbart.py
@@ -20,7 +20,14 @@
 import unittest
 
 from transformers import MBartConfig, is_torch_available
-from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_sentencepiece,
+    require_tokenizers,
+    require_torch,
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -317,13 +324,13 @@ def test_inputs_embeds(self):
             with torch.no_grad():
                 model(**inputs)[0]
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = MBartForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
diff --git a/tests/models/mega/test_modeling_mega.py b/tests/models/mega/test_modeling_mega.py
index 10df7a555e5d..e63d155fbc71 100644
--- a/tests/models/mega/test_modeling_mega.py
+++ b/tests/models/mega/test_modeling_mega.py
@@ -17,7 +17,13 @@
 import unittest
 
 from transformers import MegaConfig, is_torch_available
-from transformers.testing_utils import TestCasePlus, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    TestCasePlus,
+    require_torch, 
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -619,12 +625,12 @@ def test_for_sequence_length_beyond_max_positions(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.check_sequence_length_beyond_max_positions(*config_and_inputs)
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_ids, _, attention_mask, *_ = self.model_tester.prepare_config_and_inputs_for_decoder()
         # attention_mask = torch.LongTensor(input_ids.ne(1)).to(torch_device)
         model = MegaForCausalLM(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py
index a795ebceef46..843f9ebceb60 100644
--- a/tests/models/mistral/test_modeling_mistral.py
+++ b/tests/models/mistral/test_modeling_mistral.py
@@ -22,7 +22,14 @@
 from pytest import mark
 
 from transformers import AutoTokenizer, MistralConfig, is_torch_available
-from transformers.testing_utils import require_flash_attn, require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import (
+    backend_empty_cache,
+    require_flash_attn, 
+    require_torch, 
+    require_torch_gpu, 
+    slow, 
+    torch_device,
+)
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
@@ -450,7 +457,7 @@ def test_model_7b_logits(self):
         torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, atol=1e-4, rtol=1e-4)
 
         del model
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
         gc.collect()
 
     @slow
@@ -467,5 +474,5 @@ def test_model_7b_generation(self):
         self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
 
         del model
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
         gc.collect()
diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py
index 02ab3b538c26..5bae58ed7215 100644
--- a/tests/models/musicgen/test_modeling_musicgen.py
+++ b/tests/models/musicgen/test_modeling_musicgen.py
@@ -28,7 +28,13 @@
     PretrainedConfig,
     T5Config,
 )
-from transformers.testing_utils import is_torch_available, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    is_torch_available, 
+    require_torch, 
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -1082,13 +1088,13 @@ def test_generate_without_input_ids(self):
             output_ids_generate = model.generate(do_sample=False, max_length=max_length, remove_invalid_values=True)
             self.assertIsNotNone(output_ids_generate)
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
 
         for model_class in self.greedy_sample_model_classes:
             model = model_class(config).eval().to(torch_device)
-            if torch_device == "cuda":
-                model.half()
+            model.half()
             # greedy
             model.generate(input_dict["input_ids"], attention_mask=input_dict["attention_mask"], max_new_tokens=10)
             # sampling
diff --git a/tests/models/mvp/test_modeling_mvp.py b/tests/models/mvp/test_modeling_mvp.py
index 8e6143529a80..ce6e46e500ab 100644
--- a/tests/models/mvp/test_modeling_mvp.py
+++ b/tests/models/mvp/test_modeling_mvp.py
@@ -22,7 +22,14 @@
 import timeout_decorator  # noqa
 
 from transformers import MvpConfig, is_torch_available
-from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_sentencepiece, 
+    require_tokenizers, 
+    require_torch, 
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -374,12 +381,12 @@ def test_tokenization(self):
             mvp_toks = tokenizer.encode(ex, return_tensors="pt").squeeze()
             assert_tensors_close(desired_result.long(), mvp_toks, prefix=ex)
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_ids, batch_size = self._get_config_and_data()
         attention_mask = input_ids.ne(1).to(torch_device)
         model = MvpForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
@@ -505,13 +512,13 @@ def test_inputs_embeds(self):
             with torch.no_grad():
                 model(**inputs)[0]
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = MvpForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
diff --git a/tests/models/nllb_moe/test_modeling_nllb_moe.py b/tests/models/nllb_moe/test_modeling_nllb_moe.py
index 409db2207e20..0ba66ff6b338 100644
--- a/tests/models/nllb_moe/test_modeling_nllb_moe.py
+++ b/tests/models/nllb_moe/test_modeling_nllb_moe.py
@@ -24,6 +24,7 @@
     require_sentencepiece,
     require_tokenizers,
     require_torch,
+    require_torch_fp16,
     slow,
     torch_device,
 )
@@ -327,13 +328,13 @@ def test_inputs_embeds(self):
             with torch.no_grad():
                 model(**inputs)[0]
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = NllbMoeForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
diff --git a/tests/models/pegasus/test_modeling_pegasus.py b/tests/models/pegasus/test_modeling_pegasus.py
index bae10d18ff8c..231f2af70592 100644
--- a/tests/models/pegasus/test_modeling_pegasus.py
+++ b/tests/models/pegasus/test_modeling_pegasus.py
@@ -18,7 +18,14 @@
 import unittest
 
 from transformers import PegasusConfig, is_torch_available
-from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_sentencepiece,
+    require_tokenizers,
+    require_torch,
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -280,13 +287,13 @@ def test_encoder_decoder_model_standalone(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
         self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = PegasusForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
@@ -352,6 +359,7 @@ def model(self):
         return AutoModelForSeq2SeqLM.from_pretrained(self.checkpoint_name).to(torch_device)
 
     @slow
+    @require_torch_fp16
     def test_pegasus_xsum_summary(self):
         assert self.tokenizer.model_max_length == 512
         inputs = self.tokenizer(self.src_text, return_tensors="pt", truncation=True, max_length=512, padding=True).to(
@@ -362,9 +370,6 @@ def test_pegasus_xsum_summary(self):
         decoded = self.tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)
         assert self.tgt_text == decoded
 
-        if "cuda" not in torch_device:
-            return
-        # Demonstrate fp16 issue, Contributions welcome!
         self.model.half()
         translated_tokens_fp16 = self.model.generate(**inputs, max_length=10)
         decoded_fp16 = self.tokenizer.batch_decode(translated_tokens_fp16, skip_special_tokens=True)
diff --git a/tests/models/pegasus_x/test_modeling_pegasus_x.py b/tests/models/pegasus_x/test_modeling_pegasus_x.py
index 22d7b0c8634a..106a8b39e84f 100644
--- a/tests/models/pegasus_x/test_modeling_pegasus_x.py
+++ b/tests/models/pegasus_x/test_modeling_pegasus_x.py
@@ -21,7 +21,14 @@
 import unittest
 
 from transformers import is_torch_available
-from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_sentencepiece,
+    require_tokenizers,
+    require_torch,
+    require_torch_fp16,
+    slow,
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -274,13 +281,13 @@ def test_inputs_embeds(self):
             with torch.no_grad():
                 model(**inputs)[0]
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = PegasusXForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
diff --git a/tests/models/persimmon/test_modeling_persimmon.py b/tests/models/persimmon/test_modeling_persimmon.py
index b91f40ca7ed2..b2587f90028a 100644
--- a/tests/models/persimmon/test_modeling_persimmon.py
+++ b/tests/models/persimmon/test_modeling_persimmon.py
@@ -22,11 +22,12 @@
 
 from transformers import PersimmonConfig, is_torch_available, set_seed
 from transformers.testing_utils import (
+    backend_empty_cache,
     require_torch, 
     require_torch_accelerator, 
     require_torch_fp16, 
     slow, 
-    torch_device
+    torch_device,
 )
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -419,7 +420,7 @@ def test_model_8b_chat_logits(self):
         # fmt: on
         torch.testing.assert_close(out.cpu()[0, 0, :30], EXPECTED_SLICE, atol=1e-5, rtol=1e-5)
 
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
         del model
         gc.collect()
 
@@ -440,6 +441,6 @@ def test_model_8b_chat_greedy_generation(self):
         text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
         self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
 
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
         del model
         gc.collect()
diff --git a/tests/models/plbart/test_modeling_plbart.py b/tests/models/plbart/test_modeling_plbart.py
index 4fbbb6835bfa..57a8a2d22aea 100644
--- a/tests/models/plbart/test_modeling_plbart.py
+++ b/tests/models/plbart/test_modeling_plbart.py
@@ -20,7 +20,14 @@
 import unittest
 
 from transformers import PLBartConfig, is_torch_available
-from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
+from transformers.testing_utils import (
+    require_sentencepiece,
+    require_tokenizers,
+    require_torch,
+    require_torch_fp16,
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -304,13 +311,13 @@ def test_inputs_embeds(self):
             with torch.no_grad():
                 model(**inputs)[0]
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_ids = input_dict["input_ids"]
         attention_mask = input_ids.ne(1).to(torch_device)
         model = PLBartForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            model.half()
+        model.half()
         model.generate(input_ids, attention_mask=attention_mask)
         model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
diff --git a/tests/models/sam/test_modeling_sam.py b/tests/models/sam/test_modeling_sam.py
index 3d5ec22c035e..eb8f265c2946 100644
--- a/tests/models/sam/test_modeling_sam.py
+++ b/tests/models/sam/test_modeling_sam.py
@@ -22,7 +22,7 @@
 import requests
 
 from transformers import SamConfig, SamMaskDecoderConfig, SamPromptEncoderConfig, SamVisionConfig, pipeline
-from transformers.testing_utils import require_torch, slow, torch_device
+from transformers.testing_utils import backend_empty_cache, require_torch, slow, torch_device
 from transformers.utils import is_torch_available, is_vision_available
 
 from ...test_configuration_common import ConfigTester
@@ -478,7 +478,7 @@ def tearDown(self):
         super().tearDown()
         # clean-up as much as possible GPU memory occupied by PyTorch
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     def test_inference_mask_generation_no_point(self):
         model = SamModel.from_pretrained("facebook/sam-vit-base")
@@ -772,9 +772,7 @@ def test_inference_mask_generation_three_boxes_point_batch(self):
         torch.testing.assert_allclose(iou_scores, EXPECTED_IOU, atol=1e-4, rtol=1e-4)
 
     def test_dummy_pipeline_generation(self):
-        generator = pipeline(
-            "mask-generation", model="facebook/sam-vit-base", device=0 if torch.cuda.is_available() else -1
-        )
+        generator = pipeline("mask-generation", model="facebook/sam-vit-base", device=torch_device)
         raw_image = prepare_image()
 
         _ = generator(raw_image, points_per_batch=64)
diff --git a/tests/models/speech_to_text/test_modeling_speech_to_text.py b/tests/models/speech_to_text/test_modeling_speech_to_text.py
index 061e17c3e7f5..31b338565a4d 100644
--- a/tests/models/speech_to_text/test_modeling_speech_to_text.py
+++ b/tests/models/speech_to_text/test_modeling_speech_to_text.py
@@ -26,6 +26,7 @@
     require_sentencepiece,
     require_tokenizers,
     require_torch,
+    require_torch_fp16,
     require_torchaudio,
     slow,
     torch_device,
@@ -324,6 +325,7 @@ def test_training(self):
     def test_training_gradient_checkpointing(self):
         pass
 
+<<<<<<< HEAD
     @unittest.skip(
         reason="This architecure seem to not compute gradients properly when using GC, check: https://github.com/huggingface/transformers/pull/27124"
     )
@@ -336,14 +338,16 @@ def test_training_gradient_checkpointing_use_reentrant(self):
     def test_training_gradient_checkpointing_use_reentrant_false(self):
         pass
 
+=======
+    @require_torch_fp16
+>>>>>>> add decorator `require_torch_fp16`
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_features = input_dict["input_features"]
         attention_mask = input_dict["attention_mask"]
         model = Speech2TextForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            input_features = input_features.half()
-            model.half()
+        input_features = input_features.half()
+        model.half()
         model.generate(input_features, attention_mask=attention_mask)
         model.generate(input_features, num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 
diff --git a/tests/models/wav2vec2/test_modeling_wav2vec2.py b/tests/models/wav2vec2/test_modeling_wav2vec2.py
index 096246fe62b1..9aa72f1c4a6e 100644
--- a/tests/models/wav2vec2/test_modeling_wav2vec2.py
+++ b/tests/models/wav2vec2/test_modeling_wav2vec2.py
@@ -28,6 +28,7 @@
 
 from transformers import Wav2Vec2Config, is_torch_available
 from transformers.testing_utils import (
+    backend_empty_cache,
     CaptureLogger,
     is_pt_flax_cross_test,
     is_pyctcdecode_available,
@@ -1455,7 +1456,7 @@ def tearDown(self):
         super().tearDown()
         # clean-up as much as possible GPU memory occupied by PyTorch
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     def _load_datasamples(self, num_samples):
         ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py
index 6bb73468ad07..d1d27303a1e5 100644
--- a/tests/models/whisper/test_modeling_whisper.py
+++ b/tests/models/whisper/test_modeling_whisper.py
@@ -24,7 +24,14 @@
 
 import transformers
 from transformers import WhisperConfig
-from transformers.testing_utils import is_pt_flax_cross_test, require_torch, require_torchaudio, slow, torch_device
+from transformers.testing_utils import (
+    is_pt_flax_cross_test, 
+    require_torch,
+    require_torch_fp16,
+    require_torchaudio,
+    slow, 
+    torch_device,
+)
 from transformers.utils import cached_property, is_flax_available, is_torch_available
 from transformers.utils.import_utils import is_datasets_available
 
@@ -429,14 +436,14 @@ def test_training_gradient_checkpointing_use_reentrant_false(self):
     def test_generate_with_head_masking(self):
         pass
 
+    @require_torch_fp16
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         config.max_target_positions = 400
         input_features = input_dict["input_features"]
         model = WhisperForConditionalGeneration(config).eval().to(torch_device)
-        if torch_device == "cuda":
-            input_features = input_features.half()
-            model.half()
+        input_features = input_features.half()
+        model.half()
         model.generate(input_features)
         model.generate(input_features, num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
 

From e94cc7678317629087264d518d4f78738dbc70cc Mon Sep 17 00:00:00 2001
From: statelesshz <hzji210@gmail.com>
Date: Mon, 30 Oct 2023 21:28:23 +0800
Subject: [PATCH 3/5] make style

---
 tests/generation/test_utils.py                         |  8 ++++----
 tests/models/bark/test_modeling_bark.py                |  6 +++---
 tests/models/bart/test_modeling_bart.py                |  8 ++++----
 .../bigbird_pegasus/test_modeling_bigbird_pegasus.py   |  8 ++++----
 tests/models/blenderbot/test_modeling_blenderbot.py    | 10 +++++-----
 .../blenderbot_small/test_modeling_blenderbot_small.py |  4 ++--
 tests/models/blip/test_modeling_blip.py                |  4 ++--
 tests/models/blip_2/test_modeling_blip_2.py            |  8 +++++++-
 tests/models/deit/test_modeling_deit.py                |  2 +-
 tests/models/fsmt/test_modeling_fsmt.py                |  8 ++++----
 tests/models/jukebox/test_modeling_jukebox.py          |  6 +++---
 tests/models/led/test_modeling_led.py                  |  8 ++++----
 tests/models/m2m_100/test_modeling_m2m_100.py          |  8 ++++----
 tests/models/marian/test_modeling_marian.py            |  8 ++++----
 tests/models/mbart/test_modeling_mbart.py              |  2 +-
 tests/models/mega/test_modeling_mega.py                |  4 ++--
 tests/models/mistral/test_modeling_mistral.py          |  8 ++++----
 tests/models/musicgen/test_modeling_musicgen.py        |  6 +++---
 tests/models/mvp/test_modeling_mvp.py                  |  8 ++++----
 tests/models/opt/test_modeling_opt.py                  |  2 +-
 tests/models/owlv2/test_modeling_owlv2.py              |  8 ++++----
 tests/models/owlvit/test_modeling_owlvit.py            |  8 ++++----
 tests/models/pegasus/test_modeling_pegasus.py          |  2 +-
 tests/models/persimmon/test_modeling_persimmon.py      |  8 ++++----
 tests/models/plbart/test_modeling_plbart.py            |  2 +-
 .../speech_to_text/test_modeling_speech_to_text.py     |  3 ---
 .../test_modeling_switch_transformers.py               |  9 ++++++++-
 tests/models/wav2vec2/test_modeling_wav2vec2.py        |  2 +-
 .../test_modeling_wav2vec2_conformer.py                |  9 ++++++++-
 tests/models/whisper/test_modeling_whisper.py          |  4 ++--
 tests/models/xglm/test_modeling_xglm.py                |  6 +++---
 31 files changed, 102 insertions(+), 85 deletions(-)

diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py
index 76775a9d3f8f..42b67be91a1c 100644
--- a/tests/generation/test_utils.py
+++ b/tests/generation/test_utils.py
@@ -23,10 +23,10 @@
 
 from transformers import is_torch_available, pipeline
 from transformers.testing_utils import (
-    require_accelerate, 
-    require_torch, 
-    require_torch_multi_accelerator, 
-    slow, 
+    require_accelerate,
+    require_torch,
+    require_torch_multi_accelerator,
+    slow,
     torch_device,
 )
 
diff --git a/tests/models/bark/test_modeling_bark.py b/tests/models/bark/test_modeling_bark.py
index 42184bb85e38..4186a72628e3 100644
--- a/tests/models/bark/test_modeling_bark.py
+++ b/tests/models/bark/test_modeling_bark.py
@@ -34,9 +34,9 @@
 )
 from transformers.testing_utils import (
     require_torch,
-    require_torch_fp16, 
-    require_torch_gpu, 
-    slow, 
+    require_torch_fp16,
+    require_torch_gpu,
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property
diff --git a/tests/models/bart/test_modeling_bart.py b/tests/models/bart/test_modeling_bart.py
index 8526c8e7797a..5e79de87c4c0 100644
--- a/tests/models/bart/test_modeling_bart.py
+++ b/tests/models/bart/test_modeling_bart.py
@@ -23,11 +23,11 @@
 
 from transformers import BartConfig, is_torch_available
 from transformers.testing_utils import (
-    require_sentencepiece, 
-    require_tokenizers, 
-    require_torch, 
+    require_sentencepiece,
+    require_tokenizers,
+    require_torch,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property
diff --git a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
index c0a03b3f6737..90b71a7b8292 100644
--- a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
+++ b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
@@ -21,11 +21,11 @@
 
 from transformers import BigBirdPegasusConfig, is_torch_available
 from transformers.testing_utils import (
-    require_sentencepiece, 
-    require_tokenizers, 
-    require_torch, 
+    require_sentencepiece,
+    require_tokenizers,
+    require_torch,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 
diff --git a/tests/models/blenderbot/test_modeling_blenderbot.py b/tests/models/blenderbot/test_modeling_blenderbot.py
index 19b88dcef8ea..5b0018ba2a4a 100644
--- a/tests/models/blenderbot/test_modeling_blenderbot.py
+++ b/tests/models/blenderbot/test_modeling_blenderbot.py
@@ -20,11 +20,11 @@
 from transformers import BlenderbotConfig, is_torch_available
 from transformers.testing_utils import (
     backend_empty_cache,
-    require_sentencepiece, 
-    require_tokenizers, 
-    require_torch, 
-    require_torch_fp16, 
-    slow, 
+    require_sentencepiece,
+    require_tokenizers,
+    require_torch,
+    require_torch_fp16,
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property
diff --git a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py
index b8078312c83a..5e63dd67edd3 100644
--- a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py
+++ b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py
@@ -20,8 +20,8 @@
 from transformers import BlenderbotSmallConfig, is_torch_available
 from transformers.testing_utils import (
     require_torch,
-    require_torch_fp16, 
-    slow, 
+    require_torch_fp16,
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property
diff --git a/tests/models/blip/test_modeling_blip.py b/tests/models/blip/test_modeling_blip.py
index 1faa28fe00f3..4792757f9118 100644
--- a/tests/models/blip/test_modeling_blip.py
+++ b/tests/models/blip/test_modeling_blip.py
@@ -25,9 +25,9 @@
 
 from transformers import BlipConfig, BlipTextConfig, BlipVisionConfig
 from transformers.testing_utils import (
-    require_torch, 
+    require_torch,
     require_torch_accelerator,
-    require_torch_fp16, 
+    require_torch_fp16,
     require_vision,
     slow,
     torch_device,
diff --git a/tests/models/blip_2/test_modeling_blip_2.py b/tests/models/blip_2/test_modeling_blip_2.py
index 910166c4a228..dd87961372d2 100644
--- a/tests/models/blip_2/test_modeling_blip_2.py
+++ b/tests/models/blip_2/test_modeling_blip_2.py
@@ -23,7 +23,13 @@
 import requests
 
 from transformers import CONFIG_MAPPING, Blip2Config, Blip2QFormerConfig, Blip2VisionConfig
-from transformers.testing_utils import require_torch, require_torch_multi_accelerator, require_vision, slow, torch_device
+from transformers.testing_utils import (
+    require_torch,
+    require_torch_multi_accelerator,
+    require_vision,
+    slow,
+    torch_device,
+)
 from transformers.utils import is_torch_available, is_vision_available
 
 from ...test_configuration_common import ConfigTester
diff --git a/tests/models/deit/test_modeling_deit.py b/tests/models/deit/test_modeling_deit.py
index 7071def61210..4a9945a731fd 100644
--- a/tests/models/deit/test_modeling_deit.py
+++ b/tests/models/deit/test_modeling_deit.py
@@ -24,8 +24,8 @@
 from transformers.testing_utils import (
     require_accelerate,
     require_torch,
-    require_torch_fp16,
     require_torch_accelerator,
+    require_torch_fp16,
     require_vision,
     slow,
     torch_device,
diff --git a/tests/models/fsmt/test_modeling_fsmt.py b/tests/models/fsmt/test_modeling_fsmt.py
index 6ebc9833a35b..da73b8d41d99 100644
--- a/tests/models/fsmt/test_modeling_fsmt.py
+++ b/tests/models/fsmt/test_modeling_fsmt.py
@@ -21,10 +21,10 @@
 
 from transformers import FSMTConfig, is_torch_available
 from transformers.testing_utils import (
-    require_sentencepiece, 
-    require_tokenizers, 
-    require_torch, 
-    require_torch_fp16, 
+    require_sentencepiece,
+    require_tokenizers,
+    require_torch,
+    require_torch_fp16,
     slow,
     torch_device,
 )
diff --git a/tests/models/jukebox/test_modeling_jukebox.py b/tests/models/jukebox/test_modeling_jukebox.py
index 718bf908e07b..e47377e776bb 100644
--- a/tests/models/jukebox/test_modeling_jukebox.py
+++ b/tests/models/jukebox/test_modeling_jukebox.py
@@ -17,10 +17,10 @@
 
 from transformers import is_torch_available
 from transformers.testing_utils import (
-    require_torch, 
-    require_torch_accelerator, 
+    require_torch,
+    require_torch_accelerator,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 from transformers.trainer_utils import set_seed
diff --git a/tests/models/led/test_modeling_led.py b/tests/models/led/test_modeling_led.py
index a549fce6ec5e..120308db90d8 100644
--- a/tests/models/led/test_modeling_led.py
+++ b/tests/models/led/test_modeling_led.py
@@ -22,11 +22,11 @@
 from transformers import LEDConfig, is_torch_available
 from transformers.models.auto import get_values
 from transformers.testing_utils import (
-    require_sentencepiece, 
-    require_tokenizers, 
-    require_torch, 
+    require_sentencepiece,
+    require_tokenizers,
+    require_torch,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property
diff --git a/tests/models/m2m_100/test_modeling_m2m_100.py b/tests/models/m2m_100/test_modeling_m2m_100.py
index 5f928b3a9b7b..39790917488d 100644
--- a/tests/models/m2m_100/test_modeling_m2m_100.py
+++ b/tests/models/m2m_100/test_modeling_m2m_100.py
@@ -21,11 +21,11 @@
 
 from transformers import M2M100Config, is_torch_available
 from transformers.testing_utils import (
-    require_sentencepiece, 
-    require_tokenizers, 
-    require_torch, 
+    require_sentencepiece,
+    require_tokenizers,
+    require_torch,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property
diff --git a/tests/models/marian/test_modeling_marian.py b/tests/models/marian/test_modeling_marian.py
index ea0966519f37..53a67c20459f 100644
--- a/tests/models/marian/test_modeling_marian.py
+++ b/tests/models/marian/test_modeling_marian.py
@@ -21,11 +21,11 @@
 
 from transformers import MarianConfig, is_torch_available
 from transformers.testing_utils import (
-    require_sentencepiece, 
-    require_tokenizers, 
-    require_torch, 
+    require_sentencepiece,
+    require_tokenizers,
+    require_torch,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property
diff --git a/tests/models/mbart/test_modeling_mbart.py b/tests/models/mbart/test_modeling_mbart.py
index 3a324f8c6d5d..3cabf7d999aa 100644
--- a/tests/models/mbart/test_modeling_mbart.py
+++ b/tests/models/mbart/test_modeling_mbart.py
@@ -25,7 +25,7 @@
     require_tokenizers,
     require_torch,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property
diff --git a/tests/models/mega/test_modeling_mega.py b/tests/models/mega/test_modeling_mega.py
index e63d155fbc71..a67ee0d00328 100644
--- a/tests/models/mega/test_modeling_mega.py
+++ b/tests/models/mega/test_modeling_mega.py
@@ -19,9 +19,9 @@
 from transformers import MegaConfig, is_torch_available
 from transformers.testing_utils import (
     TestCasePlus,
-    require_torch, 
+    require_torch,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 
diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py
index 843f9ebceb60..4bcb722c144e 100644
--- a/tests/models/mistral/test_modeling_mistral.py
+++ b/tests/models/mistral/test_modeling_mistral.py
@@ -24,10 +24,10 @@
 from transformers import AutoTokenizer, MistralConfig, is_torch_available
 from transformers.testing_utils import (
     backend_empty_cache,
-    require_flash_attn, 
-    require_torch, 
-    require_torch_gpu, 
-    slow, 
+    require_flash_attn,
+    require_torch,
+    require_torch_gpu,
+    slow,
     torch_device,
 )
 
diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py
index 5bae58ed7215..2cd662bfe576 100644
--- a/tests/models/musicgen/test_modeling_musicgen.py
+++ b/tests/models/musicgen/test_modeling_musicgen.py
@@ -29,10 +29,10 @@
     T5Config,
 )
 from transformers.testing_utils import (
-    is_torch_available, 
-    require_torch, 
+    is_torch_available,
+    require_torch,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property
diff --git a/tests/models/mvp/test_modeling_mvp.py b/tests/models/mvp/test_modeling_mvp.py
index ce6e46e500ab..ab9dbd95f751 100644
--- a/tests/models/mvp/test_modeling_mvp.py
+++ b/tests/models/mvp/test_modeling_mvp.py
@@ -23,11 +23,11 @@
 
 from transformers import MvpConfig, is_torch_available
 from transformers.testing_utils import (
-    require_sentencepiece, 
-    require_tokenizers, 
-    require_torch, 
+    require_sentencepiece,
+    require_tokenizers,
+    require_torch,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property
diff --git a/tests/models/opt/test_modeling_opt.py b/tests/models/opt/test_modeling_opt.py
index 1c252ab97d52..9845fcd4695f 100644
--- a/tests/models/opt/test_modeling_opt.py
+++ b/tests/models/opt/test_modeling_opt.py
@@ -22,7 +22,7 @@
 import timeout_decorator  # noqa
 
 from transformers import OPTConfig, is_torch_available
-from transformers.testing_utils import require_torch, require_torch_fp16, require_torch_accelerator, slow, torch_device
+from transformers.testing_utils import require_torch, require_torch_accelerator, require_torch_fp16, slow, torch_device
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
diff --git a/tests/models/owlv2/test_modeling_owlv2.py b/tests/models/owlv2/test_modeling_owlv2.py
index 6c76736631a6..d9913596ac5c 100644
--- a/tests/models/owlv2/test_modeling_owlv2.py
+++ b/tests/models/owlv2/test_modeling_owlv2.py
@@ -25,11 +25,11 @@
 
 from transformers import Owlv2Config, Owlv2TextConfig, Owlv2VisionConfig
 from transformers.testing_utils import (
-    require_torch, 
-    require_torch_accelerator, 
+    require_torch,
+    require_torch_accelerator,
     require_torch_fp16,
-    require_vision, 
-    slow, 
+    require_vision,
+    slow,
     torch_device,
 )
 from transformers.utils import is_torch_available, is_vision_available
diff --git a/tests/models/owlvit/test_modeling_owlvit.py b/tests/models/owlvit/test_modeling_owlvit.py
index a107c7281dc0..8edbf411f7b9 100644
--- a/tests/models/owlvit/test_modeling_owlvit.py
+++ b/tests/models/owlvit/test_modeling_owlvit.py
@@ -25,11 +25,11 @@
 
 from transformers import OwlViTConfig, OwlViTTextConfig, OwlViTVisionConfig
 from transformers.testing_utils import (
-    require_torch, 
-    require_torch_accelerator, 
+    require_torch,
+    require_torch_accelerator,
     require_torch_fp16,
-    require_vision, 
-    slow, 
+    require_vision,
+    slow,
     torch_device,
 )
 from transformers.utils import is_torch_available, is_vision_available
diff --git a/tests/models/pegasus/test_modeling_pegasus.py b/tests/models/pegasus/test_modeling_pegasus.py
index 231f2af70592..fbf79650f45e 100644
--- a/tests/models/pegasus/test_modeling_pegasus.py
+++ b/tests/models/pegasus/test_modeling_pegasus.py
@@ -23,7 +23,7 @@
     require_tokenizers,
     require_torch,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property
diff --git a/tests/models/persimmon/test_modeling_persimmon.py b/tests/models/persimmon/test_modeling_persimmon.py
index b2587f90028a..0ffb999145be 100644
--- a/tests/models/persimmon/test_modeling_persimmon.py
+++ b/tests/models/persimmon/test_modeling_persimmon.py
@@ -23,10 +23,10 @@
 from transformers import PersimmonConfig, is_torch_available, set_seed
 from transformers.testing_utils import (
     backend_empty_cache,
-    require_torch, 
-    require_torch_accelerator, 
-    require_torch_fp16, 
-    slow, 
+    require_torch,
+    require_torch_accelerator,
+    require_torch_fp16,
+    slow,
     torch_device,
 )
 
diff --git a/tests/models/plbart/test_modeling_plbart.py b/tests/models/plbart/test_modeling_plbart.py
index 57a8a2d22aea..0d5274b01819 100644
--- a/tests/models/plbart/test_modeling_plbart.py
+++ b/tests/models/plbart/test_modeling_plbart.py
@@ -25,7 +25,7 @@
     require_tokenizers,
     require_torch,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property
diff --git a/tests/models/speech_to_text/test_modeling_speech_to_text.py b/tests/models/speech_to_text/test_modeling_speech_to_text.py
index 31b338565a4d..2c4fc268e8d9 100644
--- a/tests/models/speech_to_text/test_modeling_speech_to_text.py
+++ b/tests/models/speech_to_text/test_modeling_speech_to_text.py
@@ -325,7 +325,6 @@ def test_training(self):
     def test_training_gradient_checkpointing(self):
         pass
 
-<<<<<<< HEAD
     @unittest.skip(
         reason="This architecure seem to not compute gradients properly when using GC, check: https://github.com/huggingface/transformers/pull/27124"
     )
@@ -338,9 +337,7 @@ def test_training_gradient_checkpointing_use_reentrant(self):
     def test_training_gradient_checkpointing_use_reentrant_false(self):
         pass
 
-=======
     @require_torch_fp16
->>>>>>> add decorator `require_torch_fp16`
     def test_generate_fp16(self):
         config, input_dict = self.model_tester.prepare_config_and_inputs()
         input_features = input_dict["input_features"]
diff --git a/tests/models/switch_transformers/test_modeling_switch_transformers.py b/tests/models/switch_transformers/test_modeling_switch_transformers.py
index 194e43d4703f..7c2fb88acda4 100644
--- a/tests/models/switch_transformers/test_modeling_switch_transformers.py
+++ b/tests/models/switch_transformers/test_modeling_switch_transformers.py
@@ -19,7 +19,14 @@
 import unittest
 
 from transformers import SwitchTransformersConfig, is_torch_available
-from transformers.testing_utils import require_tokenizers, require_torch, require_torch_accelerator, require_torch_bf16, slow, torch_device
+from transformers.testing_utils import (
+    require_tokenizers,
+    require_torch,
+    require_torch_accelerator,
+    require_torch_bf16,
+    slow,
+    torch_device,
+)
 
 from ...generation.test_utils import GenerationTesterMixin
 from ...test_configuration_common import ConfigTester
diff --git a/tests/models/wav2vec2/test_modeling_wav2vec2.py b/tests/models/wav2vec2/test_modeling_wav2vec2.py
index 9aa72f1c4a6e..cb943520db68 100644
--- a/tests/models/wav2vec2/test_modeling_wav2vec2.py
+++ b/tests/models/wav2vec2/test_modeling_wav2vec2.py
@@ -28,8 +28,8 @@
 
 from transformers import Wav2Vec2Config, is_torch_available
 from transformers.testing_utils import (
-    backend_empty_cache,
     CaptureLogger,
+    backend_empty_cache,
     is_pt_flax_cross_test,
     is_pyctcdecode_available,
     is_torchaudio_available,
diff --git a/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py b/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py
index 3bf71bb1c814..5c7bfd0a9bee 100644
--- a/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py
+++ b/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py
@@ -21,7 +21,14 @@
 from datasets import load_dataset
 
 from transformers import Wav2Vec2ConformerConfig, is_torch_available
-from transformers.testing_utils import is_pt_flax_cross_test, require_torch, require_torch_accelerator, require_torch_fp16, slow, torch_device
+from transformers.testing_utils import (
+    is_pt_flax_cross_test,
+    require_torch,
+    require_torch_accelerator,
+    require_torch_fp16,
+    slow,
+    torch_device,
+)
 
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_common import (
diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py
index d1d27303a1e5..9bb835360887 100644
--- a/tests/models/whisper/test_modeling_whisper.py
+++ b/tests/models/whisper/test_modeling_whisper.py
@@ -25,11 +25,11 @@
 import transformers
 from transformers import WhisperConfig
 from transformers.testing_utils import (
-    is_pt_flax_cross_test, 
+    is_pt_flax_cross_test,
     require_torch,
     require_torch_fp16,
     require_torchaudio,
-    slow, 
+    slow,
     torch_device,
 )
 from transformers.utils import cached_property, is_flax_available, is_torch_available
diff --git a/tests/models/xglm/test_modeling_xglm.py b/tests/models/xglm/test_modeling_xglm.py
index cdfcb757a5d2..235d9e19001a 100644
--- a/tests/models/xglm/test_modeling_xglm.py
+++ b/tests/models/xglm/test_modeling_xglm.py
@@ -20,10 +20,10 @@
 
 from transformers import XGLMConfig, is_torch_available
 from transformers.testing_utils import (
-    require_torch, 
-    require_torch_accelerator, 
+    require_torch,
+    require_torch_accelerator,
     require_torch_fp16,
-    slow, 
+    slow,
     torch_device,
 )
 

From 14c32ad9550b5a7421ecc5f277f45df2b49d119e Mon Sep 17 00:00:00 2001
From: "Hz, Ji" <hzji210@gmail.com>
Date: Wed, 1 Nov 2023 00:42:45 +0800
Subject: [PATCH 4/5] apply review suggestion

---
 tests/models/gptj/test_modeling_gptj.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/models/gptj/test_modeling_gptj.py b/tests/models/gptj/test_modeling_gptj.py
index 37d57bbc937b..4fc41ba6d310 100644
--- a/tests/models/gptj/test_modeling_gptj.py
+++ b/tests/models/gptj/test_modeling_gptj.py
@@ -563,6 +563,7 @@ def test_gptj_sample(self):
         output_seq_tt_strs = tokenizer.batch_decode(output_seq_tt, skip_special_tokens=True)
 
         if torch_device != "cpu":
+            # currently this expect value is only for `cuda`
             EXPECTED_OUTPUT_STR = (
                 "Today is a nice day and I've already been enjoying it. I walked to work with my wife"
             )

From f0762cc61b52d66420968f2c29d93b330bb6577d Mon Sep 17 00:00:00 2001
From: "Hz, Ji" <hzji210@gmail.com>
Date: Wed, 1 Nov 2023 00:48:30 +0800
Subject: [PATCH 5/5] Oops, the fp16 decorator was misused

---
 tests/models/jukebox/test_modeling_jukebox.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/models/jukebox/test_modeling_jukebox.py b/tests/models/jukebox/test_modeling_jukebox.py
index e47377e776bb..ea0ee1397773 100644
--- a/tests/models/jukebox/test_modeling_jukebox.py
+++ b/tests/models/jukebox/test_modeling_jukebox.py
@@ -370,7 +370,6 @@ def test_sampling(self):
 
     @slow
     @require_torch_accelerator
-    @require_torch_fp16
     @skip("Not enough GPU memory on CI runners")
     def test_slow_sampling(self):
         model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()