From 626e21bf271307cab2146d1476c55392df537748 Mon Sep 17 00:00:00 2001 From: arvinder004 Date: Fri, 3 Oct 2025 15:54:55 +0530 Subject: [PATCH 1/3] rewritten test_offline_mode --- tests/utils/test_offline.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/tests/utils/test_offline.py b/tests/utils/test_offline.py index 357005eb575b..ff11c26dfb98 100644 --- a/tests/utils/test_offline.py +++ b/tests/utils/test_offline.py @@ -22,7 +22,6 @@ class OfflineTests(TestCasePlus): @require_torch - @unittest.skip("This test is failing on main") # TODO matt/ydshieh, this test needs to be fixed def test_offline_mode(self): # this test is a bit tricky since TRANSFORMERS_OFFLINE can only be changed before # `transformers` is loaded, and it's too late for inside pytest - so we are changing it @@ -49,17 +48,12 @@ def test_offline_mode(self): def offline_socket(*args, **kwargs): raise RuntimeError("Offline mode is enabled, we shouldn't access internet") socket.socket = offline_socket """ + # First subprocess run to warm the cache (online, no mocking) + stdout, _ = self._execute_with_env(load, run) + self.assertIn("success", stdout) - # Force fetching the files so that we can use the cache - mname = "hf-internal-testing/tiny-random-bert" - BertConfig.from_pretrained(mname) - BertModel.from_pretrained(mname) - BertTokenizer.from_pretrained(mname) - pipeline(task="fill-mask", model=mname) - - # baseline - just load from_pretrained with normal network - # should succeed as TRANSFORMERS_OFFLINE=1 tells it to use local files - stdout, _ = self._execute_with_env(load, run, mock, TRANSFORMERS_OFFLINE="1") + # Second subprocess run in offline mode: ensure no network and use local cache only + stdout, _ = self._execute_with_env(load, mock, run, HF_HUB_OFFLINE="1") self.assertIn("success", stdout) @require_torch From c190625b48d737180873f3caba8eaaa3da93269c Mon Sep 17 00:00:00 2001 From: arvinder004 Date: Fri, 3 Oct 2025 16:12:19 +0530 Subject: [PATCH 2/3] removed unused imports --- tests/models/gpt2/test_modeling_gpt2.py | 29 +++++++++++++++++++++++++ tests/utils/test_offline.py | 1 - 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tests/models/gpt2/test_modeling_gpt2.py b/tests/models/gpt2/test_modeling_gpt2.py index 89a4bf545310..1dec76759078 100644 --- a/tests/models/gpt2/test_modeling_gpt2.py +++ b/tests/models/gpt2/test_modeling_gpt2.py @@ -281,6 +281,35 @@ def test_training_gradient_checkpointing_use_reentrant_false(self): super().test_training_gradient_checkpointing_use_reentrant_false() self.all_model_classes = self.original_all_model_classes + @require_torch_gpu + def test_dtype_device_parity_logits_fp32_cpu_vs_fp16_cuda(self): + # minimal parity check: logits should be close across dtype/device for the same weights + torch.manual_seed(0) + tester = self.model_tester + config, inputs_dict = tester.prepare_config_and_inputs_for_common() + + # Create a single base model on CPU in fp32 + base_model = GPT2LMHeadModel(config).eval() + + with torch.no_grad(): + # Run on CPU fp32 + cpu_inputs = {k: v.to("cpu") for k, v in inputs_dict.items()} + cpu_logits = base_model(**cpu_inputs).logits + + # Clone weights to a CUDA fp16 copy + cuda_model = GPT2LMHeadModel(config).eval() + cuda_model.load_state_dict(base_model.state_dict()) + cuda_model = cuda_model.to("cuda", dtype=torch.float16) + + cuda_inputs = {k: v.to("cuda") for k, v in inputs_dict.items()} + cuda_logits = cuda_model(**cuda_inputs).logits.to(dtype=torch.float32, device="cpu") + + # Compare with relaxed tolerances to accommodate dtype differences + self.assertEqual(cpu_logits.shape, cuda_logits.shape) + max_abs_diff = (cpu_logits - cuda_logits).abs().max().item() + # fp16 numerical noise tolerance + self.assertLessEqual(max_abs_diff, 5e-3) + @require_torch class GPT2ModelLanguageGenerationTest(unittest.TestCase): diff --git a/tests/utils/test_offline.py b/tests/utils/test_offline.py index ff11c26dfb98..9671cc804a39 100644 --- a/tests/utils/test_offline.py +++ b/tests/utils/test_offline.py @@ -14,7 +14,6 @@ import subprocess import sys -import unittest from transformers import BertConfig, BertModel, BertTokenizer, pipeline from transformers.testing_utils import TestCasePlus, require_torch From f3503a21ae888951455b41f549100d9a55f3d2a1 Mon Sep 17 00:00:00 2001 From: arvinder004 Date: Fri, 3 Oct 2025 16:32:39 +0530 Subject: [PATCH 3/3] removed unrelated changes --- tests/models/gpt2/test_modeling_gpt2.py | 29 ------------------------- 1 file changed, 29 deletions(-) diff --git a/tests/models/gpt2/test_modeling_gpt2.py b/tests/models/gpt2/test_modeling_gpt2.py index 1dec76759078..89a4bf545310 100644 --- a/tests/models/gpt2/test_modeling_gpt2.py +++ b/tests/models/gpt2/test_modeling_gpt2.py @@ -281,35 +281,6 @@ def test_training_gradient_checkpointing_use_reentrant_false(self): super().test_training_gradient_checkpointing_use_reentrant_false() self.all_model_classes = self.original_all_model_classes - @require_torch_gpu - def test_dtype_device_parity_logits_fp32_cpu_vs_fp16_cuda(self): - # minimal parity check: logits should be close across dtype/device for the same weights - torch.manual_seed(0) - tester = self.model_tester - config, inputs_dict = tester.prepare_config_and_inputs_for_common() - - # Create a single base model on CPU in fp32 - base_model = GPT2LMHeadModel(config).eval() - - with torch.no_grad(): - # Run on CPU fp32 - cpu_inputs = {k: v.to("cpu") for k, v in inputs_dict.items()} - cpu_logits = base_model(**cpu_inputs).logits - - # Clone weights to a CUDA fp16 copy - cuda_model = GPT2LMHeadModel(config).eval() - cuda_model.load_state_dict(base_model.state_dict()) - cuda_model = cuda_model.to("cuda", dtype=torch.float16) - - cuda_inputs = {k: v.to("cuda") for k, v in inputs_dict.items()} - cuda_logits = cuda_model(**cuda_inputs).logits.to(dtype=torch.float32, device="cpu") - - # Compare with relaxed tolerances to accommodate dtype differences - self.assertEqual(cpu_logits.shape, cuda_logits.shape) - max_abs_diff = (cpu_logits - cuda_logits).abs().max().item() - # fp16 numerical noise tolerance - self.assertLessEqual(max_abs_diff, 5e-3) - @require_torch class GPT2ModelLanguageGenerationTest(unittest.TestCase):