From 6090db5ddf3bc06bfcf167b0dcb2f71582c8eb98 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 13 Dec 2021 11:23:33 +0100
Subject: [PATCH 01/11] Adding some slow test to check for perceiver at least
 from a high level.

---
 .../perceiver/feature_extraction_perceiver.py |  1 -
 .../models/perceiver/modeling_perceiver.py    | 10 ++++
 src/transformers/pipelines/__init__.py        |  2 +-
 tests/test_pipelines_image_classification.py  | 46 +++++++++++++++++++
 4 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/src/transformers/models/perceiver/feature_extraction_perceiver.py b/src/transformers/models/perceiver/feature_extraction_perceiver.py
index a15c7df204bd..fb5fb91af9e9 100644
--- a/src/transformers/models/perceiver/feature_extraction_perceiver.py
+++ b/src/transformers/models/perceiver/feature_extraction_perceiver.py
@@ -185,5 +185,4 @@ def __call__(
         # return as BatchFeature
         data = {"pixel_values": images}
         encoded_inputs = BatchFeature(data=data, tensor_type=return_tensors)
-
         return encoded_inputs
diff --git a/src/transformers/models/perceiver/modeling_perceiver.py b/src/transformers/models/perceiver/modeling_perceiver.py
index c0d3a69b35dd..68752e7b14b5 100755
--- a/src/transformers/models/perceiver/modeling_perceiver.py
+++ b/src/transformers/models/perceiver/modeling_perceiver.py
@@ -1268,6 +1268,7 @@ def forward(
         output_hidden_states=None,
         labels=None,
         return_dict=None,
+        pixel_values=None,
     ):
         r"""
         labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
@@ -1296,6 +1297,10 @@ def forward(
             >>> predicted_class_idx = logits.argmax(-1).item()
             >>> print("Predicted class:", model.config.id2label[predicted_class_idx])
         """
+        if inputs is not None and pixel_values is not None:
+            raise ValueError("You cannot use both `inputs` and `pixel_values`")
+        elif inputs is None and pixel_values is not None:
+            inputs = pixel_values
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
 
         outputs = self.perceiver(
@@ -1399,6 +1404,7 @@ def forward(
         output_hidden_states=None,
         labels=None,
         return_dict=None,
+        pixel_values=None,
     ):
         r"""
         labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
@@ -1427,6 +1433,10 @@ def forward(
             >>> predicted_class_idx = logits.argmax(-1).item()
             >>> print("Predicted class:", model.config.id2label[predicted_class_idx])
         """
+        if inputs is not None and pixel_values is not None:
+            raise ValueError("You cannot use both `inputs` and `pixel_values`")
+        elif inputs is None and pixel_values is not None:
+            inputs = pixel_values
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
 
         outputs = self.perceiver(
diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py
index 2c4cc1688ead..d7a54e0e72ed 100755
--- a/src/transformers/pipelines/__init__.py
+++ b/src/transformers/pipelines/__init__.py
@@ -528,7 +528,7 @@ def pipeline(
     load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None
     load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None
 
-    if task in {"audio-classification"}:
+    if task in {"audio-classification", "image-classification"}:
         # Audio classification will never require a tokenizer.
         # the model on the other hand might have a tokenizer, but
         # the files could be missing from the hub, instead of failing
diff --git a/tests/test_pipelines_image_classification.py b/tests/test_pipelines_image_classification.py
index f61ffea2df55..bf4404c96ba0 100644
--- a/tests/test_pipelines_image_classification.py
+++ b/tests/test_pipelines_image_classification.py
@@ -28,6 +28,7 @@
     require_tf,
     require_torch,
     require_vision,
+    slow,
 )
 
 from .test_pipelines_common import ANY, PipelineTestCaseMeta
@@ -167,3 +168,48 @@ def test_custom_tokenizer(self):
         image_classifier = pipeline("image-classification", model="lysandre/tiny-vit-random", tokenizer=tokenizer)
 
         self.assertIs(image_classifier.tokenizer, tokenizer)
+
+    @slow
+    @require_torch
+    def test_perceiver(self):
+        # Perceiver is not tested by `run_pipeline_test` properly.
+        # That is because the type of feature_extractor and model preprocessor need to be kept
+        # in sync, which is not the case in the current design
+        image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-conv")
+        outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {"score": 0.4385, "label": "tabby, tabby cat"},
+                {"score": 0.321, "label": "tiger cat"},
+                {"score": 0.0502, "label": "Egyptian cat"},
+                {"score": 0.0137, "label": "crib, cot"},
+                {"score": 0.007, "label": "radiator"},
+            ],
+        )
+
+        image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-fourier")
+        outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {"score": 0.5658, "label": "tabby, tabby cat"},
+                {"score": 0.1309, "label": "tiger cat"},
+                {"score": 0.0722, "label": "Egyptian cat"},
+                {"score": 0.0707, "label": "remote control, remote"},
+                {"score": 0.0082, "label": "computer keyboard, keypad"},
+            ],
+        )
+
+        image_classifier = pipeline("image-classification", model="deepmind/vision-perceiver-learned")
+        outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg")
+        self.assertEqual(
+            nested_simplify(outputs, decimals=4),
+            [
+                {"score": 0.3022, "label": "tabby, tabby cat"},
+                {"score": 0.2362, "label": "Egyptian cat"},
+                {"score": 0.1856, "label": "tiger cat"},
+                {"score": 0.0324, "label": "remote control, remote"},
+                {"score": 0.0096, "label": "quilt, comforter, comfort, puff"},
+            ],
+        )

From 7c8b18e59b78c8ee970082e8e21d089c4b2614fa Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 13 Dec 2021 12:09:02 +0100
Subject: [PATCH 02/11] Re-enabling fast tests for Perceiver
 ImageClassification.

---
 tests/test_modeling_perceiver.py             | 20 ++++++++++++++++
 tests/test_pipelines_common.py               | 24 ++++++++++++++------
 tests/test_pipelines_image_classification.py | 14 ++----------
 3 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/tests/test_modeling_perceiver.py b/tests/test_modeling_perceiver.py
index d6fba44c5818..853b7547b55c 100644
--- a/tests/test_modeling_perceiver.py
+++ b/tests/test_modeling_perceiver.py
@@ -130,6 +130,8 @@ def __init__(
 
     def prepare_config_and_inputs(self, model_class=None):
         config = self.get_config()
+        if model_class is not None:
+            config = self.update_config_with_model_class(model_class)
 
         input_mask = None
         sequence_labels = None
@@ -191,6 +193,24 @@ def get_config(self):
             num_labels=self.num_labels,
         )
 
+    def update_config_with_model_class(self, config, model_class):
+        # Perceiver is a bit specific since `d_model` needs to be defined
+        # ahead of time in the config, but the actual desired `d_model` might
+        # depend on the model class we want to use so this is an escape hatch
+        # to enable adjusting `d_model` after we know which head we intend to
+        # use
+        if model_class.__name__ == "PerceiverForImageClassificationLearned":
+            config.d_model = 512
+        elif model_class.__name__ == "PerceiverForImageClassificationFourier":
+            config.d_model = 261
+        elif model_class.__name__ == "PerceiverForImageClassificationConvProcessing":
+            config.d_model = 322
+        elif model_class.__name__ == "PerceiverForOpticalFlow":
+            config.d_model = 322
+        elif model_class.__name__ == "PerceiverForMultimodalAutoencoding":
+            config.d_model = 409
+        return config
+
     def get_pipeline_config(self):
         config = self.get_config()
         # Byte level vocab
diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py
index eaa67579c706..9a53a4d8f6a8 100644
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -54,7 +54,7 @@ def get_checkpoint_from_architecture(architecture):
         logger.warning(f"Can't retrieve checkpoint from {architecture.__name__}")
 
 
-def get_tiny_config_from_class(configuration_class):
+def get_tiny_config_from_class(configuration_class, model_architecture):
     if "OpenAIGPT" in configuration_class.__name__:
         # This is the only file that is inconsistent with the naming scheme.
         # Will rename this file if we decide this is the way to go
@@ -77,12 +77,17 @@ def get_tiny_config_from_class(configuration_class):
     model_tester = model_tester_class(parent=None)
 
     if hasattr(model_tester, "get_pipeline_config"):
-        return model_tester.get_pipeline_config()
+        config = model_tester.get_pipeline_config()
     elif hasattr(model_tester, "get_config"):
-        return model_tester.get_config()
+        config = model_tester.get_config()
     else:
+        config = None
         logger.warning(f"Model tester {model_tester_class.__name__} has no `get_config()`.")
 
+    if hasattr(model_tester, "update_config_with_model_class"):
+        config = model_tester.update_config_with_model_class(config, model_architecture)
+    return config
+
 
 @lru_cache(maxsize=100)
 def get_tiny_tokenizer_from_checkpoint(checkpoint):
@@ -100,11 +105,14 @@ def get_tiny_tokenizer_from_checkpoint(checkpoint):
     return tokenizer
 
 
-def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config):
+def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_extractor_class):
     try:
         feature_extractor = AutoFeatureExtractor.from_pretrained(checkpoint)
     except Exception:
-        feature_extractor = None
+        try:
+            feature_extractor = feature_extractor_class()
+        except Exception:
+            feature_extractor = None
     if hasattr(tiny_config, "image_size") and feature_extractor:
         feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
 
@@ -168,7 +176,9 @@ def test(self):
                         self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer")
                 else:
                     tokenizer = None
-                feature_extractor = get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config)
+                feature_extractor = get_tiny_feature_extractor_from_checkpoint(
+                    checkpoint, tiny_config, feature_extractor_class
+                )
 
                 if tokenizer is None and feature_extractor is None:
                     self.skipTest(
@@ -209,7 +219,7 @@ def data(n):
 
                     for model_architecture in model_architectures:
                         checkpoint = get_checkpoint_from_architecture(model_architecture)
-                        tiny_config = get_tiny_config_from_class(configuration)
+                        tiny_config = get_tiny_config_from_class(configuration, model_architecture)
                         tokenizer_classes = TOKENIZER_MAPPING.get(configuration, [])
                         feature_extractor_class = FEATURE_EXTRACTOR_MAPPING.get(configuration, None)
                         feature_extractor_name = (
diff --git a/tests/test_pipelines_image_classification.py b/tests/test_pipelines_image_classification.py
index bf4404c96ba0..4a85a97e31a4 100644
--- a/tests/test_pipelines_image_classification.py
+++ b/tests/test_pipelines_image_classification.py
@@ -14,12 +14,7 @@
 
 import unittest
 
-from transformers import (
-    MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
-    PerceiverConfig,
-    PreTrainedTokenizer,
-    is_vision_available,
-)
+from transformers import MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, PreTrainedTokenizer, is_vision_available
 from transformers.pipelines import ImageClassificationPipeline, pipeline
 from transformers.testing_utils import (
     is_pipeline_test,
@@ -51,12 +46,7 @@ class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
     model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
 
     def get_test_pipeline(self, model, tokenizer, feature_extractor):
-        if isinstance(model.config, PerceiverConfig):
-            self.skipTest(
-                "Perceiver model tester is defined with a language one, which has no feature_extractor, so the automated test cannot work here"
-            )
-
-        image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor)
+        image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor, top_k=2)
         examples = [
             Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
             "http://images.cocodataset.org/val2017/000000039769.jpg",

From 780d8c0b45464a9abee030d6334f129f322bb871 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 13 Dec 2021 12:16:37 +0100
Subject: [PATCH 03/11] Perceiver might try to run without Tokenizer (Fast
 doesn't exist) and with FeatureExtractor some text only pipelines.

---
 tests/test_pipelines_text_classification.py | 2 ++
 tests/test_pipelines_zero_shot.py           | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tests/test_pipelines_text_classification.py b/tests/test_pipelines_text_classification.py
index 39deed9bee55..43f3adc7609c 100644
--- a/tests/test_pipelines_text_classification.py
+++ b/tests/test_pipelines_text_classification.py
@@ -73,6 +73,8 @@ def test_tf_bert(self):
         self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])
 
     def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        if tokenizer is None:
+            self.skipTest("This test cannot work without a tokenizer, Perceiver ?")
         text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
         return text_classifier, ["HuggingFace is in", "This is another test"]
 
diff --git a/tests/test_pipelines_zero_shot.py b/tests/test_pipelines_zero_shot.py
index ed564581e526..a283bb12f327 100644
--- a/tests/test_pipelines_zero_shot.py
+++ b/tests/test_pipelines_zero_shot.py
@@ -32,6 +32,8 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase, metaclass=PipelineT
     tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
 
     def get_test_pipeline(self, model, tokenizer, feature_extractor):
+        if tokenizer is None:
+            self.skipTest("This test cannot work without a tokenizer, Perceiver ?")
         classifier = ZeroShotClassificationPipeline(
             model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]
         )

From ff7b5c5eb7fb2b7a9c7a701a8274982f36c9f429 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 13 Dec 2021 12:52:25 +0100
Subject: [PATCH 04/11] Oops.

---
 tests/test_modeling_perceiver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_modeling_perceiver.py b/tests/test_modeling_perceiver.py
index 853b7547b55c..01f1084ca57c 100644
--- a/tests/test_modeling_perceiver.py
+++ b/tests/test_modeling_perceiver.py
@@ -131,7 +131,7 @@ def __init__(
     def prepare_config_and_inputs(self, model_class=None):
         config = self.get_config()
         if model_class is not None:
-            config = self.update_config_with_model_class(model_class)
+            config = self.update_config_with_model_class(config, model_class)
 
         input_mask = None
         sequence_labels = None

From 55d00619fb501976b93f83eb4c30094b14180bbe Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 13 Dec 2021 14:39:20 +0100
Subject: [PATCH 05/11] Adding a comment for `update_config_with_model_class`.

---
 tests/test_pipelines_common.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py
index 9a53a4d8f6a8..9edef64561a8 100644
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -85,6 +85,8 @@ def get_tiny_config_from_class(configuration_class, model_architecture):
         logger.warning(f"Model tester {model_tester_class.__name__} has no `get_config()`.")
 
     if hasattr(model_tester, "update_config_with_model_class"):
+        # Some models are very specific in testing mode since the config might depend
+        # on `model_architecture`. This is rare: PerceiverConfig only for now.
         config = model_tester.update_config_with_model_class(config, model_architecture)
     return config
 

From d56ad2a32924868773c28d1719dd1fcae5b81135 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 13 Dec 2021 17:15:27 +0100
Subject: [PATCH 06/11] Remove `model_architecture` to get `tiny_config`.

---
 tests/test_pipelines_common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py
index 9edef64561a8..56b218ade54a 100644
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -54,7 +54,7 @@ def get_checkpoint_from_architecture(architecture):
         logger.warning(f"Can't retrieve checkpoint from {architecture.__name__}")
 
 
-def get_tiny_config_from_class(configuration_class, model_architecture):
+def get_tiny_config_from_class(configuration_class):
     if "OpenAIGPT" in configuration_class.__name__:
         # This is the only file that is inconsistent with the naming scheme.
         # Will rename this file if we decide this is the way to go
@@ -221,7 +221,7 @@ def data(n):
 
                     for model_architecture in model_architectures:
                         checkpoint = get_checkpoint_from_architecture(model_architecture)
-                        tiny_config = get_tiny_config_from_class(configuration, model_architecture)
+                        tiny_config = get_tiny_config_from_class(configuration)
                         tokenizer_classes = TOKENIZER_MAPPING.get(configuration, [])
                         feature_extractor_class = FEATURE_EXTRACTOR_MAPPING.get(configuration, None)
                         feature_extractor_name = (

From e81db3a64eb1feccdf496c0d58c1f9344e50d851 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 13 Dec 2021 18:57:32 +0100
Subject: [PATCH 07/11] Finalize rebase.

---
 tests/test_pipelines_common.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py
index 56b218ade54a..0b67479d3405 100644
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -84,10 +84,6 @@ def get_tiny_config_from_class(configuration_class):
         config = None
         logger.warning(f"Model tester {model_tester_class.__name__} has no `get_config()`.")
 
-    if hasattr(model_tester, "update_config_with_model_class"):
-        # Some models are very specific in testing mode since the config might depend
-        # on `model_architecture`. This is rare: PerceiverConfig only for now.
-        config = model_tester.update_config_with_model_class(config, model_architecture)
     return config
 
 

From d07a4cbf5d1faeefb0b0b4cfcdfc1c2d6287cb94 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 13 Dec 2021 19:03:35 +0100
Subject: [PATCH 08/11] Smarter way to handle undefined FastTokenizer.

---
 tests/test_pipelines_common.py              | 7 +++++++
 tests/test_pipelines_text_classification.py | 2 --
 tests/test_pipelines_zero_shot.py           | 2 --
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py
index 0b67479d3405..9c7b1c6d03a3 100644
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -226,6 +226,13 @@ def data(n):
                         if not tokenizer_classes:
                             # We need to test even if there are no tokenizers.
                             tokenizer_classes = [None]
+                        else:
+                            # Remove the non defined tokenizers
+                            # ByT5 and Perceiver are bytes-level and don't define
+                            # FastTokenizer, we can just ignore those.
+                            tokenizer_classes = [
+                                tokenizer_class for tokenizer_class in tokenizer_classes if tokenizer_class is not None
+                            ]
 
                         for tokenizer_class in tokenizer_classes:
                             if tokenizer_class is not None:
diff --git a/tests/test_pipelines_text_classification.py b/tests/test_pipelines_text_classification.py
index 43f3adc7609c..39deed9bee55 100644
--- a/tests/test_pipelines_text_classification.py
+++ b/tests/test_pipelines_text_classification.py
@@ -73,8 +73,6 @@ def test_tf_bert(self):
         self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])
 
     def get_test_pipeline(self, model, tokenizer, feature_extractor):
-        if tokenizer is None:
-            self.skipTest("This test cannot work without a tokenizer, Perceiver ?")
         text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
         return text_classifier, ["HuggingFace is in", "This is another test"]
 
diff --git a/tests/test_pipelines_zero_shot.py b/tests/test_pipelines_zero_shot.py
index a283bb12f327..ed564581e526 100644
--- a/tests/test_pipelines_zero_shot.py
+++ b/tests/test_pipelines_zero_shot.py
@@ -32,8 +32,6 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase, metaclass=PipelineT
     tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
 
     def get_test_pipeline(self, model, tokenizer, feature_extractor):
-        if tokenizer is None:
-            self.skipTest("This test cannot work without a tokenizer, Perceiver ?")
         classifier = ZeroShotClassificationPipeline(
             model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]
         )

From d87f4b519a090279921bab495421671b921c66bf Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 13 Dec 2021 19:09:03 +0100
Subject: [PATCH 09/11] Remove old code.

---
 tests/test_modeling_perceiver.py | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/tests/test_modeling_perceiver.py b/tests/test_modeling_perceiver.py
index 01f1084ca57c..d6fba44c5818 100644
--- a/tests/test_modeling_perceiver.py
+++ b/tests/test_modeling_perceiver.py
@@ -130,8 +130,6 @@ def __init__(
 
     def prepare_config_and_inputs(self, model_class=None):
         config = self.get_config()
-        if model_class is not None:
-            config = self.update_config_with_model_class(config, model_class)
 
         input_mask = None
         sequence_labels = None
@@ -193,24 +191,6 @@ def get_config(self):
             num_labels=self.num_labels,
         )
 
-    def update_config_with_model_class(self, config, model_class):
-        # Perceiver is a bit specific since `d_model` needs to be defined
-        # ahead of time in the config, but the actual desired `d_model` might
-        # depend on the model class we want to use so this is an escape hatch
-        # to enable adjusting `d_model` after we know which head we intend to
-        # use
-        if model_class.__name__ == "PerceiverForImageClassificationLearned":
-            config.d_model = 512
-        elif model_class.__name__ == "PerceiverForImageClassificationFourier":
-            config.d_model = 261
-        elif model_class.__name__ == "PerceiverForImageClassificationConvProcessing":
-            config.d_model = 322
-        elif model_class.__name__ == "PerceiverForOpticalFlow":
-            config.d_model = 322
-        elif model_class.__name__ == "PerceiverForMultimodalAutoencoding":
-            config.d_model = 409
-        return config
-
     def get_pipeline_config(self):
         config = self.get_config()
         # Byte level vocab

From 3d2c3c3df0da3667b2cd8c228dd1904a76fd71fc Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 13 Dec 2021 19:55:06 +0100
Subject: [PATCH 10/11] Addressing some nits.

---
 .../models/perceiver/feature_extraction_perceiver.py            | 1 +
 src/transformers/pipelines/__init__.py                          | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/transformers/models/perceiver/feature_extraction_perceiver.py b/src/transformers/models/perceiver/feature_extraction_perceiver.py
index fb5fb91af9e9..a15c7df204bd 100644
--- a/src/transformers/models/perceiver/feature_extraction_perceiver.py
+++ b/src/transformers/models/perceiver/feature_extraction_perceiver.py
@@ -185,4 +185,5 @@ def __call__(
         # return as BatchFeature
         data = {"pixel_values": images}
         encoded_inputs = BatchFeature(data=data, tensor_type=return_tensors)
+
         return encoded_inputs
diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py
index d7a54e0e72ed..c27bb4aef8e7 100755
--- a/src/transformers/pipelines/__init__.py
+++ b/src/transformers/pipelines/__init__.py
@@ -529,7 +529,7 @@ def pipeline(
     load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None
 
     if task in {"audio-classification", "image-classification"}:
-        # Audio classification will never require a tokenizer.
+        # These will never require a tokenizer.
         # the model on the other hand might have a tokenizer, but
         # the files could be missing from the hub, instead of failing
         # on such repos, we just force to not load it.

From 51a102dfceee2f0cdf4a4f5aa9470dccc4963afb Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 13 Dec 2021 19:57:14 +0100
Subject: [PATCH 11/11] Don't instantiate `None`.

---
 tests/test_pipelines_common.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py
index 9c7b1c6d03a3..ded85875c82f 100644
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -108,7 +108,10 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_
         feature_extractor = AutoFeatureExtractor.from_pretrained(checkpoint)
     except Exception:
         try:
-            feature_extractor = feature_extractor_class()
+            if feature_extractor_class is not None:
+                feature_extractor = feature_extractor_class()
+            else:
+                feature_extractor = None
         except Exception:
             feature_extractor = None
     if hasattr(tiny_config, "image_size") and feature_extractor: