huggingface · patrickvonplaten · Mar 10, 2022 · Mar 8, 2022 · Mar 8, 2022 · Mar 8, 2022
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
@@ -114,6 +114,8 @@
       title: Logging
     - local: main_classes/model
       title: Models
+    - local: main_classes/text_generation
+      title: Text Generation
     - local: main_classes/onnx
       title: ONNX
     - local: main_classes/optimizer_schedules

diff --git a/docs/source/main_classes/model.mdx b/docs/source/main_classes/model.mdx
@@ -86,14 +86,6 @@ Due to Pytorch design, this functionality is only available for floating dtypes.
     - push_to_hub
     - all
 
-## Generation
-
-[[autodoc]] generation_utils.GenerationMixin
-
-[[autodoc]] generation_tf_utils.TFGenerationMixin
-
-[[autodoc]] generation_flax_utils.FlaxGenerationMixin
-
 ## Pushing to the Hub
 
 [[autodoc]] file_utils.PushToHubMixin
diff --git a/docs/source/main_classes/text_generation.mdx b/docs/source/main_classes/text_generation.mdx
@@ -0,0 +1,39 @@
+<!--Copyright 2022 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Generation
+
+The methods for auto-regressive text generation, namely [`~generation_utils.GenerationMixin.generate`] (for the PyTorch models), [`~generation_tf_utils.TFGenerationMixin.generate`] (for the TensorFlow models) and [`~generation_flax_utils.FlaxGenerationMixin.generate`] (for the Flax/JAX models), are implemented in [`~generation_utils.GenerationMixin`], [`~generation_tf_utils.TFGenerationMixin`] and [`~generation_flax_utils.FlaxGenerationMixin`] respectively.
+
+The `GenerationMixin` classes are inherited by the corresponding base model classes, *e.g.* [`PreTrainedModel`], [`TFPreTrainedModel`], and [`FlaxPreTrainedModel`] respectively, therefore exposing all 
+methods for auto-regressive text generation to every model class.
+
+## GenerationMixn
+
+[[autodoc]] generation_utils.GenerationMixin
+	- generate
+	- greedy_search
+	- sample
+	- beam_search
+	- beam_sample
+	- group_beam_search
+	- constrained_beam_search
+
+## TFGenerationMixn
+
+[[autodoc]] generation_tf_utils.TFGenerationMixin
+	- generate
+
+## FlaxGenerationMixn
+
+[[autodoc]] generation_flax_utils.FlaxGenerationMixin
+	- generate
diff --git a/src/transformers/generation_flax_utils.py b/src/transformers/generation_flax_utils.py
@@ -118,7 +118,16 @@ class BeamSearchState:
 
 class FlaxGenerationMixin:
     """
-    A class containing all of the functions supporting generation, to be used as a mixin in [`FlaxPreTrainedModel`].
+    A class containing all functions for auto-regressive text generation, to be used as a mixin in
+    [`FlaxPreTrainedModel`].
+
+    The class exposes [`~generation_flax_utils.FlaxGenerationMixin.generate`], which can be used for:
+            - *greedy decoding* by calling [`~generation_flax_utils.FlaxGenerationMixin._greedy_search`] if
+              `num_beams=1` and `do_sample=False`.
+            - *multinomial sampling* by calling [`~generation_flax_utils.FlaxGenerationMixin._sample`] if `num_beams=1`
+              and `do_sample=True`.
+            - *beam-search decoding* by calling [`~generation_utils.FlaxGenerationMixin._beam_search`] if `num_beams>1`
+              and `do_sample=False`.
     """
 
     @staticmethod
@@ -176,12 +185,23 @@ def generate(
         **model_kwargs,
     ):
         r"""
-        Generates sequences for models with a language modeling head. The method currently supports greedy decoding,
-        and, multinomial sampling.
+        Generates sequences of token ids for models with a language modeling head. The method supports the following
+        generation methods for text-decoder, text-to-text, speech-to-text, and vision-to-text models:
 
-        Apart from `input_ids`, all the arguments below will default to the value of the attribute of the same name
-        inside the [`PretrainedConfig`] of the model. The default values indicated are the default values of those
-        config.
+            - *greedy decoding* by calling [`~generation_flax_utils.FlaxGenerationMixin._greedy_search`] if
+              `num_beams=1` and `do_sample=False`.
+            - *multinomial sampling* by calling [`~generation_flax_utils.FlaxGenerationMixin._sample`] if `num_beams=1`
+              and `do_sample=True`.
+            - *beam-search decoding* by calling [`~generation_utils.FlaxGenerationMixin._beam_search`] if `num_beams>1`
+              and `do_sample=False`.
+
+        <Tip warning={true}>
+
+        Apart from `inputs`, all the arguments below will default to the value of the attribute of the same name as
+        defined in the model's config (`config.json`) which in turn defaults to the
+        [`~modeling_utils.PretrainedConfig`] of the model.
+
+        </Tip>
 
         Most of these parameters are explained in more detail in [this blog
         post](https://huggingface.co/blog/how-to-generate).
@@ -236,7 +256,7 @@ def generate(
         >>> input_ids = tokenizer(input_context, return_tensors="np").input_ids
         >>> # generate candidates using sampling
         >>> outputs = model.generate(input_ids=input_ids, max_length=20, top_k=30, do_sample=True)
-        >>> print("Generated:", tokenizer.batch_decode(outputs, skip_special_tokens=True))
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
         ```"""
         # set init values
         max_length = max_length if max_length is not None else self.config.max_length