PaddlePaddle · Ligoml · Jun 17, 2022 · Apr 28, 2022 · Apr 29, 2022 · Apr 29, 2022
diff --git a/python/paddle/vision/models/alexnet.py b/python/paddle/vision/models/alexnet.py
@@ -1,4 +1,4 @@
-# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -175,20 +175,32 @@ def _alexnet(arch, pretrained, **kwargs):
 
 
 def alexnet(pretrained=False, **kwargs):
-    """AlexNet model
+    """
+    AlexNet model
 
     Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
-
+        pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False.
+        **kwargs: Additional keyword arguments，For details, please refer to :ref:`AlexNet <api_paddle_vision_models_AlexNet>`. 
+
+    Returns:
+        the model of alexnet.
+
     Examples:
         .. code-block:: python
-
+          :name: code-example
+            import paddle
             from paddle.vision.models import alexnet
 
             # build model
             model = alexnet()
 
             # build model and load imagenet pretrained weight
             # model = alexnet(pretrained=True)
+
+            x = paddle.rand([1, 3, 224, 224])
+            out = model(x)
+
+            print(out.shape)
+            # [1, 1000]
     """
     return _alexnet('alexnet', pretrained, **kwargs)
diff --git a/python/paddle/vision/models/mobilenetv1.py b/python/paddle/vision/models/mobilenetv1.py
@@ -58,14 +58,14 @@ class MobileNetV1(nn.Layer):
     `"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" <https://arxiv.org/abs/1704.04861>`_.
 
     Args:
-        scale (float): scale of channels in each layer. Default: 1.0.
-        num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer 
+        scale (float, optional): scale of channels in each layer. Default: 1.0.
+        num_classes (int, optional): output dim of last fc layer. If num_classes <=0, last fc layer 
                             will not be defined. Default: 1000.
-        with_pool (bool): use pool before the last fc layer or not. Default: True.
+        with_pool (bool, optional): use pool before the last fc layer or not. Default: True.
 
     Examples:
         .. code-block:: python
-
+          :name: code-example1
             import paddle
             from paddle.vision.models import MobileNetV1
 
@@ -75,6 +75,7 @@ class MobileNetV1(nn.Layer):
             out = model(x)
 
             print(out.shape)
+            # [1, 1000]
     """
 
     def __init__(self, scale=1.0, num_classes=1000, with_pool=True):

diff --git a/python/paddle/vision/models/mobilenetv2.py b/python/paddle/vision/models/mobilenetv2.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -75,14 +75,14 @@ class MobileNetV2(nn.Layer):
     `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.
 
     Args:
-        scale (float): scale of channels in each layer. Default: 1.0.
-        num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer 
+        scale (float, optional): scale of channels in each layer. Default: 1.0.
+        num_classes (int, optional): output dim of last fc layer. If num_classes <=0, last fc layer 
                             will not be defined. Default: 1000.
-        with_pool (bool): use pool before the last fc layer or not. Default: True.
+        with_pool (bool, optional): use pool before the last fc layer or not. Default: True.
 
     Examples:
         .. code-block:: python
-
+          :name: code-example1
             import paddle
             from paddle.vision.models import MobileNetV2
 
@@ -92,6 +92,7 @@ class MobileNetV2(nn.Layer):
             out = model(x)
 
             print(out.shape)
+            # [1, 1000]
     """
 
     def __init__(self, scale=1.0, num_classes=1000, with_pool=True):

diff --git a/python/paddle/vision/models/vgg.py b/python/paddle/vision/models/vgg.py
@@ -33,13 +33,15 @@ class VGG(nn.Layer):
 
     Args:
         features (nn.Layer): Vgg features create by function make_layers.
-        num_classes (int): Output dim of last fc layer. If num_classes <=0, last fc layer 
+        num_classes (int, optional): Output dim of last fc layer. If num_classes <=0, last fc layer 
                             will not be defined. Default: 1000.
-        with_pool (bool): Use pool before the last three fc layer or not. Default: True.
+        with_pool (bool, optional): Use pool before the last three fc layer or not. Default: True.
 
     Examples:
         .. code-block:: python
+          :name: code-example
 
+            import paddle
             from paddle.vision.models import VGG
             from paddle.vision.models.vgg import make_layers
 
@@ -49,6 +51,12 @@ class VGG(nn.Layer):
 
             vgg11 = VGG(features)
 
+            x = paddle.rand([1, 3, 224, 224])
+            out = vgg11(x)
+
+            print(out.shape)
+            # [1, 1000]
+
     """
 
     def __init__(self, features, num_classes=1000, with_pool=True):

diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py
@@ -951,7 +951,7 @@ def psroi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None):
         boxes_num (Tensor): The number of boxes contained in each picture in the batch.
         output_size (int|Tuple(int, int))  The pooled output size(H, W), data type 
                                is int32. If int, H and W are both equal to output_size.
-        spatial_scale (float): Multiplicative spatial scale factor to translate ROI coords from their 
+        spatial_scale (float, optional): Multiplicative spatial scale factor to translate ROI coords from their 
                                input scale to the scale used when pooling. Default: 1.0
         name(str, optional): The default value is None.
                              Normally there is no need for user to set this property.
@@ -963,12 +963,15 @@ def psroi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None):
 
     Examples:
         .. code-block:: python
-
+          :name: code-example1
+
             import paddle
             x = paddle.uniform([2, 490, 28, 28], dtype='float32')
             boxes = paddle.to_tensor([[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]], dtype='float32')
             boxes_num = paddle.to_tensor([1, 2], dtype='int32')
             pool_out = paddle.vision.ops.psroi_pool(x, boxes, boxes_num, 7, 1.0)
+            print(pool_out.shape)
+            # [3, 10, 7, 7]
     """
 
     check_type(output_size, 'output_size', (int, tuple, list), 'psroi_pool')
@@ -1014,7 +1017,7 @@ class PSRoIPool(Layer):
     Args:
         output_size (int|Tuple(int, int))  The pooled output size(H, W), data type 
                                is int32. If int, H and W are both equal to output_size.
-        spatial_scale (float): Multiplicative spatial scale factor to translate ROI coords from their 
+        spatial_scale (float, optional): Multiplicative spatial scale factor to translate ROI coords from their 
                                input scale to the scale used when pooling. Default: 1.0.
 
     Shape:
@@ -1025,19 +1028,19 @@ class PSRoIPool(Layer):
               The output_channels equal to C / (pooled_h * pooled_w), where C is the channels of input.
 
     Returns:
-        None
+        None.
 
     Examples:
         .. code-block:: python
-
+          :name: code-example1
             import paddle
 
             psroi_module = paddle.vision.ops.PSRoIPool(7, 1.0)
             x = paddle.uniform([2, 490, 28, 28], dtype='float32')
             boxes = paddle.to_tensor([[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]], dtype='float32')
             boxes_num = paddle.to_tensor([1, 2], dtype='int32')
             pool_out = psroi_module(x, boxes, boxes_num)
-
+            print(pool_out.shape) # [3, 10, 7, 7]
     """
 
     def __init__(self, output_size, spatial_scale=1.0):
@@ -1187,7 +1190,7 @@ def roi_align(x,
               aligned=True,
               name=None):
     """
-    This operator implements the roi_align layer.
+    Implementing the roi_align layer.
     Region of Interest (RoI) Align operator (also known as RoI Align) is to
     perform bilinear interpolation on inputs of nonuniform sizes to obtain
     fixed-size feature maps (e.g. 7*7), as described in Mask R-CNN.
@@ -1211,31 +1214,31 @@ def roi_align(x,
             the batch, the data type is int32.
         output_size (int or Tuple[int, int]): The pooled output size(h, w), data
             type is int32. If int, h and w are both equal to output_size.
-        spatial_scale (float32): Multiplicative spatial scale factor to translate
+        spatial_scale (float32, optional): Multiplicative spatial scale factor to translate
             ROI coords from their input scale to the scale used when pooling.
-            Default: 1.0
-        sampling_ratio (int32): number of sampling points in the interpolation
+            Default: 1.0.
+        sampling_ratio (int32, optional): number of sampling points in the interpolation
             grid used to compute the output value of each pooled output bin.
             If > 0, then exactly ``sampling_ratio x sampling_ratio`` sampling
             points per bin are used.
             If <= 0, then an adaptive number of grid points are used (computed
             as ``ceil(roi_width / output_width)``, and likewise for height).
-            Default: -1
-        aligned (bool): If False, use the legacy implementation. If True, pixel
+            Default: -1.
+        aligned (bool, optional): If False, use the legacy implementation. If True, pixel
             shift the box coordinates it by -0.5 for a better alignment with the
             two neighboring pixel indices. This version is used in Detectron2.
-            Default: True
+            Default: True.
         name(str, optional): For detailed information, please refer to :
             ref:`api_guide_Name`. Usually name is no need to set and None by
             default.
 
     Returns:
-        Tensor: The output of ROIAlignOp is a 4-D tensor with shape (num_boxes,
+        The output of ROIAlignOp is a 4-D tensor with shape (num_boxes,
             channels, pooled_h, pooled_w). The data type is float32 or float64.
 
     Examples:
         .. code-block:: python
-
+          :name: code-example1
             import paddle
             from paddle.vision.ops import roi_align
 
@@ -1306,12 +1309,12 @@ class RoIAlign(Layer):
             when pooling. Default: 1.0
 
     Returns:
-        align_out (Tensor): The output of ROIAlign operator is a 4-D tensor with
+        The output of ROIAlign operator is a 4-D tensor with
             shape (num_boxes, channels, pooled_h, pooled_w).
 
     Examples:
         ..  code-block:: python
-
+          :name: code-example1
             import paddle
             from paddle.vision.ops import RoIAlign
 

diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py
@@ -666,8 +666,8 @@ class Normalize(BaseTransform):
     ``output[channel] = (input[channel] - mean[channel]) / std[channel]``
 
     Args:
-        mean (int|float|list|tuple): Sequence of means for each channel.
-        std (int|float|list|tuple): Sequence of standard deviations for each channel.
+        mean (int|float|list|tuple, optional): Sequence of means for each channel.
+        std (int|float|list|tuple, optional): Sequence of standard deviations for each channel.
         data_format (str, optional): Data format of img, should be 'HWC' or 
             'CHW'. Default: 'CHW'.
         to_rgb (bool, optional): Whether to convert to rgb. Default: False.
@@ -683,20 +683,21 @@ class Normalize(BaseTransform):
     Examples:
 
         .. code-block:: python
-
-            import numpy as np
-            from PIL import Image
+          :name: code-example
+            import paddle
             from paddle.vision.transforms import Normalize
 
-            normalize = Normalize(mean=[127.5, 127.5, 127.5], 
+            normalize = Normalize(mean=[127.5, 127.5, 127.5],
                                   std=[127.5, 127.5, 127.5],
                                   data_format='HWC')
 
-            fake_img = Image.fromarray((np.random.rand(300, 320, 3) * 255.).astype(np.uint8))
+            fake_img = paddle.rand([300,320,3]).numpy() * 255.
 
             fake_img = normalize(fake_img)
             print(fake_img.shape)
-            print(fake_img.max, fake_img.max)
+            # (300, 320, 3)
+            print(fake_img.max(), fake_img.min())
+            # 0.99999905 -0.999974
 
     """