Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions python/paddle/vision/models/alexnet.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -175,20 +175,32 @@ def _alexnet(arch, pretrained, **kwargs):


def alexnet(pretrained=False, **kwargs):
"""AlexNet model
"""
AlexNet model

Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.

pretrained (bool, optional): If True, returns a model pre-trained on ImageNet. Default: False.
**kwargs: Additional keyword arguments,For details, please refer to :ref:`AlexNet <api_paddle_vision_models_AlexNet>`.

Returns:
the model of alexnet.

Examples:
.. code-block:: python

:name: code-example
import paddle
from paddle.vision.models import alexnet

# build model
model = alexnet()

# build model and load imagenet pretrained weight
# model = alexnet(pretrained=True)

x = paddle.rand([1, 3, 224, 224])
out = model(x)

print(out.shape)
# [1, 1000]
"""
return _alexnet('alexnet', pretrained, **kwargs)
9 changes: 5 additions & 4 deletions python/paddle/vision/models/mobilenetv1.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ class MobileNetV1(nn.Layer):
`"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" <https://arxiv.org/abs/1704.04861>`_.

Args:
scale (float): scale of channels in each layer. Default: 1.0.
num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
scale (float, optional): scale of channels in each layer. Default: 1.0.
num_classes (int, optional): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool): use pool before the last fc layer or not. Default: True.
with_pool (bool, optional): use pool before the last fc layer or not. Default: True.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这三个参数都是optional哦

Examples:
.. code-block:: python

:name: code-example1
import paddle
from paddle.vision.models import MobileNetV1

Expand All @@ -75,6 +75,7 @@ class MobileNetV1(nn.Layer):
out = model(x)

print(out.shape)
# [1, 1000]
"""

def __init__(self, scale=1.0, num_classes=1000, with_pool=True):
Expand Down
11 changes: 6 additions & 5 deletions python/paddle/vision/models/mobilenetv2.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -75,14 +75,14 @@ class MobileNetV2(nn.Layer):
`"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.

Args:
scale (float): scale of channels in each layer. Default: 1.0.
num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
scale (float, optional): scale of channels in each layer. Default: 1.0.
num_classes (int, optional): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool): use pool before the last fc layer or not. Default: True.
with_pool (bool, optional): use pool before the last fc layer or not. Default: True.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这三个参数也都是optional哦

Examples:
.. code-block:: python

:name: code-example1
import paddle
from paddle.vision.models import MobileNetV2

Expand All @@ -92,6 +92,7 @@ class MobileNetV2(nn.Layer):
out = model(x)

print(out.shape)
# [1, 1000]
"""

def __init__(self, scale=1.0, num_classes=1000, with_pool=True):
Expand Down
12 changes: 10 additions & 2 deletions python/paddle/vision/models/vgg.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ class VGG(nn.Layer):

Args:
features (nn.Layer): Vgg features create by function make_layers.
num_classes (int): Output dim of last fc layer. If num_classes <=0, last fc layer
num_classes (int, optional): Output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool): Use pool before the last three fc layer or not. Default: True.
with_pool (bool, optional): Use pool before the last three fc layer or not. Default: True.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

后两个参数是optional哦

Examples:
.. code-block:: python
:name: code-example

import paddle
from paddle.vision.models import VGG
from paddle.vision.models.vgg import make_layers

Expand All @@ -49,6 +51,12 @@ class VGG(nn.Layer):

vgg11 = VGG(features)

x = paddle.rand([1, 3, 224, 224])
out = vgg11(x)

print(out.shape)
# [1, 1000]

"""

def __init__(self, features, num_classes=1000, with_pool=True):
Expand Down
37 changes: 20 additions & 17 deletions python/paddle/vision/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -951,7 +951,7 @@ def psroi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None):
boxes_num (Tensor): The number of boxes contained in each picture in the batch.
output_size (int|Tuple(int, int)) The pooled output size(H, W), data type
is int32. If int, H and W are both equal to output_size.
spatial_scale (float): Multiplicative spatial scale factor to translate ROI coords from their
spatial_scale (float, optional): Multiplicative spatial scale factor to translate ROI coords from their
input scale to the scale used when pooling. Default: 1.0
name(str, optional): The default value is None.
Normally there is no need for user to set this property.
Expand All @@ -963,12 +963,15 @@ def psroi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None):

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

spatial_scale是可选参数

Examples:
.. code-block:: python

:name: code-example1

import paddle
x = paddle.uniform([2, 490, 28, 28], dtype='float32')
boxes = paddle.to_tensor([[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]], dtype='float32')
boxes_num = paddle.to_tensor([1, 2], dtype='int32')
pool_out = paddle.vision.ops.psroi_pool(x, boxes, boxes_num, 7, 1.0)
print(pool_out.shape)
# [3, 10, 7, 7]
"""

check_type(output_size, 'output_size', (int, tuple, list), 'psroi_pool')
Expand Down Expand Up @@ -1014,7 +1017,7 @@ class PSRoIPool(Layer):
Args:
output_size (int|Tuple(int, int)) The pooled output size(H, W), data type
is int32. If int, H and W are both equal to output_size.
spatial_scale (float): Multiplicative spatial scale factor to translate ROI coords from their
spatial_scale (float, optional): Multiplicative spatial scale factor to translate ROI coords from their
input scale to the scale used when pooling. Default: 1.0.

Shape:
Expand All @@ -1025,19 +1028,19 @@ class PSRoIPool(Layer):
The output_channels equal to C / (pooled_h * pooled_w), where C is the channels of input.

Returns:
None
None.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • spatial_scale是optional哦
  • Returns:
    None. 加个.

Examples:
.. code-block:: python

:name: code-example1
import paddle
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

示例代码增加一些输出数据展示?


psroi_module = paddle.vision.ops.PSRoIPool(7, 1.0)
x = paddle.uniform([2, 490, 28, 28], dtype='float32')
boxes = paddle.to_tensor([[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]], dtype='float32')
boxes_num = paddle.to_tensor([1, 2], dtype='int32')
pool_out = psroi_module(x, boxes, boxes_num)

print(pool_out.shape) # [3, 10, 7, 7]
"""

def __init__(self, output_size, spatial_scale=1.0):
Expand Down Expand Up @@ -1187,7 +1190,7 @@ def roi_align(x,
aligned=True,
name=None):
"""
This operator implements the roi_align layer.
Implementing the roi_align layer.
Region of Interest (RoI) Align operator (also known as RoI Align) is to
perform bilinear interpolation on inputs of nonuniform sizes to obtain
fixed-size feature maps (e.g. 7*7), as described in Mask R-CNN.
Expand All @@ -1211,31 +1214,31 @@ def roi_align(x,
the batch, the data type is int32.
output_size (int or Tuple[int, int]): The pooled output size(h, w), data
type is int32. If int, h and w are both equal to output_size.
spatial_scale (float32): Multiplicative spatial scale factor to translate
spatial_scale (float32, optional): Multiplicative spatial scale factor to translate
ROI coords from their input scale to the scale used when pooling.
Default: 1.0
sampling_ratio (int32): number of sampling points in the interpolation
Default: 1.0.
sampling_ratio (int32, optional): number of sampling points in the interpolation
grid used to compute the output value of each pooled output bin.
If > 0, then exactly ``sampling_ratio x sampling_ratio`` sampling
points per bin are used.
If <= 0, then an adaptive number of grid points are used (computed
as ``ceil(roi_width / output_width)``, and likewise for height).
Default: -1
aligned (bool): If False, use the legacy implementation. If True, pixel
Default: -1.
aligned (bool, optional): If False, use the legacy implementation. If True, pixel
shift the box coordinates it by -0.5 for a better alignment with the
two neighboring pixel indices. This version is used in Detectron2.
Default: True
Default: True.
name(str, optional): For detailed information, please refer to :
ref:`api_guide_Name`. Usually name is no need to set and None by
default.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • 描述中去掉「This operator」
  • 注意参数是否optional
  • 句尾需要加.

Returns:
Tensor: The output of ROIAlignOp is a 4-D tensor with shape (num_boxes,
The output of ROIAlignOp is a 4-D tensor with shape (num_boxes,
channels, pooled_h, pooled_w). The data type is float32 or float64.

Examples:
.. code-block:: python

:name: code-example1
import paddle
from paddle.vision.ops import roi_align

Expand Down Expand Up @@ -1306,12 +1309,12 @@ class RoIAlign(Layer):
when pooling. Default: 1.0

Returns:
align_out (Tensor): The output of ROIAlign operator is a 4-D tensor with
The output of ROIAlign operator is a 4-D tensor with
shape (num_boxes, channels, pooled_h, pooled_w).

Examples:
.. code-block:: python

:name: code-example1
import paddle
from paddle.vision.ops import RoIAlign

Expand Down
17 changes: 9 additions & 8 deletions python/paddle/vision/transforms/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,8 +666,8 @@ class Normalize(BaseTransform):
``output[channel] = (input[channel] - mean[channel]) / std[channel]``

Args:
mean (int|float|list|tuple): Sequence of means for each channel.
std (int|float|list|tuple): Sequence of standard deviations for each channel.
mean (int|float|list|tuple, optional): Sequence of means for each channel.
std (int|float|list|tuple, optional): Sequence of standard deviations for each channel.
data_format (str, optional): Data format of img, should be 'HWC' or
'CHW'. Default: 'CHW'.
to_rgb (bool, optional): Whether to convert to rgb. Default: False.
Expand All @@ -683,20 +683,21 @@ class Normalize(BaseTransform):
Examples:

.. code-block:: python

import numpy as np
from PIL import Image
:name: code-example
import paddle
from paddle.vision.transforms import Normalize

normalize = Normalize(mean=[127.5, 127.5, 127.5],
normalize = Normalize(mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
data_format='HWC')

fake_img = Image.fromarray((np.random.rand(300, 320, 3) * 255.).astype(np.uint8))
fake_img = paddle.rand([300,320,3]).numpy() * 255.

fake_img = normalize(fake_img)
print(fake_img.shape)
print(fake_img.max, fake_img.max)
# (300, 320, 3)
print(fake_img.max(), fake_img.min())
# 0.99999905 -0.999974

"""

Expand Down