add dummy_v2 dataset (#84)

ClarkChin08 · web-flow · commit 5557751c03da · 2021-05-31T02:02:04.000+08:00
diff --git a/docs/dataset.md b/docs/dataset.md
diff --git a/examples/helloworld/tf_example4/README.md b/examples/helloworld/tf_example4/README.md
@@ -12,7 +12,8 @@ This example is used to demonstrate how to quantize a TensorFlow checkpoint and
 We will create a dummy dataloader and only need to add the following lines for quantization to create an int8 model.
     ```python
     quantizer = Quantization('./conf.yaml')
-    dataset = quantizer.dataset('dummy', shape=(100, 100, 100, 3), label=True)
+    dataset = quantizer.dataset('dummy_v2', \
+        input_shape=(100, 100, 3), label_shape=(1, ))
     quantizer.model = common.Model('./model/public/rfcn-resnet101-coco-tf/rfcn_resnet101_coco_2018_01_28/')
     quantizer.calib_dataloader = common.DataLoader(dataset)
     quantized_model = quantizer()
diff --git a/examples/helloworld/tf_example4/test.py b/examples/helloworld/tf_example4/test.py
@@ -9,7 +9,8 @@
 def main():
 
     quantizer = Quantization('./conf.yaml')
-    dataset = quantizer.dataset('dummy', shape=(100, 100, 100, 3), label=True)
+    dataset = quantizer.dataset('dummy_v2', \
+        input_shape=(100, 100, 3), label_shape=(1, ))
     quantizer.model = common.Model('./model/public/rfcn-resnet101-coco-tf/rfcn_resnet101_coco_2018_01_28/')
     quantizer.calib_dataloader = common.DataLoader(dataset)
     quantized_model = quantizer()
diff --git a/examples/tensorflow/style_transfer/style_tune.py b/examples/tensorflow/style_transfer/style_tune.py
@@ -139,8 +139,8 @@ def main(args=None):
               crop_ratio=0.2,
               resize_shape=(256, 256))
       else: 
-          dataset = DATASETS('tensorflow')['dummy']( \
-              shape=[(200, 256, 256, 3), (200, 256, 256, 3)], label=True) 
+          dataset = DATASETS('tensorflow')['dummy_v2'](\
+              input_shape=[(256, 256, 3), (256, 256, 3)], label_shape=(1, )) 
       dataloader = DATALOADERS['tensorflow'](dataset=dataset, batch_size=FLAGS.batch_size)
       tf.import_graph_def(frozen_graph, name='')
       style_transfer(sess, dataloader)
@@ -164,7 +164,7 @@ def style_transfer(sess, dataloader):
 
       stylized_images = sess.graph.get_tensor_by_name(output_name)
       
-      for (content_img_np, style_img_np), _ in dataloader:
+      for idx, ((content_img_np, style_img_np), _) in enumerate(dataloader):
           start_time = time.time()
           stylized_image_res = sess.run(
               stylized_images,
@@ -173,6 +173,8 @@ def style_transfer(sess, dataloader):
                   content_name: content_img_np})
           duration = time.time() - start_time
           time_list.append(duration)
+          if idx + 1 == 20:
+              break
       warm_up = 1
       throughput = (len(time_list) - warm_up)/ np.array(time_list[warm_up:]).sum()
       print('Batch size = {}'.format(FLAGS.batch_size)) 
diff --git a/lpot/conf/config.py b/lpot/conf/config.py
@@ -144,7 +144,7 @@ def input_to_list(data):
 
 def list_to_tuple(data):
     if isinstance(data, str):
-        return tuple([s.strip() for s in data.split(',')])
+        return tuple([int(s.strip()) for s in data.split(',')])
 
     elif isinstance(data, list):
         if isinstance(data[0], list):
@@ -380,6 +380,21 @@ def percent_to_float(data):
     Optional('ImageRecord'): {
         'root': str,
     },
+    Optional('dummy_v2'): {
+        'input_shape': And(Or(str, list), Use(list_to_tuple)), 
+        Optional('label_shape'): And(Or(str, list), Use(list_to_tuple)), 
+        Optional('low'): Or(
+            float,
+            And(int, Use(input_int_to_float)),
+            And(list, Use(input_int_to_float)),
+            And(str, Use(input_int_to_float))),
+        Optional('high'): Or(
+            float,
+            And(int, Use(input_int_to_float)),
+            And(list, Use(input_int_to_float)),
+            And(str, Use(input_int_to_float))),
+        Optional('dtype'): And(Or(str, list), Use(input_to_list)),
+    },
     Optional('dummy'): {
         'shape': And(Or(str, list), Use(list_to_tuple)), 
         Optional('low'): Or(
@@ -655,6 +670,7 @@ def percent_to_float(data):
                 },
             },
             Optional('configs'): configs_schema,
+            Optional('iteration', default=-1): int,
             Optional('dataloader'): dataloader_schema,
             Optional('postprocess'): {
                 Optional('transform'): postprocess_schema
diff --git a/lpot/experimental/benchmark.py b/lpot/experimental/benchmark.py
@@ -174,8 +174,14 @@ def run_instance(self, mode):
 
         adaptor = FRAMEWORKS[framework](framework_specific_info)
 
+        if deep_get(cfg, 'evaluation.{}.iteration'.format(mode)) == -1 and 'dummy_v2' in \
+            deep_get(cfg, 'evaluation.{}.dataloader.dataset'.format(mode), {}):
+            deep_set(cfg, 'evaluation.{}.iteration'.format(mode), 10)
+
         iteration = -1 if deep_get(cfg, 'evaluation.{}.iteration'.format(mode)) is None \
             else deep_get(cfg, 'evaluation.{}.iteration'.format(mode))
+
+
         metric =  deep_get(cfg, 'evaluation.{}.metric'.format(mode))
         b_postprocess_cfg = deep_get(cfg, 'evaluation.{}.postprocess'.format(mode))
 
diff --git a/lpot/experimental/data/datasets/dummy_dataset.py b/lpot/experimental/data/datasets/dummy_dataset.py
@@ -24,7 +24,7 @@
 torch = LazyImport('torch')
 
 @dataset_registry(dataset_type="dummy", framework="tensorflow, onnxrt_qlinearops, \
-                        onnxrt_integerops", dataset_format='')
+                  onnxrt_integerops, pytorch, pytorch_ipex, mxnet", dataset_format='')
 class DummyDataset(Dataset):
     """Dataset used for dummy data generation.
        This Dataset is to construct a dataset from a specific shape.
@@ -134,28 +134,3 @@ def __getitem__(self, index):
             return sample, 0
         else:
             return sample
-
-@dataset_registry(dataset_type="dummy", framework="mxnet", dataset_format='')
-class MXNetDummyDataset(DummyDataset):
-    def __getitem__(self, index):
-        sample = self.dataset[index]
-        if self.transform is not None:
-            self.logger.info('Dummy dataset does not need transform!')
-        if self.label:
-            return sample, 0
-        else:
-            return sample
-
-@dataset_registry(dataset_type="dummy", framework="pytorch, pytorch_ipex", 
-                    dataset_format='')
-class PyTorchDummyDataset(DummyDataset):
-    def __getitem__(self, index):
-        sample = self.dataset[index]
-        if self.transform is not None:
-            self.logger.info('Dummy dataset does not need transform!')
-        if self.label:
-            return sample, 0
-        else:
-            return sample
-
-
diff --git a/lpot/experimental/data/datasets/dummy_dataset_v2.py b/lpot/experimental/data/datasets/dummy_dataset_v2.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+from .dataset import dataset_registry, IterableDataset
+import numpy as np
+from lpot.utils.utility import LazyImport
+from lpot.utils import logger
+
+mx = LazyImport('mxnet')
+torch = LazyImport('torch')
+
+@dataset_registry(dataset_type="dummy_v2", framework="tensorflow, onnxrt_qlinearops, \
+                  onnxrt_integerops, pytorch, pytorch_ipex, mxnet", dataset_format='')
+class DummyDataset(IterableDataset):
+    """Dataset used for dummy_v2 data generation.
+       This Dataset is to construct a dataset from a input shape and label shape.
+       the value range is calculated from: low * stand_normal(0, 1) + high
+
+    Args: sample_size (int): total size of the dummy samples.
+          input_shape (list or tuple): create single or multi input tensors, 
+              tuple reperesent the sample shape of the dataset, eg an image size should be
+              represented as (224, 224, 3), list contains multiple tuple and 
+              represent multi input tensors.
+          label_shape (list or tuple): create single or multi label tensors, 
+              tuple reperesent the label shape of the dataset, eg an label size should be
+              represented as (1, ), list contains multiple tuple and 
+              represent multi label tensors.
+          low (list or float, default=-128.):low out the tensor value range from [0, 1] 
+                                            to [0, low] or [low, 0] if low < 0, if float, 
+                                            will implement all tensors with same low value.  
+          high (list or float, default=127.):high the tensor value by add all tensor element
+                                            value high. If list, length of list should be 
+                                            same with shape list.
+          dtype (list or str, default='float32'):support multi tensor dtype setting. If list,
+                                                length of list should be same with shape list,
+                                                if str, all tensors will use same dtype. dtype
+                                                support 'float32', 'float16', 'uint8', 'int8',
+                                                'int32', 'int64', 'bool'.
+          transform (transform object, default=None): dummy_v2 dataset does not need transform.
+                                                        If transform is not None, it will ignore
+                                                        it.  
+          filter (Filter objects, default=None): filter out examples according to 
+                                                specific conditions
+
+    """
+    def __init__(self, input_shape, label_shape=None, low=-128., high=127., \
+                 dtype='float32', transform=None, filter=None):
+
+        self.dtype_map = {'float32':np.float32, 'float16':np.float16, 'uint8':np.uint8, \
+                     'int8':np.int8, 'int32':np.int32, 'int64':np.int64, 'bool':np.bool}
+
+        np.random.seed(9527)
+        self.transform = transform
+        self.input_shape = input_shape
+        self.label_shape = label_shape
+        self.low = low
+        self.high = high
+        self.dtype = dtype
+
+        if label_shape is None:
+            self.label_dim = 0
+        elif isinstance(label_shape, tuple):
+            self.label_dim = 1
+        else:
+            self.label_dim = len(label_shape)
+
+        self.input_dim = 1 if isinstance(input_shape, tuple) else len(input_shape)
+        self.total_dim = self.input_dim + self.label_dim
+
+        if isinstance(high, list):
+            assert len(high) == self.total_dim and \
+                all(isinstance(elem, float) for elem in high),\
+                'high value list length should same with label dim + input_dim'
+        else:
+            self.high = (high * np.ones(self.total_dim)).astype(np.float)
+
+        if isinstance(low, list):
+            assert len(low) == self.total_dim and \
+                all(isinstance(elem, float) for elem in low), \
+                'low value list length should same with label dim + input_dim'
+        else:
+            self.low = (low * np.ones(self.total_dim)).astype(np.float)
+
+        if isinstance(dtype, list):
+            assert len(dtype) == self.total_dim and \
+                all(elem in self.dtype_map.keys() for elem in dtype), \
+                'dtype list length should same with label dim + input_dim'
+        else:
+            self.dtype = [self.dtype for i in range(0, self.total_dim)]
+
+        if isinstance(input_shape, tuple):
+            self.input_shape = [input_shape]
+
+        if isinstance(label_shape, tuple):
+            self.label_shape = [label_shape]
+
+    def __iter__(self):
+        while True:
+            input_data = []
+            for idx in range(0, self.input_dim):
+                tensor = np.random.uniform(\
+                    low=self.low[idx], high=self.high[idx], size=self.input_shape[idx])
+                tensor = tensor.astype(self.dtype_map[self.dtype[idx]])
+                input_data.append(tensor)
+
+            label = []
+            for idx in range(0, self.label_dim):
+                shift_idx = self.input_dim + idx 
+                tensor = np.random.uniform(low=self.low[shift_idx],
+                                           high=self.high[shift_idx],
+                                           size=self.label_shape[idx])
+                tensor = tensor.astype(self.dtype_map[self.dtype[shift_idx]])
+                label.append(tensor)
+
+            if len(input_data) == 1:
+                input_data = input_data[0] 
+
+            if len(label) == 1:
+                label = label[0] 
+
+            if len(label) > 0:
+                yield input_data, label
+            else:
+                yield input_data
+        
+    def __len__(self):
+        return sys.maxsize
diff --git a/lpot/experimental/quantization.py b/lpot/experimental/quantization.py
@@ -113,8 +113,12 @@ def __call__(self):
                 if eval_dataloader_cfg is None:
                     self._eval_func = self._fake_eval_func
                 else:
+                    if deep_get(cfg, 'evaluation.accuracy.iteration') == -1 and 'dummy_v2' \
+                        in deep_get(cfg, 'evaluation.accuracy.dataloader.dataset', {}):
+                        deep_set(cfg, 'evaluation.accuracy.iteration', 10) 
+                    
                     self._eval_dataloader = create_dataloader(self.framework, \
-                                                             eval_dataloader_cfg)
+                                                              eval_dataloader_cfg)
 
         approach_cfg = deep_get(cfg, 'quantization.approach')
         if self._calib_func:
diff --git a/lpot/strategy/strategy.py b/lpot/strategy/strategy.py
@@ -418,6 +418,7 @@ def _evaluate(self, model):
                                          self.adaptor, \
                                          self.cfg.evaluation.accuracy.metric, \
                                          postprocess_cfg, \
+                                         self.cfg.evaluation.accuracy.iteration, \
                                          tensorboard = self.cfg.tuning.tensorboard, \
                                          fp32_baseline = self.baseline == None)
 
diff --git a/test/test_dataloader.py b/test/test_dataloader.py
@@ -798,6 +798,45 @@ def test_tensorflow_dummy(self):
             dataset = datasets['dummy'](shape=(4, 256, 256, 3), high=[128., 127.])
         with self.assertRaises(AssertionError):
             dataset = datasets['dummy'](shape=(4, 256, 256, 3), dtype=['float32', 'int8'])
+
+    def test_tensorflow_dummy_v2(self):
+        datasets = DATASETS('tensorflow')
+        # test with label
+        dataset = datasets['dummy_v2'](\
+            input_shape=(256, 256, 3), label_shape=(1,))
+        data_loader = DATALOADERS['tensorflow'](dataset)
+        iterator = iter(data_loader)
+        data = next(iterator)
+        self.assertEqual(data[0].shape, (1, 256, 256, 3))
+        self.assertEqual(data[1].shape, (1, 1))
+        # dynamic batching
+        data_loader.batch(batch_size=2, last_batch='rollover')
+        iterator = iter(data_loader)
+        data = next(iterator)
+        self.assertEqual(data[0].shape, (2, 256, 256, 3))
+        self.assertEqual(data[1].shape, (2, 1))
+
+        # test without label
+        dataset = datasets['dummy_v2'](input_shape=(256, 256, 3))
+        data_loader = DATALOADERS['tensorflow'](dataset)
+        iterator = iter(data_loader)
+        data = next(iterator)
+        self.assertEqual(data.shape, (1, 256, 256, 3))
+        # dynamic batching
+        data_loader.batch(batch_size=2, last_batch='rollover')
+        iterator = iter(data_loader)
+        data = next(iterator)
+        self.assertEqual(data.shape, (2, 256, 256, 3))
+
+        with self.assertRaises(AssertionError):
+            dataset = datasets['dummy_v2'](\
+                input_shape=(256, 256, 3), low=[1., 0.])
+        with self.assertRaises(AssertionError):
+            dataset = datasets['dummy_v2'](\
+                input_shape=(256, 256, 3), high=[128., 127.])
+        with self.assertRaises(AssertionError):
+            dataset = datasets['dummy_v2'](\
+                input_shape=(256, 256, 3), dtype=['float32', 'int8'])
  
     def test_style_transfer_dataset(self):
         random_array = np.random.random_sample([100,100,3]) * 255