Skip to content

Commit 5557751

Browse files
authored
add dummy_v2 dataset (#84)
1 parent bf1915f commit 5557751

File tree

11 files changed

+228
-37
lines changed

11 files changed

+228
-37
lines changed

docs/dataset.md

Lines changed: 8 additions & 4 deletions
Large diffs are not rendered by default.

examples/helloworld/tf_example4/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ This example is used to demonstrate how to quantize a TensorFlow checkpoint and
1212
We will create a dummy dataloader and only need to add the following lines for quantization to create an int8 model.
1313
```python
1414
quantizer = Quantization('./conf.yaml')
15-
dataset = quantizer.dataset('dummy', shape=(100, 100, 100, 3), label=True)
15+
dataset = quantizer.dataset('dummy_v2', \
16+
input_shape=(100, 100, 3), label_shape=(1, ))
1617
quantizer.model = common.Model('./model/public/rfcn-resnet101-coco-tf/rfcn_resnet101_coco_2018_01_28/')
1718
quantizer.calib_dataloader = common.DataLoader(dataset)
1819
quantized_model = quantizer()

examples/helloworld/tf_example4/test.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
def main():
1010

1111
quantizer = Quantization('./conf.yaml')
12-
dataset = quantizer.dataset('dummy', shape=(100, 100, 100, 3), label=True)
12+
dataset = quantizer.dataset('dummy_v2', \
13+
input_shape=(100, 100, 3), label_shape=(1, ))
1314
quantizer.model = common.Model('./model/public/rfcn-resnet101-coco-tf/rfcn_resnet101_coco_2018_01_28/')
1415
quantizer.calib_dataloader = common.DataLoader(dataset)
1516
quantized_model = quantizer()

examples/tensorflow/style_transfer/style_tune.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,8 @@ def main(args=None):
139139
crop_ratio=0.2,
140140
resize_shape=(256, 256))
141141
else:
142-
dataset = DATASETS('tensorflow')['dummy']( \
143-
shape=[(200, 256, 256, 3), (200, 256, 256, 3)], label=True)
142+
dataset = DATASETS('tensorflow')['dummy_v2'](\
143+
input_shape=[(256, 256, 3), (256, 256, 3)], label_shape=(1, ))
144144
dataloader = DATALOADERS['tensorflow'](dataset=dataset, batch_size=FLAGS.batch_size)
145145
tf.import_graph_def(frozen_graph, name='')
146146
style_transfer(sess, dataloader)
@@ -164,7 +164,7 @@ def style_transfer(sess, dataloader):
164164

165165
stylized_images = sess.graph.get_tensor_by_name(output_name)
166166

167-
for (content_img_np, style_img_np), _ in dataloader:
167+
for idx, ((content_img_np, style_img_np), _) in enumerate(dataloader):
168168
start_time = time.time()
169169
stylized_image_res = sess.run(
170170
stylized_images,
@@ -173,6 +173,8 @@ def style_transfer(sess, dataloader):
173173
content_name: content_img_np})
174174
duration = time.time() - start_time
175175
time_list.append(duration)
176+
if idx + 1 == 20:
177+
break
176178
warm_up = 1
177179
throughput = (len(time_list) - warm_up)/ np.array(time_list[warm_up:]).sum()
178180
print('Batch size = {}'.format(FLAGS.batch_size))

lpot/conf/config.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def input_to_list(data):
144144

145145
def list_to_tuple(data):
146146
if isinstance(data, str):
147-
return tuple([s.strip() for s in data.split(',')])
147+
return tuple([int(s.strip()) for s in data.split(',')])
148148

149149
elif isinstance(data, list):
150150
if isinstance(data[0], list):
@@ -380,6 +380,21 @@ def percent_to_float(data):
380380
Optional('ImageRecord'): {
381381
'root': str,
382382
},
383+
Optional('dummy_v2'): {
384+
'input_shape': And(Or(str, list), Use(list_to_tuple)),
385+
Optional('label_shape'): And(Or(str, list), Use(list_to_tuple)),
386+
Optional('low'): Or(
387+
float,
388+
And(int, Use(input_int_to_float)),
389+
And(list, Use(input_int_to_float)),
390+
And(str, Use(input_int_to_float))),
391+
Optional('high'): Or(
392+
float,
393+
And(int, Use(input_int_to_float)),
394+
And(list, Use(input_int_to_float)),
395+
And(str, Use(input_int_to_float))),
396+
Optional('dtype'): And(Or(str, list), Use(input_to_list)),
397+
},
383398
Optional('dummy'): {
384399
'shape': And(Or(str, list), Use(list_to_tuple)),
385400
Optional('low'): Or(
@@ -655,6 +670,7 @@ def percent_to_float(data):
655670
},
656671
},
657672
Optional('configs'): configs_schema,
673+
Optional('iteration', default=-1): int,
658674
Optional('dataloader'): dataloader_schema,
659675
Optional('postprocess'): {
660676
Optional('transform'): postprocess_schema

lpot/experimental/benchmark.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,14 @@ def run_instance(self, mode):
174174

175175
adaptor = FRAMEWORKS[framework](framework_specific_info)
176176

177+
if deep_get(cfg, 'evaluation.{}.iteration'.format(mode)) == -1 and 'dummy_v2' in \
178+
deep_get(cfg, 'evaluation.{}.dataloader.dataset'.format(mode), {}):
179+
deep_set(cfg, 'evaluation.{}.iteration'.format(mode), 10)
180+
177181
iteration = -1 if deep_get(cfg, 'evaluation.{}.iteration'.format(mode)) is None \
178182
else deep_get(cfg, 'evaluation.{}.iteration'.format(mode))
183+
184+
179185
metric = deep_get(cfg, 'evaluation.{}.metric'.format(mode))
180186
b_postprocess_cfg = deep_get(cfg, 'evaluation.{}.postprocess'.format(mode))
181187

lpot/experimental/data/datasets/dummy_dataset.py

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
torch = LazyImport('torch')
2525

2626
@dataset_registry(dataset_type="dummy", framework="tensorflow, onnxrt_qlinearops, \
27-
onnxrt_integerops", dataset_format='')
27+
onnxrt_integerops, pytorch, pytorch_ipex, mxnet", dataset_format='')
2828
class DummyDataset(Dataset):
2929
"""Dataset used for dummy data generation.
3030
This Dataset is to construct a dataset from a specific shape.
@@ -134,28 +134,3 @@ def __getitem__(self, index):
134134
return sample, 0
135135
else:
136136
return sample
137-
138-
@dataset_registry(dataset_type="dummy", framework="mxnet", dataset_format='')
139-
class MXNetDummyDataset(DummyDataset):
140-
def __getitem__(self, index):
141-
sample = self.dataset[index]
142-
if self.transform is not None:
143-
self.logger.info('Dummy dataset does not need transform!')
144-
if self.label:
145-
return sample, 0
146-
else:
147-
return sample
148-
149-
@dataset_registry(dataset_type="dummy", framework="pytorch, pytorch_ipex",
150-
dataset_format='')
151-
class PyTorchDummyDataset(DummyDataset):
152-
def __getitem__(self, index):
153-
sample = self.dataset[index]
154-
if self.transform is not None:
155-
self.logger.info('Dummy dataset does not need transform!')
156-
if self.label:
157-
return sample, 0
158-
else:
159-
return sample
160-
161-
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) 2021 Intel Corporation
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
import sys
19+
from .dataset import dataset_registry, IterableDataset
20+
import numpy as np
21+
from lpot.utils.utility import LazyImport
22+
from lpot.utils import logger
23+
24+
mx = LazyImport('mxnet')
25+
torch = LazyImport('torch')
26+
27+
@dataset_registry(dataset_type="dummy_v2", framework="tensorflow, onnxrt_qlinearops, \
28+
onnxrt_integerops, pytorch, pytorch_ipex, mxnet", dataset_format='')
29+
class DummyDataset(IterableDataset):
30+
"""Dataset used for dummy_v2 data generation.
31+
This Dataset is to construct a dataset from a input shape and label shape.
32+
the value range is calculated from: low * stand_normal(0, 1) + high
33+
34+
Args: sample_size (int): total size of the dummy samples.
35+
input_shape (list or tuple): create single or multi input tensors,
36+
tuple reperesent the sample shape of the dataset, eg an image size should be
37+
represented as (224, 224, 3), list contains multiple tuple and
38+
represent multi input tensors.
39+
label_shape (list or tuple): create single or multi label tensors,
40+
tuple reperesent the label shape of the dataset, eg an label size should be
41+
represented as (1, ), list contains multiple tuple and
42+
represent multi label tensors.
43+
low (list or float, default=-128.):low out the tensor value range from [0, 1]
44+
to [0, low] or [low, 0] if low < 0, if float,
45+
will implement all tensors with same low value.
46+
high (list or float, default=127.):high the tensor value by add all tensor element
47+
value high. If list, length of list should be
48+
same with shape list.
49+
dtype (list or str, default='float32'):support multi tensor dtype setting. If list,
50+
length of list should be same with shape list,
51+
if str, all tensors will use same dtype. dtype
52+
support 'float32', 'float16', 'uint8', 'int8',
53+
'int32', 'int64', 'bool'.
54+
transform (transform object, default=None): dummy_v2 dataset does not need transform.
55+
If transform is not None, it will ignore
56+
it.
57+
filter (Filter objects, default=None): filter out examples according to
58+
specific conditions
59+
60+
"""
61+
def __init__(self, input_shape, label_shape=None, low=-128., high=127., \
62+
dtype='float32', transform=None, filter=None):
63+
64+
self.dtype_map = {'float32':np.float32, 'float16':np.float16, 'uint8':np.uint8, \
65+
'int8':np.int8, 'int32':np.int32, 'int64':np.int64, 'bool':np.bool}
66+
67+
np.random.seed(9527)
68+
self.transform = transform
69+
self.input_shape = input_shape
70+
self.label_shape = label_shape
71+
self.low = low
72+
self.high = high
73+
self.dtype = dtype
74+
75+
if label_shape is None:
76+
self.label_dim = 0
77+
elif isinstance(label_shape, tuple):
78+
self.label_dim = 1
79+
else:
80+
self.label_dim = len(label_shape)
81+
82+
self.input_dim = 1 if isinstance(input_shape, tuple) else len(input_shape)
83+
self.total_dim = self.input_dim + self.label_dim
84+
85+
if isinstance(high, list):
86+
assert len(high) == self.total_dim and \
87+
all(isinstance(elem, float) for elem in high),\
88+
'high value list length should same with label dim + input_dim'
89+
else:
90+
self.high = (high * np.ones(self.total_dim)).astype(np.float)
91+
92+
if isinstance(low, list):
93+
assert len(low) == self.total_dim and \
94+
all(isinstance(elem, float) for elem in low), \
95+
'low value list length should same with label dim + input_dim'
96+
else:
97+
self.low = (low * np.ones(self.total_dim)).astype(np.float)
98+
99+
if isinstance(dtype, list):
100+
assert len(dtype) == self.total_dim and \
101+
all(elem in self.dtype_map.keys() for elem in dtype), \
102+
'dtype list length should same with label dim + input_dim'
103+
else:
104+
self.dtype = [self.dtype for i in range(0, self.total_dim)]
105+
106+
if isinstance(input_shape, tuple):
107+
self.input_shape = [input_shape]
108+
109+
if isinstance(label_shape, tuple):
110+
self.label_shape = [label_shape]
111+
112+
def __iter__(self):
113+
while True:
114+
input_data = []
115+
for idx in range(0, self.input_dim):
116+
tensor = np.random.uniform(\
117+
low=self.low[idx], high=self.high[idx], size=self.input_shape[idx])
118+
tensor = tensor.astype(self.dtype_map[self.dtype[idx]])
119+
input_data.append(tensor)
120+
121+
label = []
122+
for idx in range(0, self.label_dim):
123+
shift_idx = self.input_dim + idx
124+
tensor = np.random.uniform(low=self.low[shift_idx],
125+
high=self.high[shift_idx],
126+
size=self.label_shape[idx])
127+
tensor = tensor.astype(self.dtype_map[self.dtype[shift_idx]])
128+
label.append(tensor)
129+
130+
if len(input_data) == 1:
131+
input_data = input_data[0]
132+
133+
if len(label) == 1:
134+
label = label[0]
135+
136+
if len(label) > 0:
137+
yield input_data, label
138+
else:
139+
yield input_data
140+
141+
def __len__(self):
142+
return sys.maxsize

lpot/experimental/quantization.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,12 @@ def __call__(self):
113113
if eval_dataloader_cfg is None:
114114
self._eval_func = self._fake_eval_func
115115
else:
116+
if deep_get(cfg, 'evaluation.accuracy.iteration') == -1 and 'dummy_v2' \
117+
in deep_get(cfg, 'evaluation.accuracy.dataloader.dataset', {}):
118+
deep_set(cfg, 'evaluation.accuracy.iteration', 10)
119+
116120
self._eval_dataloader = create_dataloader(self.framework, \
117-
eval_dataloader_cfg)
121+
eval_dataloader_cfg)
118122

119123
approach_cfg = deep_get(cfg, 'quantization.approach')
120124
if self._calib_func:

lpot/strategy/strategy.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,7 @@ def _evaluate(self, model):
418418
self.adaptor, \
419419
self.cfg.evaluation.accuracy.metric, \
420420
postprocess_cfg, \
421+
self.cfg.evaluation.accuracy.iteration, \
421422
tensorboard = self.cfg.tuning.tensorboard, \
422423
fp32_baseline = self.baseline == None)
423424

0 commit comments

Comments
 (0)