intel
diff --git a/‎examples/tensorflow/object_detection/yolo_v3/README.md‎
Lines changed: 79 additions & 0 deletions b/‎examples/tensorflow/object_detection/yolo_v3/README.md‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎examples/tensorflow/object_detection/yolo_v3/coco_constants.py‎
Lines changed: 6 additions & 0 deletions b/‎examples/tensorflow/object_detection/yolo_v3/coco_constants.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎examples/tensorflow/object_detection/yolo_v3/infer_detections.py‎
Lines changed: 152 additions & 0 deletions b/‎examples/tensorflow/object_detection/yolo_v3/infer_detections.py‎
Lines changed: 152 additions & 0 deletions
@@ -0,0 +1,79 @@
+This document describes the step-by-step to reproduce Yolo-v3 tuning result with LPOT.
+
+## Prerequisite
+
+
+### 1. Installation
+Recommend python 3.6 or higher version.
+
+```shell
+# Install Intel® Low Precision Optimization Tool
+pip install lpot
+```
+### 2. Install Intel Tensorflow
+```shell
+pip install intel-tensorflow==1.15.0up3
+```
+> Note: Supported Tensorflow versions please refer to LPOT readme file.
+
+### 3. Installation Dependency packages
+```shell
+cd examples/tensorflow/object_detection
+pip install -r requirements.txt
+```
+
+### 4. Downloaded Yolo-v3 model
+```shell
+git clone https://github.com/mystic123/tensorflow-yolo-v3.git
+cd tensorflow-yolo-v3
+```
+
+### 5. Download COCO Class Names File
+```shell
+wget https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names
+```
+
+### 6. Download Model Weights (Full):
+```shell
+wget https://pjreddie.com/media/files/yolov3.weights
+```
+
+### 7. Generate PB:
+```shell
+python convert_weights_pb.py --class_names coco.names --weights_file yolov3.weights --data_format NHWC --size 416 --output_graph yolov3.pb
+```
+
+### 8. Prepare Dataset
+
+#### Automatic dataset download
+
+> **_Note: `prepare_dataset.sh` script works with TF version 1.x._**
+
+Run the `prepare_dataset.sh` script located in `examples/tensorflow/object_detection`.
+
+Usage:
+```shell
+cd examples/tensorflow/object_detection
+. prepare_dataset.sh
+```
+
+This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to
+tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script.
+
+#### Manual dataset download
+Download CoCo Dataset from [Official Website](https://cocodataset.org/#download).
+
+## Get Quantized Yolo-v3 model with LPOT
+
+### 1.Config the yolo_v3.yaml with the valid cocoraw data path.
+
+### 2.Run below command one by one.
+Usage
+```shell
+cd examples/tensorflow/object_detection/yolo_v3
+```
+```python
+python infer_detections.py --input_graph /path/to/yolov3_fp32.pb --config ./yolo_v3.yaml --output_graph /path/to/save/yolov3_tuned3.pb
+```
+
+Finally, the LPOT will generate the quantized Yolo-v3 model with relative 1% loss.
@@ -0,0 +1,6 @@
+COCO_NUM_VAL_IMAGES = 4952
+LABEL_MAP = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20,
+            21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+            42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+            61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
+            84, 85, 86, 87, 88, 89, 90]
@@ -0,0 +1,152 @@
+import time
+import numpy as np
+import tensorflow as tf
+
+from absl import app, flags
+
+from tensorflow.python.client import timeline
+from coco_constants import LABEL_MAP
+from utils import read_graph, non_max_suppression
+
+flags.DEFINE_integer('batch_size', 1, "batch size")
+
+flags.DEFINE_string("ground_truth", None, "ground truth file")
+
+flags.DEFINE_string("input_graph", None, "input graph")
+
+flags.DEFINE_string("output_graph", None, "input graph")
+
+flags.DEFINE_string("config", None, "LPOT config file")
+
+flags.DEFINE_float("conf_threshold", 0.5, "confidence threshold")
+
+flags.DEFINE_float("iou_threshold", 0.4, "IoU threshold")
+
+flags.DEFINE_integer("num_intra_threads", 0, "number of intra threads")
+
+flags.DEFINE_integer("num_inter_threads", 1, "number of inter threads")
+
+flags.DEFINE_boolean("benchmark", False, "benchmark mode")
+
+flags.DEFINE_boolean("profiling", False, "Signal of profiling")
+
+FLAGS = flags.FLAGS
+
+
+class NMS():
+    def __init__(self, conf_threshold, iou_threshold):
+        self.conf_threshold = conf_threshold
+        self.iou_threshold = iou_threshold
+
+    def __call__(self, sample):
+        preds, labels = sample
+        if not isinstance(preds, np.ndarray):
+            preds = np.array(preds)
+        filtered_boxes = non_max_suppression(preds,
+                                             self.conf_threshold,
+                                             self.iou_threshold)
+
+        det_boxes = []
+        det_scores = []
+        det_classes = []
+        for cls, bboxs in filtered_boxes.items():
+            det_classes.extend([LABEL_MAP[cls + 1]] * len(bboxs))
+            for box, score in bboxs:
+                rect_pos = box.tolist()
+                y_min, x_min = rect_pos[1], rect_pos[0]
+                y_max, x_max = rect_pos[3], rect_pos[2]
+                height, width = 416, 416
+                det_boxes.append(
+                    [y_min / height, x_min / width, y_max / height, x_max / width])
+                det_scores.append(score)
+
+        if len(det_boxes) == 0:
+            det_boxes = np.zeros((0, 4))
+            det_scores = np.zeros((0, ))
+            det_classes = np.zeros((0, ))
+
+        return [np.array([det_boxes]), np.array([det_scores]), np.array([det_classes])], labels
+
+
+def create_tf_config():
+    config = tf.compat.v1.ConfigProto()
+    config.intra_op_parallelism_threads = FLAGS.num_intra_threads
+    config.inter_op_parallelism_threads = FLAGS.num_inter_threads
+    return config
+
+
+def run_benchmark():
+    config = create_tf_config()
+
+    graph_def = read_graph(FLAGS.input_graph)
+
+    tf.import_graph_def(graph_def, name='')
+
+    input_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name('inputs:0')
+    output_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name('output_boxes:0')
+
+    dummy_data_shape = list(input_tensor.shape)
+    dummy_data_shape[0] = FLAGS.batch_size
+    dummy_data = np.random.random(dummy_data_shape).astype(np.float32)
+
+    if FLAGS.profiling != True:
+        num_warmup = 200
+        total_iter = 1000
+    else:
+        num_warmup = 20
+        total_iter = 100
+
+    total_time = 0.0
+
+    with tf.compat.v1.Session(config=config) as sess:
+        print("Running warm-up")
+        for i in range(num_warmup):
+            sess.run(output_tensor, {input_tensor: dummy_data})
+        print("Warm-up complete")
+
+        for i in range(1, total_iter + 1):
+            start_time = time.time()
+            sess.run(output_tensor, {input_tensor: dummy_data})
+            end_time = time.time()
+
+            if i % 10 == 0:
+                print(
+                    "Steps = {0}, {1:10.6f} samples/sec".format(i, FLAGS.batch_size / duration))
+
+            duration = end_time - start_time
+            total_time += duration
+
+        if FLAGS.profiling:
+            options = tf.compat.v1.RunOptions(
+                trace_level=tf.compat.v1.RunOptions.FULL_TRACE)
+            run_metadata = tf.compat.v1.RunMetadata()
+
+            sess.run(output_tensor, {input_tensor: dummy_data},
+                     options=options, run_metadata=run_metadata)
+
+            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
+            chrome_trace = fetched_timeline.generate_chrome_trace_format()
+            with open("timeline_%s.json" % (time.time()), 'w') as f:
+                f.write(chrome_trace)
+
+    print("Average Thoughput: %f samples/sec" %
+          (total_iter * FLAGS.batch_size / total_time))
+
+
+def main(_):
+    if FLAGS.benchmark:
+        run_benchmark()
+    else:
+        FLAGS.batch_size = 1
+        from lpot.experimental import Quantization, common
+        quantizer = Quantization(FLAGS.config)
+        quantizer.model = common.Model(FLAGS.input_graph)
+        kwargs = {'conf_threshold': FLAGS.conf_threshold,
+                  'iou_threshold': FLAGS.iou_threshold}
+        quantizer.postprocess = common.Postprocess(NMS, 'NMS', **kwargs)
+        q_model = quantizer()
+        q_model.save(FLAGS.output_graph)
+
+
+if __name__ == '__main__':
+    app.run(main)