intel
diff --git a/‎examples/fp8_sample/README.md‎
Lines changed: 96 additions & 0 deletions b/‎examples/fp8_sample/README.md‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎examples/fp8_sample/maxabs_measure.json‎
Lines changed: 7 additions & 0 deletions b/‎examples/fp8_sample/maxabs_measure.json‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/fp8_sample/maxabs_quant.json‎
Lines changed: 8 additions & 0 deletions b/‎examples/fp8_sample/maxabs_quant.json‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎examples/fp8_sample/quant_config.json‎
Lines changed: 8 additions & 0 deletions b/‎examples/fp8_sample/quant_config.json‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎examples/fp8_sample/sample_one_step.py‎
Lines changed: 56 additions & 0 deletions b/‎examples/fp8_sample/sample_one_step.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎examples/fp8_sample/sample_two_steps.py‎
Lines changed: 50 additions & 0 deletions b/‎examples/fp8_sample/sample_two_steps.py‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎neural_compressor/evaluation/hf_eval/hf_datasets/__init__.py‎
Lines changed: 9 additions & 2 deletions b/‎neural_compressor/evaluation/hf_eval/hf_datasets/__init__.py‎
Lines changed: 9 additions & 2 deletions
@@ -0,0 +1,96 @@
+### Usage demo:
+
+#### two steps to get quantized model
+
+```diff
+import torch
++ from neural_compressor.torch.quantization import FP8Config, convert, prepare, finalize_calibration
+import habana_frameworks.torch.core as htcore
+
+class M(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.fc1 = torch.nn.Linear(10, 5)
+        self.fc2 = torch.nn.Linear(5, 10)
+
+    def forward(self, inp):
+        x1 = self.fc1(inp)
+        x2 = self.fc2(x1)
+        return x2
+
+model = M().eval()
+
++ config = FP8Config.from_json_file(args.quant_config) # args.quant_config is the path of json file
+
++ if config.measure:
++   model = prepare(model, config)
+
++ if config.quantize:
++     htcore.hpu_initialize()
++     model = convert(model, config)
+
+# user code run
+with torch.no_grad():
+    model.to("hpu")
+    output = model(torch.randn(1, 10).to("hpu"))
+    print(output)
+
++ if config.measure:
++    finalize_calibration(model)
+```
+
+
+Whole script and config refer to [sample_two_steps.py](./sample_two_steps.py), [maxabs_measure.json](./maxabs_measure.json) and [maxabs_quant.json](./maxabs_quant.json).
+
+First, measure the tensor quantization statistic:
+```shell
+python sample_two_steps.py --quant_config=maxabs_measure.json
+```
+
+Then quantize the model based on previous measurements:
+```shell
+python sample_two_steps.py --quant_config=maxabs_quant.json
+```
+
+#### one step to get quantized model
+
+```diff
+import torch
++ from neural_compressor.torch.quantization import FP8Config, convert, prepare, finalize_calibration
+import habana_frameworks.torch.core as htcore
+
+class M(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.fc1 = torch.nn.Linear(10, 5)
+        self.fc2 = torch.nn.Linear(5, 10)
+
+    def forward(self, inp):
+        x1 = self.fc1(inp)
+        x2 = self.fc2(x1)
+        return x2
+
+model = M().to("hpu")
+
++ config = FP8Config.from_json_file(args.quant_config) # args.quant_config is the path of json file
++ model = prepare(model, config)
+
+# user code run to do calibration
+with torch.no_grad():
+    output = model(torch.randn(1, 10).to("hpu"))
+    print(output)
+
++ finalize_calibration(model)
++ model = convert(model)
+
+# user code to run benchmark for quantized model
+with torch.no_grad():
+    output = model(torch.randn(1, 10).to("hpu"))
+    print(output)
+```
+
+Whole script and config refer to [sample_one_step.py](./sample_one_step.py).
+
+```shell
+python sample_one_step.py --quant_config=quant_config.json
+```
@@ -0,0 +1,7 @@
+{
+    "mode": "MEASURE",
+    "observer": "maxabs",
+    "allowlist": {"types": [], "names":  []},
+    "blocklist": {"types": [], "names":  []},
+    "dump_stats_path": "./hqt_output/measure"
+}
@@ -0,0 +1,8 @@
+{
+    "mode": "QUANTIZE",
+    "observer": "maxabs",
+    "scale_method": "maxabs_hw",
+    "allowlist": {"types": [], "names":  []},
+    "blocklist": {"types": [], "names":  []},
+    "dump_stats_path": "./hqt_output/measure"
+}
@@ -0,0 +1,8 @@
+{
+    "mode": "AUTO",
+    "observer": "maxabs",
+    "scale_method": "maxabs_hw",
+    "allowlist": {"types": [], "names":  []},
+    "blocklist": {"types": [], "names":  []},
+    "dump_stats_path": "./hqt_output/measure"
+}
@@ -0,0 +1,56 @@
+import argparse
+import torch
+import habana_frameworks.torch.core as htcore
+htcore.hpu_set_env()
+
+from neural_compressor.torch.quantization import FP8Config, convert, finalize_calibration, prepare
+
+torch.manual_seed(1)
+
+
+# 1. python sample_one_step.py --quant_config=quant_config.json
+
+
+class M(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.fc1 = torch.nn.Linear(10, 5)
+        self.fc2 = torch.nn.Linear(5, 10)
+
+    def forward(self, inp):
+        x1 = self.fc1(inp)
+        x2 = self.fc2(x1)
+        return x2
+
+
+def eval_func(model):
+    # user's eval func
+    input = torch.randn(1, 10)
+    model(input.to("hpu"))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Habana FP8 sample code.", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("--quant_config", type=str, help="json file of quantization config")
+    args = parser.parse_args()
+
+    model = M().eval().to("hpu")
+    htcore.hpu_initialize()
+
+    config = FP8Config.from_json_file(args.quant_config)
+    model = prepare(model, config)
+
+    # for calibration
+    with torch.no_grad():
+        # model.to("hpu")
+        output = model(torch.randn(1, 10).to("hpu"))
+
+    model = convert(model)
+    print(model)
+
+    # for benchmark
+    with torch.no_grad():
+        output = model(torch.randn(1, 10).to("hpu"))
+        print(output)
@@ -0,0 +1,50 @@
+import argparse
+import torch
+import habana_frameworks.torch.core as htcore
+htcore.hpu_set_env()
+
+from neural_compressor.torch.quantization import FP8Config, convert, finalize_calibration, prepare
+
+torch.manual_seed(1)
+
+# 1. python sample_two_steps.py --quant_config=maxabs_measure.json
+# 2. python sample_two_steps.py --quant_config=maxabs_quant.json
+
+
+class M(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.fc1 = torch.nn.Linear(10, 5)
+        self.fc2 = torch.nn.Linear(5, 10)
+
+    def forward(self, inp):
+        x1 = self.fc1(inp)
+        x2 = self.fc2(x1)
+        return x2
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Habana FP8 sample code.", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("--quant_config", type=str, help="json file of quantization config")
+    args = parser.parse_args()
+
+    model = M().eval()
+    config = FP8Config.from_json_file(args.quant_config)
+
+    if config.measure:
+        model = prepare(model, config)
+
+    if config.quantize:
+        htcore.hpu_initialize()
+        model = convert(model, config)
+        print(model)
+
+    with torch.no_grad():
+        model.to("hpu")
+        output = model(torch.randn(1, 10).to("hpu"))
+        print(output)
+
+    if config.measure:
+        finalize_calibration(model)
@@ -1,16 +1,23 @@
-#!/usr/bin/env python
+#
 # -*- coding: utf-8 -*-
 #
+<<<<<<<< HEAD:neural_compressor/evaluation/hf_eval/hf_datasets/__init__.py
 # Copyright (c) 2022 Intel Corporation
+========
+# Copyright (c) 2018 Intel Corporation
+>>>>>>>> 23fe77ec31ed8ef87e5b0717d7ab41eb0b34afc8:examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#   http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
+
+#