Neural Coder PTQ Support on Intel GPU with IPEX Backend (#1461)

zehao-intel · web-flow · commit f11c51b6cee5 · 2023-12-13T18:18:03.000+08:00
Signed-off-by: zehao-intel &lt;zehao.huang@intel.com&gt;
diff --git a/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml b/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml
@@ -0,0 +1,34 @@
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+transformation:
+  location:
+    - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
+  content:
+    - |-
+      [+] from neural_compressor.config import PostTrainingQuantConfig
+      [+] from neural_compressor.quantization import fit
+      [+] MODEL_NAME = MODEL_NAME.to("xpu")
+      [+] conf = PostTrainingQuantConfig(backend='ipex', quant_level=1, device="xpu")
+      [+] MODEL_NAME = fit(model=MODEL_NAME, conf=conf, calib_dataloader=DATALOADER_NAME)
+      [+] MODEL_NAME.save("./quantized_model")
+      [+] MODEL_NAME.eval()
+  order:
+    - below:
+      above:
+        - pytorch_jit_script
+        - pytorch_jit_script_ofi
+        - pytorch_jit_trace
+        - pytorch_jit_trace_ofi
+        - pytorch_channels_last
diff --git a/neural_coder/docs/SupportMatrix.md b/neural_coder/docs/SupportMatrix.md
@@ -8,7 +8,7 @@ Supported Optimization Features
 | PyTorch | [JIT (Just-In-Time) Script/Trace](https://pytorch.org/docs/stable/jit.html) & [optimize_for_inference](https://pytorch.org/docs/stable/generated/torch.jit.optimize_for_inference.html) | `pytorch_jit_script`, `pytorch_jit_trace`, `pytorch_jit_script_ofi`, `pytorch_jit_trace_ofi` |
 | PyTorch | JIT with [TorchDynamo](https://github.com/pytorch/torchdynamo) | `pytorch_torchdynamo_jit_script`, `pytorch_torchdynamo_jit_trace`, `pytorch_torchdynamo_jit_script_ofi`, `pytorch_torchdynamo_jit_trace_ofi` |
 | PyTorch | [Intel Neural Compressor (INC) Mixed Precision](https://github.com/intel/neural-compressor/blob/master/docs/source/mixed_precision.md) | `pytorch_inc_bf16` | 
-| PyTorch | [INC INT8 Static Quantization (FX/IPEX)](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `pytorch_inc_static_quant_fx`, `pytorch_inc_static_quant_ipex` |
+| PyTorch | [INC INT8 Static Quantization (FX/IPEX)](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `pytorch_inc_static_quant_fx`, `pytorch_inc_static_quant_ipex`, `pytorch_inc_static_quant_ipex_xpu` |
 | PyTorch | [INC INT8 Dynamic Quantization](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `pytorch_inc_dynamic_quant` |
 | PyTorch | [Intel Extension for PyTorch (FP32, BF16, INT8 Static/Dynamic Quantization)](https://github.com/intel/intel-extension-for-pytorch) | `pytorch_ipex_fp32`, `pytorch_ipex_bf16`, `pytorch_ipex_int8_static_quant`, `pytorch_ipex_int8_dynamic_quant` |
 | PyTorch | [Alibaba Blade-DISC](https://github.com/alibaba/BladeDISC) | `pytorch_aliblade` |
diff --git a/neural_coder/interface.py b/neural_coder/interface.py
@@ -118,6 +118,7 @@ def enable(
         "pytorch_inc_dynamic_quant",
         "pytorch_inc_static_quant_fx",
         "pytorch_inc_static_quant_ipex",
+        "pytorch_inc_static_quant_ipex_xpu",
         "pytorch_inc_bf16",
         "pytorch_inc_huggingface_optimum_static",
         "pytorch_inc_huggingface_optimum_dynamic",
@@ -210,6 +211,7 @@ def enable(
         or "pytorch_jit_trace_ofi" in features
         or "pytorch_inc_static_quant_fx" in features
         or "pytorch_inc_static_quant_ipex" in features
+        or "pytorch_inc_static_quant_ipex_xpu" in features
     ):
         features = ["pytorch_reclaim_inputs"] + features
 
@@ -312,6 +314,7 @@ def enable(
                 "pytorch_inc_dynamic_quant",
                 "pytorch_inc_static_quant_fx",
                 "pytorch_inc_static_quant_ipex",
+                "pytorch_inc_static_quant_ipex_xpu",
                 "pytorch_inc_huggingface_optimum_static",
                 "pytorch_inc_huggingface_optimum_dynamic",
                 "onnx_inc_static_quant_qlinear",
@@ -839,6 +842,7 @@ def superbench(
                 ["pytorch_inc_dynamic_quant"],
                 ["pytorch_inc_static_quant_fx"],
                 ["pytorch_inc_static_quant_ipex"],
+                ["pytorch_inc_static_quant_ipex_xpu"],
                 ["pytorch_inc_bf16"],
             ]
             standalones_pool = []
@@ -857,12 +861,14 @@ def superbench(
                 "pytorch_ipex_bf16",
                 "pytorch_inc_static_quant_fx",
                 "pytorch_inc_static_quant_ipex",
+                "pytorch_inc_static_quant_ipex_xpu",
                 "pytorch_inc_dynamic_quant",
                 "pytorch_ipex_int8_static_quant",
                 "pytorch_ipex_int8_dynamic_quant",
             ]
             # features that can be standalone (either use alone or use with "backend"):
             standalones_pool = [
+                "pytorch_ipex_xpu",
                 "pytorch_mixed_precision_cpu",
                 "pytorch_channels_last",
             ]
@@ -906,6 +912,8 @@ def superbench(
                     continue
                 if "pytorch_inc_static_quant_ipex" in features and "pytorch_mixed_precision_cpu" in features:
                     continue
+                if "pytorch_inc_static_quant_ipex_xpu" in features and "pytorch_mixed_precision_cpu" in features:
+                    continue
                 if "pytorch_inc_dynamic_quant" in features and "pytorch_mixed_precision_cpu" in features:
                     continue
 
@@ -960,6 +968,8 @@ def remove_if_have(list, element):
                         features_display = "Intel INT8 (Static)"
                     elif features == ["pytorch_inc_static_quant_ipex"]:
                         features_display = "Intel INT8 (IPEX)"
+                    elif features == ["pytorch_inc_static_quant_ipex_xpu"]:
+                        features_display = "Intel INT8 (IPEX XPU)"
                     elif features == ["pytorch_inc_bf16"]:
                         features_display = "Intel BF16"
                     elif features == []:
@@ -1047,6 +1057,8 @@ def remove_if_have(list, element):
                 best_optimization_display = "Intel INT8 (Static)"
             elif list_optimization_set_top3[0] == ["pytorch_inc_static_quant_ipex"]:
                 best_optimization_display = "Intel INT8 (IPEX)"
+            elif list_optimization_set_top3[0] == ["pytorch_inc_static_quant_ipex_xpu"]:
+                best_optimization_display = "Intel INT8 (IPEX XPU)"
             elif list_optimization_set_top3[0] == ["pytorch_inc_bf16"]:
                 best_optimization_display = "Intel BF16"
             elif list_optimization_set_top3[0] == []:
diff --git a/neural_coder/launcher.py b/neural_coder/launcher.py
@@ -57,6 +57,8 @@ def execute(
                     args.opt = "pytorch_inc_static_quant_fx"
                 if args.approach == "static_ipex":
                     args.opt = "pytorch_inc_static_quant_ipex"
+                if args.approach == "static_ipex_xpu":
+                    args.opt = "pytorch_inc_static_quant_ipex_xpu"
                 if args.approach == "dynamic":
                     args.opt = "pytorch_inc_dynamic_quant"
                 if args.approach == "auto":