Skip to content

Commit df3d64f

Browse files
authored
improve 3x ut on bf16 supported machine (#2163)
Signed-off-by: changwangss <chang1.wang@intel.com>
1 parent b8d8ac0 commit df3d64f

File tree

5 files changed

+15
-5
lines changed

5 files changed

+15
-5
lines changed

neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,9 @@ def _ipex_post_quant_process(model, example_inputs, use_bf16, inplace=False):
393393
Returns:
394394
A converted jit model.
395395
"""
396-
if use_bf16 and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1"): # pragma: no cover
396+
if (
397+
use_bf16 and (not os.getenv("FORCE_FP32") == "1") and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1")
398+
): # pragma: no cover
397399
with torch.no_grad():
398400
with torch.cpu.amp.autocast():
399401
model = ipex.quantization.convert(model, inplace=inplace)

neural_compressor/torch/algorithms/static_quant/static_quant.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,9 @@ def _ipex_post_quant_process(model, example_inputs, use_bf16, inplace=False):
185185
Returns:
186186
A converted jit model.
187187
"""
188-
if use_bf16 and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1"): # pragma: no cover
188+
if (
189+
use_bf16 and (not os.getenv("FORCE_FP32") == "1") and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1")
190+
): # pragma: no cover
189191
with torch.no_grad():
190192
with torch.cpu.amp.autocast():
191193
model = ipex.quantization.convert(model, inplace=inplace)

test/3x/torch/quantization/test_smooth_quant.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
import copy
23
import shutil
34

@@ -9,7 +10,7 @@
910

1011
if is_ipex_available():
1112
import intel_extension_for_pytorch as ipex
12-
13+
os.environ["FORCE_FP32"] = "1"
1314

1415
class Model(torch.nn.Module):
1516
device = torch.device("cpu")

test/3x/torch/quantization/test_static_quant.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
import copy
23
import shutil
34

@@ -22,7 +23,7 @@
2223
from neural_compressor.torch.utils.auto_accelerator import auto_detect_accelerator
2324

2425
device = auto_detect_accelerator().current_device()
25-
26+
os.environ["FORCE_FP32"] = "1"
2627

2728
def build_simple_torch_model():
2829
class Model(torch.nn.Module):

test/3x/torch/quantization/weight_only/test_transformers.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from transformers import AutoTokenizer
99

1010
from neural_compressor.torch.utils import get_ipex_version
11+
from neural_compressor.utils.utility import CpuInfo
1112
from neural_compressor.transformers import (
1213
AutoModelForCausalLM,
1314
Qwen2VLForConditionalGeneration,
@@ -107,7 +108,10 @@ def test_quantization_for_llm(self):
107108
woq_model = AutoModelForCausalLM.from_pretrained(model_name_or_path, quantization_config=woq_config)
108109
woq_model.eval()
109110
output = woq_model(dummy_input)
110-
assert isclose(float(output[0][0][0][0]), 0.18400897085666656, rel_tol=1e-04)
111+
if CpuInfo().bf16:
112+
assert isclose(float(output[0][0][0][0]), 0.19140625, rel_tol=1e-04)
113+
else:
114+
assert isclose(float(output[0][0][0][0]), 0.18400897085666656, rel_tol=1e-04)
111115

112116
def test_save_load(self):
113117
model_name_or_path = self.model_name_or_path

0 commit comments

Comments
 (0)