Skip to content

Commit 098401d

Browse files
Update example scripts for ONNXRT DML EP (#1455)
Signed-off-by: chensuyue <suyue.chen@intel.com> Co-authored-by: Wang, Mengni <mengni.wang@intel.com>
1 parent f11c51b commit 098401d

File tree

8 files changed

+96
-35
lines changed
  • examples/onnxrt
    • image_recognition
      • onnx_model_zoo
        • shufflenet/quantization/ptq_static
        • vgg16/quantization/ptq_static
      • resnet50_torchvision/quantization/ptq_static
    • nlp
      • bert/quantization/ptq_static
      • huggingface_model/text_classification/quantization/ptq_dynamic
    • object_detection/ssd_mobilenet_v1/quantization/ptq_static
  • neural_compressor

8 files changed

+96
-35
lines changed

examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/main.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ def eval_func(model, dataloader, metric, backend):
264264
model = onnx.load(args.model_path)
265265
dataloader = Dataloader(args.dataset_location, args.label_path, args.batch_size)
266266
top1 = TopK()
267-
backend = 'default' if args.device == 'cpu' else 'onnxrt_dml_ep'
267+
backend = 'onnxrt_dml_ep' if args.device == 'npu' else 'default'
268268
def eval(onnx_model):
269269
return eval_func(onnx_model, dataloader, top1, backend)
270270

@@ -289,7 +289,5 @@ def eval(onnx_model):
289289
device=args.device,
290290
backend=backend)
291291

292-
q_model = quantization.fit(model, config, calib_dataloader=dataloader,
293-
eval_func=eval)
294-
292+
q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval)
295293
q_model.save(args.output_model)

examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/main.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,8 @@ def fetcher(ids):
191191

192192
def eval_func(model, dataloader, metric):
193193
metric.reset()
194-
sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers())
194+
provider = 'DmlExecutionProvider' if backend == 'onnxrt_dml_ep' else 'CPUExecutionProvider'
195+
sess = ort.InferenceSession(model.SerializeToString(), providers=[provider])
195196
input_names = [i.name for i in sess.get_inputs()]
196197
for input_data, label in dataloader:
197198
output = sess.run(None, dict(zip(input_names, [input_data])))
@@ -252,29 +253,42 @@ def eval_func(model, dataloader, metric):
252253
default=1,
253254
type=int,
254255
)
256+
parser.add_argument(
257+
'--device',
258+
type=str,
259+
default='cpu',
260+
choices=['cpu', 'npu'],
261+
)
255262
args = parser.parse_args()
263+
backend = 'onnxrt_dml_ep' if args.device == 'npu' else 'default'
256264

257265
model = onnx.load(args.model_path)
258266
dataloader = Dataloader(args.dataset_location, args.label_path, args.batch_size)
259267
top1 = TopK()
268+
260269
def eval(onnx_model):
261270
return eval_func(onnx_model, dataloader, top1)
262271

263272
if args.benchmark:
264273
if args.mode == 'performance':
265274
from neural_compressor.benchmark import fit
266275
from neural_compressor.config import BenchmarkConfig
267-
conf = BenchmarkConfig(warmup=10, iteration=1000, cores_per_instance=4, num_of_instance=1)
276+
conf = BenchmarkConfig(warmup=10,
277+
iteration=1000,
278+
cores_per_instance=4,
279+
num_of_instance=1,
280+
device=args.device,
281+
backend=backend)
268282
fit(model, conf, b_dataloader=dataloader)
269283
elif args.mode == 'accuracy':
270284
acc_result = eval(model)
271285
print("Batch size = %d" % dataloader.batch_size)
272286
print("Accuracy: %.5f" % acc_result)
273287
if args.tune:
274288
from neural_compressor import quantization, PostTrainingQuantConfig
275-
config = PostTrainingQuantConfig(quant_format=args.quant_format)
289+
config = PostTrainingQuantConfig(quant_format=args.quant_format,
290+
device=args.device,
291+
backend=backend)
276292

277-
q_model = quantization.fit(model, config, calib_dataloader=dataloader,
278-
eval_func=eval)
279-
280-
q_model.save(args.output_model)
293+
q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval)
294+
q_model.save(args.output_model)

examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/main.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def result(self):
115115
return 0
116116
return self.num_correct / self.num_sample
117117

118+
118119
class Dataloader:
119120
def __init__(self, dataset_location, image_list, batch_size):
120121
self.batch_size = batch_size
@@ -206,15 +207,18 @@ def fetcher(ids):
206207
except StopIteration:
207208
return
208209

210+
209211
def eval_func(model, dataloader, metric):
210212
metric.reset()
211-
sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers())
213+
provider = 'DmlExecutionProvider' if backend == 'onnxrt_dml_ep' else 'CPUExecutionProvider'
214+
sess = ort.InferenceSession(model.SerializeToString(), providers=[provider])
212215
input_names = [i.name for i in sess.get_inputs()]
213216
for input_data, label in dataloader:
214217
output = sess.run(None, dict(zip(input_names, [input_data])))
215218
metric.update(output, label)
216219
return metric.result()
217220

221+
218222
if __name__ == "__main__":
219223
logger.info("Evaluating ONNXRuntime full precision accuracy and performance:")
220224
parser = argparse.ArgumentParser(
@@ -275,7 +279,14 @@ def eval_func(model, dataloader, metric):
275279
default=1,
276280
type=int,
277281
)
282+
parser.add_argument(
283+
'--device',
284+
type=str,
285+
default='cpu',
286+
choices=['cpu', 'npu'],
287+
)
278288
args = parser.parse_args()
289+
backend = 'onnxrt_dml_ep' if args.device == 'npu' else 'default'
279290

280291
model = onnx.load(args.model_path)
281292
dataloader = Dataloader(args.dataset_location, args.label_path, args.batch_size)
@@ -297,6 +308,8 @@ def eval(onnx_model):
297308
cores_per_instance=4,
298309
num_of_instance=1,
299310
diagnosis=args.diagnose,
311+
device=args.device,
312+
backend=backend,
300313
)
301314
fit(model, conf, b_dataloader=dataloader)
302315
elif args.mode == 'accuracy':
@@ -308,9 +321,9 @@ def eval(onnx_model):
308321
config = PostTrainingQuantConfig(
309322
quant_format=args.quant_format,
310323
diagnosis=args.diagnose,
324+
device=args.device,
325+
backend=backend
311326
)
312327

313-
q_model = quantization.fit(model, config, calib_dataloader=dataloader,
314-
eval_func=eval)
315-
328+
q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval)
316329
q_model.save(args.output_model)

examples/onnxrt/nlp/bert/quantization/ptq_static/main.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -350,8 +350,17 @@ def result(self):
350350
choices=["distilbert", "bert", "mobilebert", "roberta"],
351351
help="model type"
352352
)
353+
parser.add_argument(
354+
'--device',
355+
type=str,
356+
default='cpu',
357+
choices=['cpu', 'npu'],
358+
)
353359
args = parser.parse_args()
354360

361+
# set config for npu test
362+
backend = 'onnxrt_dml_ep' if args.device == 'npu' else 'default'
363+
355364
dataset = ONNXRTBertDataset(args.model_path,
356365
data_dir=args.data_path,
357366
model_name_or_path=args.model_name_or_path,
@@ -364,8 +373,8 @@ def result(self):
364373

365374
def eval_func(model):
366375
metric.reset()
367-
session = onnxruntime.InferenceSession(model.SerializeToString(),
368-
providers=onnxruntime.get_available_providers())
376+
provider = 'DmlExecutionProvider' if backend == 'onnxrt_dml_ep' else 'CPUExecutionProvider'
377+
session = onnxruntime.InferenceSession(model.SerializeToString(), providers=[provider])
369378
ort_inputs = {}
370379
len_inputs = len(session.get_inputs())
371380
inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
@@ -388,6 +397,8 @@ def eval_func(model):
388397
iteration=100,
389398
cores_per_instance=4,
390399
num_of_instance=1,
400+
device=args.device,
401+
backend=backend
391402
)
392403
fit(model, conf, b_dataloader=dataloader)
393404
elif args.mode == "accuracy":
@@ -425,6 +436,8 @@ def eval_func(model):
425436
quant_format=args.quant_format,
426437
calibration_sampling_size=[8, 16, 32],
427438
recipes={"optypes_to_exclude_output_quant": ["MatMul", "Gemm", "Attention", "FusedGemm"]},
439+
device=args.device,
440+
backend=backend
428441
)
429442
q_model = quantization.fit(model,
430443
config,

examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -340,8 +340,15 @@ def result(self):
340340
default=768,
341341
type=int,
342342
)
343+
parser.add_argument(
344+
'--device',
345+
type=str,
346+
default='cpu',
347+
choices=['cpu', 'npu'],
348+
)
343349

344350
args = parser.parse_args()
351+
backend = 'onnxrt_dml_ep' if args.device == 'npu' else 'default'
345352

346353
dataset = ONNXRTBertDataset(args.model_path,
347354
data_dir=args.data_path,
@@ -352,8 +359,8 @@ def result(self):
352359

353360
def eval_func(model, *args):
354361
metric.reset()
355-
session = ort.InferenceSession(model.SerializeToString(),
356-
providers=ort.get_available_providers())
362+
provider = 'DmlExecutionProvider' if backend == 'onnxrt_dml_ep' else 'CPUExecutionProvider'
363+
session = ort.InferenceSession(model.SerializeToString(), providers=[provider])
357364
ort_inputs = {}
358365
len_inputs = len(session.get_inputs())
359366
inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
@@ -374,7 +381,9 @@ def eval_func(model, *args):
374381
from neural_compressor.config import BenchmarkConfig
375382
conf = BenchmarkConfig(iteration=100,
376383
cores_per_instance=28,
377-
num_of_instance=1)
384+
num_of_instance=1,
385+
device=args.device,
386+
backend=backend)
378387
fit(model, conf, b_dataloader=dataloader)
379388
elif args.mode == 'accuracy':
380389
acc_result = eval_func(model)
@@ -413,6 +422,8 @@ def eval_func(model, *args):
413422
if args.model_name_or_path == 'Alireza1044/albert-base-v2-sst2':
414423
specific_quant_config['recipes'] = {'first_conv_or_matmul_quantization': False}
415424
config = PostTrainingQuantConfig(approach='dynamic',
425+
device=args.device,
426+
backend=backend,
416427
**specific_quant_config)
417428
q_model = quantization.fit(model,
418429
config,

examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
from data_utils import ComposeTransform, ResizeTransform, LabelBalanceCOCORawFilter
2828

2929
logger = logging.getLogger(__name__)
30-
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
31-
datefmt = '%m/%d/%Y %H:%M:%S',
32-
level = logging.WARN)
30+
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
31+
datefmt='%m/%d/%Y %H:%M:%S',
32+
level=logging.WARN)
3333
logger.info("Evaluating ONNXRuntime full precision accuracy and performance:")
3434
parser = argparse.ArgumentParser(
3535
formatter_class=argparse.ArgumentDefaultsHelpFormatter
@@ -89,7 +89,14 @@
8989
default=16,
9090
help="quantization format"
9191
)
92+
parser.add_argument(
93+
'--device',
94+
type=str,
95+
default='cpu',
96+
choices=['cpu', 'npu'],
97+
)
9298
args = parser.parse_args()
99+
backend = 'onnxrt_dml_ep' if args.device == 'npu' else 'default'
93100

94101
if __name__ == "__main__":
95102
model = onnx.load(args.model_path)
@@ -106,8 +113,8 @@
106113

107114
def eval_func(model):
108115
metric.reset()
109-
session = ort.InferenceSession(model.SerializeToString(),
110-
providers=ort.get_available_providers())
116+
provider = 'DmlExecutionProvider' if backend == 'onnxrt_dml_ep' else 'CPUExecutionProvider'
117+
session = ort.InferenceSession(model.SerializeToString(), providers=[provider])
111118
ort_inputs = {}
112119
len_inputs = len(session.get_inputs())
113120
inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
@@ -143,6 +150,8 @@ def eval_func(model):
143150
cores_per_instance=4,
144151
num_of_instance=1,
145152
diagnosis=args.diagnose,
153+
device=args.device,
154+
backend=backend,
146155
)
147156
fit(model, conf, b_dataloader=eval_dataloader)
148157
elif args.mode == 'accuracy':
@@ -161,6 +170,8 @@ def eval_func(model):
161170
quant_format=args.quant_format,
162171
calibration_sampling_size=[50],
163172
diagnosis=args.diagnose,
173+
device=args.device,
174+
backend=backend,
164175
)
165176
q_model = quantization.fit(model, config, calib_dataloader=calib_dataloader, eval_func=eval_func)
166-
q_model.save(args.output_model)
177+
q_model.save(args.output_model)

neural_compressor/adaptor/onnxrt.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1315,14 +1315,15 @@ def query_fw_capability(self, model):
13151315
attention_matmul = []
13161316
for _, node in enumerate(self.pre_optimized_model.nodes()):
13171317
if node.op_type in ["Conv", "MatMul", "Attention"]:
1318-
# get first Conv or MatMul node
1319-
if len(first_quantizable_node) == 0:
1320-
first_quantizable_node.append(node)
1321-
1322-
# get last Conv or MatMul node
1323-
if len(last_quantizable_node) != 0:
1324-
last_quantizable_node.pop()
1325-
last_quantizable_node.append(node)
1318+
if node.op_type in optype_wise:
1319+
# get first Conv or MatMul node
1320+
if len(first_quantizable_node) == 0:
1321+
first_quantizable_node.append(node)
1322+
1323+
# get last Conv or MatMul node
1324+
if len(last_quantizable_node) != 0:
1325+
last_quantizable_node.pop()
1326+
last_quantizable_node.append(node)
13261327

13271328
all_conv_matmul.append(node)
13281329
if node.op_type != "Conv":

neural_compressor/benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,7 @@ def fit(model, conf, b_dataloader=None, b_func=None):
513513
assert sys.platform in ["linux", "win32", "darwin"], "platform not supported..."
514514
# disable multi-instance for running benchmark on GPU device
515515
set_all_env_var(conf)
516-
if conf.device == "gpu" or sys.platform == "darwin":
516+
if conf.device == "gpu" or conf.device == "npu" or sys.platform == "darwin":
517517
set_env_var("NC_ENV_CONF", True, overwrite_existing=True)
518518

519519
if conf.diagnosis and os.environ.get("NC_ENV_CONF", None) in [None, "False"]:

0 commit comments

Comments
 (0)