Skip to content

Commit fc0609b

Browse files
committed
Add --model-dtype (pure bfloat16/float16) support to inference.py
1 parent 8ce197e commit fc0609b

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

inference.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@
105105
help='use Native AMP for mixed precision training')
106106
parser.add_argument('--amp-dtype', default='float16', type=str,
107107
help='lower precision AMP dtype (default: float16)')
108+
parser.add_argument('--model-dtype', default=None, type=str,
109+
help='Model dtype override (non-AMP) (default: float32)')
108110
parser.add_argument('--fuser', default='', type=str,
109111
help="Select jit fuser. One of ('', 'te', 'old', 'nvfuser')")
110112
parser.add_argument('--model-kwargs', nargs='*', default={}, action=ParseKwargs)
@@ -161,9 +163,15 @@ def main():
161163

162164
device = torch.device(args.device)
163165

166+
model_dtype = None
167+
if args.model_dtype:
168+
assert args.model_dtype in ('float32', 'float16', 'bfloat16')
169+
model_dtype = getattr(torch, args.model_dtype)
170+
164171
# resolve AMP arguments based on PyTorch / Apex availability
165172
amp_autocast = suppress
166173
if args.amp:
174+
assert model_dtype is None or model_dtype == torch.float32, 'float32 model dtype must be used with AMP'
167175
assert args.amp_dtype in ('float16', 'bfloat16')
168176
amp_dtype = torch.bfloat16 if args.amp_dtype == 'bfloat16' else torch.float16
169177
amp_autocast = partial(torch.autocast, device_type=device.type, dtype=amp_dtype)
@@ -201,7 +209,7 @@ def main():
201209
if args.test_pool:
202210
model, test_time_pool = apply_test_time_pool(model, data_config)
203211

204-
model = model.to(device)
212+
model = model.to(device=device, dtype=model_dtype)
205213
model.eval()
206214
if args.channels_last:
207215
model = model.to(memory_format=torch.channels_last)
@@ -237,6 +245,7 @@ def main():
237245
use_prefetcher=True,
238246
num_workers=workers,
239247
device=device,
248+
img_dtype=model_dtype or torch.float32,
240249
**data_config,
241250
)
242251

@@ -280,7 +289,7 @@ def main():
280289
np_labels = to_label(np_indices)
281290
all_labels.append(np_labels)
282291

283-
all_outputs.append(output.cpu().numpy())
292+
all_outputs.append(output.float().cpu().numpy())
284293

285294
# measure elapsed time
286295
batch_time.update(time.time() - end)

0 commit comments

Comments
 (0)