-
Notifications
You must be signed in to change notification settings - Fork 44
Open
Description
When I test the fps of edgenext_x_small and edgenext_small with RTX 2060(notebook), find edgenext_small is faster than edgenext_x_small ?
the following is the result . Each row is the average fps, time_mean, time_std of 100 inferences, and the last row(“result_average”) is the average of the 10 rows above.
# the command:python get_fps.py --model edgenext_small --finetune weights/edgenext_small.pth
{'fps': 112.7, 'time_mean': 8.9, 'time_std': 0.4}
{'fps': 115.1, 'time_mean': 8.7, 'time_std': 0.4}
{'fps': 115.6, 'time_mean': 8.6, 'time_std': 0.3}
{'fps': 115.2, 'time_mean': 8.7, 'time_std': 0.3}
{'fps': 114.9, 'time_mean': 8.7, 'time_std': 0.3}
{'fps': 113.1, 'time_mean': 8.8, 'time_std': 0.5}
{'fps': 111.7, 'time_mean': 9.0, 'time_std': 0.4}
{'fps': 114.9, 'time_mean': 8.7, 'time_std': 0.4}
{'fps': 114.4, 'time_mean': 8.7, 'time_std': 0.5}
{'fps': 117.4, 'time_mean': 8.5, 'time_std': 0.3}
result_average:
{'fps': 114.5, 'time_mean': 8.7, 'time_std': 0.4}
# the command:python get_fps.py --model edgenext_x_small --finetune weights/edgenext_x_small.pth
{'fps': 108.8, 'time_mean': 9.2, 'time_std': 0.5}
{'fps': 112.7, 'time_mean': 8.9, 'time_std': 0.5}
{'fps': 114.6, 'time_mean': 8.7, 'time_std': 0.4}
{'fps': 114.3, 'time_mean': 8.7, 'time_std': 0.4}
{'fps': 111.8, 'time_mean': 8.9, 'time_std': 0.6}
{'fps': 110.9, 'time_mean': 9.0, 'time_std': 0.5}
{'fps': 111.2, 'time_mean': 9.0, 'time_std': 0.5}
{'fps': 109.8, 'time_mean': 9.1, 'time_std': 0.5}
{'fps': 113.9, 'time_mean': 8.8, 'time_std': 0.5}
{'fps': 96.0, 'time_mean': 10.4, 'time_std': 2.9}
result_average:
{'fps': 110.4, 'time_mean': 9.1, 'time_std': 0.7}
Here is the code I used(just add get_fps function in the main.py)
def get_fps(args, repetitions=120, num_warmup=20, infer_epoch=10):
utils.init_distributed_mode(args)
print(args)
device = torch.device(args.device)
# Eval/USI_eval configurations
if args.eval:
if args.usi_eval:
args.crop_pct = 0.95
model_state_dict_name = 'state_dict'
else:
model_state_dict_name = 'model_ema'
else:
model_state_dict_name = 'model'
# load mmodel
model = create_model(
args.model,
pretrained=False,
num_classes=args.nb_classes,
drop_path_rate=args.drop_path,
layer_scale_init_value=args.layer_scale_init_value,
head_init_scale=1.0,
input_res=args.input_size,
classifier_dropout=args.classifier_dropout,
)
if args.finetune:
checkpoint = torch.load(args.finetune, map_location="cpu")
state_dict = checkpoint[model_state_dict_name]
utils.load_state_dict(model, state_dict)
from mmcv.cnn import fuse_conv_bn
model = fuse_conv_bn(model)
model.to(device)
model.eval()
# open cudnn speed up
torch.backends.cudnn.benchmark = True
# init data
data = torch.randn(1, 3, 256, 256, dtype=torch.float).to(device)
# test fps
result_average = {'fps': 0, 'time_mean': 0, 'time_std': 0}
for _ in range(infer_epoch):
result = {}
infer_time = []
for i in range(repetitions):
torch.cuda.synchronize()
start_time = time.perf_counter()
# infer
with torch.no_grad():
model(data)
torch.cuda.synchronize()
elapsed = (time.perf_counter() - start_time)
if i >= num_warmup:
infer_time.append(elapsed)
result['fps'] = (repetitions - num_warmup) / sum(infer_time)
result['time_mean'] = np.mean(infer_time) * 1000
result['time_std'] = np.std(infer_time) * 1000
result_average['fps'] += result['fps']
result_average['time_mean'] += result['time_mean']
result_average['time_std'] += result['time_std']
for key, value in result.items():
result[key] = round(value, 1)
print(result)
for key, value in result_average.items():
result_average[key] = round(value / infer_epoch, 1)
print("result_average:")
print(result_average)
if __name__ == '__main__':
parser = argparse.ArgumentParser('EdgeNeXt training and evaluation script', parents=[get_args_parser()])
args = parser.parse_args()
if args.output_dir:
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
# main(args)
get_fps(args)
Metadata
Metadata
Assignees
Labels
No labels