File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed
torchao/_models/mixtral-moe Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -386,16 +386,16 @@ def callback(x):
386
386
387
387
if not interactive :
388
388
pass
389
- # print(tokenizer.decode(y[0].tolist()))
389
+ print (tokenizer .decode (y [0 ].tolist ()))
390
390
else :
391
391
print ()
392
392
tokens_generated = y .size (- 1 ) - prompt_length
393
393
tokens_sec = tokens_generated / t
394
394
aggregate_metrics ["tokens_per_sec" ].append (tokens_sec )
395
- # print(
396
- # f"Time for inference {i + 1}: {t:.02f} sec total, {tokens_sec:.02f} tokens/sec"
397
- # )
398
- # print(f"Bandwidth achieved: {model_size * tokens_sec / 1e9:.02f} GB/s")
395
+ print (
396
+ f"Time for inference { i + 1 } : { t :.02f} sec total, { tokens_sec :.02f} tokens/sec"
397
+ )
398
+ print (f"Bandwidth achieved: { model_size * tokens_sec / 1e9 :.02f} GB/s" )
399
399
400
400
if i == 0 and device == "cuda" and memory_profile is not None :
401
401
snapshot = torch .cuda .memory ._snapshot ()
You can’t perform that action at this time.
0 commit comments