Skip to content

Commit e18e520

Browse files
committed
removing test code
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags:
1 parent 330f69e commit e18e520

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

torchao/_models/mixtral-moe/generate.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -386,16 +386,16 @@ def callback(x):
386386

387387
if not interactive:
388388
pass
389-
# print(tokenizer.decode(y[0].tolist()))
389+
print(tokenizer.decode(y[0].tolist()))
390390
else:
391391
print()
392392
tokens_generated = y.size(-1) - prompt_length
393393
tokens_sec = tokens_generated / t
394394
aggregate_metrics["tokens_per_sec"].append(tokens_sec)
395-
# print(
396-
# f"Time for inference {i + 1}: {t:.02f} sec total, {tokens_sec:.02f} tokens/sec"
397-
# )
398-
# print(f"Bandwidth achieved: {model_size * tokens_sec / 1e9:.02f} GB/s")
395+
print(
396+
f"Time for inference {i + 1}: {t:.02f} sec total, {tokens_sec:.02f} tokens/sec"
397+
)
398+
print(f"Bandwidth achieved: {model_size * tokens_sec / 1e9:.02f} GB/s")
399399

400400
if i == 0 and device == "cuda" and memory_profile is not None:
401401
snapshot = torch.cuda.memory._snapshot()

0 commit comments

Comments
 (0)