[ez] Use noquant to make dashboard logic easier (#1430)

jerryzh168 · web-flow · commit a03ca99a6dba · 2024-12-17T18:35:36.000-08:00
Summary:
A small fix for output json results for llama, sam, sam2 benchmarks to make the code
easier for calculating geomean speedup of autoquant v.s. noquant

Test Plan:
local test

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/examples/sam2_amg_server/server.py b/examples/sam2_amg_server/server.py
@@ -658,7 +658,7 @@ def main(checkpoint_path,
             headers = ["name", "dtype", "device", "arch", "metric", "actual", "target"]
             name = "sam2-" + model_type
             arch = get_arch_name()
-            dtype = "autoquant" if use_autoquant else ("compile" if fast else "base")
+            dtype = "autoquant" if use_autoquant else "noquant"
             avg_time_per_run, max_memory_allocated_bytes, max_memory_allocated_percentage = result
             memory_result = [name, dtype, device, arch, "memory(MiB)", max_memory_allocated_bytes, None]
             memory_percent_result = [name, dtype, device, arch, "memory(%)", max_memory_allocated_percentage, None]
diff --git a/torchao/_models/llama/generate.py b/torchao/_models/llama/generate.py
@@ -940,7 +940,7 @@ def callback(x):
         headers = ["name", "dtype", "device", "arch", "metric", "actual", "target"]
         name = checkpoint_path.parent.name
         arch = get_arch_name()
-        dtype = quantization or str(precision)
+        dtype = quantization or "noquant"
         memory_result = [name, dtype, device, arch, "mem/s", bandwidth, None]
         performance_result = [name, dtype, device, arch, "tok/s", tokpersec, None]
         write_json_result = write_json_result_local if output_json_local else write_json_result_ossci
diff --git a/torchao/_models/sam/eval_combo.py b/torchao/_models/sam/eval_combo.py
@@ -467,7 +467,7 @@ def mlp_only(mod, name):
         headers = ["name", "dtype", "device", "arch", "metric", "actual", "target"]
         name = sam_model_type
         arch = get_arch_name()
-        dtype = compress or str(use_half) or "torch.float32"
+        dtype = compress or "noquant"
         memory_result = [name, dtype, device, arch, "memory(MiB)", max_memory_allocated_bytes, None]
         performance_result = [name, dtype, device, arch, "img_s(avg)", img_s, None]
         write_json_result = write_json_result_local if output_json_local else write_json_result_ossci
diff --git a/torchao/_models/sam2/build_sam.py b/torchao/_models/sam2/build_sam.py
@@ -87,8 +87,6 @@ def build_sam2(
             "++model.sam_mask_decoder_extra_args.dynamic_multimask_stability_thresh=0.98",
         ]
     # Read config and init model
-    import os
-    print("cur path:", os.getcwd())
     cfg = compose(config_name=config_file, overrides=hydra_overrides_extra)
     OmegaConf.resolve(cfg)
     model = instantiate(cfg.model, _recursive_=True)

Original file line number	Diff line number	Diff line change
`@@ -87,8 +87,6 @@ def build_sam2(`
`87`	`87`	`"++model.sam_mask_decoder_extra_args.dynamic_multimask_stability_thresh=0.98",`
`88`	`88`	`]`
`89`	`89`	`# Read config and init model`
`90`		`- import os`
`91`		`- print("cur path:", os.getcwd())`
`92`	`90`	`cfg = compose(config_name=config_file, overrides=hydra_overrides_extra)`
`93`	`91`	`OmegaConf.resolve(cfg)`
`94`	`92`	`model = instantiate(cfg.model, _recursive_=True)`