diff --git a/experiments/README.md b/experiments/README.md index 981f2b4..bd52688 100644 --- a/experiments/README.md +++ b/experiments/README.md @@ -80,7 +80,7 @@ $ python run_experiments.py 16 vit_b For CPU platform, set SEGMENT_ANYTHING_FAST_USE_FLASH_4 as 0, since Custom flash attention kernels were written specifically for A100. ``` -$ SEGMENT_ANYTHING_FAST_USE_FLASH_4=0 python run_experiments.py 16 vit_b --run-experiments --num-workers 32 --device cpu +$ SEGMENT_ANYTHING_FAST_USE_FLASH_4=0 python run_experiments.py 16 vit_b --run-experiments --num-workers 32 --device cpu --num_iter ``` If at any point you run into issue, please note that you can increase verbosity by adding `--capture_output False` to above command. Also, please don't hesitate to open an issue. diff --git a/experiments/eval_combo.py b/experiments/eval_combo.py index 0b32d8f..b942f21 100644 --- a/experiments/eval_combo.py +++ b/experiments/eval_combo.py @@ -185,7 +185,8 @@ def build_results(batched_data_iter, use_compile_decoder, use_nested_tensor, pad_input_image_batch, - use_fullgraph=False): + use_fullgraph=False, + num_iter=None): # TODO: Re-enable this for datapoints assert not use_compile_decoder @@ -229,6 +230,8 @@ def build_results(batched_data_iter, else: partial_batch = True batch_idx += 1 + if num_iter is not None and batch_idx > num_iter: + break avg_ms_per_img = None if num_images > 0: @@ -307,7 +310,8 @@ def run( memory_path=None, use_local_sam_fork=False, use_compiler_settings=False, - device="cuda" + device="cuda", + num_iter=None ): from torch._inductor import config as inductorconfig inductorconfig.triton.unique_kernel_names = True @@ -432,7 +436,8 @@ def run( use_compile, use_compile_decoder, use_nested_tensor, - pad_input_image_batch) + pad_input_image_batch, + num_iter=num_iter) if compress == "static_quant": from static_quant import get_x_absmax diff --git a/experiments/run_experiments.py b/experiments/run_experiments.py index b37e3fa..5a8bf27 100755 --- a/experiments/run_experiments.py +++ b/experiments/run_experiments.py @@ -46,7 +46,8 @@ def run_experiment(experiments_data, profile_path=None, profile_top=False, memory_path=None, - device="cuda"): + device="cuda", + num_iter=None): root_cmd = ["python", "eval_combo.py", "--coco_root_dir", f"{experiments_data}/datasets/coco2017", @@ -86,6 +87,8 @@ def run_experiment(experiments_data, if extra_args is None: extra_args = [] args = args + ["--device", device] + if num_iter is not None: + args = args + ["--num_iter", str(num_iter)] args = args + extra_args if print_header: args = args + ["--print_header", "True"] @@ -148,7 +151,8 @@ def run(batch_size, print_header=True, capture_output=True, local_fork_only=False, - device="cuda"): + device="cuda", + num_iter=None): assert model == "vit_b" or model == "vit_h" @@ -159,7 +163,8 @@ def run(batch_size, batch_size=batch_size, num_workers=num_workers, capture_output=capture_output, - device=device) + device=device, + num_iter=num_iter) print_header = True if run_traces: