|
6 | 6 | import math
|
7 | 7 | import segment_anything_fast
|
8 | 8 | import time
|
| 9 | +import resource |
9 | 10 |
|
10 | 11 | torch._dynamo.config.cache_size_limit = 50000
|
11 | 12 |
|
@@ -257,7 +258,10 @@ def profile_top_runner(fn, *args, **kwargs):
|
257 | 258 | torch.profiler.ProfilerActivity.CUDA],
|
258 | 259 | record_shapes=True) as prof:
|
259 | 260 | result = fn(*args, **kwargs)
|
260 |
| - print(prof.key_averages().table(sort_by="self_cuda_time_total", row_limit=-1)) |
| 261 | + if torch.cuda.is_available(): |
| 262 | + print(prof.key_averages().table(sort_by="self_cuda_time_total", row_limit=-1)) |
| 263 | + else: |
| 264 | + print(prof.key_averages().table(sort_by="self_cpu_time_total", row_limit=-1)) |
261 | 265 | return result
|
262 | 266 |
|
263 | 267 |
|
@@ -444,15 +448,22 @@ def run(
|
444 | 448 | batch_ms_batch_size = (avg_ms_per_img * num_images) / num_batches / batch_size
|
445 | 449 |
|
446 | 450 | mIoU = calculate_miou(results, mask_debug_out_dir, True, cat_id_to_cat)
|
447 |
| - max_memory_allocated_bytes = torch.cuda.max_memory_allocated() |
448 |
| - _, total_memory = torch.cuda.mem_get_info() |
449 |
| - max_memory_allocated_percentage = int(100 * (max_memory_allocated_bytes / total_memory)) |
450 |
| - max_memory_allocated_bytes = max_memory_allocated_bytes >> 20 |
| 451 | + if torch.cuda.is_available(): |
| 452 | + max_memory_allocated_bytes = torch.cuda.max_memory_allocated() |
| 453 | + _, total_memory = torch.cuda.mem_get_info() |
| 454 | + max_memory_allocated_percentage = int(100 * (max_memory_allocated_bytes / total_memory)) |
| 455 | + max_memory_allocated_bytes = max_memory_allocated_bytes >> 20 |
| 456 | + else: |
| 457 | + import psutil |
| 458 | + total_memory = psutil.virtual_memory().total |
| 459 | + max_memory_allocated_bytes = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss |
| 460 | + max_memory_allocated_percentage = int(100 * (max_memory_allocated_bytes / (total_memory >> 10))) |
| 461 | + max_memory_allocated_bytes = max_memory_allocated_bytes >> 10 |
451 | 462 |
|
452 | 463 | if print_header:
|
453 |
| - print(",".join(["sam_model_type", "batch_size", "memory(MiB)", "memory(%)", "img_s(avg)", "batch_ms(avg)/batch_size", "mIoU", "use_compile", |
| 464 | + print(",".join(["device", "sam_model_type", "batch_size", "memory(MiB)", "memory(%)", "img_s(avg)", "batch_ms(avg)/batch_size", "mIoU", "use_compile", |
454 | 465 | "use_half", "compress", "epilogue_fusion_first", "use_compile_decoder", "use_nested_tensor", "use_rel_pos", "pad_input_image_batch", "num_workers", "num_batches", "num_images", "profile_path", "memory_path"]))
|
455 |
| - print(",".join(map(str, [sam_model_type, batch_size, max_memory_allocated_bytes, max_memory_allocated_percentage, img_s, batch_ms_batch_size, mIoU, use_compile, |
| 466 | + print(",".join(map(str, [device, sam_model_type, batch_size, max_memory_allocated_bytes, max_memory_allocated_percentage, img_s, batch_ms_batch_size, mIoU, use_compile, |
456 | 467 | use_half, compress, epilogue_fusion_first, use_compile_decoder, use_nested_tensor, use_rel_pos, pad_input_image_batch, num_workers, num_batches, num_images, profile_path, memory_path])))
|
457 | 468 |
|
458 | 469 |
|
|
0 commit comments