From 658f82e19681666a47e518f9364dfe53699fa081 Mon Sep 17 00:00:00 2001 From: malmahrouqi3 Date: Tue, 8 Jul 2025 12:20:09 -0400 Subject: [PATCH] added nsight profiling to phoenix benchmark cases --- .github/workflows/bench.yml | 26 ++++++++++++++++++++++++++ .github/workflows/phoenix/bench.sh | 4 ++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index b5e89a2b5..58298dd05 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -92,6 +92,31 @@ jobs: (cd pr && . ./mfc.sh load -c ${{ matrix.flag }} -m g) (cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}.yaml ../pr/bench-${{ matrix.device }}.yaml) + - name: Process Nsight Profiling Report + run: | + if [ -f "pr/report.nsys-rep" ]; then + echo "=== Nsight Profiling Summary ===" + echo "Master" + (cd master && nsys stats --report nvtx_sum report.nsys-rep) + echo "Pr" + (cd pr && nsys stats --report nvtx_sum report.nsys-rep) + + echo "=== CUDA API CALLS ===" + echo "Master" + (cd master && nsys stats --report cuda_api_sum --format table report.nsys-rep | head -100) + echo "Pr" + (cd pr && nsys stats --report cuda_api_sum --format table report.nsys-rep | head -100) + + echo "=== GPU KERNELS ===" + echo "Master" + (cd master && nsys stats --report cuda_gpu_kern_sum --format table report.nsys-rep | head -100) + echo "Pr" + (cd pr && nsys stats --report cuda_gpu_kern_sum --format table report.nsys-rep | head -100) + + else + echo "No Nsight report found, skipping profiling analysis" + fi + - name: Print Logs if: always() run: | @@ -106,5 +131,6 @@ jobs: path: | pr/bench-${{ matrix.device }}.* pr/build/benchmarks/* + pr/report.nsys-rep master/bench-${{ matrix.device }}.* master/build/benchmarks/* diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh index f58ef4472..512711c0b 100644 --- a/.github/workflows/phoenix/bench.sh +++ b/.github/workflows/phoenix/bench.sh @@ -16,9 +16,9 @@ mkdir -p $currentdir export TMPDIR=$currentdir if [ "$job_device" = "gpu" ]; then - ./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks + nsys profile -o report ./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks else - ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks + nsys profile -o report ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks fi sleep 10