diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 63c14ad7a..9a5323695 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -92,6 +92,31 @@ jobs: (cd pr && . ./mfc.sh load -c ${{ matrix.flag }} -m g) (cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}.yaml ../pr/bench-${{ matrix.device }}.yaml) + - name: Process Nsight Profiling Report + run: | + if [ -f "pr/report.nsys-rep" ]; then + echo "=== Nsight Profiling Summary ===" + echo "Master" + (cd master && nsys stats --report nvtx_sum report.nsys-rep) + echo "Pr" + (cd pr && nsys stats --report nvtx_sum report.nsys-rep) + + echo "=== CUDA API CALLS ===" + echo "Master" + (cd master && nsys stats --report cuda_api_sum --format table report.nsys-rep | head -100) + echo "Pr" + (cd pr && nsys stats --report cuda_api_sum --format table report.nsys-rep | head -100) + + echo "=== GPU KERNELS ===" + echo "Master" + (cd master && nsys stats --report cuda_gpu_kern_sum --format table report.nsys-rep | head -100) + echo "Pr" + (cd pr && nsys stats --report cuda_gpu_kern_sum --format table report.nsys-rep | head -100) + + else + echo "No Nsight report found, skipping profiling analysis" + fi + - name: Print Logs if: always() run: | @@ -107,5 +132,6 @@ jobs: path: | pr/bench-${{ matrix.device }}.* pr/build/benchmarks/* + pr/report.nsys-rep master/bench-${{ matrix.device }}.* master/build/benchmarks/* diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh index a0e93f905..6e95d280b 100644 --- a/.github/workflows/phoenix/bench.sh +++ b/.github/workflows/phoenix/bench.sh @@ -17,9 +17,9 @@ mkdir -p $currentdir export TMPDIR=$currentdir if [ "$device" = "gpu" ]; then - ./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks + nsys profile -o report ./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks else - ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks + nsys profile -o report ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks fi sleep 10