Skip to content

Commit 7ad2626

Browse files
committed
nvhpc/25.3 + tiles
1 parent 107e62e commit 7ad2626

File tree

4 files changed

+28
-18
lines changed

4 files changed

+28
-18
lines changed

multi/Makefile_leonardo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ EXTRA_DIR = /leonardo/prod/opt/compilers/cuda/12.3/none/compat
1414
export $(CUDECOMP_DIR)/lib
1515

1616
# Compiler and Linker Flags
17-
FFLAGS = -fast -acc -gpu=managed -Minfo=accel -Mfree -Mpreprocess -cpp -cuda -I$(CUDECOMP_DIR)/include/
17+
FFLAGS = -fast -acc -gpu=mem:managed -Minfo=accel -Mfree -Mpreprocess -cpp -cuda -I$(CUDECOMP_DIR)/include/
1818
LIBS = -L$(CUDECOMP_DIR)/lib/ -L$(CUDA_DIR)/lib64/ -lcudecomp_fort -lcudecomp -cudalib=cufft -lnvToolsExt
1919

2020
# Source and Object Files

multi/go_leo_prof.sh

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,12 @@
1010
#SBATCH --qos=boost_qos_dbg
1111
#SBATCH --error=test.err
1212

13-
module load nvhpc/24.3
14-
module load cuda/12.3
15-
module load openmpi/4.1.6--nvhpc--24.3
13+
#module load nvhpc/24.3
14+
#module load cuda/12.3
15+
#module load openmpi/4.1.6--nvhpc--24.3
16+
module load profile/candidate
17+
module load nvhpc/25.3
18+
module load hpcx-mpi/2.19
1619
#export LD_LIBRARY_PATH=/leonardo_scratch/large/userexternal/aroccon0/MHIT36_cuDecomp/cuDecomp/build/lib:$LD_LIBRARY_PATH
1720
#export LD_LIBRARY_PATH=/leonardo_scratch/large/userexternal/lenzenbe/RE95_256_cuDec/cuDecomp/build/lib:$LD_LIBRARY_PATH
1821
CURRENT_DIR="$(pwd)"
@@ -25,7 +28,10 @@ export LD_LIBRARY_PATH=$ROOT_DIR:$LD_LIBRARY_PATH
2528
#mpirun -n 4 nsys profile --trace=cuda,nvtx,mpi -o profile_output_%q{SLURM_PROCID} --stats=true ./mhit36
2629
#mpirun -n 4 nsys profile -t cuda,nvtx,mpi -o report.$SLURM_LOCALID ./mhit36
2730
#srun -n 4 nsys profile -t cuda,nvtx,mpi --output=nsys_report_rank%t ./mhit36nsys profile --multiprocess=true -t cuda,nvtx,mpi -o report $
28-
nsys profile -t cuda,nvtx,mpi,openacc mpirun -np 4 ./mhit36
31+
#nsys profile -t cuda,nvtx,mpi,openacc mpirun -np 4 ./mhit36
32+
33+
# for nsight compute report
34+
mpirun -n 4 ncu --kernel-name main_659 --set=full --import-source=yes -o profile -f --launch-skip 3 --launch-count 1 "./mhit36"
2935

3036

3137

multi/leo.sh

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1-
module load nvhpc/24.3
2-
module load cuda/12.3
3-
module load openmpi/4.1.6--nvhpc--24.3
1+
# old modules - ! if you use these, changes in Makefile are needed from -gpu=mem:managed to -gpu=managed
2+
#module load nvhpc/24.3
3+
#module load cuda/12.3
4+
#module load openmpi/4.1.6--nvhpc--24.3
5+
module load profile/candidate
6+
module load nvhpc/25.3
7+
module load hpcx-mpi/2.19
48
cp Makefile_leonardo Makefile
59
make clean
610
make

multi/main.f90

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -136,13 +136,13 @@ program main
136136
CHECK_CUDECOMP_EXIT(cudecompGridDescCreate(handle, grid_desc, config, options))
137137

138138
! Print information on configuration
139-
if (rank == 0) then
140-
write(*,"(' Running on ', i0, ' x ', i0, ' process grid ...')") config%pdims(1), config%pdims(2)
141-
write(*,"(' Using ', a, ' transpose backend ...')") &
142-
cudecompTransposeCommBackendToString(config%transpose_comm_backend)
143-
write(*,"(' Using ', a, ' halo backend ...')") &
144-
cudecompHaloCommBackendToString(config%halo_comm_backend)
145-
endif
139+
!if (rank == 0) then
140+
! write(*,"(' Running on ', i0, ' x ', i0, ' process grid ...')") config%pdims(1), config%pdims(2)
141+
! write(*,"(' Using ', a, ' transpose backend ...')") &
142+
! cudecompTransposeCommBackendToString(config%transpose_comm_backend)
143+
! write(*,"(' Using ', a, ' halo backend ...')") &
144+
! cudecompHaloCommBackendToString(config%halo_comm_backend)
145+
!endif
146146

147147

148148
! Get pencil info for the grid descriptor in the physical space
@@ -584,7 +584,7 @@ program main
584584
! Projection step, convective terms
585585
! 5.1a Convective terms NS
586586
! Loop on inner nodes
587-
!$acc parallel loop collapse(3)
587+
!$acc parallel loop tile(16,4,2)
588588
do k=1+halo_ext, piX%shape(3)-halo_ext
589589
do j=1+halo_ext, piX%shape(2)-halo_ext
590590
do i=1,nx
@@ -626,7 +626,7 @@ program main
626626
enddo
627627

628628
! 5.1b Compute viscous terms
629-
!$acc parallel loop collapse(3)
629+
!$acc parallel loop tile(16,4,2)
630630
do k=1+halo_ext, piX%shape(3)-halo_ext
631631
do j=1+halo_ext, piX%shape(2)-halo_ext
632632
do i=1,nx
@@ -656,7 +656,7 @@ program main
656656

657657
! 5.1c NS forcing
658658

659-
!$acc parallel loop collapse(3)
659+
!$acc parallel loop tile(16,4,2)
660660
do k=1+halo_ext, piX%shape(3)-halo_ext
661661
do j=1+halo_ext, piX%shape(2)-halo_ext
662662
do i = 1, piX%shape(1)

0 commit comments

Comments
 (0)