@@ -14,6 +14,8 @@ program main
14
14
15
15
16
16
implicit none
17
+ ! timer for scaling test
18
+ real :: t_start, t_end, elapsed
17
19
! grid dimensions
18
20
integer :: comm_backend
19
21
integer :: pr, pc
@@ -230,6 +232,14 @@ program main
230
232
enddo
231
233
! allocate k_d on the device (later on remove and use OpenACC + managed memory?)
232
234
allocate (kx_d, source= kx)
235
+
236
+ allocate (mysin(nx), mycos(nx))
237
+ do i= 1 ,nx
238
+ ! compute here the sin to avoid multiple computations of sin
239
+ mysin(i)= sin (k0* x(i))
240
+ ! compute here the cos to avoid multiple computations of cos
241
+ mycos(i)= cos (k0* x(i))
242
+ enddo
233
243
! ########################################################################################################################################
234
244
! 1. INITIALIZATION AND cuDECOMP AUTOTUNING : END
235
245
! ########################################################################################################################################
@@ -370,12 +380,12 @@ program main
370
380
! Save initial fields (only if a fresh start)
371
381
if (restart .eq. 0 ) then
372
382
if (rank.eq. 0 ) write (* ,* ) " Save initial fields"
373
- call writefield(tstart,1 )
374
- call writefield(tstart,2 )
375
- call writefield(tstart,3 )
383
+ ! call writefield(tstart,1)
384
+ ! call writefield(tstart,2)
385
+ ! call writefield(tstart,3)
376
386
! call writefield(tstart,4)
377
387
#if phiflag == 1
378
- call writefield(tstart,5 )
388
+ ! call writefield(tstart,5)
379
389
#endif
380
390
endif
381
391
! ########################################################################################################################################
@@ -399,6 +409,8 @@ program main
399
409
gamma= 1.d0 * gumax
400
410
! $acc data copyin(piX)
401
411
! $acc data create(rhsu_o, rhsv_o, rhsw_o)
412
+ ! $acc data copyin(mysin, mycos)
413
+ call cpu_time(t_start)
402
414
! Start temporal loop
403
415
do t= tstart,tfin
404
416
! Create custom label for each marker
@@ -558,7 +570,7 @@ program main
558
570
! Projection step, convective terms
559
571
! 5.1a Convective terms NS
560
572
! Loop on inner nodes
561
- ! $acc parallel loop tile(16,4,2)
573
+ ! $acc parallel loop tile(16,4,2) present(mysin, mycos)
562
574
do k= 1 + halo_ext, piX% shape (3 )- halo_ext
563
575
do j= 1 + halo_ext, piX% shape (2 )- halo_ext
564
576
do i= 1 ,nx
@@ -612,9 +624,12 @@ program main
612
624
kg = piX% lo(3 ) + k - 1
613
625
jg = piX% lo(2 ) + j - 1
614
626
! ABC forcing
615
- rhsu(i,j,k)= rhsu(i,j,k) + f3* sin (k0* x(kg))+ f2* cos (k0* x(jg))
616
- rhsv(i,j,k)= rhsv(i,j,k) + f1* sin (k0* x(i))+ f3* cos (k0* x(kg))
617
- rhsw(i,j,k)= rhsw(i,j,k) + f2* sin (k0* x(jg))+ f1* cos (k0* x(i))
627
+ ! rhsu(i,j,k)= rhsu(i,j,k) + f3*sin(k0*x(kg))+f2*cos(k0*x(jg))
628
+ rhsu(i,j,k)= rhsu(i,j,k) + f3* mysin(kg)+ f2* mycos(jg)
629
+ ! rhsv(i,j,k)= rhsv(i,j,k) + f1*sin(k0*x(i))+f3*cos(k0*x(kg))
630
+ rhsv(i,j,k)= rhsv(i,j,k) + f1* mysin(i)+ f3* mycos(kg)
631
+ ! rhsw(i,j,k)= rhsw(i,j,k) + f2*sin(k0*x(jg))+f1*cos(k0*x(i))
632
+ rhsw(i,j,k)= rhsw(i,j,k) + f2* mysin(jg)+ f1* mycos(i)
618
633
! TG Forcing
619
634
! rhsu(i,j,k)= rhsu(i,j,k) + f1*sin(k0*x(i))*cos(k0*x(j))*cos(k0*x(k))
620
635
! rhsv(i,j,k)= rhsv(i,j,k) - f1*cos(k0*x(i))*sin(k0*x(j))*sin(k0*x(k))
@@ -1002,13 +1017,13 @@ program main
1002
1017
if (mod (t,dump) .eq. 0 ) then
1003
1018
if (rank .eq. 0 ) write (* ,* ) " Saving output files"
1004
1019
! write velocity and pressure fiels (1-4)
1005
- call writefield(t,1 )
1006
- call writefield(t,2 )
1007
- call writefield(t,3 )
1020
+ ! call writefield(t,1)
1021
+ ! call writefield(t,2)
1022
+ ! call writefield(t,3)
1008
1023
! call writefield(t,4)
1009
1024
#if phiflag == 1
1010
1025
! write phase-field (5)
1011
- call writefield(t,5 )
1026
+ ! call writefield(t,5)
1012
1027
#endif
1013
1028
endif
1014
1029
! ########################################################################################################################################
@@ -1018,12 +1033,16 @@ program main
1018
1033
call nvtxEndRange
1019
1034
! call nvtxEndRange
1020
1035
enddo
1036
+ call cpu_time(t_end)
1037
+ elapsed = t_end- t_start
1038
+ if (rank .eq. 0 ) write (* ,* ) ' Elapsed time (seconds):' , elapsed
1039
+ ! $acc end data
1021
1040
! $acc end data
1022
1041
! $acc end data
1023
1042
1024
1043
! Remove allocated variables (add new)
1025
1044
deallocate (u,v,w)
1026
- deallocate (tanh_psi)
1045
+ deallocate (tanh_psi, mysin, mycos )
1027
1046
deallocate (rhsu,rhsv,rhsw)
1028
1047
deallocate (rhsu_o,rhsv_o,rhsw_o)
1029
1048
deallocate (phi,rhsphi,normx,normy,normz)
0 commit comments