@@ -414,58 +414,13 @@ program main
414
414
! START STEP 4: PHASE-FIELD SOLVER (EXPLICIT)
415
415
! ########################################################################################################################################
416
416
#if phiflag == 1
417
-
418
- ! !! big kernel failed experiment, it is super slow :(
419
- ! !$acc kernels
420
- ! do k=1, piX%shape(3)
421
- ! do j=1, piX%shape(2)
422
- ! do i=1,nx
423
-
424
- ! ! compute distance function psi (used to compute normals)
425
- ! val = min(phi(i,j,k),1.0d0) ! avoid machine precision overshoots in phi that leads to problem with log
426
- ! psidi(i,j,k) = eps*log((val+enum)/(1.d0-val+enum))
427
-
428
- ! if(k.ge.1+halo_ext .and. k.le.piX%shape(3)-halo_ext) then
429
- ! if(j.ge.1+halo_ext .and. j.le.piX%shape(2)-halo_ext) then
430
- ! ! 4.1 RHS computation, no need of halo update in theory
431
- ! ip=i+1
432
- ! jp=j+1
433
- ! kp=k+1
434
- ! im=i-1
435
- ! jm=j-1
436
- ! km=k-1
437
- ! if (ip .gt. nx) ip=1
438
- ! if (im .lt. 1) im=nx
439
- ! rhsphi(i,j,k) = &
440
- ! - (u(ip,j,k)*0.5d0*(phi(ip,j,k)+phi(i,j,k)) - u(i,j,k)*0.5d0*(phi(i,j,k)+phi(im,j,k)))*dxi & ! 4.1.1 Convective term
441
- ! - (v(i,jp,k)*0.5d0*(phi(i,jp,k)+phi(i,j,k)) - v(i,j,k)*0.5d0*(phi(i,j,k)+phi(i,jm,k)))*dxi & ! 4.1.1 Convective term
442
- ! - (w(i,j,kp)*0.5d0*(phi(i,j,kp)+phi(i,j,k)) - w(i,j,k)*0.5d0*(phi(i,j,k)+phi(i,j,km)))*dxi & ! 4.1.1 Convective term
443
- ! + gamma*(eps*(phi(ip,j,k)-2.d0*phi(i,j,k)+phi(im,j,k))*ddxi + & ! 4.1.2 Compute diffusive term
444
- ! eps*(phi(i,jp,k)-2.d0*phi(i,j,k)+phi(i,jm,k))*ddxi + & ! 4.1.2 Compute diffusive term
445
- ! eps*(phi(i,j,kp)-2.d0*phi(i,j,k)+phi(i,j,km))*ddxi) ! 4.1.2 Compute diffusive term
446
- ! ! end of 4.1 RHS computation
447
-
448
- ! ! 4.1.3. Compute Sharpening term (gradient)
449
- ! ! Substep 1 computer normals
450
- ! normx(i,j,k) = (psidi(ip,j,k) - psidi(im,j,k))
451
- ! normy(i,j,k) = (psidi(i,jp,k) - psidi(i,jm,k))
452
- ! normz(i,j,k) = (psidi(i,j,kp) - psidi(i,j,km))
453
-
454
- ! endif
455
- ! endif
456
- ! enddo
457
- ! enddo
458
- ! enddo
459
- ! !$acc end kernels
460
-
461
417
! $acc kernels
462
418
do k= 1 , piX% shape (3 )
463
419
do j= 1 , piX% shape (2 )
464
420
do i= 1 ,nx
465
421
! compute distance function psi (used to compute normals)
466
422
val = min (phi(i,j,k),1.0d0 ) ! avoid machine precision overshoots in phi that leads to problem with log
467
423
psidi(i,j,k) = eps* log ((val+ enum)/ (1.d0 - val+ enum))
468
-
469
424
! compute here the tanh of distance function psi (used in the sharpening term) to avoid multiple computations of tanh
470
425
tanh_psi(i,j,k) = tanh (0.5d0 * psidi(i,j,k)* epsi)
471
426
enddo
@@ -477,7 +432,7 @@ program main
477
432
do k= 1 + halo_ext, piX% shape (3 )- halo_ext
478
433
do j= 1 + halo_ext, piX% shape (2 )- halo_ext
479
434
do i= 1 ,nx
480
- ! 4.1 RHS computation, no need of halo update in theory
435
+ ! 4.1 RHS computation
481
436
ip= i+1
482
437
jp= j+1
483
438
kp= k+1
@@ -486,20 +441,18 @@ program main
486
441
km= k-1
487
442
if (ip .gt. nx) ip= 1
488
443
if (im .lt. 1 ) im= nx
444
+ ! convective (first three lines) and diffusive (last three lines)
489
445
rhsphi(i,j,k) = &
490
- - (u(ip,j,k)* 0.5d0 * (phi(ip,j,k)+ phi(i,j,k)) - u(i,j,k)* 0.5d0 * (phi(i,j,k)+ phi(im,j,k)))* dxi & ! 4.1.1 Convective term
491
- - (v(i,jp,k)* 0.5d0 * (phi(i,jp,k)+ phi(i,j,k)) - v(i,j,k)* 0.5d0 * (phi(i,j,k)+ phi(i,jm,k)))* dxi & ! 4.1.1 Convective term
492
- - (w(i,j,kp)* 0.5d0 * (phi(i,j,kp)+ phi(i,j,k)) - w(i,j,k)* 0.5d0 * (phi(i,j,k)+ phi(i,j,km)))* dxi & ! 4.1.1 Convective term
493
- + gamma* (eps* (phi(ip,j,k)- 2.d0 * phi(i,j,k)+ phi(im,j,k))* ddxi + & ! 4.1.2 Compute diffusive term
494
- eps* (phi(i,jp,k)- 2.d0 * phi(i,j,k)+ phi(i,jm,k))* ddxi + & ! 4.1.2 Compute diffusive term
495
- eps* (phi(i,j,kp)- 2.d0 * phi(i,j,k)+ phi(i,j,km))* ddxi) ! 4.1.2 Compute diffusive term
496
-
497
- ! 4.1.3. Compute Sharpening term (gradient)
498
- ! Substep 1 computer normals
446
+ - (u(ip,j,k)* 0.5d0 * (phi(ip,j,k)+ phi(i,j,k)) - u(i,j,k)* 0.5d0 * (phi(i,j,k)+ phi(im,j,k)))* dxi &
447
+ - (v(i,jp,k)* 0.5d0 * (phi(i,jp,k)+ phi(i,j,k)) - v(i,j,k)* 0.5d0 * (phi(i,j,k)+ phi(i,jm,k)))* dxi &
448
+ - (w(i,j,kp)* 0.5d0 * (phi(i,j,kp)+ phi(i,j,k)) - w(i,j,k)* 0.5d0 * (phi(i,j,k)+ phi(i,j,km)))* dxi &
449
+ + gamma* (eps* (phi(ip,j,k)- 2.d0 * phi(i,j,k)+ phi(im,j,k))* ddxi + &
450
+ eps* (phi(i,jp,k)- 2.d0 * phi(i,j,k)+ phi(i,jm,k))* ddxi + &
451
+ eps* (phi(i,j,kp)- 2.d0 * phi(i,j,k)+ phi(i,j,km))* ddxi)
452
+ ! 4.1.3. Compute normals for sharpening term (gradient)
499
453
normx(i,j,k) = (psidi(ip,j,k) - psidi(im,j,k))
500
454
normy(i,j,k) = (psidi(i,jp,k) - psidi(i,jm,k))
501
455
normz(i,j,k) = (psidi(i,j,kp) - psidi(i,j,km))
502
-
503
456
enddo
504
457
enddo
505
458
enddo
@@ -526,7 +479,6 @@ program main
526
479
do i= 1 ,nx
527
480
normod = 1.d0 / (sqrt (normx(i,j,k)* normx(i,j,k) + normy(i,j,k)* normy(i,j,k) + normz(i,j,k)* normz(i,j,k)) + 1.0E-16 )
528
481
! normod = 1.d0/(sqrt(normx(i,j,k)**2d0 + normy(i,j,k)**2d0 + normz(i,j,k)**2d0) + 1.0E-16)
529
-
530
482
normx(i,j,k) = normx(i,j,k)* normod
531
483
normy(i,j,k) = normy(i,j,k)* normod
532
484
normz(i,j,k) = normz(i,j,k)* normod
@@ -555,15 +507,8 @@ program main
555
507
! NEW ACDI
556
508
! rhsphi(i,j,k)=rhsphi(i,j,k)-gamma*((0.25d0*(1.d0-(tanh(0.5d0*psidi(ip,j,k)*epsi))**2)*normx(ip,j,k)- 0.25d0*(1.d0-(tanh(0.5d0*psidi(im,j,k)*epsi))**2)*normx(im,j,k))*0.5*dxi +&
557
509
! (0.25d0*(1.d0-(tanh(0.5d0*psidi(i,jp,k)*epsi))**2)*normy(i,jp,k)- 0.25d0*(1.d0-(tanh(0.5d0*psidi(i,jm,k)*epsi))**2)*normy(i,jm,k))*0.5*dxi +&
558
- ! (0.25d0*(1.d0-(tanh(0.5d0*psidi(i,j,kp)*epsi))**2)*normz(i,j,kp)- 0.25d0*(1.d0-(tanh(0.5d0*psidi(i,j,km)*epsi))**2)*normz(i,j,km))*0.5*dxi)
559
-
560
- ! rhsphi(i,j,k)=rhsphi(i,j,k)-gamma*((0.25d0*(1.d0-(tanh(0.5d0*psidi(ip,j,k)*epsi))*tanh(0.5d0*psidi(ip,j,k)*epsi))*normx(ip,j,k) - &
561
- ! 0.25d0*(1.d0-(tanh(0.5d0*psidi(im,j,k)*epsi))*tanh(0.5d0*psidi(im,j,k)*epsi))*normx(im,j,k))*0.5*dxi + &
562
- ! (0.25d0*(1.d0-(tanh(0.5d0*psidi(i,jp,k)*epsi))*tanh(0.5d0*psidi(i,jp,k)*epsi))*normy(i,jp,k) - &
563
- ! 0.25d0*(1.d0-(tanh(0.5d0*psidi(i,jm,k)*epsi))*tanh(0.5d0*psidi(i,jm,k)*epsi))*normy(i,jm,k))*0.5*dxi + &
564
- ! (0.25d0*(1.d0-(tanh(0.5d0*psidi(i,j,kp)*epsi))*tanh(0.5d0*psidi(i,j,kp)*epsi))*normz(i,j,kp) - &
565
- ! 0.25d0*(1.d0-(tanh(0.5d0*psidi(i,j,km)*epsi))*tanh(0.5d0*psidi(i,j,km)*epsi))*normz(i,j,km))*0.5*dxi)
566
-
510
+ !
511
+ ! ACDI improved version with pre-computed tanh
567
512
rhsphi(i,j,k)= rhsphi(i,j,k)- gamma* ((0.25d0 * (1.d0 - tanh_psi(ip,j,k)* tanh_psi(i,j,km))* normx(ip,j,k) - &
568
513
0.25d0 * (1.d0 - tanh_psi(im,j,k)* tanh_psi(i,j,km))* normx(im,j,k))* 0.5 * dxi + &
569
514
(0.25d0 * (1.d0 - tanh_psi(i,jp,k)* tanh_psi(i,j,km))* normy(i,jp,k) - &
0 commit comments