From d1ed405c5771e3cda5b00091bf9dd067cb9c197a Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Mon, 22 Feb 2021 13:25:06 +0900 Subject: [PATCH 01/70] make NICAM-DC-MINI correspond to Fugaku --- NICAM-DC-MINI/sysdep/Makedef.Fugaku-xmp | 42 ++++++++++ NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmp.sh | 78 +++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 NICAM-DC-MINI/sysdep/Makedef.Fugaku-xmp create mode 100644 NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmp.sh diff --git a/NICAM-DC-MINI/sysdep/Makedef.Fugaku-xmp b/NICAM-DC-MINI/sysdep/Makedef.Fugaku-xmp new file mode 100644 index 0000000..fbbc6fa --- /dev/null +++ b/NICAM-DC-MINI/sysdep/Makedef.Fugaku-xmp @@ -0,0 +1,42 @@ +# +# ------ FOR K computer ----- +# + +##### for computation + +FFLAGS_FAST = -Kfast,parallel,auto,ocl,preex,array_private,noalias=s,mfunc=2 \ + -Kparallel_iteration=8,instance=8,dynamic_iteration \ + -Qi -Qt -X03 -Ncompdisp -Koptmsg=1 -Cpp \ + -Kprefetch_cache_level=all,prefetch_iteration_L2=50 -Ksimd + +FFLAGS_DEBUG = -O0 \ + -Qi -Qt -X03 -v03s -v03d -v03o -Ncompdisp -Koptmsg=1 -Cpp \ + -Ec -Eg -Ha -He -Hf -Ho -Hs -Hu -Hx -Ncheck_global +# -DDEBUG + +# Performance monitor +# disable +PERF_MONIT = -Ntl_notrt -U_FIPP_ -U_FAPP_ +# fipp +#PERF_MONIT = -Ntl_trt -D_FIPP_ -U_FAPP_ +# fapp +#PERF_MONIT = -Ntl_trt -U_FIPP_ -D_FAPP_ + +FC = xmpf90 +FFLAGS = -cpp $(FFLAGS_FAST) $(PERF_MONIT) $(RDMA) +#FFLAGS = $(FFLAGS_DEBUG) $(PERF_MONIT) $(RDMA) + +MODDIROPT ?= -M + +CC = mpifccpx +CFLAGS = -Kfast,parallel,ocl,preex,array_private,region_extension,restp=all -Qt -Ksimd $(PERF_MONIT) + +LD = $(FC) +LFLAGS = $(FFLAGS) + +##### for frontend +INSTALL = install +AR = ar +ARFLAGS = r +RANLIB = ranlib +JOBSUB = pjsub diff --git a/NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmp.sh b/NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmp.sh new file mode 100644 index 0000000..0c614f4 --- /dev/null +++ b/NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmp.sh @@ -0,0 +1,78 @@ +#! /bin/bash -x + +GLEV=${1} +RLEV=${2} +NMPI=${3} +ZL=${4} +VGRID=${5} +TOPDIR=${6} +BINNAME=${7} +RUNCONF=${8} + +# System specific +MPIEXEC="mpiexec" + +GL=`printf %02d ${GLEV}` +RL=`printf %02d ${RLEV}` +if [ ${NMPI} -ge 10000 ]; then + NP=`printf %05d ${NMPI}` +elif [ ${NMPI} -ge 1000 ]; then + NP=`printf %04d ${NMPI}` +elif [ ${NMPI} -ge 100 ]; then + NP=`printf %03d ${NMPI}` +else + NP=`printf %02d ${NMPI}` +fi + +dir2d=gl${GL}rl${RL}pe${NP} +dir3d=gl${GL}rl${RL}z${ZL}pe${NP} +res2d=GL${GL}RL${RL} +res3d=GL${GL}RL${RL}z${ZL} + +MNGINFO=rl${RL}-prc${NP}.info + +if [ ${NMPI} -gt 36864 ]; then + rscgrp="huge" +elif [ ${NMPI} -gt 384 ]; then + rscgrp="large" +else + rscgrp="small" +fi + +outdir=${dir3d} +cd ${outdir} +HERE=${PWD} + +ln -s ${TOPDIR}/bin/${BINNAME} . +ln -s ${TOPDIR}/data/mnginfo/${MNGINFO} . +ln -s ${TOPDIR}/data/grid/vgrid/${VGRID} . + +for f in $( ls ${TOPDIR}/data/grid/boundary/${dir2d} ) +do + ln -s ${TOPDIR}/data/grid/boundary/${dir2d}/${f} . +done + + +cat << EOF1 > run.sh +#! /bin/bash -x +################################################################################ +# +# for Fugaku computer +# +################################################################################ +#PJM --rsc-list "node=${NMPI}" +#PJM --rsc-list "elapse=02:00:00" +#PJM --mpi "use-rankdir" +#PJM -j +#PJM -s +# +export PARALLEL=8 +export OMP_NUM_THREADS=8 + +# run +${MPIEXEC} ./${BINNAME} || exit + +################################################################################ +EOF1 + +exit From 4fe5a77a59652df5a4829dc362c941207bcfa497 Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Wed, 24 Feb 2021 17:29:21 +0900 Subject: [PATCH 02/70] make NTCHEM-MINI correspond to Fugaku --- .../linux64_xmp_omp_fugaku.config.sh.in | 14 +++++ .../linux64_xmp_omp_fugaku.makeconfig.in | 61 +++++++++++++++++++ NTCHEM-MINI/tests/c6h6/Fugaku.sh | 13 ++++ 3 files changed, 88 insertions(+) create mode 100755 NTCHEM-MINI/config/linux64_xmp_omp_fugaku.config.sh.in create mode 100755 NTCHEM-MINI/config/linux64_xmp_omp_fugaku.makeconfig.in create mode 100755 NTCHEM-MINI/tests/c6h6/Fugaku.sh diff --git a/NTCHEM-MINI/config/linux64_xmp_omp_fugaku.config.sh.in b/NTCHEM-MINI/config/linux64_xmp_omp_fugaku.config.sh.in new file mode 100755 index 0000000..88dfefa --- /dev/null +++ b/NTCHEM-MINI/config/linux64_xmp_omp_fugaku.config.sh.in @@ -0,0 +1,14 @@ +# +export TARGET=LINUX64 +unset USE_MPI + +# if you want to use MPICH, you can set the environmental variables as +# follos (see ./GA/README) +# +# export MPI_USE=yes +# export MPI_INCLUDE=/usr/include +# export MPI_LIB=/usr/lib +# export LIBMPI=-lmpi + +export LARGE_FILES=yes + diff --git a/NTCHEM-MINI/config/linux64_xmp_omp_fugaku.makeconfig.in b/NTCHEM-MINI/config/linux64_xmp_omp_fugaku.makeconfig.in new file mode 100755 index 0000000..37902dc --- /dev/null +++ b/NTCHEM-MINI/config/linux64_xmp_omp_fugaku.makeconfig.in @@ -0,0 +1,61 @@ + + TARGET = LINUX64 + + DMACRO = -UDEBUG +# DMACRO+=-DSUPPORT_R16 +# DMACRO+=-DHAVE_ERF + + INC = -I$(INCLUDE) -I$(LOCALINC) -I/opt/FJSVxtclanga/tcsds-1.2.30a/include/mpi/fujitsu +### MOD = -J$(LOCALINC) + MOD = + INCMOD = $(INC) $(MOD) + +# FCONVERT = + + F77C = xmpf90 +### F77FLAGS = $(DMACRO) $(INCMOD) -Kfast -Am -fw -X9 -Fixed +### F77FLAGSNOOPT = $(DMACRO) $(INCMOD) -Am -fw -X9 -Fixed + F77FLAGS = -cpp -omp $(DMACRO) $(INCMOD) -Kfast -X9 -Fixed + F77FLAGSNOOPT = -cpp -omp $(DMACRO) $(INCMOD) -X9 -Fixed + + F90C = xmpf90 +### F90FLAGS = $(DMACRO) $(INCMOD) -Kfast -Am -fw +### F90FLAGSNOOPT = $(DMACRO) $(INCMOD) -Am -fw + F90FLAGS = -cpp -omp $(DMACRO) $(INCMOD) -Kfast + F90FLAGSNOOPT = -cpp -omp $(DMACRO) $(INCMOD) + + MODSUFFIX = mod + + CC = fccpx + CFLAGS = $(INC) -Kfast + + CXX = FCCpx + CXXFLAGS = $(INC) -Kfast + CXXLIB = + + MPIFLAGS = +# MPIFLAGS = -DMPIINT8 + MPILDFLAGS = + + OMPFLAGS = -Kopenmp + OMPLDFLAGS = -Kopenmp + +# LD = mpifrtpx +# LDFLAGS = -L$(LIB) + LD = xmpf90 #mpiFCCpx + LDFLAGS = -L$(LIB) +# LDFLAGS = --linkfortran -L$(LIB) + + AR = ar + ARFLAGS = cr + RANLIB = ranlib + + MAKE = make + + SHELL = /bin/sh + MV = /bin/mv -f + RM = /bin/rm -f + CP = /bin/cp -f + MKDIR = /bin/mkdir + LN = /bin/ln + diff --git a/NTCHEM-MINI/tests/c6h6/Fugaku.sh b/NTCHEM-MINI/tests/c6h6/Fugaku.sh new file mode 100755 index 0000000..0870173 --- /dev/null +++ b/NTCHEM-MINI/tests/c6h6/Fugaku.sh @@ -0,0 +1,13 @@ +#!/bin/bash -x +# +#PJM -o "c6h6_rimp2.out" +#PJM -e "c6h6_rimp2.err" +#PJM --rsc-list "node=2" +#PJM --rsc-list "elapse=0:10:00" +#PJM -s +# + +export FLIB_FASTOMP=FALSE +export FLIB_CNTL_BARRIER_ERR=FALSE + +mpiexec ./rimp2.exe From 57b5af7c70ae4f486c4cbd8c0ed1e0bb27266188 Mon Sep 17 00:00:00 2001 From: n70272a Date: Tue, 2 Mar 2021 15:23:10 +0900 Subject: [PATCH 03/70] add configulation files and tests exec script for Intel PC with OpenBLAS/spack --- ...inux64_xmp_omp_gfortran_spack.config.sh.in | 14 +++++ ...nux64_xmp_omp_gfortran_spack.makeconfig.in | 53 +++++++++++++++++++ .../platforms/config_mine.xmp_gfortran_spack | 12 +++++ NTCHEM-MINI/tests/c6h6/GCC_PC.sh | 12 +++++ 4 files changed, 91 insertions(+) create mode 100755 NTCHEM-MINI/config/linux64_xmp_omp_gfortran_spack.config.sh.in create mode 100755 NTCHEM-MINI/config/linux64_xmp_omp_gfortran_spack.makeconfig.in create mode 100755 NTCHEM-MINI/platforms/config_mine.xmp_gfortran_spack create mode 100755 NTCHEM-MINI/tests/c6h6/GCC_PC.sh diff --git a/NTCHEM-MINI/config/linux64_xmp_omp_gfortran_spack.config.sh.in b/NTCHEM-MINI/config/linux64_xmp_omp_gfortran_spack.config.sh.in new file mode 100755 index 0000000..88dfefa --- /dev/null +++ b/NTCHEM-MINI/config/linux64_xmp_omp_gfortran_spack.config.sh.in @@ -0,0 +1,14 @@ +# +export TARGET=LINUX64 +unset USE_MPI + +# if you want to use MPICH, you can set the environmental variables as +# follos (see ./GA/README) +# +# export MPI_USE=yes +# export MPI_INCLUDE=/usr/include +# export MPI_LIB=/usr/lib +# export LIBMPI=-lmpi + +export LARGE_FILES=yes + diff --git a/NTCHEM-MINI/config/linux64_xmp_omp_gfortran_spack.makeconfig.in b/NTCHEM-MINI/config/linux64_xmp_omp_gfortran_spack.makeconfig.in new file mode 100755 index 0000000..7250a40 --- /dev/null +++ b/NTCHEM-MINI/config/linux64_xmp_omp_gfortran_spack.makeconfig.in @@ -0,0 +1,53 @@ + + TARGET = LINUX64 + + DMACRO = -UDEBUG +# DMACRO+=-DSUPPORT_R16 +# DMACRO+=-DHAVE_ERF + + INC = -I$(INCLUDE) -I$(LOCALINC) -I$(MPIHOME)/include + MOD = -J$(LOCALINC) + INCMOD = $(INC) $(MOD) + +# FCONVERT = + + F77C = xmpf90 + F77FLAGS = -cpp -omp $(DMACRO) $(INCMOD) -m64 -O2 -std=legacy -DNOQUAD + F77FLAGSNOOPT = $(DMACRO) $(INCMOD) -m64 -std=legacy -DNOQUAD -Wuninitialized -Wall -Wunderflow -fbounds-check + + F90C = xmpf90 + F90FLAGS = -cpp -omp $(DMACRO) $(INCMOD) -m64 -O2 -std=legacy -DNOQUAD + F90FLAGSNOOPT = $(DMACRO) $(INCMOD) -m64 -std=legacy -Wuninitialized -Wall -Wunderflow -fbounds-check -DNOQUAD + + MODSUFFIX = mod + + CC = gcc + CFLAGS = $(INC) -O3 + + CXX = g++ + CXXFLAGS = $(INC) -O3 + CXXLIB = -lstdc++ + + MPIFLAGS = -UMPIINT8 + MPILDFLAGS = + + OMPFLAGS = -fopenmp + OMPLDFLAGS = -fopenmp + +# LD = xmpf90 -fc=gfortran + LD = xmpf90 + LDFLAGS = -L$(LIB) $(MPILIB) + + AR = ar + ARFLAGS = cr + RANLIB = ranlib + + MAKE = make + + SHELL = /bin/sh + MV = /bin/mv -f + RM = /bin/rm -f + CP = /bin/cp -f + MKDIR = /bin/mkdir + LN = /bin/ln + diff --git a/NTCHEM-MINI/platforms/config_mine.xmp_gfortran_spack b/NTCHEM-MINI/platforms/config_mine.xmp_gfortran_spack new file mode 100755 index 0000000..2545f31 --- /dev/null +++ b/NTCHEM-MINI/platforms/config_mine.xmp_gfortran_spack @@ -0,0 +1,12 @@ +# general PC cluster with GNU (gfortran) based mpif90 + OpenBLAS library +# via spack + +./config/configure \ +--lapack="`pkg-config --libs openblas`" \ +--blas="`pkg-config --libs openblas`" \ +--atlas= \ +linux64_xmp_omp_gfortran_spack + +cd ./config +ln -sf makeconfig makeconfig.xmp + diff --git a/NTCHEM-MINI/tests/c6h6/GCC_PC.sh b/NTCHEM-MINI/tests/c6h6/GCC_PC.sh new file mode 100755 index 0000000..583702d --- /dev/null +++ b/NTCHEM-MINI/tests/c6h6/GCC_PC.sh @@ -0,0 +1,12 @@ +#!/bin/bash +date +hostname +MOLECULE=c6h6 +pwd +NPROCS=1 +#export OMP_NUM_THREADS=8 +export OMP_NUM_THREADS=1 +time mpirun -np $NPROCS ../../bin/rimp2.exe | tee ${MOLECULE}_rimp2.out +ls -go +#python ./check.py + From 13f1a256d9ce2ee9026cb8e614198a6caf96d136 Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Tue, 2 Mar 2021 16:03:45 +0900 Subject: [PATCH 04/70] modify for Fugaku --- FFB-MINI/src/ma_prof/src/maprof_yaml.c | 3 ++- FFB-MINI/src/make_setting.xmp_fugaku | 29 ++++++++++++++++++++++++++ FFB-MINI/test/go.sh | 2 +- FFB-MINI/test/go_fugaku.sh | 14 +++++++++++++ 4 files changed, 46 insertions(+), 2 deletions(-) create mode 100755 FFB-MINI/src/make_setting.xmp_fugaku create mode 100755 FFB-MINI/test/go_fugaku.sh diff --git a/FFB-MINI/src/ma_prof/src/maprof_yaml.c b/FFB-MINI/src/ma_prof/src/maprof_yaml.c index 609d649..9e9f469 100755 --- a/FFB-MINI/src/ma_prof/src/maprof_yaml.c +++ b/FFB-MINI/src/ma_prof/src/maprof_yaml.c @@ -71,6 +71,7 @@ maprof_yaml_node maprof_yaml_str_node(const char *s) node_t *n = (node_t *)malloc(sizeof(node_t)); n->type = SCALAR; n->data.scalar.type = STRING; + if ( ! s ) { n->data.scalar.value.str = NULL; return n;} n->data.scalar.value.str = strdup(s); return n; } @@ -156,7 +157,7 @@ void maprof_yaml_add_map_item(maprof_yaml_node map, const char *name, maprof_yam map_end = map->data.mapping.end; map_new = (map_item_t *)malloc(sizeof(map_item_t)); - map_new->name = strdup(name); + map_new->name = (!name ? NULL : strdup(name)); map_new->node = node; map_new->next = NULL; if (map_end == NULL) { diff --git a/FFB-MINI/src/make_setting.xmp_fugaku b/FFB-MINI/src/make_setting.xmp_fugaku new file mode 100755 index 0000000..68abcf6 --- /dev/null +++ b/FFB-MINI/src/make_setting.xmp_fugaku @@ -0,0 +1,29 @@ +CC = mpifccpx +FC = export XMP_ONLYCAF=1; xmpf90 +#FC = xmpf90 + +DEFINE += -I/opt/FJSVxtclanga/tcsds-1.2.30a/include + +DEFINE += -DNO_METIS +DEFINE += -DNO_REFINER + +# timing option +DEFINE += -DPROF_MAPROF + +CFLAGS += $(DEFINE) -Kvisimpact,ocl +FFLAGS += $(DEFINE) --Wf"-ocl" -Kvisimpact,ocl -Nlst=t + + +ifeq (, $(findstring -DNO_METIS, $(FFLAGS))) +CFLAGS += -I/opt/klocal/include +LIBS += -L/opt/klocal/lib -lmetis +endif + +ifeq (, $(findstring -DNO_REFINER, $(FFLAGS))) +REFINER = $(HOME)/opt/REVOCAP_Refiner +FFLAGS += -I$(REFINER)/include +LIBS += -L$(REFINER)/lib -lRcapRefiner +LIBS += -lRcapRefiner +LD = mpiFCCpx +LDFLAGS = --linkfortran -Kvisimpact +endif diff --git a/FFB-MINI/test/go.sh b/FFB-MINI/test/go.sh index 19347df..73c1f9b 100755 --- a/FFB-MINI/test/go.sh +++ b/FFB-MINI/test/go.sh @@ -2,5 +2,5 @@ PROG=../bin/ffb_mini -mpiexec -np 8 $PROG 2 2 2 46 | tee les3x.log.P0001 \ +mpiexec -np 8 -x XMP_ONESIDED_HEAP_SIZE=8192M $PROG 2 2 2 46 | tee les3x.log.P0001 \ && ./check.py master/les3x.log les3x.log diff --git a/FFB-MINI/test/go_fugaku.sh b/FFB-MINI/test/go_fugaku.sh new file mode 100755 index 0000000..87f845a --- /dev/null +++ b/FFB-MINI/test/go_fugaku.sh @@ -0,0 +1,14 @@ +#!/bin/sh +# +# pjsub --interact --sparam "wait-time=600" ./go_fugaku.sh +# +#PJM -L "node=8" +#PJM -L "elapse=00:05:00" +#PJM -S + +PROG=../bin/ffb_mini +export PARALLEL=8 +export XMP_ONESIDED_HEAP_SIZE=8192M + +mpiexec -np 8 ${PROG} 2 2 2 46 color_partsize=2000 reorder_ndiv=10 unroll=on \ + | tee les3x.log.P0001 From 5c4f33b1b7cdef9fd6f7ccabe743214779dd1d27 Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Tue, 2 Mar 2021 16:41:52 +0900 Subject: [PATCH 05/70] Add configulation of MODYLAS-MINI for Fugaku --- MODYLAS-MINI/src/ma_prof/src/maprof_yaml.c | 4 ++-- MODYLAS-MINI/src/make_setting.xmp_fugaku | 11 +++++++++++ MODYLAS-MINI/test/go_fugaku.sh | 11 +++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) create mode 100755 MODYLAS-MINI/src/make_setting.xmp_fugaku create mode 100755 MODYLAS-MINI/test/go_fugaku.sh diff --git a/MODYLAS-MINI/src/ma_prof/src/maprof_yaml.c b/MODYLAS-MINI/src/ma_prof/src/maprof_yaml.c index 690065e..a3350c0 100755 --- a/MODYLAS-MINI/src/ma_prof/src/maprof_yaml.c +++ b/MODYLAS-MINI/src/ma_prof/src/maprof_yaml.c @@ -69,7 +69,7 @@ maprof_yaml_node maprof_yaml_str_node(const char *s) node_t *n = (node_t *)malloc(sizeof(node_t)); n->type = SCALAR; n->data.scalar.type = STRING; - n->data.scalar.value.str = strdup(s); + n->data.scalar.value.str = (!s ? NULL : strdup(s)); return n; } @@ -154,7 +154,7 @@ void maprof_yaml_add_map_item(maprof_yaml_node map, const char *name, maprof_yam map_end = map->data.mapping.end; map_new = (map_item_t *)malloc(sizeof(map_item_t)); - map_new->name = strdup(name); + map_new->name = (!name ? NULL : strdup(name)); map_new->node = node; map_new->next = NULL; if (map_end == NULL) { diff --git a/MODYLAS-MINI/src/make_setting.xmp_fugaku b/MODYLAS-MINI/src/make_setting.xmp_fugaku new file mode 100755 index 0000000..ea185c0 --- /dev/null +++ b/MODYLAS-MINI/src/make_setting.xmp_fugaku @@ -0,0 +1,11 @@ +# +# K, FX10 +# + +FC = xmpf90 +CC = mpifccpx +FFLAGS = -cpp -omp -Cpp -DENERGY_DIRECT_K -Kfast,openmp,parallel,array_private,auto,ilfunc,ocl,preex,simd=2,mfunc=2 +FFLAGS += -I/opt/FJSVxtclanga/tcsds-1.2.30a/include/mpi/fujitsu + +# diagonostic (output *.lst files) +#FFLAGS += -Qa,d,i,m,p,t,x diff --git a/MODYLAS-MINI/test/go_fugaku.sh b/MODYLAS-MINI/test/go_fugaku.sh new file mode 100755 index 0000000..e7c1b76 --- /dev/null +++ b/MODYLAS-MINI/test/go_fugaku.sh @@ -0,0 +1,11 @@ +#!/bin/sh +#PJM -L "node=8" +#PJM -L "elapse=00:15:00" +#PJM -S + +MODYLAS=../src/modylas_mini + +export OMP_NUM_THREADS=2 + +(mpiexec $MODYLAS ./wat111 \ + && python2 ./check.py wat111.mdmntr.8n_8t wat111.mdmntr) 2>&1 | tee log From e1adf48b9a6051ff2e34027958ce0774cbc7fb52 Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Tue, 2 Mar 2021 18:15:08 +0900 Subject: [PATCH 06/70] Add configulation of CCS-QCD for Fugaku --- CCS-QCD/src/ma_prof/src/maprof_yaml.c | 4 ++-- CCS-QCD/src/make.xmp_Fugaku.inc | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 CCS-QCD/src/make.xmp_Fugaku.inc diff --git a/CCS-QCD/src/ma_prof/src/maprof_yaml.c b/CCS-QCD/src/ma_prof/src/maprof_yaml.c index 255762a..4176618 100755 --- a/CCS-QCD/src/ma_prof/src/maprof_yaml.c +++ b/CCS-QCD/src/ma_prof/src/maprof_yaml.c @@ -65,7 +65,7 @@ maprof_yaml_node maprof_yaml_str_node(const char *s) node_t *n = (node_t *)malloc(sizeof(node_t)); n->type = SCALAR; n->data.scalar.type = STRING; - n->data.scalar.value.str = strdup(s); + n->data.scalar.value.str = (!s ? NULL : strdup(s)); return n; } @@ -150,7 +150,7 @@ void maprof_yaml_add_map_item(maprof_yaml_node map, const char *name, maprof_yam map_end = map->data.mapping.end; map_new = (map_item_t *)malloc(sizeof(map_item_t)); - map_new->name = strdup(name); + map_new->name = (!name ? NULL : strdup(name)); map_new->node = node; map_new->next = NULL; if (map_end == NULL) { diff --git a/CCS-QCD/src/make.xmp_Fugaku.inc b/CCS-QCD/src/make.xmp_Fugaku.inc new file mode 100644 index 0000000..dead033 --- /dev/null +++ b/CCS-QCD/src/make.xmp_Fugaku.inc @@ -0,0 +1,27 @@ +#$Id: make.inc,v 1.1 2009/12/02 10:24:37 ishikawa Exp $ + +REVISION = -D_REVISION_='"$(shell cat .svnrevision)"' + +# +# specify Fortran90 MPI compiler FC, compiler options FFLAGS and linker options LDFLAGS +# + +FC = xmpf90 +FFLAGS = -cpp -omp -Kfast,openmp -KXFILL -Kprefetch_sequential=soft + +CC = mpifccpx + +LDFLAGS = $(FFLAGS) + +#LIBS = + +MPIINC = /opt/FJSVxtclanga/tcsds-1.2.30a/include/mpi/fujitsu +INCLUDE = -I./ -I$(MPIINC) + +#.F90.o : + +#%.mod : %.F90 +# $(FC) $(FFLAGS) $(REVISION) $(INCLUDE) -c $< + +#%.o : %.F90 +# $(FC) $(FFLAGS) $(REVISION) $(INCLUDE) -c $< From f1c254d8f157bff35606529df9067d436a0fa6af Mon Sep 17 00:00:00 2001 From: "Yoshikawa, Hiroyuki" Date: Wed, 3 Mar 2021 14:46:49 +0900 Subject: [PATCH 07/70] [WIP] XMP API conversion of CCS-QCD, add comlib.F90 as xmpAPI_comlib.F90 --- CCS-QCD/src/make.xmpAPI_gfortran.inc | 19 + CCS-QCD/src/xmpAPI_comlib.F90 | 831 +++++++++++++++++++++++++++ 2 files changed, 850 insertions(+) create mode 100644 CCS-QCD/src/make.xmpAPI_gfortran.inc create mode 100644 CCS-QCD/src/xmpAPI_comlib.F90 diff --git a/CCS-QCD/src/make.xmpAPI_gfortran.inc b/CCS-QCD/src/make.xmpAPI_gfortran.inc new file mode 100644 index 0000000..e217f7e --- /dev/null +++ b/CCS-QCD/src/make.xmpAPI_gfortran.inc @@ -0,0 +1,19 @@ +#$Id: make.inc,v 1.1 2009/12/02 10:24:37 ishikawa Exp $ + +REVISION = -D_REVISION_='"$(shell cat .svnrevision)"' + +# +# specify Fortran90 MPI compiler FC, compiler options FFLAGS and linker options LDFLAGS +# + +# example: GNU gfortran, OpenMP enabled. +OMNI_HOME = $(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') +OMNI_INC = -I$(OMNI_HOME)/include +OMNI_LIB = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') + +FC = mpif90 +CC = mpicc +FFLAGS = -cpp -omp -O2 -ffixed-line-length-132 -march=native -g -fopenmp -mcmodel=medium -funderscoring +LDFLAGS = $(FFLAGS) $(OMNI_LIB) + +INCLUDE = -I./ $(OMNI_INC) diff --git a/CCS-QCD/src/xmpAPI_comlib.F90 b/CCS-QCD/src/xmpAPI_comlib.F90 new file mode 100644 index 0000000..d0b4f91 --- /dev/null +++ b/CCS-QCD/src/xmpAPI_comlib.F90 @@ -0,0 +1,831 @@ + subroutine copy(buf1, buf2, n) + !--- 2020 Fujitsu + use xmp_api + !--- 2020 Fujitsu end + implicit none + integer :: n + complex(8) :: buf1(n), buf2(n) + + buf1(:) = buf2(:) + return + end subroutine copy + +#include "config.h" + +module comlib +!= Communication subroutines wrapping MPI. +! +!== Abstract +! +! MPI1 Isend-irecv-Wait is used. +! +! Uses +! MPI1 +! - 0 comlib_make2 : +! - 1 comlib_irecv : MPI_irecv +! - 2 comlib_isend : MPI_Isend +! - 3 comlib_check : MPI_Wait(recv)/MPI_Wait(isend) +! +! node | 3 0 1 2 3 0 +! ======================================================================================================= +! buff | recv | send recv | send recv | send recv | send recv | send recv | send +! ======================================================================================================= +! irecv | irecv | irecv | irecv | irecv | irecv | +! | isend => | isend => | isend => | isend => | isend => | irsend +! wait | wait wait | wait wait | wait wait | wait wait | wait wait | +! ======================================================================================================= +! +!== Version +! +! $Id: comlib.F90,v 1.1 2009/12/02 10:24:23 ishikawa Exp $ +! + implicit none + private + public :: comlib_data_c16 + public :: comlib_data_c8 + public :: comlib_init + public :: comlib_finalize + public :: comlib_node + public :: comlib_make2 + public :: comlib_isend + public :: comlib_irecv + public :: comlib_sendrecv + public :: comlib_check + public :: comlib_barrier + public :: comlib_bcast + public :: comlib_sumcast + + integer, save :: myid,numprocs,ntag + + type comlib_data_c16 +! +! Holds node, data sender/receiver information +! + sequence + private + integer :: ssize,rsize,sdesc,rdesc,stag,rtag,sreq,rreq + complex(8), pointer :: sbuff,rbuff,dummy0,dummy1 + end type + + type comlib_data_c8 +! +! Holds node, data sender/receiver information +! + sequence + private + integer :: ssize,rsize,sdesc,rdesc,stag,rtag,sreq,rreq + complex(4), pointer :: sbuff,rbuff,dummy0,dummy1 + end type + + interface comlib_make2 + module procedure comlib_make_c16 + module procedure comlib_make_c8 + end interface + + interface comlib_isend + module procedure comlib_isend_c16 + module procedure comlib_isend_c8 + end interface + + interface comlib_irecv + module procedure comlib_irecv_c16 + module procedure comlib_irecv_c8 + end interface + + interface comlib_sendrecv + module procedure comlib_sendrecv_c16 + module procedure comlib_sendrecv_c8 + end interface + + interface comlib_check + module procedure comlib_check_c16 + module procedure comlib_check_c8 + end interface + + interface comlib_bcast +! +! General method to broadcast data +! + module procedure comlib_bcast_char + module procedure comlib_bcast_i4 + module procedure comlib_bcast_r8 + module procedure comlib_bcast_c16 + module procedure comlib_bcast_i4_array + module procedure comlib_bcast_r8_array + module procedure comlib_bcast_c16_array + end interface + + interface comlib_sumcast +! +! General method to take total sum and broadcast data +! + module procedure comlib_sumcast_i4 + module procedure comlib_sumcast_r8 + module procedure comlib_sumcast_r4 + module procedure comlib_sumcast_c16 + module procedure comlib_sumcast_c8 + module procedure comlib_sumcast_i4_array + module procedure comlib_sumcast_r8_array + module procedure comlib_sumcast_r4_array + module procedure comlib_sumcast_c16_array + module procedure comlib_sumcast_c8_array + end interface + + contains + +subroutine comlib_bcast_char(arg,ids) +! +! Broadcast character +! +! - arg : character +! - ids : source destination +! +!coarray use mpi + implicit none + character(LEN=*), intent(inout) :: arg + integer, intent(in) :: ids + integer :: ilen,ierr + + ilen=LEN(arg) +!coarray call MPI_BCAST(arg,ilen,MPI_CHARACTER,ids,MPI_COMM_WORLD,ierr) + + return +end subroutine + + subroutine comlib_bcast_i4(arg,ids) +! +! Broadcast integer +! +! - arg : integer +! - ids : source destination +! +!coarray use mpi + implicit none + integer, intent(inout) :: arg + integer, intent(in) :: ids +!coarray integer :: ierr + +!coarray call MPI_BCAST(arg,1,MPI_INTEGER,ids,MPI_COMM_WORLD,ierr) + call co_broadcast( arg,ids+1 ) + + return + end subroutine + + subroutine comlib_bcast_r8(arg,ids) +! +! Broadcast real(8) +! +! - arg : real +! - ids : source destination +! +!coarray use mpi + implicit none + real(8), intent(inout) :: arg + integer, intent(in) :: ids +!coarray integer :: ierr + +!coarray call MPI_BCAST(arg,1,MPI_REAL8,ids,MPI_COMM_WORLD,ierr) + call co_broadcast( arg,ids+1 ) + + return + end subroutine + + subroutine comlib_bcast_c16(arg,ids) +! +! Broadcast complex(8) +! +! - arg : complex +! - ids : source destination +! +!coarray use mpi + implicit none + complex(8), intent(inout) :: arg + integer, intent(in) :: ids +!coarray integer :: ierr + +!coarray call MPI_BCAST(arg,1,MPI_COMPLEX16,ids,MPI_COMM_WORLD,ierr) + call co_broadcast( arg,ids+1 ) + + return + end subroutine + + subroutine comlib_bcast_i4_array(arg,ids) +! +! Broadcast integer(4) array +! +!coarray use mpi + implicit none + integer, intent(inout) :: arg(:) + integer, intent(in) :: ids +!coarray integer :: ilen,ierr + +!coarray ilen=SIZE(arg) +!coarray call MPI_BCAST(arg,ilen,MPI_INTEGER,ids,MPI_COMM_WORLD,ierr) + call co_broadcast( arg,ids+1 ) + + return + end subroutine + + subroutine comlib_bcast_r8_array(arg,ids) +! +! Broadcast real(8) array +! +!coarray use mpi + implicit none + real(8), intent(inout) :: arg(:) + integer, intent(in) :: ids +!coarray integer :: ilen,ierr + +!coarray ilen=SIZE(arg) +!coarray call MPI_BCAST(arg,ilen,MPI_REAL8,ids,MPI_COMM_WORLD,ierr) + call co_broadcast( arg,ids+1 ) + + return + end subroutine + + subroutine comlib_bcast_c16_array(arg,ids) +! +! Broadcast complex(8) array +! +!coarray use mpi + implicit none + complex(8), intent(inout) :: arg(:) + integer, intent(in) :: ids +!coarray integer :: ilen,ierr + +!coarray ilen=SIZE(arg) +!coarray call MPI_BCAST(arg,ilen,MPI_COMPLEX16,ids,MPI_COMM_WORLD,ierr) + call co_broadcast( arg,ids+1 ) + + return + end subroutine + + subroutine comlib_init +! +! Initialize this library +! +!coarray use mpi + implicit none +!coarray integer :: ierr,iprov + + + ntag=0 +!coarray call MPI_INIT(ierr) + +! call MPI_INIT_THREAD(MPI_THREAD_MULTIPLE,iprov,ierr) +! call MPI_INIT_THREAD(MPI_THREAD_SINGLE,iprov,ierr) +! select case(iprov) +! case (MPI_THREAD_SINGLE) +! write(*,'("MPI suport MPI_THREAD_SINGLE.")') +! call MPI_Finalize(ierr) +! stop +! case (MPI_THREAD_FUNNELED) +! write(*,'("MPI suport only MPI_THREAD_FUNNELED.")') +! call MPI_Finalize(ierr) +! stop +! case (MPI_THREAD_SERIALIZED) +! write(*,'("MPI suport only MPI_THREAD_SERIALIZED.")') +! call MPI_Finalize(ierr) +! stop +! case (MPI_THREAD_MULTIPLE) +! write(*,'("MPI suport MPI_THREAD_MULTIPLE.")') +! end select + +!coarray +! call MPI_COMM_RANK(MPI_COMM_WORLD,myid,ierr) +! call MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr) + myid = this_image() - 1 + numprocs = num_images() + + return + end subroutine + + subroutine comlib_finalize +! +! Finalize this library +! +!coarray use mpi + implicit none +!coarray integer :: ierr + +!coarray call MPI_Finalize(ierr) + + return + end subroutine + + subroutine comlib_node(nodeid,npe) +! +! Get nodeid and total number of nodes +! +!coarray use mpi + implicit none + integer, intent(out) :: nodeid,npe + + nodeid=myid + npe=numprocs + + return + end subroutine + + subroutine comlib_make_c16(id,node,send,recv,isize) +! +! Make 1to1 (all nodes) communication information +! +! - id : comlib_data +! - node : nodeid of this process +! - send : 1st component of sender data array (complex(8)) +! - recv : 1st component of reciever data array (complex(8)) +! - isize : total data size to be send/recieved in unit of bytes +! +!coarray use mpi + implicit none + type(comlib_data_c16), intent(out):: id + integer, intent(in) :: node,isize + complex(8), target :: send,recv + type(comlib_data_c16) :: idall(0:numprocs-1) + integer :: ierr,i +!coarray + integer :: buf + + idall(myid)%sdesc=node ! send : myid -> node + +!**** make send receive table for all nodes +!coarray do i=0,numprocs-1 +!coarray call MPI_BCAST(idall(i)%sdesc,1,MPI_INTEGER,i,MPI_COMM_WORLD,ierr) + do i=1,numprocs + buf = idall(i-1)%sdesc + call co_broadcast(buf,i) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all() + !--- 2020 Fujitsu end + idall(i-1)%sdesc = buf + end do + do i=0,numprocs-1 + idall(idall(i)%sdesc)%rdesc=i + enddo + do i=0,numprocs-1 + ntag=ntag+1 + idall(i)%stag=ntag + enddo + do i=0,numprocs-1 + idall(i)%rtag=idall(idall(i)%rdesc)%stag + enddo + + id%sdesc=idall(myid)%sdesc ! node number (send) + id%rdesc=idall(myid)%rdesc ! node number (recv) + id%stag =idall(myid)%stag ! tag (send) + id%rtag =idall(myid)%rtag ! tag (recv) + id%ssize=isize ! amount of data in byte (send) + id%rsize=isize ! amount of data in byte (recv) + id%sbuff=>send ! send buffer (pointer) + id%rbuff=>recv ! receive buffer (pointer) + + return + end subroutine + + subroutine comlib_make_c8(id,node,send,recv,isize) +! +! Make 1to1 (all nodes) communication information +! +! - id : comlib_data +! - node : nodeid of this process +! - send : 1st component of sender data array (complex(4)) +! - recv : 1st component of reciever data array (complex(4)) +! - isize : total data size to be send/recieved in unit of bytes +! +!coarray use mpi + implicit none + type(comlib_data_c8), intent(out):: id + integer, intent(in) :: node,isize + complex(4), target :: send,recv + type(comlib_data_c8) :: idall(0:numprocs-1) + integer :: ierr,i +!coarray + integer :: buf + + idall(myid)%sdesc=node ! send : myid -> node + +!**** make send receive table for all nodes +!coarray do i=0,numprocs-1 +!coarray call MPI_BCAST(idall(i)%sdesc,1,MPI_INTEGER,i,MPI_COMM_WORLD,ierr) + do i=1,numprocs + buf = idall(i-1)%sdesc + call co_broadcast(buf,i) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all() + !--- 2020 Fujitsu end + idall(i-1)%sdesc = buf + enddo + do i=0,numprocs-1 + idall(idall(i)%sdesc)%rdesc=i + enddo + do i=0,numprocs-1 + ntag=ntag+1 + idall(i)%stag=ntag + enddo + do i=0,numprocs-1 + idall(i)%rtag=idall(idall(i)%rdesc)%stag + enddo + + id%sdesc=idall(myid)%sdesc ! node number (send) + id%rdesc=idall(myid)%rdesc ! node number (recv) + id%stag =idall(myid)%stag ! tag (send) + id%rtag =idall(myid)%rtag ! tag (recv) + id%ssize=isize ! amount of data in byte (send) + id%rsize=isize ! amount of data in byte (recv) + id%sbuff=>send ! send buffer (pointer) + id%rbuff=>recv ! receive buffer (pointer) + + return + end subroutine + + subroutine comlib_isend_c16(id) +! +! Send data indicated by id. +! +!coarray use mpi + implicit none + type(comlib_data_c16), intent(inout) :: id + integer :: ierr + +!coarray call MPI_Isend(id%sbuff,id%ssize,MPI_CHARACTER,id%sdesc, & +!coarray & id%stag,MPI_COMM_WORLD,id%sreq,ierr) + + return + end subroutine + + subroutine comlib_isend_c8(id) +! +! Send data indicated by id. +! +!coarray use mpi + implicit none + type(comlib_data_c8), intent(inout) :: id + integer :: ierr + +!coarray call MPI_Isend(id%sbuff,id%ssize,MPI_CHARACTER,id%sdesc, & +!coarray & id%stag,MPI_COMM_WORLD,id%sreq,ierr) + + return + end subroutine + + subroutine comlib_irecv_c16(id) +! +! Recieve data indicated by id. +! +!coarray use mpi + implicit none + type(comlib_data_c16), intent(inout) :: id + integer :: ierr + +!coarray call MPI_Irecv(id%rbuff,id%rsize,MPI_CHARACTER,id%rdesc, & +!coarray & id%rtag,MPI_COMM_WORLD,id%rreq,ierr) + + return + end subroutine + + subroutine comlib_irecv_c8(id) +! +! Recieve data indicated by id. +! +!coarray use mpi + implicit none + type(comlib_data_c8), intent(inout) :: id + integer :: ierr + +!coarray call MPI_Irecv(id%rbuff,id%rsize,MPI_CHARACTER,id%rdesc, & +!coarray & id%rtag,MPI_COMM_WORLD,id%rreq,ierr) + + return + end subroutine + + subroutine comlib_sendrecv_c16(id) +! +! Send and Recieve data indicated by id. +! +!coarray use mpi + implicit none + type(comlib_data_c16), intent(inout) :: id +!coarray +! integer :: ierr,istat(MPI_STATUS_SIZE) +! + !--- 2020 Fujitsu + !complex(8), allocatable :: sbuff(:)[:] + !complex(8), allocatable :: rbuff(:)[:] + ! + !allocate(sbuff(id%ssize/16)[*]) + !allocate(rbuff(id%rsize/16)[*]) + ! + complex(8), pointer :: sbuff(:) => null() + complex(8), pointer :: rbuff(:) => null() + integer(8) :: s_desc, r_desc + integer(8), dimension(1) :: s_lb,s_ub, r_lb, r_ub + integer(4), dimension(1) :: img_dims + integer(8) :: s_sec, r_sec + integer(8) :: start1, end1, end2 + integer(4) :: stride1 + integer(4) :: status + + s_lb(1) = 1; s_ub(1) = id%ssize/16 + r_lb(1) = 1; r_ub(1) = id%rsize/16 + + call xmp_new_coarray(s_desc, 16, 1, s_lb, s_ub, 1, img_dims) + call xmp_new_coarray(r_desc, 16, 1, r_lb, r_ub, 1, img_dims) + + call xmp_coarray_bind(s_desc, sbuff) + call xmp_coarray_bind(r_desc, rbuff) + !--- 2020 Fujitsu end + +!coarray +! call MPI_SendRecv(id%sbuff,id%ssize,MPI_CHARACTER,id%sdesc,id%stag, & +! & id%rbuff,id%rsize,MPI_CHARACTER,id%rdesc,id%rtag, & +! & MPI_COMM_WORLD,istat,ierr) + call copy(sbuff, id%sbuff, id%ssize/16) + !--- 2020 Fujitsu + !rbuff(:)[id%sdesc+1] = sbuff(:) + !sync all + call xmp_new_array_section(s_sec, 1) + call xmp_new_array_section(r_sec, 1) + start1 = 1; stride1 = 1 + end1 = id%ssize/16; end2 = id%rsize/16 + call xmp_array_section_set_triplet(s_sec, 1, start1,end1,stride1, status) + call xmp_array_section_set_triplet(r_sec, 1, start1,end2,stride1, status) + img_dims(1) = id%sdesc+1 + call xmp_coarray_put(r_desc,r_sec, s_desc,s_sec, status); + call xmp_sync_all() + !--- 2020 Fujitsu end + call copy(id%rbuff, rbuff, id%rsize/16) + + !--- 2020 Fujitsu + !deallocate(rbuff) + !deallocate(sbuff) + call xmp_free_array_section(s_sec) + call xmp_free_array_section(r_sec) + call xmp_coarray_deallocate(s_desc, status) + call xmp_coarray_deallocate(r_desc, status) + !--- 2020 Fujitsu end + + return + end subroutine + + subroutine comlib_sendrecv_c8(id) +! +! Send and Recieve data indicated by id. +! +!coarray use mpi + implicit none + type(comlib_data_c8), intent(inout) :: id +!coarray +! integer :: ierr,istat(MPI_STATUS_SIZE) +! +! call MPI_SendRecv(id%sbuff,id%ssize,MPI_CHARACTER,id%sdesc,id%stag, & +! & id%rbuff,id%rsize,MPI_CHARACTER,id%rdesc,id%rtag, & +! & MPI_COMM_WORLD,istat,ierr) + + return + end subroutine + + subroutine comlib_check_c16(id) +! +! Check communication ends. +! +!coarray use mpi + implicit none + type(comlib_data_c16), intent(inout) :: id +!coarray +! integer :: sstat(MPI_STATUS_SIZE) +! integer :: rstat(MPI_STATUS_SIZE),ierr +! +! call MPI_Wait(id%rreq,rstat,ierr) +! call MPI_Wait(id%sreq,sstat,ierr) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all() + !--- 2020 Fujitsu end +! write(*,'("RSTAT:",99I12,I8)')rstat,myid +! write(*,'("SSTAT:",99I12,I8)')sstat,myid + + return + end subroutine + + subroutine comlib_check_c8(id) +! +! Check communication ends. +! +!coarray use mpi + implicit none + type(comlib_data_c8), intent(inout) :: id +!coarray +! integer :: status(MPI_STATUS_SIZE),ierr +! +! call MPI_Wait(id%rreq,status,ierr) +! call MPI_Wait(id%sreq,status,ierr) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all() + !--- 2020 Fujitsu end + + return + end subroutine + + subroutine comlib_barrier +! +! Barrier sync. +! +!coarray use mpi + implicit none +!coarray +! integer :: ierr +! +! call MPI_Barrier(MPI_COMM_WORLD,ierr) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all() + !--- 2020 Fujitsu end + + return + end subroutine + + subroutine comlib_sumcast_i4_array(i4) +! +! sum and broadcast of integer(4) data array +! +!coarray use mpi + implicit none + integer, intent(inout) :: i4(:) + integer :: i4tmp(SIZE(i4)) + integer :: ierr,isize + + isize=SIZE(i4) +!coarray call MPI_Allreduce(i4,i4tmp,isize,MPI_INTEGER,MPI_SUM,MPI_COMM_WORLD,ierr) + i4=i4tmp + + return + end subroutine + + subroutine comlib_sumcast_r8_array(r8) +! +! sum and broadcast of real(8) data array +! +!coarray use mpi + implicit none + real(8), intent(inout) :: r8(:) + real(8) :: r8tmp(SIZE(r8)) + integer :: ierr,isize + + isize=SIZE(r8) +!coarray call MPI_Allreduce(r8,r8tmp,isize,MPI_REAL8,MPI_SUM,MPI_COMM_WORLD,ierr) + r8=r8tmp + + return + end subroutine + + subroutine comlib_sumcast_r4_array(r4) +! +! sum and broadcast of real(4) data array +! +!coarray use mpi + implicit none + real(4), intent(inout) :: r4(:) + real(4) :: r4tmp(SIZE(r4)) + integer :: ierr,isize + + isize=SIZE(r4) +!coarray call MPI_Allreduce(r4,r4tmp,isize,MPI_REAL4,MPI_SUM,MPI_COMM_WORLD,ierr) + r4=r4tmp + + return + end subroutine + + subroutine comlib_sumcast_c16_array(c16) +! +! sum and broadcast of complex(8) data array +! +!coarray use mpi + implicit none + complex(8), intent(inout) :: c16(:) + complex(8) :: c16tmp(SIZE(c16)) + integer :: ierr,isize + + isize=SIZE(c16) +!coarray call MPI_Allreduce(c16,c16tmp,isize,MPI_COMPLEX16,MPI_SUM,MPI_COMM_WORLD,ierr) + c16=c16tmp + + return + end subroutine + + subroutine comlib_sumcast_c8_array(c8) +! +! sum and broadcast of complex(4) data array +! +!coarray use mpi + implicit none + complex(4), intent(inout) :: c8(:) + complex(4) :: c8tmp(SIZE(c8)) + integer :: ierr,isize + + isize=SIZE(c8) +!coarray call MPI_Allreduce(c8,c8tmp,isize,MPI_COMPLEX8,MPI_SUM,MPI_COMM_WORLD,ierr) + c8=c8tmp + + return + end subroutine + + subroutine comlib_sumcast_i4(i4) +! +! sum and broadcast of integer(4) data +! +!coarray use mpi + implicit none + integer, intent(inout) :: i4 + integer :: i4tmp +!coarray +! integer :: ierr +! +! call MPI_Allreduce(i4,i4tmp,1,MPI_INTEGER,MPI_SUM,MPI_COMM_WORLD,ierr) + i4tmp = i4 + call co_sum(i4tmp) + i4 = i4tmp + + return + end subroutine + + subroutine comlib_sumcast_r8(r8) +! +! sum and broadcast of real(8) data +! +!coarray use mpi + implicit none + real(8), intent(inout) :: r8 + real(8) :: r8tmp +!coarray +! integer :: ierr +! +! call MPI_Allreduce(r8,r8tmp,1,MPI_REAL8,MPI_SUM,MPI_COMM_WORLD,ierr) + r8tmp = r8 + call co_sum(r8tmp) + r8 = r8tmp + + return + end subroutine + + subroutine comlib_sumcast_r4(r4) +! +! sum and broadcast of real(4) data +! +!coarray use mpi + implicit none + real(4), intent(inout) :: r4 + real(4) :: r4tmp +!coarray +! integer :: ierr +! +! call MPI_Allreduce(r4,r4tmp,1,MPI_REAL4,MPI_SUM,MPI_COMM_WORLD,ierr) + r4tmp = r4 + call co_sum(r4tmp) + r4 = r4tmp + + return + end subroutine + + subroutine comlib_sumcast_c16(c16) +! +! sum and broadcast of complex(8) data +! +!coarray use mpi + implicit none + complex(8), intent(inout) :: c16 + complex(8) :: c16tmp +!coarray +! integer :: ierr +! +! call MPI_Allreduce(c16,c16tmp,1,MPI_COMPLEX16,MPI_SUM,MPI_COMM_WORLD,ierr) + c16tmp = c16 + call co_sum(c16tmp) + c16 = c16tmp + + return + end subroutine + + subroutine comlib_sumcast_c8(c8) +! +! sum and broadcast of complex(4) data +! +!coarray use mpi + implicit none + complex(4), intent(inout) :: c8 + complex(4) :: c8tmp +!coarray +! integer :: ierr +! +! call MPI_Allreduce(c8,c8tmp,1,MPI_COMPLEX8,MPI_SUM,MPI_COMM_WORLD,ierr) + c8tmp = c8 + call co_sum(c8tmp) + c8 = c8tmp + + return + end subroutine + +end module From b05be8feebdb6064f685eabc233a60cd566c93d5 Mon Sep 17 00:00:00 2001 From: "Yoshikawa, Hiroyuki" Date: Wed, 3 Mar 2021 18:52:35 +0900 Subject: [PATCH 08/70] Modify to switch comlib.F90/xmpAPI_comlib.F90 according to USE_XMP_API macro. --- CCS-QCD/src/Makefile | 7 ++++++- CCS-QCD/src/make.xmpAPI_gfortran.inc | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CCS-QCD/src/Makefile b/CCS-QCD/src/Makefile index b0d7d13..b37d470 100644 --- a/CCS-QCD/src/Makefile +++ b/CCS-QCD/src/Makefile @@ -16,7 +16,12 @@ FFLAGS += -D_NDIMX=$(PX) -D_NDIMY=$(PY) -D_NDIMZ=$(PZ) endif export ####################################################################### -SOURCE = ccs_qcd_solver_bench.F90 ccs_qcd_solver_bench_class.F90 lattice_class.F90 comlib.F90 +SOURCE = ccs_qcd_solver_bench.F90 ccs_qcd_solver_bench_class.F90 lattice_class.F90 +ifeq ($(USE_XMP_API), yes) + SOURCE += xmpAPI_comlib.F90 +else + SOURCE += comlib.F90 +endif CONFIG = CLASS_$(CLASS)/config.h FFLAGS += -Ima_prof/src LDFLAGS += -Lma_prof/src diff --git a/CCS-QCD/src/make.xmpAPI_gfortran.inc b/CCS-QCD/src/make.xmpAPI_gfortran.inc index e217f7e..f33502d 100644 --- a/CCS-QCD/src/make.xmpAPI_gfortran.inc +++ b/CCS-QCD/src/make.xmpAPI_gfortran.inc @@ -7,6 +7,7 @@ REVISION = -D_REVISION_='"$(shell cat .svnrevision)"' # # example: GNU gfortran, OpenMP enabled. +USE_XMP_API = yes OMNI_HOME = $(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') OMNI_INC = -I$(OMNI_HOME)/include OMNI_LIB = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') From 8095348b481ef13b030a1260c4c5356040509f9e Mon Sep 17 00:00:00 2001 From: "Yoshikawa, Hiroyuki" Date: Thu, 4 Mar 2021 16:28:44 +0900 Subject: [PATCH 09/70] [WIP] Modify CCS-QCD xmp_api code --- CCS-QCD/src/Makefile | 14 ++++---- CCS-QCD/src/make.xmpAPI_gfortran.inc | 6 ++-- CCS-QCD/src/xmpAPI_comlib.F90 | 53 +++++++++++++++++++++------- 3 files changed, 52 insertions(+), 21 deletions(-) diff --git a/CCS-QCD/src/Makefile b/CCS-QCD/src/Makefile index b37d470..487782c 100644 --- a/CCS-QCD/src/Makefile +++ b/CCS-QCD/src/Makefile @@ -16,12 +16,12 @@ FFLAGS += -D_NDIMX=$(PX) -D_NDIMY=$(PY) -D_NDIMZ=$(PZ) endif export ####################################################################### -SOURCE = ccs_qcd_solver_bench.F90 ccs_qcd_solver_bench_class.F90 lattice_class.F90 ifeq ($(USE_XMP_API), yes) - SOURCE += xmpAPI_comlib.F90 + COMLIB = xmpAPI_comlib else - SOURCE += comlib.F90 + COMLIB = comlib endif +SOURCE = ccs_qcd_solver_bench.F90 ccs_qcd_solver_bench_class.F90 lattice_class.F90 $(COMLIB).F90 CONFIG = CLASS_$(CLASS)/config.h FFLAGS += -Ima_prof/src LDFLAGS += -Lma_prof/src @@ -51,14 +51,14 @@ clean : $(MAKE) -C ma_prof/src clean ####################################################################### -comlib.o : comlib.F90 config.h -lattice_class.o : lattice_class.F90 comlib.o config.h -ccs_qcd_solver_bench_class.o : ccs_qcd_solver_bench_class.F90 comlib.o lattice_class.o config.h +$(COMLIB).o : $(COMLIB).F90 config.h +lattice_class.o : lattice_class.F90 $(COMLIB).o config.h +ccs_qcd_solver_bench_class.o : ccs_qcd_solver_bench_class.F90 $(COMLIB).o lattice_class.o config.h ccs_qcd_solver_bench_class.o : bicgstab_hmc.h90 clover.h90 clover_f1f2.h90 clvinv.h90 ccs_qcd_solver_bench_class.o : clvinv_ldl.h90 copy_u.h90 copy_y.h90 expp_u.h90 full2linear_clv.h90 gauss_y.h90 ccs_qcd_solver_bench_class.o : init_p.h90 init_u_and_y.h90 initset.h90 mult.h90 mult_eo_tzyx.h90 ccs_qcd_solver_bench_class.o : mult_fclinv.h90 mult_mb_pre.h90 output.h90 xrand.h90 -ccs_qcd_solver_bench.o : ccs_qcd_solver_bench.F90 comlib.o ccs_qcd_solver_bench_class.o lattice_class.o config.h +ccs_qcd_solver_bench.o : ccs_qcd_solver_bench.F90 $(COMLIB).o ccs_qcd_solver_bench_class.o lattice_class.o config.h .PHONY: CONFIG_GEN CONFIG_GEN: diff --git a/CCS-QCD/src/make.xmpAPI_gfortran.inc b/CCS-QCD/src/make.xmpAPI_gfortran.inc index f33502d..78be54a 100644 --- a/CCS-QCD/src/make.xmpAPI_gfortran.inc +++ b/CCS-QCD/src/make.xmpAPI_gfortran.inc @@ -14,7 +14,9 @@ OMNI_LIB = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINK FC = mpif90 CC = mpicc -FFLAGS = -cpp -omp -O2 -ffixed-line-length-132 -march=native -g -fopenmp -mcmodel=medium -funderscoring -LDFLAGS = $(FFLAGS) $(OMNI_LIB) +FFLAGS = -cpp -O2 -ffixed-line-length-132 -march=native -g -fopenmp -mcmodel=medium -funderscoring \ + $(OMNI_INC) +LDFLAGS = $(FFLAGS) +LIBS = $(OMNI_LIB) INCLUDE = -I./ $(OMNI_INC) diff --git a/CCS-QCD/src/xmpAPI_comlib.F90 b/CCS-QCD/src/xmpAPI_comlib.F90 index d0b4f91..ceccc9e 100644 --- a/CCS-QCD/src/xmpAPI_comlib.F90 +++ b/CCS-QCD/src/xmpAPI_comlib.F90 @@ -265,9 +265,14 @@ subroutine comlib_init ! Initialize this library ! !coarray use mpi - implicit none !coarray integer :: ierr,iprov +!--- 2020 Fujitsu +use xmp_api +use mpi +integer :: ierr +!--- 2020 Fujitsu end + ntag=0 !coarray call MPI_INIT(ierr) @@ -294,8 +299,14 @@ subroutine comlib_init !coarray ! call MPI_COMM_RANK(MPI_COMM_WORLD,myid,ierr) ! call MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr) - myid = this_image() - 1 - numprocs = num_images() + !--- 2020 Fujitsu + !myid = this_image() - 1 + !numprocs = num_images() + call xmp_api_init + myid = xmp_this_image() - 1 + !numprocs = xmp_num_images() + call MPI_COMM_SIZE(MPI_COMM_WORLD,numprocs,ierr) + !--- 2020 Fujitsu end return end subroutine @@ -309,6 +320,9 @@ subroutine comlib_finalize !coarray integer :: ierr !coarray call MPI_Finalize(ierr) + !--- 2020 Fujitsu + call xmp_api_finalize + !--- 2020 Fujitsu end return end subroutine @@ -357,7 +371,7 @@ subroutine comlib_make_c16(id,node,send,recv,isize) call co_broadcast(buf,i) !--- 2020 Fujitsu !sync all - call xmp_sync_all() + call xmp_sync_all(ierr) !--- 2020 Fujitsu end idall(i-1)%sdesc = buf end do @@ -414,7 +428,7 @@ subroutine comlib_make_c8(id,node,send,recv,isize) call co_broadcast(buf,i) !--- 2020 Fujitsu !sync all - call xmp_sync_all() + call xmp_sync_all(ierr) !--- 2020 Fujitsu end idall(i-1)%sdesc = buf enddo @@ -506,6 +520,9 @@ subroutine comlib_sendrecv_c16(id) ! Send and Recieve data indicated by id. ! !coarray use mpi + !--- 2020 Fujitsu + use xmp_api + !--- 2020 Fujitsu end implicit none type(comlib_data_c16), intent(inout) :: id !coarray @@ -518,8 +535,8 @@ subroutine comlib_sendrecv_c16(id) !allocate(sbuff(id%ssize/16)[*]) !allocate(rbuff(id%rsize/16)[*]) ! - complex(8), pointer :: sbuff(:) => null() - complex(8), pointer :: rbuff(:) => null() + integer, pointer :: sbuff(:) => null() + integer, pointer :: rbuff(:) => null() integer(8) :: s_desc, r_desc integer(8), dimension(1) :: s_lb,s_ub, r_lb, r_ub integer(4), dimension(1) :: img_dims @@ -553,8 +570,8 @@ subroutine comlib_sendrecv_c16(id) call xmp_array_section_set_triplet(s_sec, 1, start1,end1,stride1, status) call xmp_array_section_set_triplet(r_sec, 1, start1,end2,stride1, status) img_dims(1) = id%sdesc+1 - call xmp_coarray_put(r_desc,r_sec, s_desc,s_sec, status); - call xmp_sync_all() + call xmp_coarray_put(img_dims, r_desc,r_sec, s_desc,s_sec, status); + call xmp_sync_all(status) !--- 2020 Fujitsu end call copy(id%rbuff, rbuff, id%rsize/16) @@ -592,6 +609,9 @@ subroutine comlib_check_c16(id) ! Check communication ends. ! !coarray use mpi + !--- 2020 Fujitsu + use xmp_api + !--- 2020 Fujitsu end implicit none type(comlib_data_c16), intent(inout) :: id !coarray @@ -602,7 +622,8 @@ subroutine comlib_check_c16(id) ! call MPI_Wait(id%sreq,sstat,ierr) !--- 2020 Fujitsu !sync all - call xmp_sync_all() + integer :: ierr + call xmp_sync_all(ierr) !--- 2020 Fujitsu end ! write(*,'("RSTAT:",99I12,I8)')rstat,myid ! write(*,'("SSTAT:",99I12,I8)')sstat,myid @@ -615,6 +636,9 @@ subroutine comlib_check_c8(id) ! Check communication ends. ! !coarray use mpi + !--- 2020 Fujitsu + use xmp_api + !--- 2020 Fujitsu end implicit none type(comlib_data_c8), intent(inout) :: id !coarray @@ -624,7 +648,8 @@ subroutine comlib_check_c8(id) ! call MPI_Wait(id%sreq,status,ierr) !--- 2020 Fujitsu !sync all - call xmp_sync_all() + integer :: ierr + call xmp_sync_all(ierr) !--- 2020 Fujitsu end return @@ -635,6 +660,9 @@ subroutine comlib_barrier ! Barrier sync. ! !coarray use mpi + !--- 2020 Fujitsu + use xmp_api + !--- 2020 Fujitsu end implicit none !coarray ! integer :: ierr @@ -642,7 +670,8 @@ subroutine comlib_barrier ! call MPI_Barrier(MPI_COMM_WORLD,ierr) !--- 2020 Fujitsu !sync all - call xmp_sync_all() + integer :: ierr + call xmp_sync_all(ierr) !--- 2020 Fujitsu end return From 3dd0ba83d60857ab4e28e49d234d61f3fbe75e81 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Thu, 4 Mar 2021 17:08:26 +0900 Subject: [PATCH 10/70] Add make_setting for XMP-API. --- MODYLAS-MINI/src/make_setting.xmp_api | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100755 MODYLAS-MINI/src/make_setting.xmp_api diff --git a/MODYLAS-MINI/src/make_setting.xmp_api b/MODYLAS-MINI/src/make_setting.xmp_api new file mode 100755 index 0000000..bb2e4d3 --- /dev/null +++ b/MODYLAS-MINI/src/make_setting.xmp_api @@ -0,0 +1,20 @@ +# +# Linux64-GCC XMP-API +# + +USE_XMP_API = yes +# FC = xmpf90 +FC = mpif90 +#FFLAGS = -cpp -omp -O2 -w -fopenmp +FFLAGS = -cpp -fopenmp -O2 -w -fopenmp +CC = mpicc + +OMNI_HOME = $(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') +OMNI_INC = -I$(OMNI_HOME)/include +OMNI_LIB = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') + + +FFLAGS += -I$(MPIHOME)/include $(OMNI_LIB) $(OMNI_INC) + +# TODO: temporal option +FFLAGS += -fcoarray=lib From 0b57782bc3ba363e4486c35d893eeb96ac101c8f Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Thu, 4 Mar 2021 18:34:56 +0900 Subject: [PATCH 11/70] add some configs for Fugaku --- CCS-QCD/src/make.xmpAPI_Fugaku.inc | 30 ++++++++++++++++++++ NTCHEM-MINI/platforms/config_mine.xmp_Fugaku | 9 ++++++ 2 files changed, 39 insertions(+) create mode 100644 CCS-QCD/src/make.xmpAPI_Fugaku.inc create mode 100755 NTCHEM-MINI/platforms/config_mine.xmp_Fugaku diff --git a/CCS-QCD/src/make.xmpAPI_Fugaku.inc b/CCS-QCD/src/make.xmpAPI_Fugaku.inc new file mode 100644 index 0000000..b75faa1 --- /dev/null +++ b/CCS-QCD/src/make.xmpAPI_Fugaku.inc @@ -0,0 +1,30 @@ +#$Id: make.inc,v 1.1 2009/12/02 10:24:37 ishikawa Exp $ + +REVISION = -D_REVISION_='"$(shell cat .svnrevision)"' + +# +# specify Fortran90 MPI compiler FC, compiler options FFLAGS and linker options LDFLAGS +# +USE_XMP_API = yes +OMNI_HOME = $(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') +OMNI_INC = -I$(OMNI_HOME)/include +OMNI_LIB = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') + +FC = mpifrtpx +FFLAGS = -cpp -Kfast,openmp -KXFILL -Kprefetch_sequential=soft + +CC = mpifccpx + +LDFLAGS = $(FFLAGS) +LIBS = $(OMNI_LIB) + +MPIINC = /opt/FJSVxtclanga/tcsds-1.2.30a/include/mpi/fujitsu +INCLUDE = -I./ $(OMNI_INC) + +#.F90.o : + +#%.mod : %.F90 +# $(FC) $(FFLAGS) $(REVISION) $(INCLUDE) -c $< + +#%.o : %.F90 +# $(FC) $(FFLAGS) $(REVISION) $(INCLUDE) -c $< diff --git a/NTCHEM-MINI/platforms/config_mine.xmp_Fugaku b/NTCHEM-MINI/platforms/config_mine.xmp_Fugaku new file mode 100755 index 0000000..419f9f5 --- /dev/null +++ b/NTCHEM-MINI/platforms/config_mine.xmp_Fugaku @@ -0,0 +1,9 @@ +# For K computer & FX10 servers - mpifrtpx compiler command and SSL2 library +./config/configure \ +--lapack= \ +--blas= \ +--atlas=-SSL2BLAMP \ +linux64_xmp_omp_fugaku + +cd ./config +ln -sf makeconfig makeconfig.xmp From 5637132c683eee4ae117c4b979ffa41a1c849a67 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Thu, 4 Mar 2021 17:14:14 +0900 Subject: [PATCH 12/70] Add 10 new files for XMP-API and update Makefile. --- MODYLAS-MINI/src/Makefile | 163 +- MODYLAS-MINI/src/xmpAPI_app_f90.f | 481 ++++++ MODYLAS-MINI/src/xmpAPI_comm.f | 1879 +++++++++++++++++++++++ MODYLAS-MINI/src/xmpAPI_comm_3.f | 1225 +++++++++++++++ MODYLAS-MINI/src/xmpAPI_comm_fmm.f | 1227 +++++++++++++++ MODYLAS-MINI/src/xmpAPI_domain_div.f | 475 ++++++ MODYLAS-MINI/src/xmpAPI_fmodules.f | 386 +++++ MODYLAS-MINI/src/xmpAPI_k_energy.f | 96 ++ MODYLAS-MINI/src/xmpAPI_mpitool.f | 59 + MODYLAS-MINI/src/xmpAPI_nve_integrate.f | 205 +++ MODYLAS-MINI/src/xmpAPI_parse_input.f | 348 +++++ 11 files changed, 6474 insertions(+), 70 deletions(-) create mode 100755 MODYLAS-MINI/src/xmpAPI_app_f90.f create mode 100755 MODYLAS-MINI/src/xmpAPI_comm.f create mode 100755 MODYLAS-MINI/src/xmpAPI_comm_3.f create mode 100755 MODYLAS-MINI/src/xmpAPI_comm_fmm.f create mode 100755 MODYLAS-MINI/src/xmpAPI_domain_div.f create mode 100755 MODYLAS-MINI/src/xmpAPI_fmodules.f create mode 100755 MODYLAS-MINI/src/xmpAPI_k_energy.f create mode 100755 MODYLAS-MINI/src/xmpAPI_mpitool.f create mode 100755 MODYLAS-MINI/src/xmpAPI_nve_integrate.f create mode 100755 MODYLAS-MINI/src/xmpAPI_parse_input.f diff --git a/MODYLAS-MINI/src/Makefile b/MODYLAS-MINI/src/Makefile index a7cf1d5..55aede4 100755 --- a/MODYLAS-MINI/src/Makefile +++ b/MODYLAS-MINI/src/Makefile @@ -17,14 +17,37 @@ FFLAGS += -DMODYLAS_MINI_VERSION=\"$(VERSION)\" CFLAGS = -DUSE_MPI -OBJS = fmodules.o main_f90.o app_f90.o \ +ifeq ($(USE_XMP_API), yes) +FMODULES = xmpAPI_fmodules +APP_F90 = xmpAPI_app_f90 +PARSE_INPUT = xmpAPI_parse_input +COMM = xmpAPI_comm +COMM_3 = xmpAPI_comm_3 +COMM_FMM = xmpAPI_comm_fmm +NVE_INTEGRATE = xmpAPI_nve_integrate +K_ENERGY = xmpAPI_k_energy +DOMAIN_DIV = xmpAPI_domain_div +MPITOOL = xmpAPI_mpitool +else +FMODULES = fmodules +APP_F90 = app_f90 +PARSE_INPUT = parse_input +COMM = comm +COMM_3 = comm_3 +COMM_FMM = comm_fmm +NVE_INTEGRATE = nve_integrate +K_ENERGY = k_energy +DOMAIN_DIV = domain_div +MPITOOL = mpitool +endif + +OBJS = $(FMODULES).o main_f90.o $(APP_F90).o \ md_charmm_f90.o md_fmm_f90.o md_direct_f90.o \ - nve_integrate.o k_energy.o cell_edge.o \ - mt19937ar.o mpitool.o domain_div.o assign2subcell.o \ + $(NVE_INTEGRATE).o $(K_ENERGY).o cell_edge.o \ + mt19937ar.o $(MPITOOL).o $(DOMAIN_DIV).o assign2subcell.o \ shake_rattle_roll.o \ - parse_input.o \ - comm.o comm_3.o comm_fmm.o \ - opening.o closing.o ConfigReader.o ConfigRead.o + $(PARSE_INPUT).o $(COMM).o $(COMM_3).o $(COMM_FMM).o \ + opening.o closing.o ConfigReader.o ConfigRead.o all: $(PROGRAM) @@ -80,28 +103,34 @@ test: $(PROGRAM) # DO NOT DELETE THIS LINE - used by make depend -app_f90.o: atommass.mod g_main.mod md_condition.mod md_const.mod md_file.mod -app_f90.o: md_fmm_domdiv_flg.mod md_monitors.mod md_periodic.mod md_segment.mod -app_f90.o: mpivar.mod nhc.mod param.mod shakerattleroll.mod trj_mpi.mod -app_f90.o: trj_org.mod unitcell.mod cutoffradius.mod +$(APP_F90).o: atommass.mod g_main.mod md_condition.mod md_const.mod md_file.mod +$(APP_F90).o: md_fmm_domdiv_flg.mod md_monitors.mod md_periodic.mod md_segment.mod +$(APP_F90).o: mpivar.mod nhc.mod param.mod shakerattleroll.mod trj_mpi.mod +$(APP_F90).o: trj_org.mod unitcell.mod cutoffradius.mod assign2subcell.o: atommass.mod md_fmm.mod md_fmm_domdiv_flg.mod md_periodic.mod assign2subcell.o: md_segment.mod mpivar.mod param.mod trj_mpi.mod trj_org.mod assign2subcell.o: unitcell.mod cell_edge.o: atommass.mod md_periodic.mod md_segment.mod param.mod trj_org.mod cell_edge.o: unitcell.mod -comm.o: comm_base.mod comm_bd.mod md_fmm.mod md_fmm_domdiv_flg.mod -comm.o: md_forces.mod md_monitors.mod md_periodic.mod md_segment.mod mpivar.mod -comm.o: param.mod trj_mpi.mod trj_org.mod unitcell.mod ompvar.mod -comm_3.o: comm_base.mod comm_d3.mod md_fmm.mod md_fmm_domdiv_flg.mod -comm_3.o: md_forces.mod md_monitors.mod md_periodic.mod md_segment.mod -comm_3.o: mpivar.mod trj_mpi.mod trj_org.mod unitcell.mod -comm_fmm.o: comm_base.mod md_fmm.mod md_fmm_domdiv_flg.mod mpivar.mod -domain_div.o: cutoffradius.mod md_condition.mod md_fmm.mod md_fmm_domdiv_flg.mod -domain_div.o: md_forces.mod md_multiplestep.mod md_periodic.mod md_segment.mod -domain_div.o: mpivar.mod shakerattleroll.mod trj_mpi.mod trj_org.mod -domain_div.o: unitcell.mod ompvar.mod -k_energy.o: atommass.mod md_const.mod md_fmm.mod md_fmm_domdiv_flg.mod -k_energy.o: ompvar.mod mpivar.mod param.mod trj_mpi.mod trj_org.mod +$(COMM).o: comm_base.mod comm_bd.mod md_fmm.mod md_fmm_domdiv_flg.mod +$(COMM).o: md_forces.mod md_monitors.mod md_periodic.mod md_segment.mod mpivar.mod +$(COMM).o: param.mod trj_mpi.mod trj_org.mod unitcell.mod ompvar.mod +$(COMM_3).o: comm_base.mod comm_d3.mod md_fmm.mod md_fmm_domdiv_flg.mod +$(COMM_3).o: md_forces.mod md_monitors.mod md_periodic.mod md_segment.mod +$(COMM_3).o: mpivar.mod trj_mpi.mod trj_org.mod unitcell.mod +$(COMM_FMM).o: comm_base.mod md_fmm.mod md_fmm_domdiv_flg.mod mpivar.mod +$(PARSE_INPUT).o: atommass.mod md_charmm_lj.mod md_condition.mod md_const.mod +$(PARSE_INPUT).o: md_coulomb.mod md_file.mod md_fmm_domdiv_flg.mod +$(PARSE_INPUT).o: md_multiplestep.mod md_periodic.mod md_segment.mod mpivar.mod +$(PARSE_INPUT).o: nhc.mod param.mod shakerattleroll.mod +$(PARSE_INPUT).o: g_main.mod cutoffradius.mod md_fmm.mod md_ewald.mod +$(PARSE_INPUT).o: trj_org.mod trj_mpi.mod unitcell.mod +$(DOMAIN_DIV).o: cutoffradius.mod md_condition.mod md_fmm.mod md_fmm_domdiv_flg.mod +$(DOMAIN_DIV).o: md_forces.mod md_multiplestep.mod md_periodic.mod md_segment.mod +$(DOMAIN_DIV).o: mpivar.mod shakerattleroll.mod trj_mpi.mod trj_org.mod +$(DOMAIN_DIV).o: unitcell.mod ompvar.mod +$(K_ENERGY).o: atommass.mod md_const.mod md_fmm.mod md_fmm_domdiv_flg.mod +$(K_ENERGY).o: ompvar.mod mpivar.mod param.mod trj_mpi.mod trj_org.mod main_f90.o: md_condition.mod md_fmm_domdiv_flg.mod md_forces.mod main_f90.o: md_monitors.mod md_multiplestep.mod md_segment.mod mpivar.mod main_f90.o: shakerattleroll.mod trj_mpi.mod version.mod @@ -116,60 +145,54 @@ md_fmm_f90.o: comm_base.mod md_const.mod md_coulomb.mod md_fmm.mod md_fmm_f90.o: md_fmm_domdiv_flg.mod md_forces.mod md_monitors.mod md_fmm_f90.o: md_periodic.mod md_segment.mod mod_wk_fmmewald.mod mpivar.mod md_fmm_f90.o: param.mod trj_mpi.mod trj_org.mod unitcell.mod ompvar.mod -mpitool.o: mpivar.mod -nve_integrate.o: atommass.mod md_condition.mod md_const.mod -nve_integrate.o: md_fmm.mod md_fmm_domdiv_flg.mod md_forces.mod md_monitors.mod -nve_integrate.o: md_multiplestep.mod md_periodic.mod md_segment.mod mpivar.mod -nve_integrate.o: param.mod shakerattleroll.mod trj_mpi.mod unitcell.mod -parse_input.o: atommass.mod md_charmm_lj.mod md_condition.mod md_const.mod -parse_input.o: md_coulomb.mod md_file.mod md_fmm_domdiv_flg.mod -parse_input.o: md_multiplestep.mod md_periodic.mod md_segment.mod mpivar.mod -parse_input.o: nhc.mod param.mod shakerattleroll.mod -parse_input.o: g_main.mod cutoffradius.mod md_fmm.mod md_ewald.mod -parse_input.o: trj_org.mod trj_mpi.mod unitcell.mod +$(MPITOOL).o: mpivar.mod +$(NVE_INTEGRATE).o: atommass.mod md_condition.mod md_const.mod +$(NVE_INTEGRATE).o: md_fmm.mod md_fmm_domdiv_flg.mod md_forces.mod md_monitors.mod +$(NVE_INTEGRATE).o: md_multiplestep.mod md_periodic.mod md_segment.mod mpivar.mod +$(NVE_INTEGRATE).o: param.mod shakerattleroll.mod trj_mpi.mod unitcell.mod shake_rattle_roll.o: atommass.mod md_fmm.mod md_fmm_domdiv_flg.mod shake_rattle_roll.o: md_segment.mod mpivar.mod param.mod pshake.mod shake_rattle_roll.o: pshake_init.mod shakerattleroll.mod trj_mpi.mod trj_org.mod -atommass.mod: .//fmodules.o -comm_base.mod: .//fmodules.o -comm_bd.mod: .//fmodules.o -comm_d3.mod: .//fmodules.o -cutoffradius.mod: .//fmodules.o -g_main.mod: .//fmodules.o -get_wtime.mod: .//fmodules.o -md_charmm_lj.mod: .//fmodules.o -md_condition.mod: .//fmodules.o -md_const.mod: .//fmodules.o -md_coulomb.mod: .//fmodules.o -md_ewald.mod: .//fmodules.o -md_file.mod: .//fmodules.o -md_fmm.mod: .//fmodules.o -md_fmm_domdiv_flg.mod: .//fmodules.o -md_forces.mod: .//fmodules.o -md_monitors.mod: .//fmodules.o -md_multiplestep.mod: .//fmodules.o -md_periodic.mod: .//fmodules.o -md_segment.mod: .//fmodules.o -md_void.mod: .//fmodules.o -mod_wk_fmmewald.mod: .//fmodules.o -mod_wk_k_ene.mod: .//fmodules.o -mpivar.mod: .//fmodules.o -ompvar.mod: .//fmodules.o -nhc.mod: .//fmodules.o -param.mod: .//fmodules.o -pshake.mod: .//fmodules.o -pshake_init.mod: .//fmodules.o -shakerattleroll.mod: .//fmodules.o -trj_mpi.mod: .//fmodules.o -trj_org.mod: .//fmodules.o -unitcell.mod: .//fmodules.o -version.mod: .//fmodules.o +atommass.mod: .//$(FMODULES).o +comm_base.mod: .//$(FMODULES).o +comm_bd.mod: .//$(FMODULES).o +comm_d3.mod: .//$(FMODULES).o +cutoffradius.mod: .//$(FMODULES).o +g_main.mod: .//$(FMODULES).o +get_wtime.mod: .//$(FMODULES).o +md_charmm_lj.mod: .//$(FMODULES).o +md_condition.mod: .//$(FMODULES).o +md_const.mod: .//$(FMODULES).o +md_coulomb.mod: .//$(FMODULES).o +md_ewald.mod: .//$(FMODULES).o +md_file.mod: .//$(FMODULES).o +md_fmm.mod: .//$(FMODULES).o +md_fmm_domdiv_flg.mod: .//$(FMODULES).o +md_forces.mod: .//$(FMODULES).o +md_monitors.mod: .//$(FMODULES).o +md_multiplestep.mod: .//$(FMODULES).o +md_periodic.mod: .//$(FMODULES).o +md_segment.mod: .//$(FMODULES).o +md_void.mod: .//$(FMODULES).o +mod_wk_fmmewald.mod: .//$(FMODULES).o +mod_wk_k_ene.mod: .//$(FMODULES).o +mpivar.mod: .//$(FMODULES).o +ompvar.mod: .//$(FMODULES).o +nhc.mod: .//$(FMODULES).o +param.mod: .//$(FMODULES).o +pshake.mod: .//$(FMODULES).o +pshake_init.mod: .//$(FMODULES).o +shakerattleroll.mod: .//$(FMODULES).o +trj_mpi.mod: .//$(FMODULES).o +trj_org.mod: .//$(FMODULES).o +unitcell.mod: .//$(FMODULES).o +version.mod: .//$(FMODULES).o opening.o: version.mod md_file.mod mpivar.mod trj_org.mod ompvar.mod opening.o: md_fmm.mod md_fmm_domdiv_flg.mod cutoffradius.mod opening.o: md_condition.mod md_multiplestep.mod md_ewald.mod closing.o: mpivar.mod -main_f90.o opening.o nve_integrate.o md_fmm_f90.o: timing.h90 +main_f90.o opening.o $(NVE_INTEGRATE).o md_fmm_f90.o: timing.h90 ifneq (, $(findstring -DPROF_MAPROF, $(FFLAGS))) timing.h90: $(MAPROF_LIB) endif @@ -177,4 +200,4 @@ endif ConfigReader.o: ConfigReader.h ConfigReader.o: params.h configread.mod: ConfigRead.o -parse_input.o: configread.mod +$(PARSE_INPUT).o: configread.mod diff --git a/MODYLAS-MINI/src/xmpAPI_app_f90.f b/MODYLAS-MINI/src/xmpAPI_app_f90.f new file mode 100755 index 0000000..a70d0f4 --- /dev/null +++ b/MODYLAS-MINI/src/xmpAPI_app_f90.f @@ -0,0 +1,481 @@ +!---------------------------------------------------------------------- +! Copyright (C) 2003-2014 Kensuke Iwahashi, Noriyuki Yoshii, +! Atsushi Yamada, Yoshimichi Andoh, +! Kazushi Fujimoto, Hidekazu Kojima, +! Fumiyasu Mizutani, and Susumu Okazaki +! All Rights Reserved. +! +! Copyright (C) 20013-2014 RIKEN AICS +! All Rights Reserved. +! +! This MD program has been developed at Nagoya University, and +! Institute for Molecular Science, National Institutes of Natural +! Sciences. +! And this work was supported by +! Next-Generation Supercomputer Project, and +! NAREGI Nanoscience Project, +! Ministry of Education, Culture, Sports, Science and Technology, +! Japan. +! +! This program is NOT a free software and distributed under the +! license described in the LICENSE. +! All rights are reserved by the authors of this program. +! +! The authors do NOT warrant or assume any legal liability or +! responsibility for the accuracy or completeness. +!---------------------------------------------------------------------- +c---------------------------------------------------------------------- + subroutine initialize_application +c---------------------------------------------------------------------- + implicit none + call check_parallel_condition +c initialize MD objects + call init_g_main + call init_md_check + call init_md_condition + call init_md_velocity + call init_fmm_domain_div + call check_cutofflength + call calc_idcell + call fmod_set_maxsegments + call fmod_alloc_metadata + call fmod_alloc_multipole + call init_fmm + call init_comm_direct_3() + call init_comm_bound() + call init_shake_local() + call pshake_initialize1 + call pshake_initialize2 + call pshake_finish1 + + return + end +c---------------------------------------------------------------------- + subroutine open_trj +c---------------------------------------------------------------------- + use md_file + implicit none + integer(4) :: io + open(f_trj, file=trim(session_name)// '.mdtrj.bin', iostat=io, + & status='replace', access='sequential',form='unformatted') + if (io /= 0) then + call abort_with_message_a('Cannot create mdtrj file.') + endif + return + end +c----------------------------------------------------------------------- + subroutine record_current_trajectory +c---------------------------------------------------------------------- + use trj_org + use unitcell + use trj_org + use nhc + use md_condition + use md_file + use md_periodic + implicit none + + write(f_trj) mdstep, mdstep*dt + ! Write coordinates and velocities of atoms. + write(f_trj) n + write(f_trj) xyz(1:3,:), v(1:3,:) + ! Write positions and velocities of thermostats. + write(f_trj) nnhc + write(f_trj) rss, vss + ! Write positions and velocities of barostats. + write(f_trj) nnhc + write(f_trj) rssb,vssb + ! Write cell parameters (length and angles). + write(f_trj) cellx,celly,cellz,alpha,beta,gamma,vboxg + call flush(f_trj) + + return + end +c----------------------------------------------------------------------- + subroutine open_mntr +c---------------------------------------------------------------------- + use md_file + implicit none + integer(4) :: io + open(f_mntr, file=trim(session_name)// '.mdmntr', iostat=io, + & status='replace', access='sequential',form='formatted') + if (io /= 0) then + call abort_with_message_a('Cannot create mdmntr file.') + endif + write(f_mntr,'(a,a,a)') + & '## ', trim(session_name), '.mdmntr' // + & ' -- monitor variables output from MD calculation by modylas' + write(f_mntr,'(a)') '#' + write(f_mntr,'(a)') '# datas below are formated as:' + write(f_mntr,'(13a)') + & '#',' step ' , + & ' time ', + & ' Hamiltonian ', + & ' potential-E ', + & ' kinetic-E ', + & ' total energy ', + & ' temperature ' + write(f_mntr,'(13a)') + & '#', ' ' , + & ' [sec] ', + & ' [J/cell] ', + & ' [J/cell] ', + & ' [J/cell] ', + & ' [J/cell] ', + & ' [K] ' + write(f_mntr,'(a)') '#' + call flush(f_mntr) + return + end +c----------------------------------------------------------------------- + subroutine record_current_monitors +c---------------------------------------------------------------------- + use unitcell + use md_condition + use md_file + use md_monitors + implicit none + + write(f_mntr,'(i10,6es20.12)') + & mdstep, mdstep*dt, hamiltonian, + & p_energy, k_energy, + & t_energy, temperature + call flush(f_mntr) + + return + end +c----------------------------------------------------------------------- + subroutine open_restart +c---------------------------------------------------------------------- + use md_file + implicit none + integer(4) :: io + open(f_restart_bin, file=trim(session_name)// '.restart.bin', + & iostat=io, status='replace', + & access='sequential',form='unformatted') + if (io /= 0) then + call abort_with_message_a('Cannot create restart.bin file.') + endif + return + end +c----------------------------------------------------------------------- + subroutine record_restart_binary +c---------------------------------------------------------------------- + use trj_org + use unitcell + use trj_org + use nhc + use md_condition + use md_file + use md_periodic + implicit none + + ! Write coordinates and velocities of atoms. + rewind(f_restart_bin) + write(f_restart_bin) n + write(f_restart_bin) xyz(1:3,:), v(1:3,:) + ! Write positions and velocities of thermostats. + write(f_restart_bin) nnhc + write(f_restart_bin) rss, vss + ! Write positions and velocities of barostats. + write(f_restart_bin) nnhc + write(f_restart_bin) rssb, vssb + ! Write cell parameters (length and angles). + write(f_restart_bin) cellx,celly,cellz, alpha,beta,gamma, vboxg + call flush(f_restart_bin) + + return + end +c----------------------------------------------------------------------- + subroutine record_current_state +c---------------------------------------------------------------------- + use md_condition + use g_main + use md_condition + use mpivar + implicit none + +!### record mdrun & mdmntr ### + if (mod((mdstep - mntr_start), mntr_interval) == 0) then + IF (myrank.eq.mpiout) THEN + call record_current_monitors + ENDIF + endif + + if (mod((mdstep-trj_start),trj_interval)==0.or. + & mod((mdstep-restart_start),restart_interval)==0) then + call pre_record_data + endif + +!### record mdtrj.bin ### + if (mod((mdstep-trj_start),trj_interval)==0) then + IF (myrank.eq.mpiout) THEN + call record_current_trajectory + ENDIF + endif + +!### record restart.bin ### + if (mod((mdstep-restart_start),restart_interval)==0)then + IF (myrank.eq.mpiout) THEN + call record_restart_binary + ENDIF + endif + + return + end +c----------------------------------------------------------------------- + subroutine check_parallel_condition +c----------------------------------------------------------------------- + use mpivar + implicit none + include 'mpif.h' + integer(4) :: ierr + +!### checm nprocs ###! + if(mod(nprocs,2).ne.0)then + if(myrank==0)then + write(*,*) 'ERROR: nprocs is not equal to 2 powers.' + endif +!coarray call mpi_barrier(mpi_comm_world, ierr) + sync all +!! + call mpiend + stop 1 + endif + if(nprocs.lt.8)then + if(myrank==0)then + write(*,*) 'ERROR: nprocs less than 8 is not supported.' + endif +!coarray call mpi_barrier(mpi_comm_world, ierr) + sync all +!! + call mpiend + stop 1 + endif + + return + end +c----------------------------------------------------------------------- + subroutine init_g_main +c---------------------------------------------------------------------- + use mpivar + implicit none + if (myrank.eq.mpiout) then +c open *.mdtrj file to which trajectory of MD calculation +c will be written + call open_trj +c +c open *.mdmntr file to which monitor variables (Hamiltonian etc.) +c will be written + call open_mntr +c +c open *.restart.bin file to which force log will be written + call open_restart + endif + return + end +c----------------------------------------------------------------------- + subroutine generate_velocity +c---------------------------------------------------------------------- + use atommass + use trj_org + use trj_org + use md_condition + use md_const + use g_main + use param + implicit none + real(8),allocatable :: randbuf(:) + real(8) :: w1,w2,M,vG__x,vG__y,vG__z,coef,T + real(8) :: rand0, rand1, genrand_real2 + integer(4) :: i,nrand +c generate maxwell distribution of velocity of atom, +c in which posibility variable sqrt(m / kB / T) * v(x, y or z) +c obeys to normal distribution with +c expectation = 0 +c standard deviation = 1 +c +c after random generation of velocity, they are adjusted for +c total momentum of the sytem to be 0 and temperature to be +c md_generic__maxwell temperature +c +c handling special case: maxwell_temperature == 0 + if (maxwell_temperature < 1.0d-50) then + v = 0.0d0 + return + endif +c generate uniform random numbers + if (mod(n,2) == 0) then + nrand = 3*n + else + nrand = 3*n+1 + endif + allocate(randbuf(nrand)) + call init_genrand(randomseed) +c conver uniform random numbers to normal random numbers + do i=1,nrand-1,2 + rand0 = genrand_real2() + rand1 = genrand_real2() + w1=sqrt(-2.0d0*log(rand0))*cos(2.0d0*md_PI*rand1) + w2=sqrt(-2.0d0*log(rand1))*cos(2.0d0*md_PI*rand0) + randbuf(i+0) = w1 + randbuf(i+1) = w2 + enddo +c add velocities + do i=1,n + coef = sqrt(md_BOLTZMANN * maxwell_temperature * + & r_mass(paranum(i))) + v(1,i) = randbuf(i*3-2) * coef + v(2,i) = randbuf(i*3-1) * coef + v(3,i) = randbuf(i*3-0) * coef + enddo + deallocate(randbuf) +c velocity of center of mass + M = 0.0d0 + vG__x = 0.0d0; vG__y = 0.0d0; vG__z = 0.0d0 + do i=1,n + if (mass(paranum(i)) .lt. 1.0e+10) then + M = M + mass(paranum(i)) + vG__x = vG__x + v(1,i) * mass(paranum(i)) + vG__y = vG__y + v(2,i) * mass(paranum(i)) + vG__z = vG__z + v(3,i) * mass(paranum(i)) + endif + enddo + vG__x = vG__x / M + vG__y = vG__y / M + vG__z = vG__z / M +c subtracting momentum of the system + do i=1,n + if (mass(paranum(i)) .lt. 1.0e+10) then + v(1,i) = v(1,i) - vG__x + v(2,i) = v(2,i) - vG__y + v(3,i) = v(3,i) - vG__z + endif + enddo +c velocity scaling + T = 0.0d0 + do i=1,n + if (mass(paranum(i)) .lt. 1.0e+10) then + T = T + mass(paranum(i)) + & *(v(1,i)*v(1,i)+v(2,i)*v(2,i)+v(3,i)*v(3,i)) + endif + enddo + T = T * degree_of_freedom_inverse * rvkbolz + v = v * sqrt(maxwell_temperature / T) + return + end +c----------------------------------------------------------------------- + subroutine remove_system_momentum +c---------------------------------------------------------------------- + use atommass + use trj_org + use trj_org + use param + implicit none + real(8) :: M, vG__x, vG__y, vG__z + integer(4) :: i +c velocity of center of mass + M = 0.0d0 + vG__x = 0.0d0 + vG__y = 0.0d0 + vG__z = 0.0d0 + do i=1, n + M = M + mass(paranum(i)) + vG__x = vG__x + v(1,i) * mass(paranum(i)) + vG__y = vG__y + v(2,i) * mass(paranum(i)) + vG__z = vG__z + v(3,i) * mass(paranum(i)) + enddo + vG__x = vG__x / M + vG__y = vG__y / M + vG__z = vG__z / M +c subtracting momentum of the system + do i=1, n + v(1,i) = v(1,i) - vG__x + v(2,i) = v(2,i) - vG__y + v(3,i) = v(3,i) - vG__z + enddo + totalmass=M + return + end +c----------------------------------------------------------------------- + subroutine init_md_velocity +c---------------------------------------------------------------------- + use md_segment + use g_main + implicit none +c generate velocity of atom obeying to Maxwell distribution + if (maxwell_temperature > 0.0d0 .or. reset_maxwell) then + call generate_velocity +c No need to exchange by jrearrange + else + call remove_system_momentum + endif + return + end +c----------------------------------------------------------------------- + subroutine init_md_check +c----------------------------------------------------------------------- + use trj_org + use cutoffradius + use md_condition + use md_file + use md_periodic + use md_segment + use g_main + use mpivar + use unitcell + implicit none + if(myrank.eq.mpiout) then +c +c check if the last trajectory will be saved + if (mod(md_condition__howmany_steps,restart_interval) /= 0) then + write(6,*) 'WARN: the last trajectory will not be saved' + endif +c +c check periodic boundary + +c check validity of cut-off length + if(cutrad > cellxh) then + write(6,*) + & 'cut-off length for force is greater than a half of cell' + call mpistop + endif + if(cutrad > cellyh) then + write(6,*) + & 'cut-off length for force is greater than a half of cell' + call mpistop + endif + if(cutrad > cellzh) then + write(6,*) + & 'cut-off length for force is greater than a half of cell' + call mpistop + endif + endif + return + end +c---------------------------------------------------------------------- + subroutine init_md_condition +c---------------------------------------------------------------------- + use trj_org + use md_condition + use shakerattleroll + implicit none +c if using SHAKE, degree_of_freedom -= howmany_constraints +c but this operation ought to be done by md_shake + degree_of_freedom = n * 3 - 3 + if(totnconst .ne. 0) then + degree_of_freedom = degree_of_freedom - totnconst + endif + degree_of_freedom_inverse = 1.0d0 / degree_of_freedom + return + end +c----------------------------------------------------------------------- + subroutine cleanup +c---------------------------------------------------------------------- + use md_file + close(f_trj) + close(f_mntr) + close(f_restart_bin) + return + end +c----------------------------------------------------------------------- diff --git a/MODYLAS-MINI/src/xmpAPI_comm.f b/MODYLAS-MINI/src/xmpAPI_comm.f new file mode 100755 index 0000000..34aaf37 --- /dev/null +++ b/MODYLAS-MINI/src/xmpAPI_comm.f @@ -0,0 +1,1879 @@ +!---------------------------------------------------------------------- +! Copyright (C) 2003-2014 Kensuke Iwahashi, Noriyuki Yoshii, +! Atsushi Yamada, Yoshimichi Andoh, +! Kazushi Fujimoto, Hidekazu Kojima, +! Fumiyasu Mizutani, and Susumu Okazaki +! All Rights Reserved. +! +! Copyright (C) 20013-2014 RIKEN AICS +! All Rights Reserved. +! +! This MD program has been developed at Nagoya University, and +! Institute for Molecular Science, National Institutes of Natural +! Sciences. +! And this work was supported by +! Next-Generation Supercomputer Project, and +! NAREGI Nanoscience Project, +! Ministry of Education, Culture, Sports, Science and Technology, +! Japan. +! +! This program is NOT a free software and distributed under the +! license described in the LICENSE. +! All rights are reserved by the authors of this program. +! +! The authors do NOT warrant or assume any legal liability or +! responsibility for the accuracy or completeness. +!---------------------------------------------------------------------- +c---------------------------------------------------------------------- + subroutine init_comm_buffer() +c---------------------------------------------------------------------- + use trj_org ! n + use trj_mpi ! n + use md_forces ! f, wk_k + use md_monitors ! p_energy, wk_p_energy + use md_fmm + use md_fmm_domdiv_flg + use mpivar + use ompvar + implicit none + integer(4) :: ii,i0,iam + integer(4) :: icx0,icy0,icz0 +!$ include 'omp_lib.h' + + wk_p_energy = 0d0 + +!$omp parallel default(shared) +!$omp& private(iam,i0,ii) +!$omp& private(icx0,icy0,icz0) +!$omp do + do iam = 0,nomp-1 + do ii=1,lxdiv*lydiv*lzdiv + icz0=mod(ii-1,lzdiv) +3 + icy0=mod(ii-1,lzdiv*lydiv) + icy0=icy0/lzdiv +3 + icx0=(ii-1)/(lzdiv*lydiv)+3 + do i0=tag(icz0,icy0,icx0), + & tag(icz0,icy0,icx0)+na_per_cell(icz0,icy0,icx0)-1 + w3_f(1,i0,iam) = 0.0d0 + w3_f(2,i0,iam) = 0.0d0 + w3_f(3,i0,iam) = 0.0d0 + end do ! i0 + end do ! ii + end do ! iam +!$omp end do +!$omp end parallel + + return + end +c---------------------------------------------------------------------- + subroutine init_comm_bound() +c---------------------------------------------------------------------- + use comm_base + use comm_bd + use trj_mpi + use md_fmm_domdiv_flg + use mpivar + implicit none + integer(4) :: itmp + + npz = nzdiv + npy = nydiv + npx = nxdiv + ipz = izflg(myrank)-1 + ipy = iyflg(myrank)-1 + ipx = ixflg(myrank)-1 + nczdiv = lzdiv + ncydiv = lydiv + ncxdiv = lxdiv + + max_mvatom = na1cell*64 + max_mvseg = max_mvatom + + max_cellcbd = max(nczdiv*ncydiv, ncydiv*ncxdiv) + max_cellcbd = max(max_cellcbd, nczdiv*ncxdiv) + itmp = max(ncydiv+ncxdiv, ncxdiv+nczdiv) + itmp = max(itmp, nczdiv+ncydiv) + max_cellcbd = max_cellcbd + 2*itmp + 4 + + allocate(abucket(6, max_mvatom, nczdiv+2, ncydiv+2, ncxdiv+2)) + allocate( iabucket(max_mvatom, nczdiv+2, ncydiv+2, ncxdiv+2)) + allocate( isbucket(max_mvseg, nczdiv+2, ncydiv+2, ncxdiv+2)) + allocate( ncseg(nczdiv+2, ncydiv+2, ncxdiv+2)) + allocate( ncatom(nczdiv+2, ncydiv+2, ncxdiv+2)) + allocate(buffp (6,max_cellcbd*max_mvatom)) + allocate(buffm (6,max_cellcbd*max_mvatom)) + allocate(ibuffp ( max_cellcbd*max_mvatom)) + allocate(ibuffm ( max_cellcbd*max_mvatom)) + allocate(isbufp (2*max_cellcbd + 1 + max_cellcbd*max_mvseg)) + allocate(isbufm (2*max_cellcbd + 1 + max_cellcbd*max_mvseg)) + allocate(rbuff_p (6,max_cellcbd*max_mvatom)[*]) + allocate(rbuff_m (6,max_cellcbd*max_mvatom)[*]) + allocate(irbuff_p( max_cellcbd*max_mvatom)[*]) + allocate(irbuff_m( max_cellcbd*max_mvatom)[*]) + allocate(irsbuf_p(2*max_cellcbd + 1 + max_cellcbd*max_mvseg)[*]) + allocate(irsbuf_m(2*max_cellcbd + 1 + max_cellcbd*max_mvseg)[*]) + allocate( ncatmw(32, nczdiv+2, ncydiv+2, ncxdiv+2) ) + + return + end +c---------------------------------------------------------------------- + subroutine comm_bound() +c---------------------------------------------------------------------- + use trj_org + use trj_mpi + use md_periodic + use md_segment + use md_fmm + use md_fmm_domdiv_flg + use comm_base + use comm_bd + use param + use mpivar + use unitcell + implicit none + INCLUDE 'mpif.h' + integer nbase, nbase2 + real(8) rdcellz, rdcelly, rdcellx + integer iczg0pr, icyg0pr, icxg0pr + real(8) z0, y0, x0 + integer ipz_dest, ipy_dest, ipx_dest + integer ipz_src, ipy_src, ipx_src + integer nsa + integer lcse(2,3,9) + integer iczg, icyg, icxg + integer icz, icy, icx + integer icz0, icz1 + integer icy0, icy1 + integer icx0, icx1 + integer i,i0,k0,itmp + integer ncc, ncs, ncsr, ics + integer nca, ncar, ica, ncarp, ncarm + integer nccp, ncsp, ncap + integer nccm, ncsm, ncam + integer ncc_p, ncs_p, nca_p + integer ncc_m, ncs_m, nca_m + integer ldcell + integer loc_csbound +! +! ... effective only when OpenMP compile. +!$ integer ncthread(2, 2, 2), itz, ity, itx, iam, nth +!$ integer omp_get_thread_num +!$ external omp_get_thread_num +!$ integer omp_get_num_threads +!$ external omp_get_num_threads +! + integer istatus(mpi_status_size), ierr +! + rdcellx=dble(ncell)/cellx + rdcelly=dble(ncell)/celly + rdcellz=dble(ncell)/cellz + +! ---- 3D rank order rule. ---- +! ipx=mod(myrank,npx) +! ipy=mod((myrank-ipx)/npx,npy) +! ipz=mod((myrank-ipx-ipy*npx)/(npx*npy),npz) +! +!----- bucket preparation. ----- +! +! assumption ; system boundary condition is NOT applied to wseg_cx, +! wseg_cy, wseg_cz. +! +! + ncseg = 0 + ncatom = 0 + ncatmw = 0 + +! cell index of the end of previous rank. + iczg0pr = ncell * ipz / npz + icyg0pr = ncell * ipy / npy + icxg0pr = ncell * ipx / npx +! +!$omp parallel default(none) +!$omp& private(x0,y0,z0) +!$omp& private(icx,icy,icz) +!$omp& private(icxg,icyg,iczg) +!$omp& private(itx,ity,itz) +!$omp& private(ncs,nsa,nca) +!$omp& private(ics,ica) +!$omp& private(ncthread) +!$omp& private(iam) +!$omp& private(nth) +!$omp& shared(ncell) +!$omp& shared(nselfseg) +!$omp& shared(wseg_cx,wseg_cy,wseg_cz) +!$omp& shared(cellxh,cellyh,cellzh) +!$omp& shared(cellx,celly,cellz) +!$omp& shared(icxg0pr,icyg0pr,iczg0pr) +!$omp& shared(rdcellx,rdcelly,rdcellz) +!$omp& shared(ncxdiv,ncydiv,nczdiv) +!$omp& shared(isbucket) +!$omp& shared(iabucket) +!$omp& shared(abucket) +!$omp& shared(lsegtop,lseg_natoms) +!$omp& shared(ncseg) +!$omp& shared(wkxyz,wkv,m2i) +!$omp& shared(ncatmw) +!$omp& shared(myrank) +!$ iam = omp_get_thread_num() +!$ nth = omp_get_num_threads() +!$ ncthread = 0 +!$ if(nth == 2) then +!$ ncthread(1,1,1) = 0 +!$ ncthread(2,1,1) = 0 +!$ ncthread(1,2,1) = 0 +!$ ncthread(2,2,1) = 0 +!$ ncthread(1,1,2) = 1 +!$ ncthread(2,1,2) = 1 +!$ ncthread(1,2,2) = 1 +!$ ncthread(2,2,2) = 1 +!$ elseif(nth == 4) then +! 1 to the 4th fractional subspace boundary. +! thread id for each subspace/domain. for FX-1. +!$ ncthread(1,1,1) = 0 +!$ ncthread(2,1,1) = 0 +!$ ncthread(1,2,1) = 1 +!$ ncthread(2,2,1) = 1 +!$ ncthread(1,1,2) = 2 +!$ ncthread(2,1,2) = 2 +!$ ncthread(1,2,2) = 3 +!$ ncthread(2,2,2) = 3 +!$ elseif(nth >= 8) then +! 1 to the 8th fractional subspace boundary. +! thread id for each subspace/domain. for K computer. +!$ ncthread(1,1,1) = 0 +!$ ncthread(2,1,1) = 1 +!$ ncthread(1,2,1) = 2 +!$ ncthread(2,2,1) = 3 +!$ ncthread(1,1,2) = 4 +!$ ncthread(2,1,2) = 5 +!$ ncthread(1,2,2) = 6 +!$ ncthread(2,2,2) = 7 +!$ endif +! +!... segment loop. + do ics = 1, nselfseg + x0 = wseg_cx(ics) + cellxh + y0 = wseg_cy(ics) + cellyh + z0 = wseg_cz(ics) + cellzh +! +! one extra cell outside process and beginning address 1 for cell index +! is to be considered. +! segment address is negative in the negative direction system boundary. +! since intrinsic function INT truncates to the 0-direction nearest +! whole number, addition of 1.0 before truncation is necessary. +! to avoid the addition intrinsic function FLOOR which truncate to the +! minus direction largest whole number is preferable. +! +! global address. + icxg = floor(x0*rdcellx) + 1 + icyg = floor(y0*rdcelly) + 1 + iczg = floor(z0*rdcellz) + 1 +! global address to local address. one cell for boundary. + icx = icxg - icxg0pr + 1 + icy = icyg - icyg0pr + 1 + icz = iczg - iczg0pr + 1 +! sort cell for thread. +! ... effective only when OpenMP compile. +!$ itx = 2 +!$ if(icx <= ncxdiv/2+1) itx = 1 +!$ ity = 2 +!$ if(icy <= ncydiv/2+1) ity = 1 +!$ itz = 2 +!$ if(icz <= nczdiv/2+1) itz = 1 + +! ... effective only when OpenMP compile. +!$ if(ncthread(itz,ity,itx) /= iam) cycle +! +! periodic bondary condition to coordinate wkxyz. +! + if(icxg <= 0) then + do ica = 1, lseg_natoms(ics) + nsa = lsegtop(ics) + ica - 1 + wkxyz(1,nsa) = wkxyz(1,nsa) + cellx + end do + endif + if(icxg > ncell) then + do ica = 1, lseg_natoms(ics) + nsa = lsegtop(ics) + ica - 1 + wkxyz(1,nsa) = wkxyz(1,nsa) - cellx + end do + endif + if(icyg <= 0) then + do ica = 1, lseg_natoms(ics) + nsa = lsegtop(ics) + ica - 1 + wkxyz(2,nsa) = wkxyz(2,nsa) + celly + end do + endif + if(icyg > ncell) then + do ica = 1, lseg_natoms(ics) + nsa = lsegtop(ics) + ica - 1 + wkxyz(2,nsa) = wkxyz(2,nsa) - celly + end do + endif + if(iczg <= 0) then + do ica = 1, lseg_natoms(ics) + nsa = lsegtop(ics) + ica - 1 + wkxyz(3,nsa) = wkxyz(3,nsa) + cellz + end do + endif + if(iczg > ncell) then + do ica = 1, lseg_natoms(ics) + nsa = lsegtop(ics) + ica - 1 + wkxyz(3,nsa) = wkxyz(3,nsa) - cellz + end do + endif +! +! segment data. + ncs = ncatmw(2,icz,icy,icx) + 1 + isbucket(ncs,icz,icy,icx) = lseg_natoms(ics) + ncatmw(2,icz,icy,icx) = ncs +! atom data. +! In the bounding process, all of the atom belonging to the same +! segment must be treated in the same way. + nca = ncatmw(1,icz,icy,icx) + do ica = 1, lseg_natoms(ics) + nsa = lsegtop(ics) + ica - 1 + nca = nca + 1 + abucket(1,nca,icz,icy,icx) = wkxyz(1,nsa) + abucket(2,nca,icz,icy,icx) = wkxyz(2,nsa) + abucket(3,nca,icz,icy,icx) = wkxyz(3,nsa) + abucket(4,nca,icz,icy,icx) = wkv(1,nsa) + abucket(5,nca,icz,icy,icx) = wkv(2,nsa) + abucket(6,nca,icz,icy,icx) = wkv(3,nsa) + iabucket(nca,icz,icy,icx) = m2i(nsa) + end do + ncatmw(1,icz,icy,icx) = nca + end do +!$OMP ENDPARALLEL + do icx = 1, ncxdiv + 2 + do icy = 1, ncydiv + 2 + do icz = 1, nczdiv + 2 + ncatom(icz,icy,icx) = ncatmw(1,icz,icy,icx) + ncseg(icz,icy,icx) = ncatmw(2,icz,icy,icx) + end do + end do + end do +! +!----- boundary communication code starts here. ------ +! +! ( bound +Z ) +! nearest neighbor. + icz = nczdiv + 2 + icy0 = 2 + icy1 = ncydiv + 1 + icx0 = 2 + icx1 = ncxdiv + 1 + ldcell = 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! 2d-diagonal adjacent(1) + icz = nczdiv + 2 + icy = 1 + icx0 = 2 + icx1 = ncxdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! 2d-diagonal adjacent(2) + icz = nczdiv + 2 + icy = ncydiv + 2 + icx0 = 2 + icx1 = ncxdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! 2d-diagonal adjacent(3) + icz = nczdiv + 2 + icy0 = 2 + icy1 = ncydiv + 1 + icx = 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! 2d-diagonal adjacent(4) + icz = nczdiv + 2 + icy0 = 2 + icy1 = ncydiv + 1 + icx = ncxdiv + 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! 3d-diagonal adjacent(5) + icz = nczdiv + 2 + icy = 1 + icx = 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! 3d-diagonal adjacent(6) + icz = nczdiv + 2 + icy = ncydiv + 2 + icx = 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! 3d-diagonal adjacent(7) + icz = nczdiv + 2 + icy = 1 + icx = ncxdiv + 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! 3d-diagonal adjacent(8) + icz = nczdiv + 2 + icy = ncydiv + 2 + icx = ncxdiv + 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! + loc_csbound = 2*(ncydiv*ncxdiv + 2*ncxdiv + 2*ncydiv + 4) + 1 + ncc = 0 + ncs = loc_csbound + nca = 0 +! + call add_buffer(abucket,iabucket,isbucket,buffp,ibuffp,isbufp, + & max_mvatom,max_mvseg,max_cellcbd, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg,ncc,ncs,nca) +! + ipz_dest = mod(ipz+1-1/npz+npz,npz)*npx*npy + ipy*npx + ipx + ipz_src = mod(ipz-1+1/npz+npz,npz)*npx*npy + ipy*npx + ipx +! + call mpi_sendrecv(ncs + 1, 1, MPI_INTEGER, + & ipz_dest, myrank, + & ncsr, 1, MPI_INTEGER, + & ipz_src, ipz_src, + & mpi_comm_world, istatus, ierr ) + + isbufp(ncs+1) = nca +!coarray call mpi_sendrecv(isbufp, ncs + 1, MPI_INTEGER, +!coarray & ipz_dest, myrank, +!coarray & irsbuf_p, ncsr, MPI_INTEGER, +!coarray & ipz_src, ipz_src, +!coarray & mpi_comm_world, istatus, ierr ) + irsbuf_p(1:ncs+1)[ipz_dest+1] = isbufp(1:ncs+1) ! Put + sync all +!! + + ncar = irsbuf_p(ncsr) +!coarray call mpi_sendrecv(buffp, 6*nca, MPI_DOUBLE_PRECISION, +!coarray & ipz_dest, myrank, +!coarray & rbuff_p, 6*ncar, MPI_DOUBLE_PRECISION, +!coarray & ipz_src, ipz_src, +!coarray & mpi_comm_world, istatus, ierr ) + rbuff_p(:,1:nca)[ipz_dest+1] = buffp(:,1:nca) ! Put + sync all +!! + +!coarray call mpi_sendrecv(ibuffp, nca, MPI_INTEGER, +!coarray & ipz_dest, myrank, +!coarray & irbuff_p, ncar, MPI_INTEGER, +!coarray & ipz_src, ipz_src, +!coarray & mpi_comm_world, istatus, ierr ) + irbuff_p(1:nca)[ipz_dest+1] = ibuffp(1:nca) ! Put + sync all +!! + +! +! ( bound -Z ) +! +! ( bound -Z ) +! nearest neighbor. + icz = 1 + icy0 = 2 + icy1 = ncydiv + 1 + icx0 = 2 + icx1 = ncxdiv + 1 + ldcell = 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! 2d-diagonal adjacent(13) + icz = 1 + icy = 1 + icx0 = 2 + icx1 = ncxdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! 2d-diagonal adjacent(14) + icz = 1 + icy = ncydiv + 2 + icx0 = 2 + icx1 = ncxdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! 2d-diagonal adjacent(15) + icz = 1 + icy0 = 2 + icy1 = ncydiv + 1 + icx = 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! 2d-diagonal adjacent(16) + icz = 1 + icy0 = 2 + icy1 = ncydiv + 1 + icx = ncxdiv + 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! 3d-diagonal adjacent(17) + icz = 1 + icy = 1 + icx = 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! 3d-diagonal adjacent(18) + icz = 1 + icy = ncydiv + 2 + icx = 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! 3d-diagonal adjacent(19) + icz = 1 + icy = 1 + icx = ncxdiv + 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! 3d-diagonal adjacent(20) + icz = 1 + icy = ncydiv + 2 + icx = ncxdiv + 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! + loc_csbound = 2*(ncydiv*ncxdiv + 2*ncxdiv + 2*ncydiv + 4) + 1 + ncc = 0 + ncs = loc_csbound + nca = 0 +! + call add_buffer(abucket,iabucket,isbucket,buffm,ibuffm,isbufm, + & max_mvatom,max_mvseg,max_cellcbd, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg,ncc,ncs,nca) +! + ipz_dest = mod(ipz-1+1/npz+npz,npz)*npx*npy + ipy*npx + ipx + ipz_src = mod(ipz+1-1/npz+npz,npz)*npx*npy + ipy*npx + ipx +! + call mpi_sendrecv(ncs + 1, 1, MPI_INTEGER, + & ipz_dest, myrank, + & ncsr, 1, MPI_INTEGER, + & ipz_src, ipz_src, + & mpi_comm_world, istatus, ierr ) + + isbufm(ncs+1) = nca +!coarray call mpi_sendrecv(isbufm, ncs + 1, MPI_INTEGER, +!coarray & ipz_dest, myrank, +!coarray & irsbuf_m, ncsr, MPI_INTEGER, +!coarray & ipz_src, ipz_src, +!coarray & mpi_comm_world, istatus, ierr ) + irsbuf_m(1:ncs+1)[ipz_dest+1] = isbufm(1:ncs+1) ! Put + sync all +!! + + ncar = irsbuf_m(ncsr) +!coarray call mpi_sendrecv(buffm, 6*nca, MPI_DOUBLE_PRECISION, +!coarray & ipz_dest, myrank, +!coarray & rbuff_m, 6*ncar, MPI_DOUBLE_PRECISION, +!coarray & ipz_src, ipz_src, +!coarray & mpi_comm_world, istatus, ierr ) + rbuff_m(1:6,1:nca)[ipz_dest+1] = buffm(1:6,1:nca) ! Put + sync all +!! + +!coarray call mpi_sendrecv(ibuffm, nca, MPI_INTEGER, +!coarray & ipz_dest, myrank, +!coarray & irbuff_m, ncar, MPI_INTEGER, +!coarray & ipz_src, ipz_src, +!coarray & mpi_comm_world, istatus, ierr ) + irbuff_m(1:nca)[ipz_dest+1] = ibuffm(1:nca) ! Put + sync all +!! + +! +! merge source nearest neighbors(+Z) on receive buffer to local bucket. + icx0 = 2 + icx1 = ncxdiv + 1 + icy0 = 2 + icy1 = ncydiv + 1 + icz = 2 + ldcell = 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! +! Merge part of 2d-diagonal adjacent on z-direction receive buffer +! to cell bucket corresponding to y-directional nearest neighbor +! communication. +! source rank +Z 2d-diagonal adjacent(1) + icx0 = 2 + icx1 = ncxdiv + 1 + icy = 1 + icz = 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! source rank +Z 2d-diagonal adjacent(2) + icx0 = 2 + icx1 = ncxdiv + 1 + icy = ncydiv + 2 + icz = 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! source rank +Z 2d-diagonal adjacent(3) + icx = 1 + icy0 = 2 + icy1 = ncydiv + 1 + icz = 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! source rank +Z 2d-diagonal adjacent(4) + icx = ncxdiv+ 2 + icy0 = 2 + icy1 = ncydiv + 1 + icz = 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! + loc_csbound = 2*(ncydiv*ncxdiv + 2*ncxdiv + 2*ncydiv + 4) + 1 + ncc_p = 0 + ncs_p = loc_csbound + nca_p = 0 +! + call add_bucket(abucket,iabucket,isbucket, + & rbuff_p,irbuff_p,irsbuf_p, + & max_mvatom,max_mvseg,max_cellcbd, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg, + & ncc_p,ncs_p,nca_p) +! +! merge source rank nearest neighbors(-Z) on receive buffer to local bucket. + icx0 = 2 + icx1 = ncxdiv + 1 + icy0 = 2 + icy1 = ncydiv + 1 + icz = nczdiv + 1 + ldcell = 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! +! Merge part of 2d-diagonal adjacent on z-direction receive buffer +! to cell bucket corresponding to y-directional nearest neighbor +! communication. +! source rank -Z 2d-diagonal adjacent(13) + icx0 = 2 + icx1 = ncxdiv + 1 + icy = 1 + icz = nczdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! source rank -Z 2d-diagonal adjacent(14) + icx0 = 2 + icx1 = ncxdiv + 1 + icy = ncydiv + 2 + icz = nczdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! source rank -Z 2d-diagonal adjacent(15) + icx = 1 + icy0 = 2 + icy1 = ncydiv + 1 + icz = nczdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! source rank -Z 2d-diagonal adjacent(16) + icx = ncxdiv + 2 + icy0 = 2 + icy1 = ncydiv + 1 + icz = nczdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! + loc_csbound = 2*(ncydiv*ncxdiv + 2*ncxdiv + 2*ncydiv + 4) + 1 + ncc_m = 0 + ncs_m = loc_csbound + nca_m = 0 +! + call add_bucket(abucket,iabucket,isbucket, + & rbuff_m,irbuff_m,irsbuf_m, + & max_mvatom,max_mvseg,max_cellcbd, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg, + & ncc_m,ncs_m,nca_m) + +! +! ( bound +Y & bound -Y ) +! ( +Y buffer ) +! nearest neighbor. + icz0 = 2 + icz1 = nczdiv + 1 + icy = ncydiv + 2 + icx0 = 2 + icx1 = ncxdiv + 1 + ldcell = 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! 2d-diagonal adjacent(10) + icz0 = 2 + icz1 = nczdiv + 1 + icy = ncydiv + 2 + icx = 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! 2d-diagonal adjacent(12) + icz0 = 2 + icz1 = nczdiv + 1 + icy = ncydiv + 2 + icx = ncxdiv + 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! + loc_csbound = 2*(nczdiv*ncxdiv + 2*nczdiv + 4) + 1 + nccp = 0 + ncsp = loc_csbound + ncap = 0 +! + call add_buffer(abucket,iabucket,isbucket,buffp,ibuffp,isbufp, + & max_mvatom,max_mvseg,max_cellcbd, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg, + & nccp,ncsp,ncap) +! +! ( -Y buffer ) +! nearest neighbor. + icz0 = 2 + icz1 = nczdiv + 1 + icy = 1 + icx0 = 2 + icx1 = ncxdiv + 1 + ldcell = 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! 2d-diagonal adjacent(9) + icz0 = 2 + icz1 = nczdiv + 1 + icy = 1 + icx = 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! 2d-diagonal adjacent(11) + icz0 = 2 + icz1 = nczdiv + 1 + icy = 1 + icx = ncxdiv + 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! + loc_csbound = 2*(nczdiv*ncxdiv + 2*nczdiv + 4) + 1 + nccm = 0 + ncsm = loc_csbound + ncam = 0 +! + call add_buffer(abucket,iabucket,isbucket,buffm,ibuffm,isbufm, + & max_mvatom,max_mvseg,max_cellcbd, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg,nccm,ncsm,ncam) +! +! ( -Y buffer ) +Z source rank 3d-diagonal adjacent(5) +! + ldcell = 1 + call add_buffb(rbuff_p,irbuff_p,irsbuf_p,buffm,ibuffm,isbufm, + & max_mvatom,max_mvseg,max_cellcbd, + & ncc_p,ncs_p,nca_p,nccm,ncsm,ncam,ldcell) +! +! ( +Y buffer ) +Z source rank 3d-diagonal adjacent(6) +! + ldcell = 1 + call add_buffb(rbuff_p,irbuff_p,irsbuf_p,buffp,ibuffp,isbufp, + & max_mvatom,max_mvseg,max_cellcbd, + & ncc_p,ncs_p,nca_p,nccp,ncsp,ncap,ldcell) +! +! ( -Y buffer ) +Z source rank 3d-diagonal adjacent(7) +! + ldcell = 1 + call add_buffb(rbuff_p,irbuff_p,irsbuf_p,buffm,ibuffm,isbufm, + & max_mvatom,max_mvseg,max_cellcbd, + & ncc_p,ncs_p,nca_p,nccm,ncsm,ncam,ldcell) +! +! ( +Y buffer ) +Z source rank 3d-diagonal adjacent(8) +! + ldcell = 1 + call add_buffb(rbuff_p,irbuff_p,irsbuf_p,buffp,ibuffp,isbufp, + & max_mvatom,max_mvseg,max_cellcbd, + & ncc_p,ncs_p,nca_p,nccp,ncsp,ncap,ldcell) +! +! ( -Y buffer ) -Z source rank 3d-diagonal adjacent(17) +! + ldcell = 1 + call add_buffb(rbuff_m,irbuff_m,irsbuf_m,buffm,ibuffm,isbufm, + & max_mvatom,max_mvseg,max_cellcbd, + & ncc_m,ncs_m,nca_m,nccm,ncsm,ncam,ldcell) +! +! ( +Y buffer ) -Z source rank 3d-diagonal adjacent(18) +! + ldcell = 1 + call add_buffb(rbuff_m,irbuff_m,irsbuf_m,buffp,ibuffp,isbufp, + & max_mvatom,max_mvseg,max_cellcbd, + & ncc_m,ncs_m,nca_m,nccp,ncsp,ncap,ldcell) +! +! ( -Y buffer ) -Z source rank 3d-diagonal adjacent(19) +! + ldcell = 1 + call add_buffb(rbuff_m,irbuff_m,irsbuf_m,buffm,ibuffm,isbufm, + & max_mvatom,max_mvseg,max_cellcbd, + & ncc_m,ncs_m,nca_m,nccm,ncsm,ncam,ldcell) +! +! ( +Y buffer ) -Z source rank 3d-diagonal adjacent(20) +! + ldcell = 1 + call add_buffb(rbuff_m,irbuff_m,irsbuf_m,buffp,ibuffp,isbufp, + & max_mvatom,max_mvseg,max_cellcbd, + & ncc_m,ncs_m,nca_m,nccp,ncsp,ncap,ldcell) +! +! +! +Y comm. + ipy_dest = ipz*npx*npy + mod(ipy+1-1/npy+npy,npy)*npx + ipx + ipy_src = ipz*npx*npy + mod(ipy-1+1/npy+npy,npy)*npx + ipx + +! + call mpi_sendrecv(ncsp + 1, 1, MPI_INTEGER, + & ipy_dest, myrank, + & ncsr, 1, MPI_INTEGER, + & ipy_src, ipy_src, + & mpi_comm_world, istatus, ierr ) + + isbufp(ncsp+1) = ncap +!coarray call mpi_sendrecv(isbufp, ncsp + 1, MPI_INTEGER, +!coarray & ipy_dest, myrank, +!coarray & irsbuf_p, ncsr, MPI_INTEGER, +!coarray & ipy_src, ipy_src, +!coarray & mpi_comm_world, istatus, ierr ) + sync all ! do Not change + irsbuf_p(1:ncsp+1)[ipy_dest+1] = isbufp(1:ncsp+1) ! Put + sync all +!! + + ncarp = irsbuf_p(ncsr) +!coarray call mpi_sendrecv(buffp, 6*ncap, MPI_DOUBLE_PRECISION, +!coarray & ipy_dest, myrank, +!coarray & rbuff_p, 6*ncarp, MPI_DOUBLE_PRECISION, +!coarray & ipy_src, ipy_src, +!coarray & mpi_comm_world, istatus, ierr ) + rbuff_p(1:6,1:ncap)[ipy_dest+1] = buffp(1:6,1:ncap) ! Put +!! + +!coarray call mpi_sendrecv(ibuffp, ncap, MPI_INTEGER, +!coarray & ipy_dest, myrank, +!coarray & irbuff_p, ncarp, MPI_INTEGER, +!coarray & ipy_src, ipy_src, +!coarray & mpi_comm_world, istatus, ierr ) + irbuff_p(1:ncap)[ipy_dest+1] = ibuffp(1:ncap) ! Put + sync all +!! + +! +! -Y comm. +! + ipy_dest = ipz*npx*npy + mod(ipy-1+1/npy+npy,npy)*npx + ipx + ipy_src = ipz*npx*npy + mod(ipy+1-1/npy+npy,npy)*npx + ipx +! + call mpi_sendrecv(ncsm + 1, 1, MPI_INTEGER, + & ipy_dest, myrank, + & ncsr, 1, MPI_INTEGER, + & ipy_src, ipy_src, + & mpi_comm_world, istatus, ierr ) + + isbufm(ncsm+1) = ncam +!coarray call mpi_sendrecv(isbufm, ncsm + 1, MPI_INTEGER, +!coarray & ipy_dest, myrank, +!coarray & irsbuf_m, ncsr, MPI_INTEGER, +!coarray & ipy_src, ipy_src, +!coarray & mpi_comm_world, istatus, ierr ) + irsbuf_m(1:ncsm+1)[ipy_dest+1] = isbufm(1:ncsm+1) ! Put + sync all +!! + + ncarm = irsbuf_m(ncsr) +!coarray call mpi_sendrecv(buffm, 6*ncam, MPI_DOUBLE_PRECISION, +!coarray & ipy_dest, myrank, +!coarray & rbuff_m, 6*ncarm, MPI_DOUBLE_PRECISION, +!coarray & ipy_src, ipy_src, +!coarray & mpi_comm_world, istatus, ierr ) + rbuff_m(1:6,1:ncam)[ipy_dest+1] = buffm(1:6,1:ncam) ! Put +!! + +!coarray call mpi_sendrecv(ibuffm, ncam, MPI_INTEGER, +!coarray & ipy_dest, myrank, +!coarray & irbuff_m, ncarm, MPI_INTEGER, +!coarray & ipy_src, ipy_src, +!coarray & mpi_comm_world, istatus, ierr ) + irbuff_m(1:ncam)[ipy_dest+1] = ibuffm(1:ncam) ! Put + sync all +!! + +! +! +Y receive buffer. +! merge source rank nearest neighbors(+Y) on receive buffer to local bucket. + icx0 = 2 + icx1 = ncxdiv + 1 + icy = 2 + icz0 = 2 + icz1 = nczdiv + 1 + ldcell = 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! source rank +Y 2d-diagonal adjacent(10) + icx = 1 + icy = 2 + icz0 = 2 + icz1 = nczdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! source rank +Y 2d-diagonal adjacent(12) + icx = ncxdiv+ 2 + icy = 2 + icz0 = 2 + icz1 = nczdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! source rank +Z 3d-diagonal adjacent(6) + icx = 1 + icy = 2 + icz = 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! source rank +Z 3d-diagonal adjacent(8) + icx = ncxdiv + 2 + icy = 2 + icz = 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! source rank -Z 3d-diagonal adjacent(18) + icx = 1 + icy = 2 + icz = nczdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! source rank -Z 3d-diagonal adjacent(20) + icx = ncxdiv + 2 + icy = 2 + icz = nczdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! + loc_csbound = 2*(nczdiv*ncxdiv + 2*nczdiv + 4) + 1 + ncc_p = 0 + ncs_p = loc_csbound + nca_p = 0 +! + call add_bucket(abucket,iabucket,isbucket, + & rbuff_p,irbuff_p,irsbuf_p, + & max_mvatom,max_mvseg,max_cellcbd, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg, + & ncc_p,ncs_p,nca_p) +! +! -Y receive buffer. +! merge source rank nearest neighbors(-Y) on receive buffer to local bucket. + icx0 = 2 + icx1 = ncxdiv + 1 + icy = ncydiv + 1 + icz0 = 2 + icz1 = nczdiv + 1 + ldcell = 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx0 + lcse(2,3,ldcell) = icx1 +! source rank -Y 2d-diagonal adjacent(9) + icx = 1 + icy = ncydiv + 1 + icz0 = 2 + icz1 = nczdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! source rank -Y 2d-diagonal adjacent(11) + icx = ncxdiv+ 2 + icy = ncydiv + 1 + icz0 = 2 + icz1 = nczdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! source rank +Z 3d-diagonal adjacent(5) + icx = 1 + icy = ncydiv + 1 + icz = 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! source rank +Z 3d-diagonal adjacent(7) + icx = ncxdiv + 2 + icy = ncydiv + 1 + icz = 2 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! source rank -Z 3d-diagonal adjacent(17) + icx = 1 + icy = ncydiv + 1 + icz = nczdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! source rank -Z 3d-diagonal adjacent(19) + icx = ncxdiv + 2 + icy = ncydiv + 1 + icz = nczdiv + 1 + ldcell = ldcell + 1 + lcse(1,1,ldcell) = icz + lcse(2,1,ldcell) = icz + lcse(1,2,ldcell) = icy + lcse(2,2,ldcell) = icy + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! + loc_csbound = 2*(nczdiv*ncxdiv + 2*nczdiv + 4) + 1 + ncc_m = 0 + ncs_m = loc_csbound + nca_m = 0 +! + call add_bucket(abucket,iabucket,isbucket, + & rbuff_m,irbuff_m,irsbuf_m, + & max_mvatom,max_mvseg,max_cellcbd, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg, + & ncc_m,ncs_m,nca_m) + +! +! ( bound +X ) +! +! nearest neighbor. + icz0 = 2 + icz1 = nczdiv + 1 + icy0 = 2 + icy1 = ncydiv + 1 + icx = ncxdiv + 2 + ldcell = 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! + loc_csbound = 2*nczdiv*ncydiv + 1 + ncc = 0 + ncs = loc_csbound + nca = 0 +! + call add_buffer(abucket,iabucket,isbucket,buffp,ibuffp,isbufp, + & max_mvatom,max_mvseg,max_cellcbd, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg,ncc,ncs,nca) +! + ipx_dest = ipz*npx*npy + ipy*npx + mod(ipx+1-1/npx+npx,npx) + ipx_src = ipz*npx*npy + ipy*npx + mod(ipx-1+1/npx+npx,npx) +! + call mpi_sendrecv(ncs + 1, 1, MPI_INTEGER, + & ipx_dest, myrank, + & ncsr, 1, MPI_INTEGER, + & ipx_src, ipx_src, + & mpi_comm_world, istatus, ierr ) + + isbufp(ncs+1) = nca +!coarray call mpi_sendrecv(isbufp, ncs + 1, MPI_INTEGER, +!coarray & ipx_dest, myrank, +!coarray & irsbuf_p, ncsr, MPI_INTEGER, +!coarray & ipx_src, ipx_src, +!coarray & mpi_comm_world, istatus, ierr ) + sync all ! do Not change + irsbuf_p(1:ncs+1)[ipx_dest+1] = isbufp(1:ncs+1) ! Put + sync all +!! + + ncar = irsbuf_p(ncsr) +!coarray call mpi_sendrecv(buffp, 6*nca, MPI_DOUBLE_PRECISION, +!coarray & ipx_dest, myrank, +!coarray & rbuff_p, 6*ncar, MPI_DOUBLE_PRECISION, +!coarray & ipx_src, ipx_src, +!coarray & mpi_comm_world, istatus, ierr ) + rbuff_p(1:6,1:nca)[ipx_dest+1] = buffp(1:6,1:nca) ! Put +!! + +!coarray call mpi_sendrecv(ibuffp, nca, MPI_INTEGER, +!coarray & ipx_dest, myrank, +!coarray & irbuff_p, ncar, MPI_INTEGER, +!coarray & ipx_src, ipx_src, +!coarray & mpi_comm_world, istatus, ierr ) + irbuff_p(1:nca)[ipx_dest+1] = ibuffp(1:nca) ! Put + sync all +!! + +! +! merge nearest neighbors(+X) on receive buffer to local bucket. + icx = 2 + icy0 = 2 + icy1 = ncydiv + 1 + icz0 = 2 + icz1 = nczdiv + 1 + ldcell = 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! + loc_csbound = 2*nczdiv*ncydiv + 1 + ncc_p = 0 + ncs_p = loc_csbound + nca_p = 0 +! + call add_bucket(abucket,iabucket,isbucket, + & rbuff_p,irbuff_p,irsbuf_p, + & max_mvatom,max_mvseg,max_cellcbd, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg, + & ncc_p,ncs_p,nca_p) + +! +! ( bound -X ) +! +! nearest neighbor. + icz0 = 2 + icz1 = nczdiv + 1 + icy0 = 2 + icy1 = ncydiv + 1 + icx = 1 + ldcell = 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! + loc_csbound = 2*nczdiv*ncydiv + 1 + ncc = 0 + ncs = loc_csbound + nca = 0 +! + call add_buffer(abucket,iabucket,isbucket,buffm,ibuffm,isbufm, + & max_mvatom,max_mvseg,max_cellcbd, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg,ncc,ncs,nca) +! + ipx_dest = ipz*npx*npy + ipy*npx + mod(ipx-1+1/npx+npx,npx) + ipx_src = ipz*npx*npy + ipy*npx + mod(ipx+1-1/npx+npx,npx) + +! + call mpi_sendrecv(ncs + 1, 1, MPI_INTEGER, + & ipx_dest, myrank, + & ncsr, 1, MPI_INTEGER, + & ipx_src, ipx_src, + & mpi_comm_world, istatus, ierr ) + + isbufm(ncs+1) = nca +!coarray call mpi_sendrecv(isbufm, ncs + 1, MPI_INTEGER, +!coarray & ipx_dest, myrank, +!coarray & irsbuf_m, ncsr, MPI_INTEGER, +!coarray & ipx_src, ipx_src, +!coarray & mpi_comm_world, istatus, ierr ) + sync all + irsbuf_m(1:ncs+1)[ipx_dest+1] = isbufm(1:ncs+1) ! Put + sync all +!! + + ncar = irsbuf_m(ncsr) +!coarray call mpi_sendrecv(buffm, 6*nca, MPI_DOUBLE_PRECISION, +!coarray & ipx_dest, myrank, +!coarray & rbuff_m, 6*ncar, MPI_DOUBLE_PRECISION, +!coarray & ipx_src, ipx_src, +!coarray & mpi_comm_world, istatus, ierr ) + rbuff_m(1:6,1:nca)[ipx_dest+1] = buffm(1:6,1:nca) ! Put +!! + +!coarray call mpi_sendrecv(ibuffm, nca, MPI_INTEGER, +!coarray & ipx_dest, myrank, +!coarray & irbuff_m, ncar, MPI_INTEGER, +!coarray & ipx_src, ipx_src, +!coarray & mpi_comm_world, istatus, ierr ) + irbuff_m(1:nca)[ipx_dest+1] = ibuffm(1:nca) ! Put + sync all +!! + +! +! merge nearest neighbors(-X) on receive buffer to local bucket. + icx = ncxdiv + 1 + icy0 = 2 + icy1 = ncydiv + 1 + icz0 = 2 + icz1 = nczdiv + 1 + ldcell = 1 + lcse(1,1,ldcell) = icz0 + lcse(2,1,ldcell) = icz1 + lcse(1,2,ldcell) = icy0 + lcse(2,2,ldcell) = icy1 + lcse(1,3,ldcell) = icx + lcse(2,3,ldcell) = icx +! + loc_csbound = 2*nczdiv*ncydiv + 1 + ncc_m = 0 + ncs_m = loc_csbound + nca_m = 0 +! + call add_bucket(abucket,iabucket,isbucket, + & rbuff_m,irbuff_m,irsbuf_m, + & max_mvatom,max_mvseg,max_cellcbd, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg, + & ncc_m,ncs_m,nca_m) + +! +! ------- create new cell meta-data and its entity --------------- +! create segment meta-data and its entity, +! ie. the number of atoms per segment. +! +! copy atom data from bucket to new meta-data structure. +! also, setup meta-data "tag" and "na_per_cell", +! and also, setup segment meta-data "lsegtop" and "lseg_natoms". +! + narea = na1cell * (nczdiv + 4) * (ncydiv + 4) + naline = na1cell * (nczdiv + 4) +! +! atom data. + nbase = narea + do icx = 2, ncxdiv + 1 + nbase = nbase + narea + nbase2 = nbase + naline + do icy = 2, ncydiv + 1 + nbase2 = nbase2 + naline + nca = nbase2 + 2*na1cell + do icz = 2, nczdiv + 1 + do ica = 1, ncatom(icz,icy,icx) + nca = nca + 1 + wkxyz(1,nca) = abucket(1,ica,icz,icy,icx) + wkxyz(2,nca) = abucket(2,ica,icz,icy,icx) + wkxyz(3,nca) = abucket(3,ica,icz,icy,icx) + wkv(1,nca) = abucket(4,ica,icz,icy,icx) + wkv(2,nca) = abucket(5,ica,icz,icy,icx) + wkv(3,nca) = abucket(6,ica,icz,icy,icx) + m2i(nca) = iabucket(ica,icz,icy,icx) + end do + end do + end do + end do +! +! segment meta-data. + ncs = 0 + nbase = narea + do icx = 2, ncxdiv + 1 + nbase = nbase + narea + nbase2 = nbase + naline + do icy = 2, ncydiv + 1 + nbase2 = nbase2 + naline + nca = nbase2 + 2*na1cell + do icz = 2, nczdiv + 1 + do ics = 1, ncseg(icz,icy,icx) + ncs = ncs + 1 + lseg_natoms(ncs) = isbucket(ics,icz,icy,icx) + lsegtop(ncs) = nca + 1 + nca = nca + lseg_natoms(ncs) + end do + end do + end do + end do +! number of segment per process. + nselfseg = ncs +! cell meta-data. + nbase = narea + do icx = 2, ncxdiv + 1 + nbase = nbase + narea + nbase2 = nbase + naline + do icy = 2, ncydiv + 1 + nbase2 = nbase2 + naline + nca = nbase2 + 2*na1cell + do icz = 2, nczdiv + 1 + tag(icz + 1, icy + 1, icx + 1) = nca + 1 + nca = nca + ncatom(icz,icy,icx) + na_per_cell(icz + 1, icy + 1, icx + 1) + & = ncatom(icz,icy,icx) + end do + end do + end do +! +!update of i2m +! +!$omp parallel default(shared) +!$omp& private(i,i0,k0) +!$omp do + do i=1,n + i2m(i)=-1 !! not necesary for performance measurement + enddo +!$omp end do +!$omp do + do k0=1,nselfseg + do i0=lsegtop(k0),lsegtop(k0)+lseg_natoms(k0)-1 + i=m2i(i0) + i2m(i)=i0 + enddo + enddo +!$omp end do +!$omp end parallel +! + return + end +c---------------------------------------------------------------------- + subroutine add_buffer(abucket,iabucket,isbucket,buff,ibuff,isbuf, + & max_mvatom,max_mvseg,max_cell, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg,ncc,ncs,nca) +c---------------------------------------------------------------------- +! + implicit none + integer max_mvatom + integer max_mvseg + integer max_cell ! max number of cells on communication buffer. + integer nczdiv + integer ncydiv + integer ncxdiv + real(8) abucket(6,max_mvatom,nczdiv+2,ncydiv+2,ncxdiv+2) + integer iabucket(max_mvatom,nczdiv+2,ncydiv+2,ncxdiv+2) + integer isbucket(max_mvseg,nczdiv+2,ncydiv+2,ncxdiv+2) + real(8) buff(6,max_cell*max_mvatom) + integer ibuff(max_cell*max_mvatom) + integer isbuf(2*max_cell + 1 + max_cell*max_mvseg) +! + integer ldcell, ldc + integer lcse(2,3,ldcell) + integer ncatom(nczdiv+2,ncydiv+2,ncxdiv+2) + integer ncseg(nczdiv+2,ncydiv+2,ncxdiv+2) + integer ncc + integer ncs + integer nca + integer ics, ica + integer icz, icy, icx + integer icz0, icy0, icx0 + integer icz1, icy1, icx1 +! + do ldc = 1, ldcell + icz0 = lcse(1,1,ldc) + icz1 = lcse(2,1,ldc) + icy0 = lcse(1,2,ldc) + icy1 = lcse(2,2,ldc) + icx0 = lcse(1,3,ldc) + icx1 = lcse(2,3,ldc) + do icx = icx0, icx1 + do icy = icy0, icy1 + do icz = icz0, icz1 + ncc = ncc + 1 + isbuf(ncc) = ncatom(icz,icy,icx) + ncc = ncc + 1 + isbuf(ncc) = ncseg(icz,icy,icx) + do ics = 1, ncseg(icz,icy,icx) + ncs = ncs + 1 + isbuf(ncs) = isbucket(ics,icz,icy,icx) + end do + do ica = 1, ncatom(icz,icy,icx) + nca = nca + 1 + buff(1,nca) = abucket(1,ica,icz,icy,icx) + buff(2,nca) = abucket(2,ica,icz,icy,icx) + buff(3,nca) = abucket(3,ica,icz,icy,icx) + buff(4,nca) = abucket(4,ica,icz,icy,icx) + buff(5,nca) = abucket(5,ica,icz,icy,icx) + buff(6,nca) = abucket(6,ica,icz,icy,icx) + ibuff(nca) = iabucket(ica,icz,icy,icx) + end do + end do + end do + end do + end do +! + return + end +! +c---------------------------------------------------------------------- + subroutine add_buffb(rbuff,irbuff,irsbuf,buff,ibuff,isbuf, + & max_mvatom,max_mvseg,max_cell, + & nccr,ncsr,ncar,ncc,ncs,nca,ldcell) +c---------------------------------------------------------------------- +! + implicit none + integer max_mvatom + integer max_mvseg + integer max_cell ! max number of cells on communication buffer. + + real(8) rbuff (6,max_cell*max_mvatom) + integer irbuff(max_cell*max_mvatom) + integer irsbuf(2*max_cell + 1 + max_cell*max_mvseg) + real(8) buff (6,max_cell*max_mvatom) + integer ibuff (max_cell*max_mvatom) + integer isbuf (2*max_cell + 1 + max_cell*max_mvseg) + + integer ncc, nccr + integer ncs, ncsr + integer nca, ncar + integer ldcell, ldc + integer ics, ica +! + do ldc = 1, ldcell + ncc = ncc + 2 + nccr = nccr + 2 + isbuf(ncc - 1) = irsbuf(nccr - 1) + isbuf(ncc) = irsbuf(nccr) +! segment data. + do ics = 1, irsbuf(nccr) + ncs = ncs + 1 + ncsr = ncsr + 1 + isbuf(ncs) = irsbuf(ncsr) + end do +! atom data. + do ica = 1, irsbuf(nccr - 1) + nca = nca + 1 + ncar = ncar + 1 + buff(1,nca) = rbuff(1,ncar) + buff(2,nca) = rbuff(2,ncar) + buff(3,nca) = rbuff(3,ncar) + buff(4,nca) = rbuff(4,ncar) + buff(5,nca) = rbuff(5,ncar) + buff(6,nca) = rbuff(6,ncar) + ibuff(nca) = irbuff(ncar) + end do + end do +! + return + end +! +c---------------------------------------------------------------------- + subroutine add_bucket(abucket,iabucket,isbucket, + & rbuff,irbuff,irsbuf, + & max_mvatom,max_mvseg,max_cell, + & nczdiv,ncydiv,ncxdiv, + & lcse,ldcell,ncatom,ncseg, + & ncc_b,ncs_b,nca_b) +c---------------------------------------------------------------------- +! + implicit none + integer max_mvatom + integer max_mvseg + integer max_cell ! max number of cells on communication buffer. + integer nczdiv + integer ncydiv + integer ncxdiv + real(8) abucket(6,max_mvatom,nczdiv+2,ncydiv+2,ncxdiv+2) + integer iabucket(max_mvatom,nczdiv+2,ncydiv+2,ncxdiv+2) + integer isbucket(max_mvseg,nczdiv+2,ncydiv+2,ncxdiv+2) + real(8) rbuff(6,max_cell*max_mvatom) + integer irbuff(max_cell*max_mvatom) + integer irsbuf(2*max_cell + 1 + max_cell*max_mvseg) + + integer ldcell, ldc + integer lcse(2,3,ldcell) + integer ncatom(nczdiv+2,ncydiv+2,ncxdiv+2) + integer ncseg(nczdiv+2,ncydiv+2,ncxdiv+2) + integer ncc_b + integer ncs_b, ncs, ics + integer nca_b, nca, ica + integer icz, icy, icx + integer icz0, icy0, icx0 + integer icz1, icy1, icx1 +! + do ldc = 1, ldcell + icz0 = lcse(1,1,ldc) + icz1 = lcse(2,1,ldc) + icy0 = lcse(1,2,ldc) + icy1 = lcse(2,2,ldc) + icx0 = lcse(1,3,ldc) + icx1 = lcse(2,3,ldc) + do icx = icx0, icx1 + do icy = icy0, icy1 + do icz = icz0, icz1 +! segment data. + ncc_b = ncc_b + 2 ! ncc_b + ncs = ncseg(icz,icy,icx) + do ics = 1, irsbuf(ncc_b) + ncs_b = ncs_b + 1 ! ncs_b + ncs = ncs + 1 + isbucket(ncs,icz,icy,icx) = irsbuf(ncs_b) + end do + ncseg(icz,icy,icx) = ncs +! atom data. + nca = ncatom(icz,icy,icx) + do ica = 1, irsbuf(ncc_b - 1) + nca_b = nca_b + 1 ! nca_b + nca = nca + 1 + abucket(1,nca,icz,icy,icx) = rbuff(1,nca_b) + abucket(2,nca,icz,icy,icx) = rbuff(2,nca_b) + abucket(3,nca,icz,icy,icx) = rbuff(3,nca_b) + abucket(4,nca,icz,icy,icx) = rbuff(4,nca_b) + abucket(5,nca,icz,icy,icx) = rbuff(5,nca_b) + abucket(6,nca,icz,icy,icx) = rbuff(6,nca_b) + iabucket(nca,icz,icy,icx) = irbuff(nca_b) + end do + ncatom(icz,icy,icx) = nca + end do + end do + end do + end do +! + return + end +c---------------------------------------------------------------------- + subroutine pre_record_data +c---------------------------------------------------------------------- + use trj_mpi + use trj_org + use md_fmm + use md_fmm_domdiv_flg + use md_segment + use mpivar + implicit none + integer(4) :: i,nsum + integer(4) :: i0,k0,i00 + include 'mpif.h' + integer(4) :: ierr + real(8),allocatable :: snd(:,:),rcv(:,:) +!coarray integer(4),allocatable :: natmlist(:),natmdisp(:) + integer(4),allocatable :: natmdisp(:) + integer(4),allocatable :: natmlist(:)[:] + integer(4),allocatable :: natmlist_tmp(:) + integer,allocatable :: ndis(:)[:], mdis(:)[:] + real(8),allocatable :: rcvx(:,:)[:] + integer :: me, np, ms, mm +!! + integer(4),allocatable :: nrearrange(:) + integer(4) :: m2i_tmp(na1cell*lxdiv*lydiv*lzdiv) + +!coarray + me = this_image() + np = num_images() + allocate(ndis(np)[*]) + allocate(mdis(n)[*]) + allocate(rcvx(6,n)[*]) +!! + + if(nprocs.eq.1) then +!$omp parallel do default(shared) +!$omp& private(i,i0) + do i = 1,n + i0=i2m(i) + xyz(1,i) = wkxyz(1,i0) + xyz(2,i) = wkxyz(2,i0) + xyz(3,i) = wkxyz(3,i0) + v(1:3,i) = wkv(1:3,i0) + end do + else + allocate(nrearrange(n)) + allocate(snd(6,n)) + allocate(rcv(6,n)) +!coarray allocate(natmlist(nprocs),natmdisp(nprocs)) + allocate(natmlist(nprocs)[*]) + allocate(natmlist_tmp(nprocs)) + allocate(natmdisp(nprocs)) +!! + + nselfatm=0 + do k0=1,nselfseg + do i0=lsegtop(k0),lsegtop(k0)+lseg_natoms(k0)-1 + nselfatm=nselfatm+1 + m2i_tmp(nselfatm)=m2i(i0) + enddo ! i0 + enddo ! k0 + + if(nselfatm.gt.na1cell*lxdiv*lydiv*lzdiv)then + write(*,*) 'ERROR: nselfatm overflowed!', myrank,nselfatm + call mpistop() + endif + +!coarray call mpi_allgather(nselfatm,1,mpi_integer, +!coarray & natmlist,1,mpi_integer, +!coarray & mpi_comm_world,ierr) +!coarray! +!coarray call mpi_barrier(mpi_comm_world,ierr) +!coarray! +!coarray natmdisp(1) = 0 + do mm = 1,np + natmlist(me)[mm] = nselfatm ! Put + sync all + enddo + natmdisp(1) = 1 +!! + nsum = natmlist(1) + do i = 2,nprocs + natmdisp(i) = natmdisp(i-1)+natmlist(i-1) + nsum = nsum + natmlist(i) + end do +! +!coarray call mpi_gatherv(m2i_tmp,nselfatm,mpi_integer, +!coarray & nrearrange,natmlist,natmdisp,mpi_integer, +!coarray & mpiout,mpi_comm_world,ierr) + ms = natmdisp(me) + mdis(ms:ms+nselfatm-1)[mpiout+1] = m2i_tmp(1:nselfatm) + sync all + nrearrange = mdis +!! +! +!$omp parallel do default(shared) +!$omp& private(i) + do i = 1,nprocs + natmlist(i) = natmlist(i)*6 + natmdisp(i) = natmdisp(i)*6 + end do +! + i00=0 + do k0=1,nselfseg + do i0=lsegtop(k0),lsegtop(k0)+lseg_natoms(k0)-1 + i00=i00+1 + snd(1,i00) = wkxyz(1,i0) + snd(2,i00) = wkxyz(2,i0) + snd(3,i00) = wkxyz(3,i0) + snd(4,i00) = wkv(1,i0) + snd(5,i00) = wkv(2,i0) + snd(6,i00) = wkv(3,i0) + end do ! i0 + end do ! k0 + +!coarray call mpi_gatherv(snd,nselfatm*6,mpi_double_precision, +!coarray & rcv,natmlist,natmdisp,mpi_double_precision, +!coarray & mpiout,mpi_comm_world,ierr) + ms = natmdisp(me)/6 + rcvx(1:6,ms:ms+nselfatm-1)[mpiout+1] = snd(1:6,1:nselfatm) + sync all + rcv = rcvx +!! +! + if(myrank.eq.mpiout) then +!$omp parallel do default(none) +!$omp& private(i,i0) +!$omp& shared(xyz,v,rcv,n,nrearrange) + do i = 1,n + i0=nrearrange(i) + xyz(1,i0) = rcv(1,i) + xyz(2,i0) = rcv(2,i) + xyz(3,i0) = rcv(3,i) + v(1,i0) = rcv(4,i) + v(2,i0) = rcv(5,i) + v(3,i0) = rcv(6,i) + end do + end if + deallocate(snd,rcv,nrearrange) +!coarray deallocate(natmlist,natmdisp) + deallocate(natmdisp) +!! + end if + + call cell_edge() + + return + end diff --git a/MODYLAS-MINI/src/xmpAPI_comm_3.f b/MODYLAS-MINI/src/xmpAPI_comm_3.f new file mode 100755 index 0000000..39dfbf5 --- /dev/null +++ b/MODYLAS-MINI/src/xmpAPI_comm_3.f @@ -0,0 +1,1225 @@ +!---------------------------------------------------------------------- +! Copyright (C) 2003-2014 Kensuke Iwahashi, Noriyuki Yoshii, +! Atsushi Yamada, Yoshimichi Andoh, +! Kazushi Fujimoto, Hidekazu Kojima, +! Fumiyasu Mizutani, and Susumu Okazaki +! All Rights Reserved. +! +! Copyright (C) 20013-2014 RIKEN AICS +! All Rights Reserved. +! +! This MD program has been developed at Nagoya University, and +! Institute for Molecular Science, National Institutes of Natural +! Sciences. +! And this work was supported by +! Next-Generation Supercomputer Project, and +! NAREGI Nanoscience Project, +! Ministry of Education, Culture, Sports, Science and Technology, +! Japan. +! +! This program is NOT a free software and distributed under the +! license described in the LICENSE. +! All rights are reserved by the authors of this program. +! +! The authors do NOT warrant or assume any legal liability or +! responsibility for the accuracy or completeness. +!---------------------------------------------------------------------- +c---------------------------------------------------------------------- + subroutine init_comm_direct_3() +c---------------------------------------------------------------------- + use comm_base + use comm_d3 + use trj_mpi + use md_fmm + use md_fmm_domdiv_flg + use mpivar + implicit none + INCLUDE 'mpif.h' + + npz = nzdiv + npy = nydiv + npx = nxdiv + ipz = izflg(myrank)-1 + ipy = iyflg(myrank)-1 + ipx = ixflg(myrank)-1 + ncxdiv = lxdiv + ncydiv = lydiv + nczdiv = lzdiv + + allocate(icbufp ((ncell/npy)*(ncell/npx)*2)[*]) + allocate(ircbufp((ncell/npy)*(ncell/npx)*2)[*]) + allocate(icbufm ((ncell/npy)*(ncell/npx)*2)[*]) + allocate(ircbufm((ncell/npy)*(ncell/npx)*2)[*]) + allocate(ibuffp (na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + allocate(irbuffp(na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + allocate(ibuffm (na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + allocate(irbuffm(na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + allocate(buffp (3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + allocate(rbuffp(3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + allocate(buffm (3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + allocate(rbuffm(3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + + return + end +c---------------------------------------------------------------------- + subroutine comm_direct_3() ! ver.20120314 +c---------------------------------------------------------------------- + use comm_base + use comm_d3 + use trj_org + use trj_mpi + use md_forces + use md_monitors + use md_fmm + use md_fmm_domdiv_flg + use md_segment + use md_periodic + use unitcell + use mpivar + implicit none + INCLUDE 'mpif.h' + integer ipz_pdest, ipy_pdest, ipx_pdest + integer ipz_psrc, ipy_psrc, ipx_psrc + integer ipz_mdest, ipy_mdest, ipx_mdest + integer ipz_msrc, ipy_msrc, ipx_msrc + integer itr, nitr + integer icz, icy, icx + integer icz0, icz1 + integer icy0, icy1 + integer iczp0, iczp1 + integer icyp0, icyp1 + integer icxp0, icxp1 + integer iczm0, iczm1 + integer icym0, icym1 + integer icxm0, icxm1 + integer iczb, icyb, icxb + integer iczbp0, iczbp1 + integer iczbm0, iczbm1 + integer icybp0, icybp1, icybp1st + integer icybm0, icybm1, icybm1st + integer icxbp0, icxbp1 +#ifndef HALFDIREE + integer icxbp1st +#endif + integer icxbm0, icxbm1, icxbm1st + integer ncc,ncc2 + integer nccp + integer nccm + integer ica, icag + integer icasp, icarp + integer icasm, icarm + integer nca + integer ncap, ncarp, ncar2p + integer ncam, ncarm, ncar2m + integer nbase, nbase2, nbase3 + integer ntmp + integer istatus(mpi_status_size, 8), ierr +#ifndef SYNC_COM + integer,dimension(8) :: irq + integer nrq +#endif +!coarray + integer nd +!! + +c----- common parameters for coordinate communication. ----- + ipx=mod(myrank,npx) + ipy=mod((myrank-ipx)/npx,npy) + ipz=mod((myrank-ipx-ipy*npx)/(npx*npy),npz) + + nczdiv = (ncell - 1)/npz + 1 + ncydiv = (ncell - 1)/npy + 1 + ncxdiv = (ncell - 1)/npx + 1 + + narea = na1cell * (ncell/npz + 4) * (ncell/npy + 4) + naline = na1cell * (ncell/npz + 4) + +! +!----- coordinate communication code starts here. ------ +! +! coordinate +Z + ipz_pdest = mod(ipz+1-1/npz+npz,npz)*npx*npy + ipy*npx + ipx + ipz_psrc = mod(ipz-1+1/npz+npz,npz)*npx*npy + ipy*npx + ipx +! coordinate -Z + ipz_mdest = mod(ipz-1+1/npz+npz,npz)*npx*npy + ipy*npx + ipx + ipz_msrc = mod(ipz+1-1/npz+npz,npz)*npx*npy + ipy*npx + ipx + + nitr = (2 - 1)/nczdiv + 1 + + DO itr = 1, nitr + if (itr == 1) then + iczp0 = 2+ nczdiv - 1 + if (nczdiv == 1) iczp0 = iczp0+1 + iczp1 = 2+ nczdiv + + nccp = 0 + DO icx = 3, 2+ncxdiv + DO icy = 3, 2+ncydiv + DO icz = iczp0, iczp1 + nccp = nccp + 1 + icbufp(nccp) = na_per_cell( icz, icy, icx ) + END DO + END DO + END DO + + iczm0 = 3 + iczm1 = iczm0 + 1 + if (nczdiv == 1) iczm1 = iczm0 + nccm = 0 + DO icx = 3, 2+ncxdiv + DO icy = 3, 2+ncydiv + DO icz = iczm0, iczm1 + nccm = nccm + 1 + icbufm(nccm) = na_per_cell( icz, icy, icx ) + END DO + END DO + END DO + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(icbufp, nccp, MPI_INTEGER, +!coarray & ipz_pdest, myrank, +!coarray & ircbufp, nccp, MPI_INTEGER, ipz_psrc, ipz_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(icbufm, nccm, MPI_INTEGER, +!coarray & ipz_mdest, myrank, +!coarray & ircbufm, nccm, MPI_INTEGER, ipz_msrc, ipz_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + ircbufp(1:nccp)[ipz_pdest+1] = icbufp(1:nccp) ! Put + ircbufm(1:nccm)[ipz_mdest+1] = icbufm(1:nccm) ! Put + sync all +!! +#else + call mpi_irecv(ircbufp, nccp, + & MPI_INTEGER, ipz_psrc, ipz_psrc, mpi_comm_world, + & irq(1), ierr) + call mpi_isend(icbufp, nccp, + & MPI_INTEGER, ipz_pdest, myrank, mpi_comm_world, + & irq(2), ierr) + call mpi_irecv(ircbufm, nccm, + & MPI_INTEGER, ipz_msrc, ipz_msrc, mpi_comm_world, + & irq(3), ierr) + call mpi_isend(icbufm, nccm, + & MPI_INTEGER, ipz_mdest, myrank, mpi_comm_world, + & irq(4), ierr) + nrq = 4 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + iczbp0 = iczp0 - nczdiv + iczbp1 = iczp1 - nczdiv + iczbm0 = iczm0 + nczdiv + iczbm1 = iczm1 + nczdiv + + ncc2 = 0 + ncarp = 0 + DO icx = 3, 2+ncxdiv + DO icy = 3, 2+ncydiv + nca = tag(iczbp1+1,icy,icx) - ircbufp(ncc2+1) + if(nczdiv > 1) then + nca = tag(iczbp1+1,icy,icx) + & - ircbufp(ncc2+2) - ircbufp(ncc2+1) + END IF + DO iczb = iczbp0, iczbp1 + ncc2 = ncc2 + 1 + na_per_cell(iczb,icy,icx) = ircbufp(ncc2) + tag(iczb,icy,icx) = nca + nca = nca + na_per_cell(iczb,icy,icx) + ncarp = ncarp + na_per_cell(iczb,icy,icx) + END DO + END DO + END DO + + ncap = 0 + DO icx = 3, 2+ncxdiv + DO icy = 3, 2+ncydiv + DO ica = tag(iczp0, icy, icx), tag(iczp1, icy, icx) + & + na_per_cell(iczp1, icy, icx)-1 + ncap = ncap + 1 + buffp(1,ncap) = wkxyz(1,ica) + buffp(2,ncap) = wkxyz(2,ica) + buffp(3,ncap) = wkxyz(3,ica) + ibuffp(ncap) = m2i(ica) + END DO + END DO + END DO + + ncc2 = 0 + ncarm = 0 + DO icx = 3, 2+ncxdiv + DO icy = 3, 2+ncydiv + nca = tag(iczbm0-1,icy,icx) + & + na_per_cell(iczbm0-1,icy,icx) + DO iczb = iczbm0, iczbm1 + ncc2 = ncc2 + 1 + na_per_cell(iczb,icy,icx) = ircbufm(ncc2) + tag(iczb,icy,icx) = nca + nca = nca + na_per_cell(iczb,icy,icx) + ncarm = ncarm + na_per_cell(iczb,icy,icx) + END DO + END DO + END DO + + ncam = 0 + DO icx = 3, 2+ncxdiv + DO icy = 3, 2+ncydiv + DO ica = tag(iczm0, icy, icx), tag(iczm1, icy, icx) + & + na_per_cell(iczm1, icy, icx)-1 + ncam = ncam + 1 + buffm(1,ncam) = wkxyz(1,ica) + buffm(2,ncam) = wkxyz(2,ica) + buffm(3,ncam) = wkxyz(3,ica) + ibuffm(ncam) = m2i(ica) + END DO + END DO + END DO + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(buffp, 3*ncap, MPI_DOUBLE_PRECISION, +!coarray & ipz_pdest, myrank, +!coarray & rbuffp, 3*ncarp, MPI_DOUBLE_PRECISION, +!coarray & ipz_psrc, ipz_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(ibuffp, ncap, MPI_INTEGER, +!coarray & ipz_pdest, myrank, +!coarray & irbuffp, ncarp, MPI_INTEGER, ipz_psrc, ipz_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray +!coarray call mpi_sendrecv(buffm, 3*ncam, MPI_DOUBLE_PRECISION, +!coarray & ipz_mdest, myrank, +!coarray & rbuffm, 3*ncarm, MPI_DOUBLE_PRECISION, +!coarray & ipz_msrc, ipz_msrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(ibuffm, ncam, MPI_INTEGER, ipz_mdest, +!coarray & myrank, irbuffm, ncarm, MPI_INTEGER, +!coarray & ipz_msrc, ipz_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + rbuffp(1:3,1:ncap)[ipz_pdest+1] = buffp(1:3,1:ncap) ! Put + irbuffp(1:ncap)[ipz_pdest+1] = ibuffp(1:ncap) ! Put + rbuffm(1:3,1:ncam)[ipz_mdest+1] = buffm(1:3,1:ncam) ! Put + irbuffm(1:ncam)[ipz_mdest+1] = ibuffm(1:ncam) ! Put + sync all +!! + +#else + call mpi_irecv(rbuffp, 3*ncarp, + & MPI_DOUBLE_PRECISION, ipz_psrc, ipz_psrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(buffp, 3*ncap, + & MPI_DOUBLE_PRECISION, ipz_pdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(irbuffp, ncarp, + & MPI_INTEGER, ipz_psrc, ipz_psrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(ibuffp, ncap, + & MPI_INTEGER, ipz_pdest, myrank, + & mpi_comm_world, irq(4), ierr) + + call mpi_irecv(rbuffm, 3*ncarm, + & MPI_DOUBLE_PRECISION, ipz_msrc, ipz_msrc, + & mpi_comm_world, irq(5), ierr) + call mpi_isend(buffm, 3*ncam, + & MPI_DOUBLE_PRECISION, ipz_mdest, myrank, + & mpi_comm_world, irq(6), ierr) + call mpi_irecv(irbuffm, ncarm, + & MPI_INTEGER, ipz_msrc, ipz_msrc, + & mpi_comm_world, irq(7), ierr) + call mpi_isend(ibuffm, ncam, + & MPI_INTEGER, ipz_mdest, myrank, + & mpi_comm_world, irq(8), ierr) + + nrq = 8 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + nca = 0 + DO icx = 3, 2+ncxdiv + DO icy = 3, 2+ncydiv + DO ica = tag(iczbp0, icy, icx), tag(iczbp1, icy, icx) + & + na_per_cell(iczbp1, icy, icx)-1 + nca = nca + 1 + wkxyz(1,ica) = rbuffp(1,nca) + wkxyz(2,ica) = rbuffp(2,nca) + wkxyz(3,ica) = rbuffp(3,nca) + m2i(ica) = irbuffp(nca) + END DO + END DO + END DO + + nca = 0 + DO icx = 3, 2+ncxdiv + DO icy = 3, 2+ncydiv + DO ica = tag(iczbm0, icy, icx), tag(iczbm1, icy, icx) + & + na_per_cell(iczbm1, icy, icx)-1 + nca = nca + 1 + wkxyz(1,ica) = rbuffm(1,nca) + wkxyz(2,ica) = rbuffm(2,nca) + wkxyz(3,ica) = rbuffm(3,nca) + m2i(ica) = irbuffm(nca) + END DO + END DO + END DO + + ELSE + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(ircbufp, nccp, MPI_INTEGER, +!coarray & ipz_pdest, myrank, +!coarray & icbufp, nccp, MPI_INTEGER, ipz_psrc, ipz_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(ircbufm, nccm, MPI_INTEGER, +!coarray & ipz_mdest, myrank, +!coarray & icbufm, nccm, MPI_INTEGER, ipz_msrc, ipz_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + icbufp(1:nccp)[ipz_pdest+1] = ircbufp(1:nccp) ! Put + icbufm(1:nccm)[ipz_mdest+1] = ircbufm(1:nccm) ! Put + sync all +!! +#else + call mpi_irecv(icbufp, nccp, + & MPI_INTEGER, ipz_psrc, ipz_psrc, mpi_comm_world, + & irq(1), ierr) + call mpi_isend(ircbufp, nccp, + & MPI_INTEGER, ipz_pdest, myrank, mpi_comm_world, + & irq(2), ierr) + call mpi_irecv(icbufm, nccm, + & MPI_INTEGER, ipz_msrc, ipz_msrc, mpi_comm_world, + & irq(3), ierr) + call mpi_isend(ircbufm, nccm, + & MPI_INTEGER, ipz_mdest, myrank, mpi_comm_world, + & irq(4), ierr) + nrq = 4 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + iczbp0 = 1 + iczbp1 = 1 + ncc2 = 0 + ncar2p = 0 + DO icx = 3, 2+ncxdiv + DO icy = 3, 2+ncydiv + nca = tag(iczbp1+1,icy,icx) - icbufp(ncc2+1) + DO iczb = iczbp0, iczbp1 + ncc2 = ncc2 + 1 + na_per_cell(iczbp0,icy,icx) = icbufp(ncc2) + tag(iczbp0,icy,icx) = nca + nca = nca + na_per_cell(iczbp0,icy,icx) + ncar2p = ncar2p + icbufp(ncc2) + END DO + END DO + END DO + + iczbm0 = 2 + nczdiv + 2 + iczbm1 = iczbm0 + ncc2 = 0 + ncar2m = 0 + DO icx = 3, 2+ncxdiv + DO icy = 3, 2+ncydiv + nca = tag(iczbm0-1,icy,icx) + & + na_per_cell(iczbm0-1,icy,icx) + ncc2 = ncc2 + 1 + na_per_cell(iczbm0,icy,icx) = icbufm(ncc2) + tag(iczbm0,icy,icx) = nca + nca = nca + na_per_cell(iczbm0,icy,icx) + ncar2m = ncar2m + icbufm(ncc2) + END DO + END DO + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(rbuffp, 3*ncarp, MPI_DOUBLE_PRECISION, +!coarray & ipz_pdest, myrank, +!coarray & buffp, 3*ncar2p, MPI_DOUBLE_PRECISION, +!coarray & ipz_psrc, ipz_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(irbuffp, ncarp, MPI_INTEGER, ipz_pdest, +!coarray & myrank, ibuffp, ncar2p, MPI_INTEGER, +!coarray & ipz_psrc, ipz_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray +!coarray call mpi_sendrecv(rbuffm, 3*ncarm, MPI_DOUBLE_PRECISION, +!coarray & ipz_mdest, myrank, +!coarray & buffm, 3*ncar2m, MPI_DOUBLE_PRECISION, +!coarray & ipz_msrc, ipz_msrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(irbuffm, ncarm, MPI_INTEGER, +!coarray & ipz_mdest, myrank, +!coarray & ibuffm, ncar2m, MPI_INTEGER, ipz_msrc, ipz_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + buffp(1:3,1:ncarp)[ipz_pdest+1] = rbuffp(1:3,1:ncarp) ! Put + ibuffp(1:ncarp)[ipz_pdest+1] = irbuffp(1:ncarp) ! Put + buffm(1:3,1:ncarm)[ipz_mdest+1] = rbuffm(1:3,1:ncarm) ! Put + ibuffm(1:ncarm)[ipz_mdest+1] = irbuffm(1:ncarm) ! Put + sync all +!! +#else + call mpi_irecv(buffp, 3*ncar2p, + & MPI_DOUBLE_PRECISION, ipz_psrc, ipz_psrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(rbuffp, 3*ncarp, + & MPI_DOUBLE_PRECISION, ipz_pdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(ibuffp, ncar2p, + & MPI_INTEGER, ipz_psrc, ipz_psrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(irbuffp, ncarp, + & MPI_INTEGER, ipz_pdest, myrank, + & mpi_comm_world, irq(4), ierr) + + call mpi_irecv(buffm, 3*ncar2m, + & MPI_DOUBLE_PRECISION, ipz_msrc, ipz_msrc, + & mpi_comm_world, irq(5), ierr) + call mpi_isend(rbuffm, 3*ncarm, + & MPI_DOUBLE_PRECISION, ipz_mdest, myrank, + & mpi_comm_world, irq(6), ierr) + call mpi_irecv(ibuffm, ncar2m, + & MPI_INTEGER, ipz_msrc, ipz_msrc, + & mpi_comm_world, irq(7), ierr) + call mpi_isend(irbuffm, ncarm, + & MPI_INTEGER, ipz_mdest, myrank, + & mpi_comm_world, irq(8), ierr) + + nrq = 8 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + nca = 0 + DO icx = 3, 2+ncxdiv + DO icy = 3, 2+ncydiv + DO ica = tag(iczbp0, icy, icx), tag(iczbp1, icy, icx) + & + na_per_cell(iczbp1, icy, icx)-1 + nca = nca + 1 + wkxyz(1,ica) = buffp(1,nca) + wkxyz(2,ica) = buffp(2,nca) + wkxyz(3,ica) = buffp(3,nca) + m2i(ica) = ibuffp(nca) + END DO + END DO + END DO + + nca = 0 + DO icx = 3, 2+ncxdiv + DO icy = 3, 2+ncydiv + DO ica = tag(iczbm0, icy, icx), tag(iczbm1, icy, icx) + & + na_per_cell(iczbm1, icy, icx)-1 + nca = nca + 1 + wkxyz(1,ica) = buffm(1,nca) + wkxyz(2,ica) = buffm(2,nca) + wkxyz(3,ica) = buffm(3,nca) + m2i(ica) = ibuffm(nca) + END DO + END DO + END DO + + END IF + END DO + +! coordinate +Y + ipy_pdest = ipz*npx*npy + mod(ipy+1-1/npy+npy,npy)*npx + ipx + ipy_psrc = ipz*npx*npy + mod(ipy-1+1/npy+npy,npy)*npx + ipx +! coordinate -Y + ipy_mdest = ipz*npx*npy + mod(ipy-1+1/npy+npy,npy)*npx + ipx + ipy_msrc = ipz*npx*npy + mod(ipy+1-1/npy+npy,npy)*npx + ipx + + icz0 = 1 + icz1 = 2 + nczdiv + 2 + nitr = (2 - 1)/ncydiv + 1 + + DO icx = 3, 2+ncxdiv + DO itr = 1, nitr + if (itr == 1) then + icyp0 = 2 + ncydiv - 1 + if (ncydiv == 1) icyp0 = icyp0 + 1 + icyp1 = 2 + ncydiv + nccp = (icz1 - icz0 + 1)*(icyp1 - icyp0 + 1) + icybp0 = icyp0 - ncydiv + icybp1 = icyp1 - ncydiv + icym0 = 3 + icym1 = icym0 + 1 + if (ncydiv == 1) icym1 = icym0 + nccm = (icz1 - icz0 + 1)*(icym1 - icym0 + 1) + icybm0 = icym0 + ncydiv + icybm1 = icym1 + ncydiv + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(na_per_cell(icz0,icyp0,icx), nccp, +!coarray & MPI_INTEGER, ipy_pdest, myrank, +!coarray & na_per_cell(icz0,icybp0,icx), nccp, MPI_INTEGER, +!coarray & ipy_psrc, ipy_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(na_per_cell(icz0,icym0,icx), nccm, +!coarray & MPI_INTEGER, ipy_mdest, myrank, +!coarray & na_per_cell(icz0,icybm0,icx), nccm, MPI_INTEGER, +!coarray & ipy_msrc, ipy_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + nd = abs(icyp1 - icyp0) + na_per_cell(:, icybp0:icybp0+nd, icx)[ipy_pdest+1] + . = na_per_cell(:, icyp0:icyp0 +nd, icx) ! Put + nd = abs(icym1 - icym0) + na_per_cell(:, icybm0:icybm0+nd, icx)[ipy_mdest+1] + . = na_per_cell(:, icym0:icym0 +nd, icx) ! Put + sync all +!! +#else + call mpi_irecv(na_per_cell(icz0,icybp0,icx), nccp, + & MPI_INTEGER, ipy_psrc, ipy_psrc, mpi_comm_world, + & irq(1), ierr) + call mpi_isend(na_per_cell(icz0,icyp0,icx), nccp, + & MPI_INTEGER, ipy_pdest, myrank, mpi_comm_world, + & irq(2), ierr) + call mpi_irecv(na_per_cell(icz0,icybm0,icx), nccm, + & MPI_INTEGER, ipy_msrc, ipy_msrc, mpi_comm_world, + & irq(3), ierr) + call mpi_isend(na_per_cell(icz0,icym0,icx), nccm, + & MPI_INTEGER, ipy_mdest, myrank, mpi_comm_world, + & irq(4), ierr) + nrq = 4 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + nbase = tag(3, 3, icx) - 3*naline + if(ncydiv == 1) nbase = nbase + naline + DO icyb = icybp0, icybp1 + nbase = nbase + naline + nca = nbase - na_per_cell(2,icyb,icx) + & - na_per_cell(1,icyb,icx) + DO icz = icz0, icz1 + tag(icz,icyb,icx) = nca + nca = nca + na_per_cell(icz,icyb,icx) + END DO + END DO + + nbase = tag(3,2+ncydiv,icx) + DO icyb = icybm0, icybm1 + nbase = nbase + naline + nca = nbase - na_per_cell(2,icyb,icx) + & - na_per_cell(1,icyb,icx) + DO icz = icz0, icz1 + tag(icz,icyb,icx) = nca + nca = nca + na_per_cell(icz,icyb,icx) + END DO + END DO + + ncap = naline * 2 + if (ncydiv == 1) ncap = naline + icasp = tag(3,icyp0,icx) - 2*na1cell + icarp = tag(3,icybp0,icx) - 2*na1cell + ncam = naline * 2 + if (ncydiv == 1) ncam = naline + icasm = tag(3,icym0,icx) - 2*na1cell + icarm = tag(3,icybm0,icx) - 2*na1cell + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(wkxyz(1,icasp),3*ncap, +!coarray & MPI_DOUBLE_PRECISION, ipy_pdest, myrank, +!coarray & wkxyz(1,icarp), 3*ncap, MPI_DOUBLE_PRECISION, +!coarray & ipy_psrc, ipy_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(m2i(icasp), ncap, MPI_INTEGER, +!coarray & ipy_pdest, myrank, +!coarray & m2i(icarp), ncap, MPI_INTEGER, +!coarray & ipy_psrc, ipy_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(wkxyz(1,icasm), 3*ncam, +!coarray & MPI_DOUBLE_PRECISION, ipy_mdest, myrank, +!coarray & wkxyz(1,icarm), 3*ncam, MPI_DOUBLE_PRECISION, +!coarray & ipy_msrc, ipy_msrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(m2i(icasm), ncam, MPI_INTEGER, +!coarray & ipy_mdest, myrank, +!coarray & m2i(icarm), ncam, MPI_INTEGER, ipy_msrc, ipy_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + wkxyz(:,icarp:icarp+ncap-1)[ipy_pdest+1] + . = wkxyz(:,icasp:icasp+ncap-1) ! Put + m2i(icarp:icarp+ncap-1)[ipy_pdest+1] + . = m2i(icasp:icasp+ncap-1) ! Put + sync all + wkxyz(:,icarm:icarm+ncam-1)[ipy_mdest+1] + . = wkxyz(:,icasm:icasm+ncam-1) ! Put + m2i(icarm:icarm+ncam-1)[ipy_mdest+1] + . = m2i(icasm:icasm+ncam-1) ! Put + sync all +!! +#else + call mpi_irecv(wkxyz(1,icarp), 3*ncap, + & MPI_DOUBLE_PRECISION, ipy_psrc, ipy_psrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(wkxyz(1,icasp), 3*ncap, + & MPI_DOUBLE_PRECISION, ipy_pdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(m2i(icarp), ncap, + & MPI_INTEGER, ipy_psrc, ipy_psrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(m2i(icasp), ncap, + & MPI_INTEGER, ipy_pdest, myrank, + & mpi_comm_world, irq(4), ierr) + + call mpi_irecv(wkxyz(1,icarm), 3*ncam, + & MPI_DOUBLE_PRECISION, ipy_msrc, ipy_msrc, + & mpi_comm_world, irq(5), ierr) + call mpi_isend(wkxyz(1,icasm), 3*ncam, + & MPI_DOUBLE_PRECISION, ipy_mdest, myrank, + & mpi_comm_world, irq(6), ierr) + call mpi_irecv(m2i(icarm), ncam, + & MPI_INTEGER, ipy_msrc, ipy_msrc, + & mpi_comm_world, irq(7), ierr) + call mpi_isend(m2i(icasm), ncam, + & MPI_INTEGER, ipy_mdest, myrank, + & mpi_comm_world, irq(8), ierr) + nrq = 8 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + else + icybp1st = icybp0 + icybp0 = 1 + icybp1 = 1 + icybm1st = icybm0 + icybm0 = 2 + ncydiv + 2 + icybm1 = icybm0 + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(na_per_cell(icz0,icybp1st,icx), nccp, +!coarray & MPI_INTEGER, ipy_pdest, myrank, +!coarray & na_per_cell(icz0,icybp0,icx), nccp, MPI_INTEGER, +!coarray & ipy_psrc, ipy_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(na_per_cell(icz0,icybm1st,icx), nccm, +!coarray & MPI_INTEGER, ipy_mdest, myrank, +!coarray & na_per_cell(icz0,icybm0,icx), nccm, MPI_INTEGER, +!coarray & ipy_msrc, ipy_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + nd = abs(icyp1 - icyp0) + na_per_cell(:, icybp0:icybp0+nd, icx)[ipy_pdest+1] + . = na_per_cell(:, icybp1st:icybp1st+nd, icx) ! Put + sync all + nd = abs(icym1 - icym0) + na_per_cell(:, icybm0:icybm0+nd, icx)[ipy_pdest+1] + . = na_per_cell(:, icybm1st:icybm1st+nd, icx) ! Put + sync all +!! +#else + call mpi_irecv(na_per_cell(icz0,icybp0,icx), nccp, + & MPI_INTEGER, ipy_psrc, ipy_psrc, mpi_comm_world, + & irq(1), ierr) + call mpi_isend(na_per_cell(icz0,icybp1st,icx), nccp, + & MPI_INTEGER, ipy_pdest, myrank, mpi_comm_world, + & irq(2), ierr) + call mpi_irecv(na_per_cell(icz0,icybm0,icx), nccm, + & MPI_INTEGER, ipy_msrc, ipy_msrc, mpi_comm_world, + & irq(3), ierr) + call mpi_isend(na_per_cell(icz0,icybm1st,icx), nccm, + & MPI_INTEGER, ipy_mdest, myrank, mpi_comm_world, + & irq(4), ierr) + nrq = 4 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + nbase = tag(3, 3, icx) - 2*naline + nca = nbase - na_per_cell(2,icybp0,icx) + & - na_per_cell(1,icybp0,icx) + DO icz = icz0, icz1 + tag(icz,icybp0,icx) = nca + nca = nca + na_per_cell(icz,icybp0,icx) + END DO + + nbase = tag(3,2+ncydiv,icx) + 2*naline + nca = nbase - na_per_cell(2,icybm0,icx) + & - na_per_cell(1,icybm0,icx) + DO icz = icz0, icz1 + tag(icz,icybm0,icx) = nca + nca = nca + na_per_cell(icz,icybm0,icx) + END DO + + ncap = naline + icasp = tag(3,icybp1st,icx) - 2*na1cell + icarp = tag(3,icybp0,icx) - 2*na1cell + ncam = naline + icasm = tag(3,icybm1st,icx) - 2*na1cell + icarm = tag(3,icybm0,icx) - 2*na1cell + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(wkxyz(1,icasp), 3*ncap, +!coarray & MPI_DOUBLE_PRECISION, ipy_pdest, myrank, +!coarray & wkxyz(1,icarp), 3*ncap, MPI_DOUBLE_PRECISION, +!coarray & ipy_psrc, ipy_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(m2i(icasp), ncap, MPI_INTEGER, +!coarray & ipy_pdest, myrank, +!coarray & m2i(icarp), ncap, MPI_INTEGER,ipy_psrc,ipy_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(wkxyz(1,icasm), 3*ncam, +!coarray & MPI_DOUBLE_PRECISION, ipy_mdest, myrank, +!coarray & wkxyz(1,icarm), 3*ncam, MPI_DOUBLE_PRECISION, +!coarray & ipy_msrc, ipy_msrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(m2i(icasm), ncam, MPI_INTEGER, +!coarray & ipy_mdest, myrank, +!coarray & m2i(icarm), ncam, MPI_INTEGER,ipy_msrc,ipy_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + wkxyz(:,icarp:icarp+ncap-1)[ipy_pdest+1] + . = wkxyz(:,icasp:icasp+ncap-1) ! Put + m2i(icarp:icarp+ncap-1)[ipy_pdest+1] + . = m2i(icasp:icasp+ncap-1) ! Put + sync all + wkxyz(:,icarm:icarm+ncam-1)[ipy_mdest+1] + . = wkxyz(:,icasm:icasm+ncam-1) ! Put + m2i(icarm:icarm+ncam-1)[ipy_mdest+1] + . = m2i(icasm:icasm+ncam-1) ! Put + sync all +!! +#else + call mpi_irecv(wkxyz(1,icarp), 3*ncap, + & MPI_DOUBLE_PRECISION, ipy_psrc, ipy_psrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(wkxyz(1,icasp), 3*ncap, + & MPI_DOUBLE_PRECISION, ipy_pdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(m2i(icarp), ncap, + & MPI_INTEGER, ipy_psrc, ipy_psrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(m2i(icasp), ncap, + & MPI_INTEGER, ipy_pdest, myrank, + & mpi_comm_world, irq(4), ierr) + + call mpi_irecv(wkxyz(1,icarm), 3*ncam, + & MPI_DOUBLE_PRECISION, ipy_msrc, ipy_msrc, + & mpi_comm_world, irq(5), ierr) + call mpi_isend(wkxyz(1,icasm), 3*ncam, + & MPI_DOUBLE_PRECISION, ipy_mdest, myrank, + & mpi_comm_world, irq(6), ierr) + call mpi_irecv(m2i(icarm), ncam, + & MPI_INTEGER, ipy_msrc, ipy_msrc, + & mpi_comm_world, irq(7), ierr) + call mpi_isend(m2i(icasm), ncam, + & MPI_INTEGER, ipy_mdest, myrank, + & mpi_comm_world, irq(8), ierr) + + nrq = 8 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + END IF + END DO + END DO + +#ifndef HALFDIREE +! coordinate +X + ipx_pdest = ipz*npx*npy + ipy*npx + mod(ipx+1-1/npx+npx,npx) + ipx_psrc = ipz*npx*npy + ipy*npx + mod(ipx-1+1/npx+npx,npx) +#endif +! coordinate -X + ipx_mdest = ipz*npx*npy + ipy*npx + mod(ipx-1+1/npx+npx,npx) + ipx_msrc = ipz*npx*npy + ipy*npx + mod(ipx+1-1/npx+npx,npx) + icz0 = 1 + icz1 = 2 + nczdiv + 2 + icy0 = 1 + icy1 = 2 + ncydiv + 2 + nitr = (2 - 1)/ncxdiv + 1 + + DO itr = 1, nitr + if (itr == 1) then + +#ifndef HALFDIREE + icxp0 = 2 + ncxdiv - 1 + if (ncxdiv == 1) icxp0 = icxp0 + 1 + icxp1 = 2 + ncxdiv + + nccp = (icz1 - icz0 +1)*(icy1 - icy0 +1)*(icxp1 - icxp0 +1) + icxbp0 = icxp0 - ncxdiv + icxbp1 = icxp1 - ncxdiv +#endif + icxm0 = 3 + icxm1 = icxm0 + 1 + if (ncxdiv == 1) icxm1 = icxm0 + + nccm = (icz1 - icz0 +1)*(icy1 - icy0 +1)*(icxm1 - icxm0 +1) + icxbm0 = icxm0 + ncxdiv + icxbm1 = icxm1 + ncxdiv + +#ifdef SYNC_COM +#ifndef HALFDIREE +!coarray call mpi_sendrecv(na_per_cell(icz0,icy0,icxp0), nccp, +!coarray & MPI_INTEGER, ipx_pdest, myrank, +!coarray & na_per_cell(icz0,icy0,icxbp0), +!coarray & nccp, MPI_INTEGER, ipx_psrc, ipx_psrc, +!coarray & mpi_comm_world, istatus, ierr ) + na_per_cell(icz0:icz1,icy0:icy1,icxbp0:icxbp0+(icxp1-icxp0)) + . [ipx_pdest+1] + . = na_per_cell(icz0:icz1,icy0:icy1,icxp0:icxp1) ! Put + sync all +!! +#endif +!coarray call mpi_sendrecv(na_per_cell(icz0,icy0,icxm0), nccm, +!coarray & MPI_INTEGER, ipx_mdest, myrank, +!coarray & na_per_cell(icz0,icy0,icxbm0), nccm, MPI_INTEGER, +!coarray & ipx_msrc, ipx_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + na_per_cell(icz0:icz1,icy0:icy1,icxbm0:icxbm0+(icxm1-icxm0)) + . [ipx_mdest+1] + . = na_per_cell(icz0:icz1,icy0:icy1,icxm0:icxm1) ! Put + sync all +!! +#else +#ifndef HALFDIREE + call mpi_irecv(na_per_cell(icz0,icy0,icxbp0), nccp, + & MPI_INTEGER, ipx_psrc, ipx_psrc, mpi_comm_world, + & irq(1), ierr) + call mpi_isend(na_per_cell(icz0,icy0,icxp0), nccp, + & MPI_INTEGER, ipx_pdest, myrank, mpi_comm_world, + & irq(2), ierr) + call mpi_irecv(na_per_cell(icz0,icy0,icxbm0), nccm, + & MPI_INTEGER, ipx_msrc, ipx_msrc, mpi_comm_world, + & irq(3), ierr) + call mpi_isend(na_per_cell(icz0,icy0,icxm0), nccm, + & MPI_INTEGER, ipx_mdest, myrank, mpi_comm_world, + & irq(4), ierr) + nrq = 4 +#else + call mpi_irecv(na_per_cell(icz0,icy0,icxbm0), nccm, + & MPI_INTEGER, ipx_msrc, ipx_msrc, mpi_comm_world, + & irq(1), ierr) + call mpi_isend(na_per_cell(icz0,icy0,icxm0), nccm, + & MPI_INTEGER, ipx_mdest, myrank, mpi_comm_world, + & irq(2), ierr) + nrq = 2 +#endif + + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + +#ifndef HALFDIREE + nbase3 = tag(3,3,3) - 3*narea - 3*naline + if(ncxdiv == 1) nbase3 = nbase3 + narea + DO icxb = icxbp0, icxbp1 + nbase3 = nbase3 + narea + nbase2 = nbase3 + DO icy = icy0, icy1 + nbase2 = nbase2 + naline + nca = nbase2 - na_per_cell(2,icy,icxb) + & - na_per_cell(1,icy,icxb) + DO icz = icz0, icz1 + tag(icz,icy,icxb) = nca + nca = nca + na_per_cell(icz,icy,icxb) + END DO + END DO + END DO +#endif + + nbase3 = tag(3,3,2+ncxdiv) - 3*naline + DO icxb = icxbm0, icxbm1 + nbase3 = nbase3 + narea + nbase2 = nbase3 + DO icy = icy0, icy1 + nbase2 = nbase2 + naline + nca = nbase2 - na_per_cell(2,icy,icxb) + & - na_per_cell(1,icy,icxb) + DO icz = icz0, icz1 + tag(icz,icy,icxb) = nca + nca = nca + na_per_cell(icz,icy,icxb) + END DO + END DO + END DO + +#ifndef HALFDIREE + ncap = narea * 2 + if (ncxdiv == 1) ncap = narea + icasp = tag(3,icy0,icxp0) - 2*na1cell + icarp = tag(3,icy0,icxbp0) - 2*na1cell +#endif + ncam = narea * 2 + if (ncxdiv == 1) ncam = narea + icasm = tag(3,icy0,icxm0) - 2*na1cell + icarm = tag(3,icy0,icxbm0) - 2*na1cell + +#ifdef SYNC_COM +#ifndef HALFDIREE +!coarray call mpi_sendrecv(wkxyz(1,icasp), 3*ncap, +!coarray & MPI_DOUBLE_PRECISION, ipx_pdest, myrank, +!coarray & wkxyz(1,icarp), 3*ncap, MPI_DOUBLE_PRECISION, +!coarray & ipx_psrc, ipx_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(m2i(icasp), ncap, MPI_INTEGER, +!coarray & ipx_pdest, myrank, +!coarray & m2i(icarp), ncap, MPI_INTEGER, ipx_psrc, ipx_psrc, +!coarray & mpi_comm_world, istatus, ierr ) + wkxyz(:,icarp:icarp+ncap-1)[ipx_pdest+1] + . = wkxyz(:,icasp:icasp+ncap-1) ! Put + m2i(icarp:icarp+ncap-1)[ipx_pdest+1] + . = m2i(icasp:icasp+ncap-1) ! Put + sync all +!! +#endif +!coarray call mpi_sendrecv(wkxyz(1,icasm), 3*ncam, +!coarray & MPI_DOUBLE_PRECISION, ipx_mdest, myrank, +!coarray & wkxyz(1,icarm), 3*ncam, MPI_DOUBLE_PRECISION, +!coarray & ipx_msrc, ipx_msrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(m2i(icasm), ncam, MPI_INTEGER, +!coarray & ipx_mdest, myrank, +!coarray & m2i(icarm), ncam, MPI_INTEGER, ipx_msrc, ipx_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + wkxyz(:,icarm:icarm+ncam-1)[ipx_mdest+1] + . = wkxyz(:,icasm:icasm+ncam-1) ! Put + m2i(icarm:icarm+ncam-1)[ipx_mdest+1] + . = m2i(icasm:icasm+ncam-1) ! Put + sync all +!! +#else +#ifndef HALFDIREE + call mpi_irecv(wkxyz(1,icarp), 3*ncap, + & MPI_DOUBLE_PRECISION, ipx_psrc, ipx_psrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(wkxyz(1,icasp), 3*ncap, + & MPI_DOUBLE_PRECISION, ipx_pdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(m2i(icarp), ncap, + & MPI_INTEGER, ipx_psrc, ipx_psrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(m2i(icasp), ncap, + & MPI_INTEGER, ipx_pdest, myrank, + & mpi_comm_world, irq(4), ierr) + + call mpi_irecv(wkxyz(1,icarm), 3*ncam, + & MPI_DOUBLE_PRECISION, ipx_msrc, ipx_msrc, + & mpi_comm_world, irq(5), ierr) + call mpi_isend(wkxyz(1,icasm), 3*ncam, + & MPI_DOUBLE_PRECISION, ipx_mdest, myrank, + & mpi_comm_world, irq(6), ierr) + call mpi_irecv(m2i(icarm), ncam, + & MPI_INTEGER, ipx_msrc, ipx_msrc, + & mpi_comm_world, irq(7), ierr) + call mpi_isend(m2i(icasm), ncam, + & MPI_INTEGER, ipx_mdest, myrank, + & mpi_comm_world, irq(8), ierr) + + nrq = 8 +#else + call mpi_irecv(wkxyz(1,icarm), 3*ncam, + & MPI_DOUBLE_PRECISION, ipx_msrc, ipx_msrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(wkxyz(1,icasm), 3*ncam, + & MPI_DOUBLE_PRECISION, ipx_mdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(m2i(icarm), ncam, + & MPI_INTEGER, ipx_msrc, ipx_msrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(m2i(icasm), ncam, + & MPI_INTEGER, ipx_mdest, myrank, + & mpi_comm_world, irq(4), ierr) + + nrq = 4 +#endif + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + else +#ifndef HALFDIREE + icxbp1st = icxbp0 + icxbp0 = 1 + icxbp1 = 1 +#endif + icxbm1st = icxbm0 + icxbm0 = 2 + ncxdiv + 2 + icxbm1 = icxbm0 + +#ifdef SYNC_COM +#ifndef HALFDIREE +!coarray call mpi_sendrecv(na_per_cell(icz0,icy0,icxbp1st), nccp, +!coarray & MPI_INTEGER, ipx_pdest, myrank, +!coarray & na_per_cell(icz0,icy0,icxbp0), nccp, MPI_INTEGER, +!coarray & ipx_psrc, ipx_psrc, +!coarray & mpi_comm_world, istatus, ierr ) + nd = abs(icxp1 - icxp0) + na_per_cell(icz0:icz1,icy0:icy1,icxbp0:icxbp0+nd)[ipx_pdest+1] + .= na_per_cell(icz0:icz1,icy0:icy1,icxbp1st:icxbp1st+nd) + sync all +!! +#endif +!coarray call mpi_sendrecv(na_per_cell(icz0,icy0,icxbm1st), nccm, +!coarray & MPI_INTEGER, ipx_mdest, myrank, +!coarray & na_per_cell(icz0,icy0,icxbm0), nccm, MPI_INTEGER, +!coarray & ipx_msrc, ipx_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + nd = abs(icxm1 - icxm0) + na_per_cell(icz0:icz1,icy0:icy1,icxbm0:icxbm0+nd)[ipx_pdest+1] + .= na_per_cell(icz0:icz1,icy0:icy1,icxbm1st:icxbm1st+nd) + sync all +!! +#else +#ifndef HALFDIREE + call mpi_irecv(na_per_cell(icz0,icy0,icxbp0), nccp, + & MPI_INTEGER, ipx_psrc, ipx_psrc, mpi_comm_world, + & irq(1), ierr) + call mpi_isend(na_per_cell(icz0,icy0,icxbp1st), nccp, + & MPI_INTEGER, ipx_pdest, myrank, mpi_comm_world, + & irq(2), ierr) + call mpi_irecv(na_per_cell(icz0,icy0,icxbm0), nccm, + & MPI_INTEGER, ipx_msrc, ipx_msrc, mpi_comm_world, + & irq(3), ierr) + call mpi_isend(na_per_cell(icz0,icy0,icxbm1st), nccm, + & MPI_INTEGER, ipx_mdest, myrank, mpi_comm_world, + & irq(4), ierr) + nrq = 4 +#else + call mpi_irecv(na_per_cell(icz0,icy0,icxbm0), nccm, + & MPI_INTEGER, ipx_msrc, ipx_msrc, mpi_comm_world, + & irq(1), ierr) + call mpi_isend(na_per_cell(icz0,icy0,icxbm1st), nccm, + & MPI_INTEGER, ipx_mdest, myrank, mpi_comm_world, + & irq(2), ierr) + nrq = 2 +#endif + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + +#ifndef HALFDIREE + nbase2 = tag(3,3,3) -2*narea - 3*naline + DO icy = icy0, icy1 + nbase2 = nbase2 + naline + nca = nbase2 - na_per_cell(2,icy,icxbp0) + & - na_per_cell(1,icy,icxbp0) + DO icz = icz0, icz1 + tag(icz,icy,icxbp0) = nca + nca = nca + na_per_cell(icz,icy,icxbp0) + END DO + END DO +#endif + nbase2 = tag(3,3,2+ncxdiv) + 2*narea - 3*naline + DO icy = icy0, icy1 + nbase2 = nbase2 + naline + nca = nbase2 - na_per_cell(2,icy,icxbm0) + & - na_per_cell(1,icy,icxbm0) + DO icz = icz0, icz1 + tag(icz,icy,icxbm0) = nca + nca = nca + na_per_cell(icz,icy,icxbm0) + END DO + END DO + +#ifndef HALFDIREE + ncap = narea + icasp = tag(3,icy0,icxbp1st) - 2*na1cell + icarp = tag(3,icy0,icxbp0) - 2*na1cell +#endif + ncam = narea + icasm = tag(3,icy0,icxbm1st) - 2*na1cell + icarm = tag(3,icy0,icxbm0) - 2*na1cell + +#ifdef SYNC_COM +#ifndef HALFDIREE +!coarray call mpi_sendrecv(wkxyz(1,icasp), 3*ncap, +!coarray & MPI_DOUBLE_PRECISION, ipx_pdest, myrank, +!coarray & wkxyz(1,icarp), 3*ncap, MPI_DOUBLE_PRECISION, +!coarray & ipx_psrc, ipx_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(m2i(icasp), ncap, MPI_INTEGER, +!coarray & ipx_pdest, myrank, +!coarray & m2i(icarp), ncap, MPI_INTEGER, ipx_psrc, ipx_psrc, +!coarray & mpi_comm_world, istatus, ierr ) + wkxyz(:,icarp:icarp+ncap-1)[ipx_pdest+1] + . = wkxyz(:,icasp:icasp+ncap-1) ! Put + m2i(icarp:icarp+ncap-1)[ipx_pdest+1] + . = m2i(icasp:icasp+ncap-1) ! Put + sync all +!! +#endif +!coarray call mpi_sendrecv(wkxyz(1,icasm), 3*ncam, +!coarray & MPI_DOUBLE_PRECISION, ipx_mdest, myrank, +!coarray & wkxyz(1,icarm), 3*ncam, MPI_DOUBLE_PRECISION, +!coarray & ipx_msrc, ipx_msrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(m2i(icasm), ncam, MPI_INTEGER, +!coarray & ipx_mdest, myrank, +!coarray & m2i(icarm), ncam, MPI_INTEGER, ipx_msrc, ipx_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + wkxyz(:,icarm:icarm+ncam-1)[ipx_mdest+1] + . = wkxyz(:,icasm:icasm+ncam-1) ! Put + m2i(icarm:icarm+ncam-1)[ipx_mdest+1] + . = m2i(icasm:icasm+ncam-1) ! Put + sync all +!! +#else +#ifndef HALFDIREE + call mpi_irecv(wkxyz(1,icarp), 3*ncap, + & MPI_DOUBLE_PRECISION, ipx_psrc, ipx_psrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(wkxyz(1,icasp), 3*ncap, + & MPI_DOUBLE_PRECISION, ipx_pdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(m2i(icarp), ncap, + & MPI_INTEGER, ipx_psrc, ipx_psrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(m2i(icasp), ncap, + & MPI_INTEGER, ipx_pdest, myrank, + & mpi_comm_world, irq(4), ierr) + + call mpi_irecv(wkxyz(1,icarm), 3*ncam, + & MPI_DOUBLE_PRECISION, ipx_msrc, ipx_msrc, + & mpi_comm_world, irq(5), ierr) + call mpi_isend(wkxyz(1,icasm), 3*ncam, + & MPI_DOUBLE_PRECISION, ipx_mdest, myrank, + & mpi_comm_world, irq(6), ierr) + call mpi_irecv(m2i(icarm), ncam, + & MPI_INTEGER, ipx_msrc, ipx_msrc, + & mpi_comm_world, irq(7), ierr) + call mpi_isend(m2i(icasm), ncam, + & MPI_INTEGER, ipx_mdest, myrank, + & mpi_comm_world, irq(8), ierr) + + nrq = 8 +#else + call mpi_irecv(wkxyz(1,icarm), 3*ncam, + & MPI_DOUBLE_PRECISION, ipx_msrc, ipx_msrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(wkxyz(1,icasm), 3*ncam, + & MPI_DOUBLE_PRECISION, ipx_mdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(m2i(icarm), ncam, + & MPI_INTEGER, ipx_msrc, ipx_msrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(m2i(icasm), ncam, + & MPI_INTEGER, ipx_mdest, myrank, + & mpi_comm_world, irq(4), ierr) + + nrq = 4 +#endif + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + END IF + END DO + +!=== create i2m ===! + ntmp=0 +!$omp parallel default(none) +!$omp& private(ncc,icx,icy,icz,ica,icag) +!$omp& shared(ncxdiv,ncydiv,nczdiv,tag,na_per_cell) +!$omp& shared(m2i,i2m) +!$omp& reduction(+:ntmp) +!$omp do +#ifndef HALFDIREE + do ncc=1,(ncxdiv+4)*(ncydiv+4)*(nczdiv+4) +#else + do ncc=2*(ncydiv+4)*(nczdiv+4)+1, + & (ncxdiv+4)*(ncydiv+4)*(nczdiv+4) +#endif + icz=mod(ncc-1,nczdiv+4) +1 + icy=mod(ncc-1,(nczdiv+4)*(ncydiv+4)) + icy=icy/(nczdiv+4) +1 + icx=(ncc-1)/((nczdiv+4)*(ncydiv+4))+1 + + if(icx.ge.3.and.icx.le.ncxdiv+2 .and. + & icy.ge.3.and.icy.le.ncydiv+2 .and. + & icz.ge.3.and.icz.le.nczdiv+2) cycle + do ica=tag(icz,icy,icx),tag(icz,icy,icx) + & +na_per_cell(icz,icy,icx)-1 + icag=m2i(ica) + i2m(icag)=ica + ntmp=ntmp+1 +! if(icag .le. -1) cycle + end do ! ica + end do ! ncc +!$omp end do +!$omp end parallel + ndatm=nselfatm+ntmp + + return + end diff --git a/MODYLAS-MINI/src/xmpAPI_comm_fmm.f b/MODYLAS-MINI/src/xmpAPI_comm_fmm.f new file mode 100755 index 0000000..6d49252 --- /dev/null +++ b/MODYLAS-MINI/src/xmpAPI_comm_fmm.f @@ -0,0 +1,1227 @@ +!---------------------------------------------------------------------- +! Copyright (C) 2003-2014 Kensuke Iwahashi, Noriyuki Yoshii, +! Atsushi Yamada, Yoshimichi Andoh, +! Kazushi Fujimoto, Hidekazu Kojima, +! Fumiyasu Mizutani, and Susumu Okazaki +! All Rights Reserved. +! +! Copyright (C) 20013-2014 RIKEN AICS +! All Rights Reserved. +! +! This MD program has been developed at Nagoya University, and +! Institute for Molecular Science, National Institutes of Natural +! Sciences. +! And this work was supported by +! Next-Generation Supercomputer Project, and +! NAREGI Nanoscience Project, +! Ministry of Education, Culture, Sports, Science and Technology, +! Japan. +! +! This program is NOT a free software and distributed under the +! license described in the LICENSE. +! All rights are reserved by the authors of this program. +! +! The authors do NOT warrant or assume any legal liability or +! responsibility for the accuracy or completeness. +!---------------------------------------------------------------------- +c--------------------------------------------------------------------- + subroutine comm_fmm_local_top(il0,mylm,wm,nscell, + $ nsczdiv, nscydiv,nscxdiv) +!! UL ver.20111004 +c--------------------------------------------------------------------- + use comm_base + use md_fmm + use mpivar + implicit none + INCLUDE 'mpif.h' + integer m1 + integer(4) nscell, nsczdiv, nscydiv, nscxdiv, mylm, il0 + complex(8) :: ccbuf(mylm*5*nscydiv*nscxdiv) +!coarray complex(8) :: rccbuf(mylm*5*nscydiv*nscxdiv,2) + complex(8) :: wm(mylm,nscell,nscell,nscell) +!coarray + complex(8),allocatable :: rccbuf(:,:)[:] + complex(8),allocatable :: wm_tmp(:,:,:,:)[:] + integer,allocatable :: ndis(:)[:] + integer me, np, nb, nd + integer ierrcode +!! + integer np_supercellz,np_supercelly,np_supercellx + integer mcell_size + integer ipz_dest, ipy_dest, ipx_dest + integer ipz_src, ipy_src, ipx_src + integer nitr,itr + integer icz0, icz1, iczb0, iczb1 + integer icy0, icy1, icyb0, icyb1 + integer icx0, icx1, icxb0, icxb1 + integer ncc, ncc2 + integer icx, icy, icz + integer m + integer ibs, ibr + integer iczb + integer icyb0prior, icxb0prior + integer ierr,istatus(mpi_status_size) + +!coarray + allocate( rccbuf(mylm*5*nscydiv*nscxdiv,2)[*] ) + allocate( wm_tmp(mylm,nscell,nscell,nscell)[*] ) + wm_tmp = wm + me = this_image() + np = num_images() + allocate( ndis(np)[*] ) +!! + +!=== local constant ===! + m1 = (nmax+1)*(nmax+1) + mcell_size = 2**il0 + +!=== global constant ===! + np_supercellz = (npz * mcell_size-1) / ncell + 1 + np_supercelly = (npy * mcell_size-1) / ncell + 1 + np_supercellx = (npx * mcell_size-1) / ncell + 1 + + icz0 = (ncell / mcell_size * ipz) / npz + 1 + icz1 = icz0 + nsczdiv - 1 + icy0 = (ncell / mcell_size * ipy) / npy + 1 + icy1 = icy0 + nscydiv - 1 + icx0 = (ncell / mcell_size * ipx) / npx + 1 + icx1 = icx0 + nscxdiv - 1 + +! ULmoment +Z + ipz_dest = mod(ipz + np_supercellz - 1/npz + npz, npz)*npy*npx + & + ipy*npx + ipx + ipz_src = mod(ipz - np_supercellz + 1/npz + npz, npz)*npy*npx + & + ipy*npx + ipx + nitr = nscell / 2 + if(nscell > npz) nitr = npz / 2 + + DO itr = 1, nitr + if (itr==1) then + iczb0 = mod(icz0 - nsczdiv - 1 + nscell, nscell) + 1 + iczb1 = mod(icz1 - nsczdiv - 1 + nscell, nscell) + 1 + + ncc = 0 + DO icx = icx0, icx1 + DO icy = icy0, icy1 + DO icz = icz0, icz1 + DO m = 1, m1 + ncc = ncc + 1 + ccbuf(ncc) = wm_tmp(m, icz, icy, icx ) + END DO + END DO + END DO + END DO + +!coarray call mpi_sendrecv(ccbuf, ncc, MPI_DOUBLE_COMPLEX,ipz_dest, +!coarray & myrank, rccbuf(1,1), ncc, MPI_DOUBLE_COMPLEX, +!coarray & ipz_src, ipz_src,mpi_comm_world,istatus,ierr) + rccbuf(1:ncc,1)[ipz_dest+1] = ccbuf(1:ncc) ! Put + sync all +!! + + ncc2 = 0 + DO icx = icx0, icx1 + DO icy = icy0, icy1 + DO iczb = iczb0, iczb1 + DO m = 1, m1 + ncc2 = ncc2 + 1 + wm_tmp(m,iczb,icy,icx) = rccbuf(ncc2,1) + END DO + END DO + END DO + END DO + else + ibs = mod(itr, 2) + 1 + ibr = mod(itr+1, 2) + 1 + iczb0 = mod(icz0 - nsczdiv*itr - 1 + nscell, nscell) + 1 + iczb1 = mod(iczb0 + nsczdiv - 1 - 1 + nscell,nscell) + 1 + +!debug +! write(6,*) "ibs,ibr,iczb0,iczb1,ncc,mylm*5*nscydiv*nscxdiv= ", +! & ibs,ibr,iczb0,iczb1,ncc,mylm*5*nscydiv*nscxdiv +! call mpi_abort(mpi_comm_world,ierrcode,ierr) +!! +!coarray call mpi_sendrecv(rccbuf(1,ibs), ncc, MPI_DOUBLE_COMPLEX, +!coarray & ipz_dest, +!coarray & myrank, rccbuf(1,ibr), ncc, MPI_DOUBLE_COMPLEX, +!coarray & ipz_src, ipz_src,mpi_comm_world,istatus,ierr) + rccbuf(1:ncc,ibr)[ipz_dest+1] = rccbuf(1:ncc,ibs) ! Put + sync all +!! + + ncc2 = 0 + DO icx = icx0, icx1 + DO icy = icy0, icy1 + DO iczb = iczb0, iczb1 + DO m = 1, m1 + ncc2 = ncc2 + 1 + wm_tmp(m,iczb,icy,icx) = rccbuf(ncc2,ibr) + END DO + END DO + END DO + END DO + end if + END DO + +! ULmoment -Z + ipz_dest = mod(ipz - np_supercellz + 1/npz + npz, npz)*npy*npx + & + ipy*npx + ipx + ipz_src = mod(ipz + np_supercellz - 1/npz + npz, npz)*npy*npx + & + ipy*npx + ipx + nitr = (nscell - 1) / 2 + if(nscell > npz) nitr = (npz - 1) / 2 + + DO itr = 1, nitr + if (itr==1) then + iczb0 = mod(icz0 + nsczdiv - 1, nscell) + 1 + iczb1 = mod(icz1 + nsczdiv - 1, nscell) + 1 + +!coarray call mpi_sendrecv(ccbuf, ncc, MPI_DOUBLE_COMPLEX, +!coarray & ipz_dest, myrank, +!coarray & rccbuf(1,1), ncc, MPI_DOUBLE_COMPLEX, +!coarray & ipz_src, ipz_src,mpi_comm_world,istatus,ierr) + rccbuf(1:ncc,1)[ipz_dest+1] = ccbuf(1:ncc) ! Put + sync all +!! + ncc2 = 0 + DO icx = icx0, icx1 + DO icy = icy0, icy1 + DO iczb = iczb0, iczb1 + DO m = 1, m1 + ncc2 = ncc2 + 1 + wm_tmp(m,iczb,icy,icx) = rccbuf(ncc2,1) + END DO + END DO + END DO + END DO + else + ibs = mod(itr, 2) + 1 + ibr = mod(itr+1, 2) + 1 + iczb0 = mod(icz0 + nsczdiv * itr - 1, nscell) + 1 + iczb1 = mod(iczb0 + nsczdiv - 1 - 1, nscell) + 1 + +!coarray call mpi_sendrecv(rccbuf(1,ibs), ncc, MPI_DOUBLE_COMPLEX, +!coarray & ipz_dest, +!coarray & myrank, rccbuf(1,ibr), ncc, MPI_DOUBLE_COMPLEX, +!coarray & ipz_src, ipz_src,mpi_comm_world,istatus,ierr) + rccbuf(1:ncc,ibr)[ipz_dest+1] = rccbuf(1:ncc,ibs) ! Put + sync all +!! + ncc2 = 0 + DO icx = icx0, icx1 + DO icy = icy0, icy1 + DO iczb = iczb0, iczb1 + DO m = 1, m1 + ncc2 = ncc2 + 1 + wm_tmp(m,iczb,icy,icx) = rccbuf(ncc2,ibr) + END DO + END DO + END DO + END DO + end if + END DO + +! ULmoment +Y + ipy_dest = ipz*npy*npx + & + mod(ipy + np_supercelly - 1/npy + npy, npy)*npx + ipx + ipy_src = ipz*npy*npx + & + mod(ipy - np_supercelly + 1/npy + npy, npy)*npx + ipx + nitr = nscell / 2 + if(nscell > npy) nitr = npy / 2 + + DO icx = icx0, icx1 + DO itr = 1, nitr + if (itr == 1) then + icyb0 = mod(icy0 - nscydiv - 1 + nscell, nscell) + 1 + icyb1 = mod(icy1 - nscydiv - 1 + nscell, nscell) + 1 + ncc = m1 * nscell * (icyb1 - icyb0 + 1) +!coarray call mpi_sendrecv(wm_tmp(1,1,icy0,icx), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_dest, myrank, wm_tmp(1,1,icyb0,icx), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_src, ipy_src,mpi_comm_world,istatus,ierr ) + nd = abs(icyb1 - icyb0) + ndis(me)[ipy_src+1] = icyb0 ! Put + sync all + nb = ndis(ipy_dest+1) + wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] + . = wm_tmp( :, :, icy0:icy0+nd, icx ) ! Put + sync all +!! + else + icyb0prior = icyb0 + icyb0 = mod(icy0 - nscydiv * itr - 1 + nscell, + & nscell) + 1 +!coarray call mpi_sendrecv(wm_tmp(1,1,icyb0prior,icx), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_dest, myrank, wm_tmp(1,1,icyb0,icx), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_src, ipy_src,mpi_comm_world,istatus,ierr ) + ndis(me)[ipy_src+1] = icyb0 ! Put + sync all + nb = ndis(ipy_dest+1) + wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] + . = wm_tmp( :, :, icyb0prior:icyb0prior+nd, icx ) ! Put + sync all +!! + endif + END DO + END DO + +! ULmoment -Y + ipy_dest = ipz*npy*npx + & + mod(ipy - np_supercelly + 1/npy + npy, npy)*npx + ipx + ipy_src = ipz*npy*npx + & + mod(ipy + np_supercelly - 1/npy + npy, npy)*npx + ipx + nitr = (nscell - 1) / 2 + if(nscell > npy) nitr = (npy -1) / 2 + + DO icx = icx0, icx1 + DO itr = 1, nitr + if (itr ==1) then + icyb0 = mod(icy0 + nscydiv - 1, nscell) + 1 + icyb1 = mod(icy1 + nscydiv - 1, nscell) + 1 + ncc = m1 * nscell * (icyb1 - icyb0 + 1) +!coarray call mpi_sendrecv(wm_tmp(1,1,icy0,icx), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_dest, myrank, wm_tmp(1,1,icyb0,icx), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_src, ipy_src,mpi_comm_world,istatus,ierr) + nd = abs(icyb1 - icyb0) + ndis(me)[ipy_src+1] = icyb0 ! Put + sync all + nb = ndis(ipy_dest+1) + wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] + . = wm_tmp( :, :, icy0:icy0+nd, icx ) ! Put + sync all +!! + else + icyb0prior = icyb0 + icyb0 = mod(icy0 + nscydiv * itr - 1, nscell) + 1 +!coarray call mpi_sendrecv(wm_tmp(1,1,icyb0prior,icx), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_dest, myrank, wm_tmp(1,1,icyb0,icx), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_src, ipy_src,mpi_comm_world,istatus,ierr) + ndis(me)[ipy_src+1] = icyb0 ! Put + sync all + nb = ndis(ipy_dest+1) + wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] + . = wm_tmp( :, :, icyb0prior:icyb0prior+nd, icx ) ! Put + sync all +!! + end if + END DO + END DO + +! ULmoment +X + ipx_dest = ipz*npy*npx + ipy*npx + & + mod(ipx + np_supercellx - 1/npx + npx, npx) + ipx_src = ipz*npy*npx + ipy*npx + & + mod(ipx - np_supercellx + 1/npx + npx, npx) + nitr = nscell / 2 + if(nscell > npx) nitr = npx / 2 + + DO itr = 1, nitr + if (itr ==1) then + icxb0 = mod(icx0 - nscxdiv - 1 + nscell, nscell) + 1 + icxb1 = mod(icx1 - nscxdiv - 1 + nscell, nscell) + 1 + ncc = m1 * nscell * nscell * (icxb1 - icxb0 + 1) +!coarray call mpi_sendrecv(wm_tmp(1,1,1,icx0), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_dest, myrank, wm_tmp(1,1,1,icxb0), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_src, ipx_src,mpi_comm_world,istatus,ierr ) + nd = abs(icxb1 - icxb0) + ndis(me)[ipx_src+1] = icxb0 ! Put + sync all + nb = ndis(ipx_dest+1) + wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] + . = wm_tmp( :, :, :, icx0:icx0+nd ) ! Put + sync all +!! + else + icxb0prior = icxb0 + icxb0 = mod(icx0 - nscxdiv * itr - 1 + nscell, nscell) + 1 +!coarray call mpi_sendrecv(wm_tmp(1,1,1,icxb0prior), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_dest, myrank, wm_tmp(1,1,1,icxb0), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_src, ipx_src,mpi_comm_world,istatus,ierr ) + ndis(me)[ipx_src+1] = icxb0 ! Put + sync all + nb = ndis(ipx_dest+1) + wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] + . = wm_tmp( :, :, :, icxb0prior:icxb0prior+nd ) ! Put + sync all +!! + end if + END DO + +! ULmoment -X + ipx_dest = ipz*npy*npx + ipy*npx + & + mod(ipx - np_supercellx + 1/npx + npx, npx) + ipx_src = ipz*npy*npx + ipy*npx + & + mod(ipx + np_supercellx - 1/npx + npx, npx) + nitr = (nscell - 1) / 2 + if(nscell > npx) nitr = (npx - 1) / 2 + + DO itr = 1, nitr + if (itr ==1) then + icxb0 = mod(icx0 + nscxdiv - 1, nscell) + 1 + icxb1 = mod(icx1 + nscxdiv - 1, nscell) + 1 + ncc = m1 * nscell * nscell * (icxb1 - icxb0 + 1) +!coarray call mpi_sendrecv(wm_tmp(1,1,1,icx0), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_dest, myrank, wm_tmp(1,1,1,icxb0), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_src, ipx_src,mpi_comm_world,istatus,ierr ) + nd = abs(icxb1 - icxb0) + ndis(me)[ipx_src+1] = icxb0 ! Put + sync all + nb = ndis(ipx_dest+1) + wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] + . = wm_tmp( :, :, :, icx0:icx0+nd ) ! Put + sync all +!! + else + icxb0prior = icxb0 + icxb0 = mod(icx0 + nscxdiv * itr - 1, nscell) + 1 +!coarray call mpi_sendrecv(wm_tmp(1,1,1,icxb0prior), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_dest, myrank, wm_tmp(1,1,1,icxb0), ncc, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_src, ipx_src,mpi_comm_world,istatus,ierr ) + ndis(me)[ipx_src+1] = icxb0 ! Put + sync all + nb = ndis(ipx_dest) + wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] + . = wm_tmp( :, :, :, icxb0prior:icxb0prior+nd ) ! Put + sync all +!! + end if + END DO + +!coarray + wm = wm_tmp +!! + + return + END +c--------------------------------------------------------------------- + subroutine comm_fmm_local_multi(ilevel, mylm, wm, + $ lclz, lcly, lclx, nbsize, nscydiv, nscxdiv) +!! LL ver.20120211 +c--------------------------------------------------------------------- + use comm_base + use md_fmm + use md_fmm_domdiv_flg + use mpivar + implicit none + include 'mpif.h' + integer(4) :: ilevel + integer(4) :: mylm, lclz, lcly, lclx, nbsize + integer(4) :: nscxdiv, nscydiv, nsczdiv + complex(8) :: wm(mylm, lclz, lcly, lclx) + integer(4) :: nbound_zm, nbound_ym, nbound_xm + integer(4) :: nbound_zp, nbound_yp, nbound_xp + complex(8) ccbufp(mylm*nbsize*nscydiv*nscxdiv) +!coarray complex(8) rccbufp(mylm*nbsize*nscydiv*nscxdiv, 2) + complex(8) ccbufm(mylm*nbsize*nscydiv*nscxdiv) +!coarray complex(8) rccbufm(mylm*nbsize*nscydiv*nscxdiv, 2) + complex(8),allocatable :: rccbufp(:,:)[:] + complex(8),allocatable :: rccbufm(:,:)[:] + complex(8),allocatable :: wm_tmp(:,:,:,:)[:] + integer,allocatable :: ndis(:)[:] + integer,allocatable :: mdis(:)[:] + integer me, np, nb, nd, mb, md +!! + integer m1 + integer np_supercell + integer mcell_size + integer ipz_pdest, ipy_pdest, ipx_pdest + integer ipz_psrc, ipy_psrc, ipx_psrc + integer ipz_mdest, ipy_mdest, ipx_mdest + integer ipz_msrc, ipy_msrc, ipx_msrc + integer nitr, itr + integer nf_pprovider + integer nf_mprovider + integer icz, icy, icx + integer iczp0, iczp1 + integer iczbp0, iczbp1 + integer iczm0, iczm1 + integer iczbm0, iczbm1 + integer iczb + integer icyp0, icyp1 + integer icybp0 + integer icym0, icym1 + integer icybm0 + integer icxp0, icxp1 + integer icxbp0 + integer icxm0, icxm1 + integer icxbm0 + integer icybp0prior + integer icxbp0prior + integer icybm0prior + integer icxbm0prior + integer ncc2 + integer nccp + integer nccm + integer m + integer ibs, ibr + integer istatus(mpi_status_size, 4), ierr +#ifndef SYNC_COM + integer,dimension(4) :: irq + integer nrq +#endif +!coarray + allocate( rccbufp(mylm*nbsize*nscydiv*nscxdiv, 2)[*] ) + allocate( rccbufm(mylm*nbsize*nscydiv*nscxdiv, 2)[*] ) + allocate( wm_tmp(mylm, lclz, lcly, lclx)[*] ) + wm_tmp = wm + me = this_image() + np = num_images() + allocate( ndis(np)[*] ) + allocate( mdis(np)[*] ) +!! + +! ---- 3D rank order rule. ---- +! ipx=mod(myrank, npx) +! ipy=mod((myrank - ipx) / npx, npy) +! ipz=mod((myrank - ipx - ipy*npx) / (npx*npy), npz) + +!=== local constant ===! + m1 = mylm + mcell_size = fmm_data(ilevel)%mcell_size + +!=== global constant ===! + nsczdiv = fmm_data(ilevel)%nsczdiv + nscydiv = fmm_data(ilevel)%nscydiv + nscxdiv = fmm_data(ilevel)%nscxdiv + + nbound_zm=fmm_data(ilevel)%nbound_zm + nbound_zp=fmm_data(ilevel)%nbound_zp + nbound_ym=fmm_data(ilevel)%nbound_ym + nbound_yp=fmm_data(ilevel)%nbound_yp + nbound_xm=fmm_data(ilevel)%nbound_xm + nbound_xp=fmm_data(ilevel)%nbound_xp + +!----- lower level moment communication starts here. ----- + +! LLmoment +Z + np_supercell = (npz * mcell_size - 1) / ncell + 1 + ipz_pdest = mod(ipz + np_supercell - 1/npz + npz, npz)*npy*npx + & + ipy*npx + ipx + ipz_psrc = mod(ipz - np_supercell + 1/npz + npz, npz)*npy*npx + & + ipy*npx + ipx +! LLmoment -Z + ipz_mdest = mod(ipz - np_supercell + 1/npz + npz, npz)*npy*npx + & + ipy*npx + ipx + ipz_msrc = mod(ipz + np_supercell - 1/npz + npz, npz)*npy*npx + & + ipy*npx + ipx + + nf_pprovider = 0 + nf_mprovider = 0 + if(nbound_zm == 4 .and. nsczdiv == 1) nf_pprovider = 1 + if(nbound_zp == 4 .and. nsczdiv == 1) nf_mprovider = 1 + nitr = max( (nbound_zm - 1) / nsczdiv + 1, + & (nbound_zp - 1) / nsczdiv + 1 ) + +!coarray +! allocate( rccbufp(mylm*nbsize*nscydiv*nscxdiv, 2)[*] ) +! allocate( rccbufm(mylm*nbsize*nscydiv*nscxdiv, 2)[*] ) +!! + do itr = 1, nitr + if (itr == 1) then ! first iteration + iczp0 = nsczdiv + 1 + if(nsczdiv < nbound_zm) iczp0 = nbound_zm + 1 + iczp1 = nbound_zm + nsczdiv + iczbp0 = iczp0 - nsczdiv + iczbp1 = iczp1 - nsczdiv + + nccp = 0 + do icx = nbound_xm + 1, nbound_xm + nscxdiv + do icy = nbound_ym + 1, nbound_ym + nscydiv + do icz = iczp0, iczp1 + do m = 1, mylm + nccp = nccp + 1 + ccbufp(nccp) = wm_tmp(m, icz, icy, icx ) + end do + end do + end do + end do + + iczm0 = nbound_zm + 1 + iczm1 = nbound_zm + nbound_zp + if(nsczdiv < nbound_zp) iczm1 = nbound_zm + nsczdiv + iczbm0 = iczm0 + nsczdiv + iczbm1 = iczm1 + nsczdiv + nccm = 0 + do icx = nbound_xm + 1, nbound_xm + nscxdiv + do icy = nbound_ym + 1, nbound_ym + nscydiv + do icz = iczm0, iczm1 + do m = 1, mylm + nccm = nccm + 1 + ccbufm(nccm) = wm_tmp(m, icz, icy, icx ) + end do + end do + end do + end do + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(ccbufp, nccp, MPI_DOUBLE_COMPLEX, +!coarray & ipz_pdest, myrank, +!coarray & rccbufp(1,1), nccp, MPI_DOUBLE_COMPLEX, +!coarray & ipz_psrc, ipz_psrc, +!coarray & mpi_comm_world, istatus, ierr) +!coarray call mpi_sendrecv(ccbufm, nccm, MPI_DOUBLE_COMPLEX, +!coarray & ipz_mdest, myrank, +!coarray & rccbufm(1,1), nccm, MPI_DOUBLE_COMPLEX, +!coarray & ipz_msrc, ipz_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + rccbufp(1:nccp,1)[ipz_pdest+1] = ccbufp(1:nccp) ! Put + rccbufm(1:nccm,1)[ipz_mdest+1] = ccbufm(1:nccm) ! Put + sync all +!! +#else + call mpi_irecv(rccbufp(1,1), nccp, + & MPI_DOUBLE_COMPLEX, ipz_psrc, ipz_psrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(ccbufp, nccp, + & MPI_DOUBLE_COMPLEX, ipz_pdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(rccbufm(1,1), nccm, + & MPI_DOUBLE_COMPLEX, ipz_msrc, ipz_msrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(ccbufm, nccm, + & MPI_DOUBLE_COMPLEX, ipz_mdest, myrank, + & mpi_comm_world, irq(4), ierr) + nrq = 4 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + ncc2 = 0 + do icx = nbound_xm + 1, nbound_xm + nscxdiv + do icy = nbound_ym + 1, nbound_ym + nscydiv + do iczb = iczbp0, iczbp1 + do m = 1, mylm + ncc2 = ncc2 + 1 + wm_tmp(m,iczb,icy,icx) = rccbufp(ncc2,1) + end do + end do + end do + end do + + ncc2 = 0 + do icx = nbound_xm + 1, nbound_xm + nscxdiv + do icy = nbound_ym + 1, nbound_ym + nscydiv + do iczb = iczbm0, iczbm1 + do m = 1, mylm + ncc2 = ncc2 + 1 + wm_tmp(m,iczb,icy,icx) = rccbufm(ncc2,1) + end do + end do + end do + end do + + else ! iteration follows + + ibs = mod(itr, 2) + 1 + ibr = mod(itr + 1, 2) + 1 + iczbp0 = nbound_zm - nsczdiv*itr + 1 + iczbp1 = iczbp0 + nsczdiv - 1 + iczbm0 = nbound_zm + nsczdiv + nsczdiv * (itr - 1) + 1 + iczbm1 = iczbm0 + nsczdiv - 1 + + if(nsczdiv /= 1 .or. (nsczdiv == 1 .and. itr < nitr)) then + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(rccbufp(1,ibs), nccp, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipz_pdest, myrank, +!coarray & rccbufp(1,ibr), nccp, MPI_DOUBLE_COMPLEX, +!coarray & ipz_psrc, ipz_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +!coarray call mpi_sendrecv(rccbufm(1,ibs), nccm, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipz_mdest, myrank, +!coarray & rccbufm(1,ibr), nccm, MPI_DOUBLE_COMPLEX, +!coarray & ipz_msrc, ipz_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + rccbufp(1:nccp,ibr)[ipz_pdest+1] = rccbufp(1:nccp,ibs) ! Put + rccbufm(1:nccm,ibr)[ipz_mdest+1] = rccbufm(1:nccm,ibs) ! Put + sync all +!! +#else + call mpi_irecv(rccbufp(1,ibr), nccp, + & MPI_DOUBLE_COMPLEX, ipz_psrc, ipz_psrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(rccbufp(1,ibs), nccp, + & MPI_DOUBLE_COMPLEX, ipz_pdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(rccbufm(1,ibr), nccm, + & MPI_DOUBLE_COMPLEX, ipz_msrc, ipz_msrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(rccbufm(1,ibs), nccm, + & MPI_DOUBLE_COMPLEX, ipz_mdest, myrank, + & mpi_comm_world, irq(4), ierr) + nrq = 4 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + ncc2 = 0 + do icx = nbound_xm + 1, nbound_xm + nscxdiv + do icy = nbound_ym + 1, nbound_ym + nscydiv + do iczb = iczbp0, iczbp1 + do m = 1, mylm + ncc2 = ncc2 + 1 + wm_tmp(m,iczb,icy,icx) = rccbufp(ncc2,ibr) + end do + end do + end do + end do + ncc2 = 0 + do icx = nbound_xm + 1, nbound_xm + nscxdiv + do icy = nbound_ym + 1, nbound_ym + nscydiv + do iczb = iczbm0, iczbm1 + do m = 1, mylm + ncc2 = ncc2 + 1 + wm_tmp(m,iczb,icy,icx) = rccbufm(ncc2,ibr) + end do + end do + end do + end do + + else ! = final pairing. = + + if(nf_pprovider == 1) then +#ifdef SYNC_COM +!coarray call mpi_send(rccbufp(1,ibs), nccp, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipz_pdest, myrank, +!coarray & mpi_comm_world, istatus, ierr ) + rccbufp(1:nccp,ibr)[ipz_pdest+1] = rccbufp(1:nccp,ibs) ! Put + sync all +!! +#else + call mpi_isend(rccbufp(1,ibs), nccp, + & MPI_DOUBLE_COMPLEX, ipz_pdest, myrank, + & mpi_comm_world, irq(1), ierr) +#endif + + else + +#ifdef SYNC_COM +!coarray call mpi_recv(rccbufp(1,ibr), nccp, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipz_psrc, ipz_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +#else + call mpi_irecv(rccbufp(1,ibr), nccp, + & MPI_DOUBLE_COMPLEX, ipz_psrc, ipz_psrc, + & mpi_comm_world, irq(1), ierr) +#endif + + endif ! final provider (p) + + if(nf_mprovider == 1) then +#ifdef SYNC_COM +!coarray call mpi_send(rccbufm(1,ibs), nccm, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipz_mdest, myrank, +!coarray & mpi_comm_world, istatus, ierr ) + rccbufm(1:nccm,ibr)[ipz_mdest+1] = rccbufm(1:nccm,ibs) ! Put + sync all +!! +#else + call mpi_isend(rccbufm(1,ibs), nccm, + & MPI_DOUBLE_COMPLEX, ipz_mdest, myrank, + & mpi_comm_world, irq(2), ierr) +#endif + + else +#ifdef SYNC_COM +!coarray call mpi_recv(rccbufm(1,ibr), nccm, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipz_msrc, ipz_msrc, +!coarray & mpi_comm_world, istatus, ierr ) +#else + call mpi_irecv(rccbufm(1,ibr), nccm, + & MPI_DOUBLE_COMPLEX, ipz_msrc, ipz_msrc, + & mpi_comm_world, irq(2), ierr) +#endif + + endif ! final provider (m) +#ifndef SYNC_COM + nrq = 2 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + if(nf_pprovider == 0) then + ncc2 = 0 + do icx = nbound_xm + 1, nbound_xm + nscxdiv + do icy = nbound_ym + 1, nbound_ym + nscydiv + do iczb = iczbp0, iczbp1 + do m = 1, mylm + ncc2 = ncc2 + 1 + wm_tmp(m,iczb,icy,icx) = rccbufp(ncc2,ibr) + end do + end do + end do + end do + endif ! final receiver (p) + + if(nf_mprovider == 0) then + ncc2 = 0 + do icx = nbound_xm + 1, nbound_xm + nscxdiv + do icy = nbound_ym + 1, nbound_ym + nscydiv + do iczb = iczbm0, iczbm1 + do m = 1, mylm + ncc2 = ncc2 + 1 + wm_tmp(m,iczb,icy,icx) = rccbufm(ncc2,ibr) + end do + end do + end do + end do + endif ! final receiver (m) + + endif ! final pairing. !ordinary root + + endif ! iteration + end do ! iteration + +! LLmoment +Y + np_supercell = (npy * mcell_size - 1) / ncell + 1 + ipy_pdest = ipz*npy*npx + & + mod(ipy + np_supercell - 1/npy + npy, npy)*npx + ipx + ipy_psrc = ipz*npy*npx + & + mod(ipy - np_supercell + 1/npy + npy, npy)*npx + ipx +! LLmoment -Y + ipy_mdest = ipz*npy*npx + & + mod(ipy - np_supercell + 1/npy + npy, npy)*npx + ipx + ipy_msrc = ipz*npy*npx + & + mod(ipy + np_supercell - 1/npy + npy, npy)*npx + ipx + + nf_pprovider = 0 + nf_mprovider = 0 + if(nbound_ym == 4 .and. nscydiv == 1) nf_pprovider = 1 + if(nbound_yp == 4 .and. nscydiv == 1) nf_mprovider = 1 + nitr = max( (nbound_ym - 1) / nscydiv + 1, + & (nbound_yp - 1) / nscydiv + 1 ) + + do icx = nbound_xm + 1, nbound_xm + nscxdiv + do itr = 1, nitr + if (itr == 1) then ! first iteration + + icyp0 = nscydiv + 1 + if(nscydiv < nbound_ym) icyp0 = nbound_ym + 1 + icyp1 = nbound_ym + nscydiv + icybp0 = icyp0 - nscydiv + nccp = (nbound_zm + nsczdiv + nbound_zp) + & *(icyp1 - icyp0 + 1) * mylm + icym0 = nbound_ym + 1 + icym1 = nbound_ym + nbound_yp + if(nscydiv < nbound_yp) icym1 = nbound_ym + nscydiv + icybm0 = icym0 + nscydiv + nccm = (nbound_zm + nsczdiv + nbound_zp) + & *(icym1 - icym0 + 1) * mylm + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(wm_tmp(1,1,icyp0,icx), nccp, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_pdest, myrank, +!coarray & wm_tmp(1,1,icybp0,icx), nccp, MPI_DOUBLE_COMPLEX, +!coarray & ipy_psrc, ipy_psrc, +!coarray & mpi_comm_world, istatus, ierr ) + nd = abs(icyp1 - icyp0) + ndis(me)[ipy_psrc+1] = icybp0 ! Put + sync all + nb = ndis(ipy_pdest+1) + wm_tmp( :, :, nb:nb +nd, icx )[ipy_pdest+1] + . = wm_tmp( :, :, icyp0:icyp0+nd, icx ) ! Put + sync all +!! +!coarray call mpi_sendrecv(wm_tmp(1,1,icym0,icx), nccm, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_mdest, myrank, +!coarray & wm_tmp(1,1,icybm0,icx), nccm, MPI_DOUBLE_COMPLEX, +!coarray & ipy_msrc, ipy_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + md = abs(icym1 - icym0) + mdis(me)[ipy_msrc+1] = icybm0 ! Put + sync all + mb = mdis(ipy_mdest+1) + wm_tmp( :, :, mb:mb +md, icx )[ipy_mdest+1] + . = wm_tmp( :, :, icym0:icym0+md, icx ) ! Put + sync all +!! +#else + call mpi_irecv(wm_tmp(1,1,icybp0,icx), nccp, + & MPI_DOUBLE_COMPLEX, ipy_psrc, ipy_psrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(wm_tmp(1,1,icyp0,icx), nccp, + & MPI_DOUBLE_COMPLEX, ipy_pdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(wm_tmp(1,1,icybm0,icx), nccm, + & MPI_DOUBLE_COMPLEX, ipy_msrc, ipy_msrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(wm_tmp(1,1,icym0,icx), nccm, + & MPI_DOUBLE_COMPLEX, ipy_mdest, myrank, + & mpi_comm_world, irq(4), ierr) + nrq = 4 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + else ! iteration follows + + icybp0prior = icybp0 + icybp0 = nbound_ym - nscydiv*itr + 1 + icybm0prior = icybm0 + icybm0 = nbound_ym + nscydiv + nscydiv * (itr - 1) + 1 + + if(nscydiv /= 1 .or. + & (nscydiv == 1 .and. itr < nitr)) then + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(wm_tmp(1,1,icybp0prior,icx), nccp, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_pdest, myrank, +!coarray & wm_tmp(1,1,icybp0,icx), nccp, MPI_DOUBLE_COMPLEX, +!coarray * ipy_psrc, ipy_psrc, +!coarray & mpi_comm_world, istatus, ierr ) + ndis(me)[ipy_psrc+1] = icybp0 ! Put + sync all + nb = ndis(ipy_pdest+1) + wm_tmp( :, :, nb:nb +nd, icx )[ipy_pdest+1] ! Put + . = wm_tmp( :, :, icybp0prior:icybp0prior+nd, icx ) + sync all +!! +!coarray call mpi_sendrecv(wm_tmp(1,1,icybm0prior,icx), nccm, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_mdest, myrank, +!coarray & wm_tmp(1,1,icybm0,icx), nccm, MPI_DOUBLE_COMPLEX, +!coarray & ipy_msrc, ipy_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + mdis(me)[ipy_msrc+1] = icybm0 ! Put + sync all + mb = mdis(ipy_mdest+1) + wm_tmp( :, :, mb:mb +md, icx )[ipy_mdest+1] ! Put + . = wm_tmp( :, :, icybm0prior:icybm0prior+md, icx ) + sync all +!! +#else + call mpi_irecv(wm_tmp(1,1,icybp0,icx), nccp, + & MPI_DOUBLE_COMPLEX, ipy_psrc, ipy_psrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(wm_tmp(1,1,icybp0prior,icx), nccp, + & MPI_DOUBLE_COMPLEX, ipy_pdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(wm_tmp(1,1,icybm0,icx), nccm, + & MPI_DOUBLE_COMPLEX, ipy_msrc, ipy_msrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(wm_tmp(1,1,icybm0prior,icx), nccm, + & MPI_DOUBLE_COMPLEX, ipy_mdest, myrank, + & mpi_comm_world, irq(4), ierr) + nrq = 4 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + else ! = final pairing. = + + if(nf_pprovider == 1) then +#ifdef SYNC_COM +!coarray call mpi_send(wm_tmp(1,1,icybp0prior,icx), nccp, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_pdest, myrank, +!coarray & mpi_comm_world, istatus, ierr ) + ndis(me)[ipy_psrc+1] = icybp0 ! Put + sync all + nb = ndis(ipy_pdest+1) + wm_tmp( :, :, nb:nb +nd, icx )[ipy_pdest+1] + . = wm_tmp( :, :, icybp0prior:icybp0prior+nd, icx ) ! Put + sync all +!! +#else + call mpi_isend(wm_tmp(1,1,icybp0prior,icx), nccp, + & MPI_DOUBLE_COMPLEX, ipy_pdest, myrank, + & mpi_comm_world, irq(1), ierr) +#endif + + else +#ifdef SYNC_COM +!coarray call mpi_recv(wm_tmp(1,1,icybp0,icx), nccp, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_psrc, ipy_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +#else + call mpi_irecv(wm_tmp(1,1,icybp0,icx), nccp, + & MPI_DOUBLE_COMPLEX, ipy_psrc, ipy_psrc, + & mpi_comm_world, irq(1), ierr) +#endif + + endif ! final provider (p) + + if(nf_mprovider == 1) then +#ifdef SYNC_COM +!coarray call mpi_send(wm_tmp(1,1,icybm0prior,icx), nccm, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_mdest, myrank, +!coarray & mpi_comm_world, istatus, ierr ) + mdis(me)[ipy_msrc+1] = icybm0 ! Put + sync all + md = mdis(ipy_mdest+1) + wm_tmp( :, :, mb:mb +md, icx )[ipy_mdest+1] + . = wm_tmp( :, :, icybm0prior:icybm0prior+md, icx ) ! Put + sync all +#else + call mpi_isend(wm_tmp(1,1,icybm0prior,icx), nccm, + & MPI_DOUBLE_COMPLEX, ipy_mdest, myrank, + & mpi_comm_world, irq(2), ierr) +#endif + + else +#ifdef SYNC_COM +!coarray call mpi_recv(wm_tmp(1,1,icybm0,icx), nccm, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipy_msrc, ipy_msrc, +!coarray & mpi_comm_world, istatus, ierr ) +#else + call mpi_irecv(wm_tmp(1,1,icybm0,icx), nccm, + & MPI_DOUBLE_COMPLEX, ipy_msrc, ipy_msrc, + & mpi_comm_world, irq(2), ierr) +#endif + + endif ! final provider (m) +#ifndef SYNC_COM + nrq = 2 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + endif ! final pairing. !ordinary root + + endif ! iteration + end do ! iteration + end do ! ipx + +! LLmoment +X + np_supercell = (npx* mcell_size - 1) / ncell + 1 + ipx_pdest = ipz*npy*npx + ipy*npx + & + mod(ipx + np_supercell - 1/npx + npx, npx) + ipx_psrc = ipz*npy*npx + ipy*npx + & + mod(ipx - np_supercell + 1/npx + npx, npx) +! LLmoment -X + ipx_mdest = ipz*npy*npx + ipy*npx + & + mod(ipx - np_supercell + 1/npx + npx, npx) + ipx_msrc = ipz*npy*npx + ipy*npx + & + mod(ipx + np_supercell - 1/npx + npx, npx) + + nf_pprovider = 0 + nf_mprovider = 0 + if(nbound_xm == 4 .and. nscxdiv == 1) nf_pprovider = 1 + if(nbound_xp == 4 .and. nscxdiv == 1) nf_mprovider = 1 + + nitr = max( (nbound_xm - 1) / nscxdiv + 1, + & (nbound_xp - 1) / nscxdiv + 1 ) + + do itr = 1, nitr + if (itr == 1) then ! first iteration + + icxp0 = nscxdiv + 1 + if(nscxdiv < nbound_xm) icxp0 = nbound_xm + 1 + icxp1 = nbound_xm + nscxdiv + icxbp0 = icxp0 - nscxdiv + nccp = (nbound_zm + nsczdiv + nbound_zp) + & *(nbound_ym + nscydiv + nbound_yp)*(icxp1 - icxp0 + 1) + & *mylm + icxm0 = nbound_xm + 1 + icxm1 = nbound_xm + nbound_xp + if(nscxdiv < nbound_xp) icxm1 = nbound_xm + nscxdiv + icxbm0 = icxm0 + nscxdiv + nccm = (nbound_zm + nsczdiv + nbound_zp) + & *(nbound_ym + nscydiv + nbound_yp)*(icxm1 - icxm0 + 1) + & *mylm + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(wm_tmp(1,1,1,icxp0), nccp, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_pdest, myrank, +!coarray & wm_tmp(1,1,1,icxbp0), nccp, MPI_DOUBLE_COMPLEX, +!coarray & ipx_psrc, ipx_psrc, +!coarray & mpi_comm_world, istatus, ierr ) + ndis(me)[ipx_psrc+1] = icxbp0 ! Put + sync all + nb = ndis(ipx_pdest+1) + wm_tmp( :, :, :, nb:nb +nd )[ipx_pdest+1] + . = wm_tmp( :, :, :, icxp0:icxp0+nd ) ! Put + sync all +!! +!coarray call mpi_sendrecv(wm_tmp(1,1,1,icxm0), nccm, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_mdest, myrank, +!coarray & wm_tmp(1,1,1,icxbm0), nccm, MPI_DOUBLE_COMPLEX, +!coarray & ipx_msrc, ipx_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + mdis(me)[ipx_msrc+1] = icxbm0 ! Put + sync all + mb = mdis(ipx_mdest+1) + wm_tmp( :, :, :, mb:mb +md )[ipx_mdest+1] + . = wm_tmp( :, :, :, icxm0:icxm0+md ) + sync all +!! +#else + call mpi_irecv(wm_tmp(1,1,1,icxbp0), nccp, + & MPI_DOUBLE_COMPLEX, ipx_psrc, ipx_psrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(wm_tmp(1,1,1,icxp0), nccp, + & MPI_DOUBLE_COMPLEX, ipx_pdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(wm_tmp(1,1,1,icxbm0), nccm, + & MPI_DOUBLE_COMPLEX, ipx_msrc, ipx_msrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(wm_tmp(1,1,1,icxm0), nccm, + & MPI_DOUBLE_COMPLEX, ipx_mdest, myrank, + & mpi_comm_world, irq(4), ierr) + nrq = 4 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + else ! iteration follows + + icxbp0prior = icxbp0 + icxbp0 = nbound_xm - nscxdiv*itr + 1 + icxbm0prior = icxbm0 + icxbm0 = nbound_xm + nscxdiv + nscxdiv * (itr - 1) + 1 +!coarray + nd = abs(icxbp0prior - icxbp0) + md = abs(icxbm0prior - icxbm0) +!! + + if(nscxdiv /= 1 .or. (nscxdiv == 1 .and. itr < nitr)) then + +#ifdef SYNC_COM +!coarray call mpi_sendrecv(wm_tmp(1,1,1,icxbp0prior), nccp, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_pdest, myrank, +!coarray & wm_tmp(1,1,1,icxbp0), nccp, MPI_DOUBLE_COMPLEX, +!coarray & ipx_psrc, ipx_psrc, +!coarray & mpi_comm_world, istatus, ierr ) + ndis(me)[ipx_psrc+1] = icxbp0 ! Put + sync all + nb = ndis(ipx_pdest+1) + wm_tmp( :, :, :, nb:nb +nd-1 )[ipx_pdest+1] + . = wm_tmp( :, :, :, icxbp0prior:icxbp0prior+nd-1 ) ! Put + sync all +!! +!coarray call mpi_sendrecv(wm_tmp(1,1,1,icxbm0prior), nccm, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_mdest, myrank, +!coarray & wm_tmp(1,1,1,icxbm0), nccm, MPI_DOUBLE_COMPLEX, +!coarray & ipx_msrc, ipx_msrc, +!coarray & mpi_comm_world, istatus, ierr ) + mdis(me)[ipx_msrc+1] = icxbm0 ! Put + sync all + mb = mdis(ipx_mdest+1) + wm_tmp( :, :, :, mb:mb +md-1 )[ipx_mdest+1] + . = wm_tmp( :, :, :, icxbm0prior:icxbm0prior+md-1 ) ! Put + sync all +!! +#else + call mpi_irecv(wm_tmp(1,1,1,icxbp0), nccp, + & MPI_DOUBLE_COMPLEX, ipx_psrc, ipx_psrc, + & mpi_comm_world, irq(1), ierr) + call mpi_isend(wm_tmp(1,1,1,icxbp0prior), nccp, + & MPI_DOUBLE_COMPLEX, ipx_pdest, myrank, + & mpi_comm_world, irq(2), ierr) + call mpi_irecv(wm_tmp(1,1,1,icxbm0), nccm, + & MPI_DOUBLE_COMPLEX, ipx_msrc, ipx_msrc, + & mpi_comm_world, irq(3), ierr) + call mpi_isend(wm_tmp(1,1,1,icxbm0prior), nccm, + & MPI_DOUBLE_COMPLEX, ipx_mdest, myrank, + & mpi_comm_world, irq(4), ierr) + nrq = 4 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + else ! = final pairing. = + + if(nf_pprovider == 1) then +#ifdef SYNC_COM +!coarray call mpi_send(wm_tmp(1,1,1,icxbp0prior), nccp, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_pdest, myrank, +!coarray & mpi_comm_world, istatus, ierr ) + ndis(me)[ipx_psrc+1] = icxbp0 ! Put + sync all + nb = ndis(ipx_pdest+1) + wm_tmp( :, :, :, nb:nb +nd-1 )[ipx_pdest+1] + . = wm_tmp( :, :, :, icxbp0prior:icxbp0prior+nd-1 ) ! put + sync all +!! +#else + call mpi_isend(wm_tmp(1,1,1,icxbp0prior), nccp, + & MPI_DOUBLE_COMPLEX, ipx_pdest, myrank, + & mpi_comm_world, irq(1), ierr) +#endif + + else +#ifdef SYNC_COM +!coarray call mpi_recv(wm_tmp(1,1,1,icxbp0), nccp, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_psrc, ipx_psrc, +!coarray & mpi_comm_world, istatus, ierr ) +#else + call mpi_irecv(wm_tmp(1,1,1,icxbp0), nccp, + & MPI_DOUBLE_COMPLEX, ipx_psrc, ipx_psrc, + & mpi_comm_world, irq(1), ierr) +#endif + + endif ! final provider (p) + + if(nf_mprovider == 1) then +#ifdef SYNC_COM +!coarray call mpi_send(wm_tmp(1,1,1,icxbm0prior), nccm, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_mdest, myrank, +!coarray & mpi_comm_world, istatus, ierr ) + mdis(me)[ipx_msrc+1] = icxbm0 ! Put + sync all + mb = mdis(ipx_mdest+1) + wm_tmp( :, :, :, mb:mb +md )[ipx_mdest+1] + . = wm_tmp( :, :, :, icxbm0prior:icxbm0prior+md ) ! Put + sync all +!! +#else + call mpi_isend(wm_tmp(1,1,1,icxbm0prior), nccm, + & MPI_DOUBLE_COMPLEX, ipx_mdest, myrank, + & mpi_comm_world, irq(2), ierr) +#endif + + else +#ifdef SYNC_COM +!coarray call mpi_recv(wm_tmp(1,1,1,icxbm0), nccm, +!coarray & MPI_DOUBLE_COMPLEX, +!coarray & ipx_msrc, ipx_msrc, +!coarray & mpi_comm_world, istatus, ierr ) +#else + call mpi_irecv(wm_tmp(1,1,1,icxbm0), nccm, + & MPI_DOUBLE_COMPLEX, ipx_msrc, ipx_msrc, + & mpi_comm_world, irq(2), ierr) +#endif + + endif ! final provider (m) +#ifndef SYNC_COM + nrq = 2 + call mpi_waitall(nrq, irq, istatus, ierr) +#endif + + endif ! final pairing. !ordinary root + + endif ! iteration + end do ! iteration + +!coarray + wm = wm_tmp +!! + return + end +c--------------------------------------------------------------------- diff --git a/MODYLAS-MINI/src/xmpAPI_domain_div.f b/MODYLAS-MINI/src/xmpAPI_domain_div.f new file mode 100755 index 0000000..1856c1c --- /dev/null +++ b/MODYLAS-MINI/src/xmpAPI_domain_div.f @@ -0,0 +1,475 @@ +!---------------------------------------------------------------------- +! Copyright (C) 2003-2014 Kensuke Iwahashi, Noriyuki Yoshii, +! Atsushi Yamada, Yoshimichi Andoh, +! Kazushi Fujimoto, Hidekazu Kojima, +! Fumiyasu Mizutani, and Susumu Okazaki +! All Rights Reserved. +! +! Copyright (C) 20013-2014 RIKEN AICS +! All Rights Reserved. +! +! This MD program has been developed at Nagoya University, and +! Institute for Molecular Science, National Institutes of Natural +! Sciences. +! And this work was supported by +! Next-Generation Supercomputer Project, and +! NAREGI Nanoscience Project, +! Ministry of Education, Culture, Sports, Science and Technology, +! Japan. +! +! This program is NOT a free software and distributed under the +! license described in the LICENSE. +! All rights are reserved by the authors of this program. +! +! The authors do NOT warrant or assume any legal liability or +! responsibility for the accuracy or completeness. +!---------------------------------------------------------------------- +c---------------------------------------------------------------------- + subroutine init_fmm_domain_div() +c---------------------------------------------------------------------- + use md_fmm + use md_fmm_domdiv_flg + use mpivar + implicit none + integer(4) :: iwkx,iwky,iwkz + integer(4) :: maxdiv,icnt + integer(4) :: icx, icy, icz, icxyz1 + integer(4) :: idx, idy, idz, imx, imy, imz,i + include 'mpif.h' + integer(4) :: ierr + + maxdiv = nprocs +!ya + IF(mpi_manual_division_flg)THEN + continue + ELSE + nxdiv=1 + nydiv=1 + nzdiv=1 + ENDIF +!ya + if(nprocs.ne.1) then +!ya + IF(mpi_manual_division_flg)THEN + maxdiv=1 + continue + ELSE + do while (mod(maxdiv,2).eq.0) + nxdiv = nxdiv*2 + maxdiv = maxdiv/2 + if(mod(maxdiv,2).eq.0) then + nydiv = nydiv*2 + maxdiv = maxdiv/2 + end if + if(mod(maxdiv,2).eq.0) then + nzdiv = nzdiv*2 + maxdiv = maxdiv/2 + end if + end do + ENDIF +!ya + end if + + if(((nxdiv*nydiv*nzdiv).ne.nprocs) .or. + & (maxdiv.ne.1) ) then + write(*,*) ' -error init_fmm_domain_div 1-' + write(*,*) 'The Number of MPI-procs is incorrect. ' + write(*,*) 'nxdiv,nydiv,nzdiv,nprocs= ',nxdiv,nydiv,nzdiv,nprocs + call mpi_abort(mpi_comm_world,ierr) + end if + + maxdiv = max(maxdiv,nxdiv) + maxdiv = max(maxdiv,nydiv) + maxdiv = max(maxdiv,nzdiv) + + if(maxdiv.gt.ncell) then + write(*,*) ' -error init_fmm_domain_div 2-' + write(*,*) ' maxdiv.gt.ncell' + call mpi_abort(mpi_comm_world,ierr) + endif + + lxdiv = mod(ncell,nxdiv) + lydiv = mod(ncell,nydiv) + lzdiv = mod(ncell,nzdiv) + + if((lxdiv.ne.0).or.(lydiv.ne.0).or.(lzdiv.ne.0)) then + write(*,*) ' -error init_fmm_domain_div 3-' + write(*,*) ' mod(ncell,mpidiv).ne.0' + call mpi_abort(mpi_comm_world,ierr) + endif + + lxdiv = ncell/nxdiv + lydiv = ncell/nydiv + lzdiv = ncell/nzdiv + + allocate(ixflg(0:nprocs-1)) + allocate(iyflg(0:nprocs-1)) + allocate(izflg(0:nprocs-1)) + allocate(ixmax(0:nprocs-1)) + allocate(iymax(0:nprocs-1)) + allocate(izmax(0:nprocs-1)) + allocate(ixmin(0:nprocs-1)) + allocate(iymin(0:nprocs-1)) + allocate(izmin(0:nprocs-1)) +! + allocate(idom(nxdiv,nydiv,nzdiv)) ! deallocate at calc_ia2c + allocate(idcell(ncell,ncell,ncell)) ! deallocate at calc_ia2c + allocate(nd2c(0:nxdiv*nydiv*nzdiv-1)) ! deallocate at calc_idcell + allocate(id2c(lxdiv*lydiv*lzdiv,0:nxdiv*nydiv*nzdiv-1)) ! dealoc + + icnt=0 + do iwkz=1,nzdiv + do iwky=1,nydiv + do iwkx=1,nxdiv + ixflg(icnt)=iwkx + iyflg(icnt)=iwky + izflg(icnt)=iwkz + idom(iwkx,iwky,iwkz) = icnt + icnt=icnt+1 + end do + end do + end do + + ixmax=0 + iymax=0 + izmax=0 + ixmin=ncell + iymin=ncell + izmin=ncell +!coarray call mpi_barrier(mpi_comm_world,ierr) + sync all +!! + + if(myrank==0)then + IF(mpi_manual_division_flg)THEN + write(*,'(/,a)') '**** MPI manual division' + ELSE + write(*,'(/,a)') '**** MPI auto division' + ENDIF + endif + + nd2c = 0 + do icz = 1, ncell + do icy = 1, ncell + do icx = 1, ncell + icxyz1 = (icx-1) * ncell*ncell + & + (icy-1) * ncell + icz-1 + imx = mod(icx,lxdiv) + imy = mod(icy,lydiv) + imz = mod(icz,lzdiv) + idx = int(icx/lxdiv) + idy = int(icy/lydiv) + idz = int(icz/lzdiv) + if(imx.ne.0) idx = idx + 1 + if(imy.ne.0) idy = idy + 1 + if(imz.ne.0) idz = idz + 1 + i = idom(idx,idy,idz) + ixmax(i) = max(ixmax(i),icx) + iymax(i) = max(iymax(i),icy) + izmax(i) = max(izmax(i),icz) + ixmin(i) = min(ixmin(i),icx) + iymin(i) = min(iymin(i),icy) + izmin(i) = min(izmin(i),icz) + nd2c(i) = nd2c(i)+1 + id2c(nd2c(i),i) = icxyz1 + end do + end do + end do + + return + end +c---------------------------------------------------------------------- + subroutine check_cutofflength ! YA +c---------------------------------------------------------------------- + use cutoffradius + use md_fmm_domdiv_flg + use unitcell + implicit none + real(8) :: cellxd,cellyd,cellzd + + cellxd=cellx/dble(lxdiv) + cellyd=celly/dble(lydiv) + cellzd=cellz/dble(lzdiv) + cellxd=2d0*cellxd + cellyd=2d0*cellyd + cellzd=2d0*cellzd + if(cutrad.eq.0)then + stop'LJ-cutoff length=0, which is unlikely situation.' + endif + if(cellxd.lt.cutrad)then ! Cubic cell only + write(6,*) 'Length of LEVEL:0 cell=', cellxd*1d+10, ' Aungstrom' + write(6,*) 'LJ-cutoff length=', cutrad*1d+10, ' Aungstrom' + write(6,*) 'This situation is unlikely, so change ncell value.' + stop + endif + return + end +c---------------------------------------------------------------------- + subroutine fmod_alloc_multipole +c---------------------------------------------------------------------- + use md_fmm + use md_fmm_domdiv_flg + use mpivar + use ompvar + implicit none +!ya + integer(4) :: m1 + integer(4) :: ieo_zst, ieo_yst, ieo_xst + integer(4) :: ieo_zen, ieo_yen, ieo_xen + integer(4) :: npz, npy, npx ! process number along each axis + integer(4) :: ipz, ipy, ipx ! iflgx,iflgy,iflgz + integer(4) :: mcell_size + integer(4) :: nbound_zm, nbound_ym, nbound_xm + integer(4) :: nbound_zp, nbound_yp, nbound_xp + integer(4) :: nsczdiv,nscydiv,nscxdiv + integer(4) :: lclz,lcly,lclx + integer(4) :: nscell + integer(4) :: tgtl + type(fmm_data_t),pointer :: d +!ya + m1=(nmax+1)*(nmax+1) + ipx=ixflg(myrank)-1 ; npx=nxdiv + ipy=iyflg(myrank)-1 ; npy=nydiv + ipz=izflg(myrank)-1 ; npz=nzdiv + + if (nlevel < 3) then + if (myrank == 0) write(*,*) 'ERROR: nlevel < 3' + call mpistop() + endif + + allocate(fmm_data(0:nlevel)) + + do tgtl = 0, nlevel + d => fmm_data(tgtl) + mcell_size = 2**tgtl + nsczdiv = (ncell / mcell_size-1) / npz + 1 + nscydiv = (ncell / mcell_size-1) / npy + 1 + nscxdiv = (ncell / mcell_size-1) / npx + 1 + ieo_zst = mod((ncell / mcell_size * ipz) / npz + 1,2) + nbound_zm = 4 ; if(ieo_zst == 0) nbound_zm = 5 + ieo_zen = mod((ncell/mcell_size * (ipz+1) -1)/npz + 1, 2) + nbound_zp = 4 ; if(ieo_zen == 1) nbound_zp = 5 + ieo_yst = mod((ncell / mcell_size * ipy) / npy + 1,2) + nbound_ym = 4 ; if(ieo_yst == 0) nbound_ym = 5 + ieo_yen = mod((ncell/mcell_size * (ipy+1) -1)/npy + 1, 2) + nbound_yp = 4 ; if(ieo_yen == 1) nbound_yp = 5 + ieo_xst = mod((ncell / mcell_size * ipx) / npx + 1,2) + nbound_xm = 4 ; if(ieo_xst == 0) nbound_xm = 5 + ieo_xen = mod((ncell/mcell_size * (ipx+1) -1)/npx + 1, 2) + nbound_xp = 4 ; if(ieo_xen == 1) nbound_xp = 5 + + nscell = (ncell - 1) / mcell_size + 1 + + lclx = nbound_xm+nscxdiv+nbound_xp + lcly = nbound_ym+nscydiv+nbound_yp + lclz = nbound_zm+nsczdiv+nbound_zp + + d%lclx=lclx; d%lcly=lcly; d%lclz=lclz + d%mcell_size=mcell_size; d%nscell=nscell + d%nscxdiv=nscxdiv; d%nscydiv=nscydiv; d%nsczdiv=nsczdiv + d%nbound_xm=nbound_xm; d%nbound_xp=nbound_xp + d%nbound_ym=nbound_ym; d%nbound_yp=nbound_yp + d%nbound_zm=nbound_zm; d%nbound_zp=nbound_zp + + allocate(d%wm_local(m1,lclz,lcly,lclx)) + allocate(d%wm_global(m1,nscell,nscell,nscell)) + allocate(d%wl_local(m1,nsczdiv,nscydiv,nscxdiv)) + allocate(d%wwl_local(m1,nsczdiv,nscydiv,nscxdiv,0:nomp-1)) + + if (tgtl == 0) then + allocate(wwm_local0(m1,lclz,lcly,lclx,0:nomp-1)) + endif + + end do + + return + end +c---------------------------------------------------------------------- + subroutine calc_idcell() +c---------------------------------------------------------------------- + use md_fmm + use md_fmm_domdiv_flg + use mpivar + implicit none + include 'mpif.h' + integer(4) :: icx0, icy0, icz0 + integer(4) :: icx1, icy1, icz1 + integer(4) :: ich1,k,ih1,ntmp + integer(4) :: ndirect2,mrcsafe,ndfmm0 + integer(4) :: ix,iy,iz + integer(4) :: ix2,iy2,iz2 + integer(4),allocatable :: + & idfmm0x(:),idfmm0y(:),idfmm0z(:) + + ndirect2 = ndirect + ndcellmargin + if (ndirect2 .gt. nimage-1) then + mrcsafe = nimage-1 + else + mrcsafe = ndirect2 + endif + ndfmm0 = 0 + do ix = -mrcsafe, mrcsafe + ix2 = ix * ix + do iy = -mrcsafe, mrcsafe + iy2 = iy * iy + do iz = -mrcsafe, mrcsafe + iz2 = iz * iz +#ifdef COMM_CUBE + if (abs(ix) <= ndirect2 .and. + & abs(iy) <= ndirect2 .and. + & abs(iz) <= ndirect2) then +#else + if (ix2+iy2+iz2 .le. ndirect2*ndirect2) then +#endif + ndfmm0 = ndfmm0 + 1 + endif + enddo + enddo + enddo + allocate(idfmm0x(ndfmm0)) + allocate(idfmm0y(ndfmm0)) + allocate(idfmm0z(ndfmm0)) + idfmm0x = 0 + idfmm0y = 0 + idfmm0z = 0 + ntmp = 0 + do ix = -mrcsafe, mrcsafe + ix2 = ix * ix + do iy = -mrcsafe, mrcsafe + iy2 = iy * iy + do iz = -mrcsafe, mrcsafe + iz2 = iz * iz +#ifdef COMM_CUBE + if (abs(ix) <= ndirect2 .and. + & abs(iy) <= ndirect2 .and. + & abs(iz) <= ndirect2) then +#else + if (ix2+iy2+iz2 .le. ndirect2*ndirect2) then +#endif + ntmp = ntmp + 1 + idfmm0x(ntmp) = ix + idfmm0y(ntmp) = iy + idfmm0z(ntmp) = iz + endif + enddo + enddo + enddo + + idcell = 0 + ntmp = 0 + do ih1 = 1,nd2c(myrank) + ich1 = id2c(ih1,myrank) + icz0 = mod(ich1,ncell)+1 + icy0 = ich1/ncell + icx0 = icy0/ncell+1 + icy0 = mod(icy0,ncell)+1 + do k = 1,ndfmm0 + icx1 = icx0 + idfmm0x(k) + icy1 = icy0 + idfmm0y(k) + icz1 = icz0 + idfmm0z(k) + if (icx1 .gt. 3*ncell .or. icx1 .le. -2*ncell .or. + & icy1 .gt. 3*ncell .or. icy1 .le. -2*ncell .or. + & icz1 .gt. 3*ncell .or. icz1 .le. -2*ncell) then + cycle + else + do while (icx1 .gt. ncell) + icx1 = icx1 - ncell + end do + do while (icx1 .le. 0) + icx1 = icx1 + ncell + end do + do while (icy1 .gt. ncell) + icy1 = icy1 - ncell + end do + do while (icy1 .le. 0) + icy1 = icy1 + ncell + end do + do while (icz1 .gt. ncell) + icz1 = icz1 - ncell + end do + do while (icz1 .le. 0) + icz1 = icz1 + ncell + end do + if(idcell(icx1,icy1,icz1).eq.0) then + ntmp = ntmp + 1 + idcell(icx1,icy1,icz1) = ntmp + end if + end if + end do + end do + + ndcell = ntmp + + deallocate(idfmm0x) + deallocate(idfmm0y) + deallocate(idfmm0z) +!ya + deallocate(nd2c,id2c) +!ya + + return + end +c--------------------------------------------------------------------- + subroutine fmod_alloc_metadata !init_fmm_direct_com() +c---------------------------------------------------------------------- + use trj_org + use trj_mpi + use md_forces + use md_periodic + use md_fmm + use md_fmm_domdiv_flg + use shakerattleroll + use md_multiplestep + use md_condition + use md_segment + use shakerattleroll + use mpivar + use ompvar + implicit none + integer(4) :: itmp + include 'mpif.h' + +!############ +! metadata +!############ + allocate(tag(lzdiv+4,lydiv+4,lxdiv+4)) + allocate(na_per_cell(lzdiv+4,lydiv+4,lxdiv+4)[*]) +!############ +! segment +!############ + max_seg = max_nsegments_per_cell*lzdiv*lydiv*lxdiv + allocate(wseg_cz(max_seg)) + allocate(wseg_cy(max_seg)) + allocate(wseg_cx(max_seg)) + allocate(ndseg_fmmn(lzdiv,lydiv,lxdiv)) + itmp=max_nsegments_per_cell*(lxdiv+4)*(lydiv+4)*(lzdiv+4) + allocate(lsegtop(itmp)) + allocate(lseg_natoms(itmp)) +!############ +! atom +!############ + na1cell=max( int((n/ncell**3)*na1cellmargin),10 ) + na5cell=na1cell*5 + nadirect=na1cell*(lxdiv+4)*(lydiv+4)*(lzdiv+4) +!Coordinate & Velocity + allocate(wkxyz(3,nadirect)[*]) + allocate(wkv(3,nadirect)) + allocate(m2i(nadirect)[*]) +!Force + allocate(wk_f(3,nadirect)) + allocate(w3_f(3,nadirect,0:nomp-1)) +!TABLE for Force_calculation + allocate(chgv_table(na5cell,0:nomp-1)) + allocate(epsilon_sqrt_table(na5cell,0:nomp-1)) + allocate(R_half_table(na5cell,0:nomp-1)) +!MT + allocate(fshort(3,nadirect)) + allocate(fmiddle(3,nadirect)) + allocate(flong(3,nadirect)) +!SHAKE + allocate(xyzstr(3,nadirect)) + allocate(kshake(nadirect)) + + return + end +c---------------------------------------------------------------------- diff --git a/MODYLAS-MINI/src/xmpAPI_fmodules.f b/MODYLAS-MINI/src/xmpAPI_fmodules.f new file mode 100755 index 0000000..1376f61 --- /dev/null +++ b/MODYLAS-MINI/src/xmpAPI_fmodules.f @@ -0,0 +1,386 @@ +!---------------------------------------------------------------------- +! Copyright (C) 2003-2014 Kensuke Iwahashi, Noriyuki Yoshii, +! Atsushi Yamada, Yoshimichi Andoh, +! Kazushi Fujimoto, Hidekazu Kojima, +! Fumiyasu Mizutani, and Susumu Okazaki +! All Rights Reserved. +! +! Copyright (C) 20013-2014 RIKEN AICS +! All Rights Reserved. +! +! This MD program has been developed at Nagoya University, and +! Institute for Molecular Science, National Institutes of Natural +! Sciences. +! And this work was supported by +! Next-Generation Supercomputer Project, and +! NAREGI Nanoscience Project, +! Ministry of Education, Culture, Sports, Science and Technology, +! Japan. +! +! This program is NOT a free software and distributed under the +! license described in the LICENSE. +! All rights are reserved by the authors of this program. +! +! The authors do NOT warrant or assume any legal liability or +! responsibility for the accuracy or completeness. +!---------------------------------------------------------------------- +c---------------------------------------------------------------------- +! MODULEs +c---------------------------------------------------------------------- + module version + character(20) :: MODYLAS_version="1.0.1" + end module +c---------------------------------------------------------------------- + module atommass + real(8),allocatable :: mass(:),r_mass(:) + real(8) :: totalmass + end module +c---------------------------------------------------------------------- + module trj_org + real(8),allocatable :: xyz(:,:) + real(8),allocatable :: v(:,:) + integer(4) :: n=0 + end module +c---------------------------------------------------------------------- + module trj_mpi + real(8),allocatable :: wkxyz(:,:)[:] + real(8),allocatable :: wkv(:,:) + integer(4),allocatable :: i2m(:), m2i(:)[:] + integer(4),allocatable :: tag(:,:,:),na_per_cell(:,:,:)[:] + integer(4) :: na1cell,na5cell,nadirect + integer(4) :: naline,narea + real(8),parameter :: na1cellmargin=2.00d0 ! ya !!100% margin + end module +c---------------------------------------------------------------------- + module unitcell + real(8) :: cellx=0.0d0, celly=0.0d0, cellz=0.0d0 + real(8) :: cellxh=0.0d0, cellyh=0.0d0, cellzh=0.0d0 + real(8) :: alpha=90.0d0, beta=90.0d0, gamma=90.0d0 + real(8) :: cellvol=0.0d0 + end module +c---------------------------------------------------------------------- + module param + implicit none + integer(4) :: npara + integer(4),allocatable :: paranum(:) + end module +c---------------------------------------------------------------------- + module nhc + integer(4),parameter :: mnhc=5,mnys=5,nnhc=5 + real(8) :: rss(mnhc),vss(mnhc) + real(8) :: rssb(mnhc),vssb(mnhc) + real(8) :: box(3,3),vboxg(3,3) + end module +c---------------------------------------------------------------------- + module cutoffradius + real(8) :: cutrad, cutrad2 + end module +c---------------------------------------------------------------------- + module shakerattleroll + real(8),allocatable :: xyzstr(:,:) + integer(4) :: totnconst=0 + integer(4) :: totnconstL,maxibn,l0max + integer(4),allocatable :: ibseL(:),shkijL(:,:,:) + real(8),allocatable :: rmassL(:,:,:) + real(8),allocatable :: dij2L(:,:) + integer(4), allocatable :: ShakeGroupLeader(:) + integer(4), allocatable :: nconstraints(:) + integer(4), allocatable :: atom1S(:,:), atom2S(:,:) + real(8), allocatable :: slength(:,:) + integer(4),allocatable :: kshake(:) + real(8) :: shake_tolerance + integer(4) :: maxshakecycle + end module +c---------------------------------------------------------------------- + module pshake + integer(4),parameter:: n_cnst_max=6 + integer(4),parameter:: n_atom_max=4 + integer(4):: n_type_ps + integer(4),allocatable:: type_psL(:) + integer(4),allocatable:: gn2ips(:) + integer(4),allocatable:: couple_ps(:,:,:), n_atom_ps(:) + real(8),allocatable:: a_0(:,:,:), a_0_sym(:,:,:) + real(8),allocatable:: rdij2_ps(:,:), mass_ps(:,:) + end module +!---------------------------------------------------------------------- + module pshake_init + integer(4):: rngrp_ps + real(8),allocatable::r_init_ps(:,:,:) + integer(4),allocatable:: n_cnst_ps(:) + end module +!---------------------------------------------------------------------- + module md_charmm_lj + real(8),allocatable :: epsilon_sqrt(:), R_half(:) + end module +c---------------------------------------------------------------------- + module md_void + integer(4) :: nvoid + integer(4),allocatable :: void_atom1(:,:),void_atom2(:,:) + integer(4),allocatable :: void_n(:) + end module +c---------------------------------------------------------------------- + module md_fmm + integer(4) :: ndirect=2, ncell=32, nlevel=5 + integer(4) :: ndcellmargin=0 + integer(4) :: nimage=96 + integer(4),allocatable,dimension(:,:) :: lddir + integer(4) :: nload,nchunk + real(8),allocatable,dimension(:,:) :: fa,pre + integer(4),allocatable :: ndseg_fmmn(:,:,:) + integer(4),allocatable,dimension(:,:) :: m_wk + integer(4) :: nmerge_fmm + integer(4) :: max_nsegments_per_cell + integer(4) :: nmax=4 + integer(4) :: mdg + complex(8),allocatable,dimension(:,:) :: premm,preml,prell + complex(8),allocatable,dimension(:,:,:,:,:,:) :: shmm,shml,shll + complex(8),allocatable,dimension(:) :: winput, woutput, wewald + complex(8),allocatable,dimension(:,:) :: shew + real(8) :: margin_mnpc=5.0d0 + integer(4) :: lgflg=1 ! 0,1,2,3, when switch local2global + real(8)::sysdpl(3),wk_sysdpl(3) + end module +c---------------------------------------------------------------------- + module md_const + real(8),parameter :: md_PI=3.1415926535897932384626433832795029d0 + real(8),parameter :: md_PI_sqrt=1.77245385090551602729816748334d0 + real(8),parameter :: md_r_PI_sqrt=1.0d0/md_PI_sqrt + real(8),parameter :: md_AVOGADRO=6.02214199d+23 + real(8),parameter :: md_ELEMENTARY_CHARGE=1.602176462D-19 + real(8),parameter :: md_ATOMIC_MASS_UNIT=1.0d-3/md_AVOGADRO + real(8),parameter :: md_VACUUM_DIELECTRIC_CONSTANT=8.854187817D-12 + real(8),parameter :: md_BOLTZMANN=1.3806503D-23 + real(8),parameter :: md_DEGREE=md_PI/180.0d0 + real(8),parameter :: md_CALORIE=4.184d0 + real(8),parameter :: md_E_CONVERT=md_AVOGADRO*1.0d-3/md_CALORIE + real(8),parameter :: md_F_CONVERT=md_AVOGADRO*1.0d-13/md_CALORIE + real(8),parameter :: rad2deg=57.29577951308232088d0 + real(8),parameter :: deg2rad=1.0d0/rad2deg + real(8),parameter :: md_QQ_4PiE=md_ELEMENTARY_CHARGE **2 + & * 0.25d0 / md_PI + & /md_VACUUM_DIELECTRIC_CONSTANT + real(8),parameter :: rvkbolz=1d0/md_BOLTZMANN + end module +c---------------------------------------------------------------------- + module md_condition + integer(4) :: mdstep = 0 + real(8) :: dt=0.0d0 + integer(4) :: degree_of_freedom=0 + real(8) :: degree_of_freedom_inverse=0.0d0 + integer(4) :: md_condition__howmany_steps=0 + end module +c---------------------------------------------------------------------- + module md_coulomb + real(8),allocatable :: chgv(:) + end module +c---------------------------------------------------------------------- + module md_ewald + logical :: ewald_sterm=.false. ! default + end module +c---------------------------------------------------------------------- + module md_file + character(LEN=1024) :: session_name + integer(4),parameter :: f_trj=11,f_mntr=12 + integer(4),parameter :: f_restart_bin=16 + integer(4),parameter :: f_mdff=17 + end module +c---------------------------------------------------------------------- + module md_forces + real(8),allocatable :: f(:,:) + real(8),allocatable :: wk_f(:,:) + real(8),allocatable :: w3_f(:,:,:) + real(8),allocatable :: chgv_table(:,:),epsilon_sqrt_table(:,:), + & R_half_table(:,:) + end module +c---------------------------------------------------------------------- + module md_monitors + real(8) :: hamiltonian + real(8) :: p_energy, wk_p_energy + real(8) :: k_energy + real(8) :: t_energy + real(8) :: temperature + end module +c---------------------------------------------------------------------- + module md_segment + integer(4),allocatable :: segtop(:) + integer(4),allocatable :: lsegtop(:),lseg_natoms(:) ! meta-data + end module +c---------------------------------------------------------------------- + module md_periodic + integer(4) :: nsegments=-1 + integer(4),allocatable :: seg_natoms(:) + real(8),allocatable :: seg_cx(:) + real(8),allocatable :: seg_cy(:) + real(8),allocatable :: seg_cz(:) + real(8),allocatable :: wseg_cx(:) + real(8),allocatable :: wseg_cy(:) + real(8),allocatable :: wseg_cz(:) + end module +c---------------------------------------------------------------------- + module g_main + integer(4) :: trj_start=0,trj_interval=1 + integer(4) :: restart_start=0,restart_interval=1 + integer(4) :: mntr_start=0,mntr_interval=1 + real(8) :: maxwell_temperature=0.0d0 + integer(4) :: randomseed=1235 + logical :: reset_maxwell=.false. ! default + end module +c---------------------------------------------------------------------- + module md_fmm_domdiv_flg + integer(4),allocatable :: idom(:,:,:) + integer(4),allocatable :: idcell(:,:,:) + integer(4),allocatable :: nd2c(:) + integer(4),allocatable :: id2c(:,:) + integer(4),allocatable :: ixflg(:),iyflg(:),izflg(:) + integer(4),allocatable :: ixmax(:),iymax(:),izmax(:) + integer(4),allocatable :: ixmin(:),iymin(:),izmin(:) + integer(4) :: lxdiv,lydiv,lzdiv + integer(4) :: nxdiv,nydiv,nzdiv + integer(4) :: ndcell=1 + integer(4) :: nselfatm=0 + integer(4) :: ndatm=0 + integer(4) :: nselfseg=0 + integer(4) :: max_seg + logical :: mpi_manual_division_flg=.false. + + complex(8),allocatable :: wwm_local0(:,:,:,:,:) + + type fmm_data_t + integer(4) :: mcell_size, nscell + integer(4) :: lclx, lcly, lclz + integer(4) :: nscxdiv, nscydiv, nsczdiv + integer(4) :: nbound_xm, nbound_ym, nbound_zm + integer(4) :: nbound_xp, nbound_yp, nbound_zp + complex(8),allocatable :: wm_local(:,:,:,:) + complex(8),allocatable :: wm_global(:,:,:,:) + complex(8),allocatable :: wl_local(:,:,:,:) + complex(8),allocatable :: wwl_local(:,:,:,:,:) + end type fmm_data_t + type(fmm_data_t),allocatable,target :: fmm_data(:) + + end module +c---------------------------------------------------------------------- + module mod_wk_fmmewald + complex(8),allocatable :: wk_wl(:) + end module +c---------------------------------------------------------------------- + module comm_base + integer npz,npy,npx + integer ipz,ipy,ipx + end module +c---------------------------------------------------------------------- + module comm_d3 + integer nczdiv, ncydiv, ncxdiv + integer,allocatable :: icbufp(:)[:] + integer,allocatable :: ircbufp(:)[:] + integer,allocatable :: icbufm(:)[:] + integer,allocatable :: ircbufm(:)[:] + integer,allocatable :: ibuffp(:)[:] + integer,allocatable :: irbuffp(:)[:] + integer,allocatable :: ibuffm(:)[:] + integer,allocatable :: irbuffm(:)[:] + real(8),allocatable :: buffp(:,:)[:] + real(8),allocatable :: rbuffp(:,:)[:] + real(8),allocatable :: buffm(:,:)[:] + real(8),allocatable :: rbuffm(:,:)[:] + end module +c---------------------------------------------------------------------- + module comm_bd + integer nczdiv, ncydiv, ncxdiv + integer max_mvatom ! max number of atoms moving to the cell. + integer max_mvseg ! max number of segments moving to the cell. + integer max_cellcbd ! max number of cells on communication buffer. + real(8),allocatable :: abucket(:,:,:,:,:) + integer,allocatable :: iabucket(:,:,:,:) + integer,allocatable :: isbucket(:,:,:,:) + integer,allocatable :: ncseg(:,:,:) + integer,allocatable :: ncatom(:,:,:) + real(8),allocatable :: buffp(:,:) + real(8),allocatable :: buffm(:,:) + integer,allocatable :: ibuffp(:) + integer,allocatable :: ibuffm(:) + integer,allocatable :: isbufp(:) + integer,allocatable :: isbufm(:) + real(8),allocatable :: rbuff_p(:,:)[:] + real(8),allocatable :: rbuff_m(:,:)[:] + integer,allocatable :: irbuff_p(:)[:] + integer,allocatable :: irbuff_m(:)[:] + integer,allocatable :: irsbuf_p(:)[:] + integer,allocatable :: irsbuf_m(:)[:] + integer,allocatable :: ncatmw(:,:,:,:) + end module +c---------------------------------------------------------------------- + module md_multiplestep + integer(4) :: maxMTm=1 + integer(4) :: maxMTl=1 + real(8),allocatable :: fshort(:,:),fmiddle(:,:),flong(:,:) + real(8) :: eneshort,enemiddle,enelong + end module +!---------------------------------------------------------------------- + module mpivar + implicit none + integer(4) :: myrank=0, nprocs=1, mpiout=0 + end module +!---------------------------------------------------------------------- + module ompvar + integer(4) :: nomp=1 + end module +!---------------------------------------------------------------------- +! SUBROUTINEs +!---------------------------------------------------------------------- + subroutine init_openmp + use ompvar + implicit none +!$ include 'omp_lib.h' +!$ nomp = omp_get_max_threads() + return + end +c---------------------------------------------------------------------- + subroutine fmod_set_maxsegments + use md_fmm + use md_periodic + use md_fmm_domdiv_flg + implicit none + max_nsegments_per_cell = + - max(int(nsegments/ncell**3*margin_mnpc),60) + return + end +c---------------------------------------------------------------------- + subroutine fmod_set_ncell() + use trj_mpi + use md_fmm + use md_fmm + use md_fmm_domdiv_flg + implicit none + integer(4) :: ntmp + nimage = ncell*3 + nlevel = int(log(dble(ncell))/log(2.0d0)+0.000001d00) + ntmp = (nmax+1)*(nmax+1) + allocate(winput(ntmp)) + allocate(woutput(ntmp)) + allocate(wewald(ntmp)) + allocate(premm(ntmp,ntmp)) + allocate(preml(ntmp,ntmp)) + allocate(prell(ntmp,ntmp)) + allocate(shmm(ntmp,ntmp,0:1,0:1,0:1,0:nlevel)) + allocate(shml(ntmp,ntmp,-5:5,-5:5,-5:5,0:nlevel)) + allocate(shll(ntmp,ntmp,0:1,0:1,0:1,0:nlevel)) + allocate(shew(ntmp,ntmp)) + winput = dcmplx(0.0d0, 0.0d0) + woutput = dcmplx(0.0d0, 0.0d0) + wewald = dcmplx(0.0d0, 0.0d0) + premm = dcmplx(0.0d0, 0.0d0) + preml = dcmplx(0.0d0, 0.0d0) + prell = dcmplx(0.0d0, 0.0d0) + shmm = dcmplx(0.0d0, 0.0d0) + shml = dcmplx(0.0d0, 0.0d0) + shll = dcmplx(0.0d0, 0.0d0) + shew = dcmplx(0.0d0, 0.0d0) + end +c---------------------------------------------------------------------- + subroutine abort_with_message_a(mesg) + implicit none + character(LEN=*) mesg + write(0,'(a)') mesg + call mpistop() + end diff --git a/MODYLAS-MINI/src/xmpAPI_k_energy.f b/MODYLAS-MINI/src/xmpAPI_k_energy.f new file mode 100755 index 0000000..4ca5e10 --- /dev/null +++ b/MODYLAS-MINI/src/xmpAPI_k_energy.f @@ -0,0 +1,96 @@ +!---------------------------------------------------------------------- +! Copyright (C) 2003-2014 Kensuke Iwahashi, Noriyuki Yoshii, +! Atsushi Yamada, Yoshimichi Andoh, +! Kazushi Fujimoto, Hidekazu Kojima, +! Fumiyasu Mizutani, and Susumu Okazaki +! All Rights Reserved. +! +! Copyright (C) 20013-2014 RIKEN AICS +! All Rights Reserved. +! +! This MD program has been developed at Nagoya University, and +! Institute for Molecular Science, National Institutes of Natural +! Sciences. +! And this work was supported by +! Next-Generation Supercomputer Project, and +! NAREGI Nanoscience Project, +! Ministry of Education, Culture, Sports, Science and Technology, +! Japan. +! +! This program is NOT a free software and distributed under the +! license described in the LICENSE. +! All rights are reserved by the authors of this program. +! +! The authors do NOT warrant or assume any legal liability or +! responsibility for the accuracy or completeness. +!---------------------------------------------------------------------- +cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc +c +c kinetic energy calculation of particles +c +c (kinetic) +cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc + subroutine k_energy_scaler(k_ene_sum) + use atommass + use trj_org + use trj_mpi + use md_const + use md_fmm + use md_fmm_domdiv_flg + use param + use mpivar + use ompvar + implicit none + include 'mpif.h' + integer(4) :: ierr +!$ include 'omp_lib.h' + + integer(4) :: ii,i0,ipar,iam + integer(4) :: i,j,k + integer(4) :: icx0,icy0,icz0,icxyz0 + real(8) :: wk_k_ene(0:nomp-1) + real(8) :: k_ene_sum + real(8) :: wk_ksum + real(8) :: wmass + + iam = 0 + +!$omp parallel default(none) +!$omp& private(iam,ii,i0,j,k,ipar,wmass) +!$omp& private(icx0,icy0,icz0,icxyz0) +!$omp& shared(wkv,mass,wk_k_ene,m2i,paranum) +!$omp& shared(tag,na_per_cell,lxdiv,lydiv,lzdiv) +!$ iam = omp_get_thread_num() + wk_k_ene(iam) = 0.0d0 +!$omp do + do ii=1,lxdiv*lydiv*lzdiv + icz0=mod(ii-1,lzdiv) +3 + icy0=mod(ii-1,lzdiv*lydiv) + icy0=icy0/lzdiv +3 + icx0=(ii-1)/(lzdiv*lydiv)+3 + do i0=tag(icz0,icy0,icx0), + & tag(icz0,icy0,icx0)+na_per_cell(icz0,icy0,icx0)-1 + ipar=paranum(m2i(i0)) + wmass=mass(ipar) + wk_k_ene(iam)=wk_k_ene(iam)+wmass*(wkv(1,i0)*wkv(1,i0) + & +wkv(2,i0)*wkv(2,i0) + & +wkv(3,i0)*wkv(3,i0)) + enddo ! i0 + enddo ! ii +!$omp end do +!$omp end parallel + + wk_ksum=0d0 + do iam=0,nomp-1 + wk_ksum=wk_ksum+0.5d0*wk_k_ene(iam) + enddo + +!coarray call mpi_allreduce(wk_ksum,k_ene_sum,1, +!coarray & mpi_double_precision,mpi_sum,mpi_comm_world,ierr) + k_ene_sum = wk_ksum + call co_sum(k_ene_sum) +!! + + return + end +cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc diff --git a/MODYLAS-MINI/src/xmpAPI_mpitool.f b/MODYLAS-MINI/src/xmpAPI_mpitool.f new file mode 100755 index 0000000..678cc8c --- /dev/null +++ b/MODYLAS-MINI/src/xmpAPI_mpitool.f @@ -0,0 +1,59 @@ +!---------------------------------------------------------------------- +! Copyright (C) 2003-2014 Kensuke Iwahashi, Noriyuki Yoshii, +! Atsushi Yamada, Yoshimichi Andoh, +! Kazushi Fujimoto, Hidekazu Kojima, +! Fumiyasu Mizutani, and Susumu Okazaki +! All Rights Reserved. +! +! Copyright (C) 20013-2014 RIKEN AICS +! All Rights Reserved. +! +! This MD program has been developed at Nagoya University, and +! Institute for Molecular Science, National Institutes of Natural +! Sciences. +! And this work was supported by +! Next-Generation Supercomputer Project, and +! NAREGI Nanoscience Project, +! Ministry of Education, Culture, Sports, Science and Technology, +! Japan. +! +! This program is NOT a free software and distributed under the +! license described in the LICENSE. +! All rights are reserved by the authors of this program. +! +! The authors do NOT warrant or assume any legal liability or +! responsibility for the accuracy or completeness. +!---------------------------------------------------------------------- +c---------------------------------------------------------------------- +c + subroutine mpistart + use mpivar + implicit none + include 'mpif.h' + integer(4) :: ierr + +!coarray call mpi_init(ierr) +!coarray call mpi_comm_size(mpi_comm_world,nprocs,ierr) +!coarray call mpi_comm_rank(mpi_comm_world,myrank,ierr) + nprocs = num_images() + myrank = this_image()-1 +!! + mpiout=0 + return + end + + subroutine mpiend + implicit none +!coarray include 'mpif.h' +!coarray integer ierr +!coarray call mpi_finalize(ierr) + return + end + + subroutine mpistop + implicit none + include 'mpif.h' + integer ierr + call mpi_abort(mpi_comm_world,ierr) + return + end diff --git a/MODYLAS-MINI/src/xmpAPI_nve_integrate.f b/MODYLAS-MINI/src/xmpAPI_nve_integrate.f new file mode 100755 index 0000000..56a27d4 --- /dev/null +++ b/MODYLAS-MINI/src/xmpAPI_nve_integrate.f @@ -0,0 +1,205 @@ +!---------------------------------------------------------------------- +! Copyright (C) 2003-2014 Kensuke Iwahashi, Noriyuki Yoshii, +! Atsushi Yamada, Yoshimichi Andoh, +! Kazushi Fujimoto, Hidekazu Kojima, +! Fumiyasu Mizutani, and Susumu Okazaki +! All Rights Reserved. +! +! Copyright (C) 20013-2014 RIKEN AICS +! All Rights Reserved. +! +! This MD program has been developed at Nagoya University, and +! Institute for Molecular Science, National Institutes of Natural +! Sciences. +! And this work was supported by +! Next-Generation Supercomputer Project, and +! NAREGI Nanoscience Project, +! Ministry of Education, Culture, Sports, Science and Technology, +! Japan. +! +! This program is NOT a free software and distributed under the +! license described in the LICENSE. +! All rights are reserved by the authors of this program. +! +! The authors do NOT warrant or assume any legal liability or +! responsibility for the accuracy or completeness. +!---------------------------------------------------------------------- +c---------------------------------------------------------------------- + subroutine nve_integrate() +c---------------------------------------------------------------------- + use atommass + use shakerattleroll + use md_condition + use md_const + use md_forces + use md_monitors + use trj_mpi + use md_fmm + use md_fmm_domdiv_flg + use param + use md_multiplestep + use md_segment + use mpivar + use unitcell +#include "timing.h90" + implicit none + integer(4) :: i0,ipar,k0 + integer(4) :: MTm, MTl + real(8) :: dtL, dthL, scaleM, scaleL + + dtL =dt/maxMTm/maxMTl + dthL=0.5d0 * dtL + + DO MTl=1,maxMTl !! == Long-range force == + DO MTm=1,maxMTm !! == Middle-range force == + + if(totnconst > 0) then +!$omp parallel default(shared) +!$omp& private(k0,i0) +!$omp do + do k0=1,nselfseg + do i0=lsegtop(k0),lsegtop(k0)+lseg_natoms(k0)-1 + xyzstr(:,i0) = wkxyz(:,i0) + enddo ! i0 + enddo ! k0 +!$omp end do +!$omp end parallel + endif + +!$omp parallel default(shared) +!$omp& private(k0,i0,ipar) +!$omp do + do k0=1,nselfseg + do i0=lsegtop(k0),lsegtop(k0)+lseg_natoms(k0)-1 + ipar=paranum(m2i(i0)) + wkv(1,i0)=wkv(1,i0)+wk_f(1,i0)*dthL*r_mass(ipar) + wkv(2,i0)=wkv(2,i0)+wk_f(2,i0)*dthL*r_mass(ipar) + wkv(3,i0)=wkv(3,i0)+wk_f(3,i0)*dthL*r_mass(ipar) + wkxyz(1,i0) = wkxyz(1,i0) + wkv(1,i0)*dtL + wkxyz(2,i0) = wkxyz(2,i0) + wkv(2,i0)*dtL + wkxyz(3,i0) = wkxyz(3,i0) + wkv(3,i0)*dtL + enddo ! i0 + enddo ! k0 +!$omp end do +!$omp end parallel + + if (totnconst > 0) then + TIME_START(TM_SHAKE) + call shake_roll(dtL) + TIME_STOP(TM_SHAKE) + endif + + if(MTl==maxMTl.and.MTm==maxMTm)then + call update_wsegc() + TIME_START(TM_MIGRATION) + call comm_bound() + TIME_STOP(TM_MIGRATION) + TIME_START(TM_SHAKE) + if (totnconst > 0) call update_shake_local() + TIME_STOP(TM_SHAKE) + endif + + TIME_START(TM_COMM_DIRECT) + call comm_direct_3() + TIME_STOP(TM_COMM_DIRECT) + call apply_pbc() +!! ^^^ short ^^^ + call md_calculate_forces_charmm_short( + - fshort,eneshort) +!! ^^^ middle ^^^ + if(MTm==maxMTm)then + scaleM=1d0 + TIME_START(TM_ENERGY_DIRECT) + call md_calculate_forces_charmm_middle( + - fmiddle,enemiddle) + TIME_STOP(TM_ENERGY_DIRECT) + else + scaleM=0d0 + endif +!! ^^^ long ^^^ + if(MTl==maxMTl.and.MTm==maxMTm)then + scaleL=1d0 + TIME_START(TM_FMM) + call md_calculate_forces_charmm_long( + - flong,enelong) + TIME_STOP(TM_FMM) + else + scaleL=0d0 + endif + +!! ^^^ sum short, middle, and long ^^^ + wk_p_energy=eneshort+enemiddle+enelong + +!$omp parallel default(shared) +!$omp& private(k0,i0) +!$omp do + do k0=1,nselfseg + do i0=lsegtop(k0),lsegtop(k0)+lseg_natoms(k0)-1 + wk_f(:,i0)=fshort(:,i0)+scaleM*fmiddle(:,i0)*maxMTm + & +scaleL*flong(:,i0) *maxMTm*maxMTl + enddo + enddo +!$omp end do +!$omp end parallel + +!$omp parallel default(shared) +!$omp& private(k0,i0,ipar) +!$omp do + do k0=1,nselfseg + do i0=lsegtop(k0),lsegtop(k0)+lseg_natoms(k0)-1 + ipar=paranum(m2i(i0)) + wkv(1,i0) = wkv(1,i0) + wk_f(1,i0)*dthL*r_mass(ipar) + wkv(2,i0) = wkv(2,i0) + wk_f(2,i0)*dthL*r_mass(ipar) + wkv(3,i0) = wkv(3,i0) + wk_f(3,i0)*dthL*r_mass(ipar) + enddo ! i0 + enddo ! k0 +!$omp end do +!$omp end parallel + + if (totnconst .gt. 0) then + TIME_START(TM_RATTLE) + call rattle_roll(dtL) + TIME_STOP(TM_RATTLE) + endif + + ENDDO ! MT middle + ENDDO ! MT long + + return + end +c---------------------------------------------------------------------- +c +c calculate thermodynamic values +c +c---------------------------------------------------------------------- + subroutine calc_hamiltonian_nve() +c---------------------------------------------------------------------- + use md_const + use md_monitors + use md_periodic + use md_condition + use mpivar + use unitcell + implicit none + real(8) :: totke + include 'mpif.h' + integer(4) :: ierr + +! ^^^ reduce potential energy ^^^ +!coarray call mpi_allreduce(wk_p_energy,p_energy,1, +!coarray & mpi_double_precision,mpi_sum,mpi_comm_world,ierr) + p_energy = wk_p_energy + call co_sum(p_energy) +!! + +! ^^^ reduce kinetic energy ^^^ + call k_energy_scaler(totke) + +! ^^^ temperature ^^^ + k_energy = totke + temperature = 2.0d0*totke*degree_of_freedom_inverse*rvkbolz + t_energy = p_energy + k_energy + hamiltonian = t_energy + + return + end diff --git a/MODYLAS-MINI/src/xmpAPI_parse_input.f b/MODYLAS-MINI/src/xmpAPI_parse_input.f new file mode 100755 index 0000000..b92c72c --- /dev/null +++ b/MODYLAS-MINI/src/xmpAPI_parse_input.f @@ -0,0 +1,348 @@ +!---------------------------------------------------------------------- +! Copyright (C) 2003-2014 Kensuke Iwahashi, Noriyuki Yoshii, +! Atsushi Yamada, Yoshimichi Andoh, +! Kazushi Fujimoto, Hidekazu Kojima, +! Fumiyasu Mizutani, and Susumu Okazaki +! All Rights Reserved. +! +! Copyright (C) 20013-2014 RIKEN AICS +! All Rights Reserved. +! +! This MD program has been developed at Nagoya University, and +! Institute for Molecular Science, National Institutes of Natural +! Sciences. +! And this work was supported by +! Next-Generation Supercomputer Project, and +! NAREGI Nanoscience Project, +! Ministry of Education, Culture, Sports, Science and Technology, +! Japan. +! +! This program is NOT a free software and distributed under the +! license described in the LICENSE. +! All rights are reserved by the authors of this program. +! +! The authors do NOT warrant or assume any legal liability or +! responsibility for the accuracy or completeness. +!---------------------------------------------------------------------- +!------------------------------------------------------------------------ + subroutine parse_input() + use mpivar + use md_file + implicit none + +!-- mdconf + call read_mdconf() + if (myrank == 0) write(*,'(/,a)') + & '**** '//trim(session_name) //'.mdconf read end successfully!' + +!-- mdxyz.bin + call read_mdxyzbin() + if (myrank == 0) write(*,'(/,a)') + & '**** '//trim(session_name)//'.mdxyz.bin read end successfully!' + +!-- mdff.bin + call read_mdffbin() + if (myrank == 0) write(*,'(/,a)') + & '**** '//trim(session_name)//'.mdff.bin read end successfully!' + + end +!------------------------------------------------------------------------ + subroutine read_mdconf + use md_file + use md_const + use md_condition + use md_fmm_domdiv_flg + use md_multiplestep + use nhc + use param + use md_segment + use md_periodic + use shakerattleroll + use g_main + use cutoffradius + use md_fmm + use md_ewald + use ConfigRead + implicit none + + call ConfigRead_parse(trim(session_name)//'.mdconf') + + trj_start = ConfigRead_get_int('trj_start') + trj_interval = ConfigRead_get_int('trj_interval') + restart_start = ConfigRead_get_int('restart_start') + restart_interval = ConfigRead_get_int('restart_interval') + mntr_start = ConfigRead_get_int('mntr_start') + mntr_interval = ConfigRead_get_int('mntr_interval') + randomseed = ConfigRead_get_int('randomseed') + dt = ConfigRead_get_double('dt') + md_condition__howmany_steps = ConfigRead_get_int('step') + reset_maxwell = ConfigRead_get_bool('maxwell_velocities') + maxwell_temperature = ConfigRead_get_double('temperature') + maxMTm = ConfigRead_get_int('nstep_skip_middle') + maxMTl = ConfigRead_get_int('nstep_skip_long') + mpi_manual_division_flg = ConfigRead_get_bool('manual_division') + if (mpi_manual_division_flg) then + nxdiv = ConfigRead_get_int('nxdiv') + nydiv = ConfigRead_get_int('nydiv') + nzdiv = ConfigRead_get_int('nzdiv') + end if + maxshakecycle = ConfigRead_get_int('shake_max_iteration') + shake_tolerance = ConfigRead_get_double('shake_tolerance') + cutrad = 1.0d-10 * ConfigRead_get_double('cutoff') + cutrad2 = cutrad * cutrad + ndirect = ConfigRead_get_int('ndirect') + nmax = ConfigRead_get_int('nmax') + lgflg = ConfigRead_get_int('ULswitch') + ewald_sterm = ConfigRead_get_bool('ewald_surface_term') + ncell = ConfigRead_get_int('ncell') + + call fmod_set_ncell() + + end +!------------------------------------------------------------------------ + subroutine read_mdxyzbin + use trj_org + use trj_mpi + use nhc + use md_file + use g_main + use mpivar + use unitcell + implicit none + integer(4) :: io, n_nhc +!KF + integer(4) :: ierr + include 'mpif.h' +!KF end + if(myrank==0) then + open(f_restart_bin, file=trim(session_name)// '.mdxyz.bin', + & iostat=io, status='old', + & access='sequential',form='unformatted') + if (io /= 0) then + call abort_with_message_a('Cannot read mdxyz.bin file.') + endif + read(f_restart_bin,end=100) n + endif +!coarray call MPI_Bcast(n, 1, MPI_INTEGER4, 0, MPI_COMM_WORLD, ierr) + call co_broadcast(n,source_image=1) +!! + + if (.not. allocated(xyz)) then + allocate(xyz(3,n)) + allocate(v(3,n)) + allocate(i2m(n)) + endif + if(myrank.eq.0) then +c Read coordinates and velocities of atoms. +c Trajectory must be arranged in the same order as input. + read(f_restart_bin) xyz(1:3,:), v(1:3,:) +c Read positions and velocities of thermostats. + read(f_restart_bin) n_nhc + if (n_nhc .ne. nnhc) call abort_with_message_a('nnhc') + read(f_restart_bin) rss, vss +c Read positions and velocities of barostats. + read(f_restart_bin) n_nhc + if (n_nhc .ne. nnhc) call abort_with_message_a('nnhc') + read(f_restart_bin) rssb, vssb +c Read cell parameters (length and angles). + read(f_restart_bin) cellx,celly,cellz, alpha,beta,gamma,vboxg + endif ! myrank==0 + 100 continue + close(f_restart_bin) +!KF +!coarray call MPI_Bcast(xyz , 3*n, MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(v , 3*n, MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(n_nhc, 1 , MPI_INTEGER4, 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(rss , 5 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(vss , 5 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(rssb , 5 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(vssb , 5 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(cellx, 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(celly, 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(cellz, 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(alpha, 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(beta , 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(gamma, 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(vboxg, 9 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call co_broadcast( xyz(1:3,1:n), source_image=1 ) + call co_broadcast( v(1:3,1:n), source_image=1 ) + call co_broadcast( n_nhc, source_image=1 ) + call co_broadcast( rss(1:5), source_image=1 ) + call co_broadcast( vss(1:5), source_image=1 ) + call co_broadcast( rssb(1:5), source_image=1 ) + call co_broadcast( vssb(1:5), source_image=1 ) + call co_broadcast( cellx, source_image=1 ) + call co_broadcast( celly, source_image=1 ) + call co_broadcast( cellz, source_image=1 ) + call co_broadcast( alpha, source_image=1 ) + call co_broadcast( beta, source_image=1 ) + call co_broadcast( gamma, source_image=1 ) + call co_broadcast( vboxg(1:3,1:3), source_image=1 ) +!! +!KF end + cellxh = 0.5d0 * cellx + cellyh = 0.5d0 * celly + cellzh = 0.5d0 * cellz + cellvol = cellx*celly*cellx + + return + end +!---------------------------------------------------------------------- + subroutine read_mdffbin + use md_file + use mpivar + use trj_org + use param + use md_periodic + use md_segment + use atommass + use md_coulomb + use md_charmm_lj + use shakerattleroll + use pshake_init + use md_void + implicit none + include 'mpif.h' + integer(4) :: force_field + integer(4) :: num + integer(4),allocatable :: dummy(:) + integer(4) :: i, j, k, ierr, io + + if(myrank==0) then + open(f_mdff, file=trim(session_name)//'.mdff.bin', iostat=io, + & status='old', access='sequential', form='unformatted') + if(io.ne.0) then + call abort_with_message_a('Cannot open mdff.bin file.') + endif + endif + + if(myrank==0) then + read(f_mdff) force_field + if (force_field /= 100) then + call abort_with_message_a('Non-CHARMM potential is used!') + endif + endif + +!-- parameter_number + call read_bcast_int(npara, 1, 1, f_mdff) +!* if (myrank == 0) write(*,*) 'npara=', npara + + allocate(paranum(n)) + call read_bcast_int(paranum, 1, n, f_mdff) + +!-- read_segment + call read_bcast_int(nsegments, 1, 1, f_mdff) + + allocate(seg_natoms(nsegments),segtop(nsegments)) + call read_bcast_int(seg_natoms, 1, nsegments, f_mdff) + call read_bcast_int(segtop, 1, nsegments, f_mdff) + + allocate(seg_cx(nsegments),seg_cy(nsegments),seg_cz(nsegments)) + +!-- molecule (not used) + if (myrank==0) then + read(f_mdff) num + allocate(dummy(num)) + read(f_mdff) dummy + read(f_mdff) dummy + deallocate(dummy) + end if + +!-- mass + allocate(mass(npara)) + allocate(r_mass(npara)) + call read_bcast_real(mass, 1, npara, f_mdff) + + r_mass(:) = 1.0d0 / mass(:) + +!-- charge + allocate(chgv(npara)) + call read_bcast_real(chgv, 1, npara, f_mdff) + +!-- LJ + allocate(epsilon_sqrt(npara),R_half(npara)) + call read_bcast_real(epsilon_sqrt, 1, npara, f_mdff) + call read_bcast_real(R_half, 1, npara, f_mdff) + +!-- shake + call read_bcast_int(totnconst, 1, 1, f_mdff) + if (totnconst==0) then + call abort_with_message_a('totnconst = 0 !') + endif + + call read_bcast_int(rngrp_ps, 1, 1, f_mdff) + + allocate(ShakeGroupLeader(npara)) + call read_bcast_int(ShakeGroupLeader, npara, 1, f_mdff) + + allocate(nconstraints(rngrp_ps)) + call read_bcast_int(nconstraints, rngrp_ps, 1, f_mdff) + + allocate(atom1S(rngrp_ps,10), atom2S(rngrp_ps,10)) + allocate(slength(rngrp_ps,10)) + atom1S(:,:) = -1 + atom2S(:,:) = -1 + slength(:,:) = -1.0d0 + if(myrank==0) then + do i = 1, rngrp_ps + do j = 1, nconstraints(i) + read(f_mdff) atom1S(i,j), atom2S(i,j), slength(i,j) + enddo + enddo + endif +!coarray call MPI_Bcast(atom1S, rngrp_ps*10, MPI_INTEGER4, 0, +!coarray & MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(atom2S, rngrp_ps*10, MPI_INTEGER4, 0, +!coarray & MPI_COMM_WORLD, ierr) +!coarray call MPI_Bcast(slength, rngrp_ps*10, MPI_REAL8, 0, +!coarray & MPI_COMM_WORLD, ierr) + call co_broadcast( atom1S, source_image=1 ) + call co_broadcast( atom2S, source_image=1 ) + call co_broadcast( slength, source_image=1 ) +!! + +!-- void + call read_bcast_int(nvoid, 1, 1, f_mdff) +!* if (myrank == 0) write(*,*) 'input,nvoid=',nvoid + if(nvoid==0) return + + call read_bcast_int(num, 1, 1, f_mdff) + + allocate(void_n(npara)) + allocate(void_atom1(npara,num),void_atom2(npara,num)) + call read_bcast_int(void_n, 1, npara, f_mdff) + call read_bcast_int(void_atom1, 1, npara*num, f_mdff) + call read_bcast_int(void_atom2, 1, npara*num, f_mdff) + +!-- + if(myrank==0) close(f_mdff) + + return + end +!------------------------------------------------------------------------- + subroutine read_bcast_int(data, n, m, in) + use mpivar + implicit none + include 'mpif.h' + integer(4) :: n, m, data(m, n), in + integer(4) :: i, ierr + do i = 1, n + if (myrank == 0) read(in) data(1:m,i) + end do + call MPI_Bcast(data, n*m, MPI_INTEGER4, 0, MPI_COMM_WORLD, ierr) + end +!------------------------------------------------------------------------- + subroutine read_bcast_real(data, n, m, in) + use mpivar + implicit none + include 'mpif.h' + integer(4) :: n, m, in + real(8) :: data(m, n) + integer(4) :: i, ierr + do i = 1, n + if (myrank == 0) read(in) data(1:m,i) + end do +!coarray call MPI_Bcast(data, n*m, MPI_REAL8, 0, MPI_COMM_WORLD, ierr) + call co_broadcast( data(1:m,1:n), source_image=1 ) +!! + end From c2406f69a1da57ae61d0bc10bd5710ec8e703328 Mon Sep 17 00:00:00 2001 From: tozaki_chisae Date: Thu, 4 Mar 2021 20:12:09 +0900 Subject: [PATCH 13/70] [WIP] modify 3 file. --- FFB-MINI/src/dd_mpi/dd_mpi.F90 | 107 +++++++++++++++++++++++++++++---- FFB-MINI/src/les3x.F | 41 +++++++++++-- FFB-MINI/src/make_setting | 9 ++- 3 files changed, 138 insertions(+), 19 deletions(-) diff --git a/FFB-MINI/src/dd_mpi/dd_mpi.F90 b/FFB-MINI/src/dd_mpi/dd_mpi.F90 index f317230..09059c5 100755 --- a/FFB-MINI/src/dd_mpi/dd_mpi.F90 +++ b/FFB-MINI/src/dd_mpi/dd_mpi.F90 @@ -367,7 +367,7 @@ SUBROUTINE DDCOM2(SEND,RECV) IMPLICIT REAL*4(A-H,O-Z) ! INCLUDE 'mpif.h' - INCLUDE 'xmp_coarray.h' +! INCLUDE 'xmp_coarray.h' !C$XMP NODES PDDCOM2(*) ! ! @@ -791,21 +791,42 @@ SUBROUTINE DDCOM3(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT END ! SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT0,IERR,& - BUFSND, BUFRCV, MAXBUF) - INCLUDE 'mpif.h' - INCLUDE 'xmp_coarray.h' + snd_desc, rcv_desc, MAXBUF) +!fj BUFSND, BUFRCV, MAXBUF) +! Fujitsu start 202103 + use xmp_api + use mpi +! Fujitsu end 202103 +! INCLUDE 'mpif.h' +! INCLUDE 'xmp_coarray.h' IMPLICIT REAL*4(A-H,O-Z) !CTTDEBG REAL*8 DFX(NP),DFY(NP),DFZ(NP) !CTTDEBG DIMENSION LDOM(NDOM),NBPDOM(NDOM),IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM), FX(NP),FY(NP),FZ(NP) - DIMENSION BUFSND(MAXBUF)[*], BUFRCV(MAXBUF)[*] +! Fujitsu start 202103 +! DIMENSION BUFSND(MAXBUF)[*], BUFRCV(MAXBUF)[*] + INTEGER*4 , POINTER :: BUFSND ( : , : ) => null ( ) + INTEGER*4 , POINTER :: BUFRCV ( : , : ) => null ( ) + INTEGER*8 :: snd_desc, rcv_desc + INTEGER*8 :: snd_sec, rcv_sec + INTEGER*8, DIMENSION(1) :: snd_lb, snd_ub, rcv_lb, rcv_ub + INTEGER*8 :: st_desc, st_l_desc + INTEGER*8 :: st_sec, st_l_sec + INTEGER*8, DIMENSION(1) :: st_lb, st_ub, st_l_lb, st_l_ub + INTEGER*4 :: img_dims(1) + INTEGER*4 status +! Fujitsu end 202103 ! PARAMETER ( MAXDOM = 10000 ) INTEGER*4 MSGIDS(MAXDOM),MSGSTS(MPI_STATUS_SIZE,MAXDOM) ! INTEGER MAX_RECV_LEN - INTEGER ,ALLOCATABLE :: START_R(:)[:] +! Fujitsu start 202103 +! INTEGER ,ALLOCATABLE :: START_R(:)[:] + INTEGER*4 , POINTER :: START_R ( : , : ) => null ( ) + INTEGER*4 , POINTER :: start_rr_p ( : , : ) => null ( ) +! Fujitsu end 202103 ! INTEGER ,ALLOCATABLE :: END_R(:)[:] INTEGER ,ALLOCATABLE :: START_S(:) INTEGER ,ALLOCATABLE :: END_S(:) @@ -901,7 +922,30 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! THE RESIDUALS FROM THE NEIGHBORING ! SUB-DOMAINS CALL MPI_COMM_RANK(MPI_COMM_WORLD,ITASK,IERR) - allocate(START_R(1:NP)[*]) +! Fujitsu start 202103 + call xmp_api_init +! + snd_lb(1) = 1 + snd_ub(1) = MAXBUF + rcv_lb(1) = 1 + rcv_ub(1) = MAXBUF +! call xmp_new_coarray(snd_desc, 4, 1, snd_lb, snd_ub, 1, img_dims) +! call xmp_new_coarray(rcv_desc, 4, 1, rcv_lb, rcv_ub, 1, img_dims) +! + call xmp_coarray_bind(snd_desc,BUFSND) + call xmp_coarray_bind(rcv_desc,BUFRCV) +! +! allocate(START_R(1:NP)[*]) + st_lb(1) = 1 + st_ub(1) = NP + st_l_lb(1) = 1 + st_l_ub(1) = 1 + call xmp_new_coarray(st_desc, 4, 1, st_lb, st_ub, 1, img_dims) +! call xmp_new_local_array(st_l_desc, 4, 1, st_l_lb, st_l_ub) + call xmp_new_coarray(st_l_desc, 4, 1, st_l_lb, st_l_ub, 1, img_dims) + call xmp_coarray_bind(st_desc,START_R) + call xmp_coarray_bind(st_l_desc,start_rr_p) +! Fujitsu end 202103 ! allocate(END_R(1:NP)[*]) allocate(START_S(1:NP)) allocate(END_S(1:NP)) @@ -1065,19 +1109,46 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT 220 CONTINUE ! ! - SYNC ALL - +! Fujitsu start 202103 +! SYNC ALL + call xmp_sync_all(status) +! + call xmp_new_array_section(snd_sec,1) + call xmp_new_array_section(rcv_sec,1) + call xmp_new_array_section(st_sec,1) + call xmp_new_array_section(st_l_sec,1) +! Fujitsu start 202103 +! DO IDOM = 1, NDOM ! PRINT *,ME,"->",LDOM(IDOM)," BUFRECV(",START_R(ME)[LDOM(IDOM)],":",END_R(ME)[LDOM(IDOM)],")[",LDOM(IDOM),"]=BUFSND(",START_S(LDOM(IDOM)),":",END_S(LDOM(IDOM)),")" ! BUFRCV(START_R(ME)[LDOM(IDOM)]:END_R(ME)[LDOM(IDOM)])[LDOM(IDOM)] = & ! BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) - START_RR = START_R(ME)[LDOM(IDOM)] +! Fujitsu start 202103 +! START_RR = START_R(ME)[LDOM(IDOM)] + call xmp_array_section_set_triplet(st_sec,1,ME,ME,1,status) + call xmp_array_section_set_triplet(st_l_sec,1,1,1,1,status) + img_dims = LDOM(IDOM) + call xmp_coarray_get(img_dims,st_desc,st_sec, & + st_l_desc,st_l_sec,status) + START_RR = start_rr_p(1) +! Fujitsu end 202103 END_RR = START_RR + (END_S(LDOM(IDOM)) - START_S(LDOM(IDOM))) - BUFRCV(START_RR:END_RR)[LDOM(IDOM)] = & - BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) +! Fujitsu start 202103 +! BUFRCV(START_RR:END_RR)[LDOM(IDOM)] = & +! BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) + call xmp_array_section_set_triplet(rcv_sec,1,START_RR,END_RR,1,status) + START_RR = START_S(LDOM(IDOM)) + END_RR = END_S(LDOM(IDOM)) + call xmp_array_section_set_triplet(snd_sec,1,START_RR,END_RR,1,status) + img_dims = LDOM(IDOM) + call xmp_coarray_put(img_dims,rcv_desc,rcv_sec,snd_desc,snd_sec,status); +! Fujitsu end 202103 END DO - SYNC ALL +! Fujitsu start 202103 +! SYNC ALL + call xmp_sync_all(status) +! Fujitsu end 202103 ! SYNC ALL ! DO 231 IDOM = 1 , NDOM @@ -1272,6 +1343,16 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT RETURN ENDIF ! +! Fujitsu start 202103 + call xmp_free_array_section(snd_sec) + call xmp_free_array_section(rcv_sec) +! +! call xmp_coarray_deallocate(snd_desc, status) +! call xmp_coarray_deallocate(rcv_desc, status) +! + call xmp_finalize_all +! Fujitsu end 202103 +! ! IPART = IPART RETURN END diff --git a/FFB-MINI/src/les3x.F b/FFB-MINI/src/les3x.F index e6365f5..409e8f8 100755 --- a/FFB-MINI/src/les3x.F +++ b/FFB-MINI/src/les3x.F @@ -1,4 +1,4 @@ -C======================================================================C +X======================================================================C C C C SOFTWARE NAME : FRONTFLOW_BLUE.8.1 C C C @@ -16,6 +16,7 @@ C======================================================================C C* PROGRAM LES3X SUBROUTINE LES3X(FILEIN) + use xmp_api #include "timing.h" !#include "xmp_coarray.h" IMPLICIT NONE @@ -322,11 +323,22 @@ SUBROUTINE LES3X(FILEIN) * NODWK1(:,:),LEWRK(:,:), * LWRK01(:),LWRK02(:),LWRK04(:) REAL*4, ALLOCATABLE:: - * RX(:,:)[:], RY(:,:)[:], WRKN(:), +CC Fj start 202103 +CC * RX(:,:)[:], RY(:,:)[:], WRKN(:), + * WRKN(:), +CC Fj end 202103 * WRK01(:),WRK02(:),WRK03(:),WRK04(:), * WRK05(:),WRK06(:),WRK07(:),WRK08(:), * WRK09(:),WRK10(:),WRK11(:),WRK12(:), * WRK13(:),WRK3(:,:) +CC Fj start 202103 + INTEGER , POINTER :: RX ( : , : ) => null ( ) + INTEGER , POINTER :: RY ( : , : ) => null ( ) + INTEGER*8 :: rx_desc, ry_desc + INTEGER*8, DIMENSION(1) :: rx_lb, rx_ub, ry_lb, ry_ub + INTEGER*4 :: img_dims(1) + INTEGER*4 status +CC Fj end 202103 REAL*8,ALLOCATABLE:: * DWRK01(:,:),DWRK02(:),DWRK03(:,:,:), * DWRK04(:,:),DWRK05(:) @@ -742,6 +754,10 @@ SUBROUTINE LES3X(FILEIN) C NDOM = 0 CALL DDINIT(NPART,IPART) C IF(IPART.GE.1) NDOM = 1 +C +C Fj start 202103 + call xmp_api_init +C Fj end 202103 C IF(IPART.GE.2) THEN IUT6 = IUTLG @@ -1090,8 +1106,8 @@ SUBROUTINE LES3X(FILEIN) ALLOCATE(LWRK01(MWRK), STAT=LERR(03)) ALLOCATE(LWRK02(MWRK), STAT=LERR(04)) ALLOCATE(LWRK04(MWRK), STAT=LERR(05)) - ALLOCATE(RX(N1,ME)[*], STAT=LERR(06)) - ALLOCATE(RY(N1,ME)[*], STAT=LERR(07)) +C ALLOCATE(RX(N1,ME)[*], STAT=LERR(06)) +C ALLOCATE(RY(N1,ME)[*], STAT=LERR(07)) ALLOCATE(WRKN(MWRK*9), STAT=LERR(08)) ALLOCATE(WRK01(MWRK), STAT=LERR(09)) ALLOCATE(WRK02(MWRK), STAT=LERR(10)) @@ -1112,6 +1128,16 @@ SUBROUTINE LES3X(FILEIN) ALLOCATE(DWRK03(3,N1,MGAUSS), STAT=LERR(25)) ALLOCATE(DWRK04(3,N1 ), STAT=LERR(26)) ALLOCATE(DWRK05( MGAUSS), STAT=LERR(27)) +C Fj start 202103 + rx_lb(1) = N1 + rx_ub(1) = ME + ry_lb(1) = N1 + ry_ub(1) = ME + call xmp_new_coarray(rx_desc, 4, 1, rx_lb, rx_ub, 1, img_dims) + call xmp_new_coarray(ry_desc, 4, 1, ry_lb, ry_ub, 1, img_dims) + call xmp_coarray_bind(rx_desc,RX) + call xmp_coarray_bind(ry_desc,RY) +C Fj start 202103 CALL ERRCHK(IUT6,IPART,27,LERR,IERR) IF(IERR.NE.0) THEN WRITE(IUT0,*) BLANK @@ -2145,6 +2171,13 @@ SUBROUTINE LES3X(FILEIN) IF(IPART.GE.2) CLOSE(IUT6) CALL DDEXIT C +C +C Fj start 202103 + call xmp_coarray_deallocate(rx_desc, status) + call xmp_coarray_deallocate(ry_desc, status) +C + call xmp_api_finalize +C Fj end 202103 C STOP C diff --git a/FFB-MINI/src/make_setting b/FFB-MINI/src/make_setting index e12552a..8ec3020 100755 --- a/FFB-MINI/src/make_setting +++ b/FFB-MINI/src/make_setting @@ -1,12 +1,17 @@ CC = mpicc -FC = xmpf90 +#FC = xmpf90 + +OMNI_HOME = $(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') +FC = mpif90 +FFLAGS = -I$(OMNI_HOME)/include -J$(OMNI_HOME)/include -fopenmp +LIBS = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') DEFINE += -DNO_METIS DEFINE += -DNO_REFINER # timing option DEFINE += -DPROF_MAPROF - +MPIHOME =/usr/local/openmpi-2.1.1.gnu/ CFLAGS += $(DEFINE) -O2 FFLAGS += $(DEFINE) -O2 -I$(MPIHOME)/include From 9d24c1cb01dec2280e042992ec9dbda5746fbbf1 Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Fri, 5 Mar 2021 01:40:49 +0900 Subject: [PATCH 14/70] modify THIS_IMAGE() to XMP_THIS_IMAGE() in dd_mpi.F90 --- FFB-MINI/src/dd_mpi/dd_mpi.F90 | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/FFB-MINI/src/dd_mpi/dd_mpi.F90 b/FFB-MINI/src/dd_mpi/dd_mpi.F90 index 09059c5..ea5a35d 100755 --- a/FFB-MINI/src/dd_mpi/dd_mpi.F90 +++ b/FFB-MINI/src/dd_mpi/dd_mpi.F90 @@ -990,7 +990,10 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! ! PRINT *,"NDOM:",NDOM ! - ME=THIS_IMAGE() +! Fujitsu start 202103 +! ME=THIS_IMAGE() + ME=xmp_this_image() +! Fujitsu end 202103 START_DASH=0 MAX_RECV_LEN = 0 NSTART = 1 From 8256b38f06eb6b8d031f7cf9876cf833b369d81f Mon Sep 17 00:00:00 2001 From: "Yoshikawa, Hiroyuki" Date: Fri, 5 Mar 2021 10:00:13 +0900 Subject: [PATCH 15/70] modify xmpAPI_comlib.F90 to use MPI subroutines instead of co_ function --- CCS-QCD/src/xmpAPI_comlib.F90 | 162 +++++++++++++++++++--------------- 1 file changed, 92 insertions(+), 70 deletions(-) diff --git a/CCS-QCD/src/xmpAPI_comlib.F90 b/CCS-QCD/src/xmpAPI_comlib.F90 index ceccc9e..2c12a9d 100644 --- a/CCS-QCD/src/xmpAPI_comlib.F90 +++ b/CCS-QCD/src/xmpAPI_comlib.F90 @@ -159,14 +159,16 @@ subroutine comlib_bcast_i4(arg,ids) ! - arg : integer ! - ids : source destination ! -!coarray use mpi + use mpi implicit none integer, intent(inout) :: arg integer, intent(in) :: ids -!coarray integer :: ierr + integer :: ierr -!coarray call MPI_BCAST(arg,1,MPI_INTEGER,ids,MPI_COMM_WORLD,ierr) - call co_broadcast( arg,ids+1 ) +!--- 2020 Fujitsu + call MPI_BCAST(arg,1,MPI_INTEGER,ids,MPI_COMM_WORLD,ierr) + !call co_broadcast( arg,ids+1 ) +!--- 2020 Fujitsu end return end subroutine @@ -178,14 +180,16 @@ subroutine comlib_bcast_r8(arg,ids) ! - arg : real ! - ids : source destination ! -!coarray use mpi + use mpi implicit none real(8), intent(inout) :: arg integer, intent(in) :: ids -!coarray integer :: ierr + integer :: ierr -!coarray call MPI_BCAST(arg,1,MPI_REAL8,ids,MPI_COMM_WORLD,ierr) - call co_broadcast( arg,ids+1 ) +!--- 2020 Fujitsu + call MPI_BCAST(arg,1,MPI_REAL8,ids,MPI_COMM_WORLD,ierr) + !call co_broadcast( arg,ids+1 ) +!--- 2020 Fujitsu end return end subroutine @@ -197,14 +201,16 @@ subroutine comlib_bcast_c16(arg,ids) ! - arg : complex ! - ids : source destination ! -!coarray use mpi + use mpi implicit none complex(8), intent(inout) :: arg integer, intent(in) :: ids -!coarray integer :: ierr + integer :: ierr -!coarray call MPI_BCAST(arg,1,MPI_COMPLEX16,ids,MPI_COMM_WORLD,ierr) - call co_broadcast( arg,ids+1 ) +!--- 2020 Fujitsu + call MPI_BCAST(arg,1,MPI_COMPLEX16,ids,MPI_COMM_WORLD,ierr) + !call co_broadcast( arg,ids+1 ) +!--- 2020 Fujitsu end return end subroutine @@ -213,15 +219,17 @@ subroutine comlib_bcast_i4_array(arg,ids) ! ! Broadcast integer(4) array ! -!coarray use mpi + use mpi implicit none integer, intent(inout) :: arg(:) integer, intent(in) :: ids -!coarray integer :: ilen,ierr + integer :: ilen,ierr -!coarray ilen=SIZE(arg) -!coarray call MPI_BCAST(arg,ilen,MPI_INTEGER,ids,MPI_COMM_WORLD,ierr) - call co_broadcast( arg,ids+1 ) +!--- 2020 Fujitsu + ilen=SIZE(arg) + call MPI_BCAST(arg,ilen,MPI_INTEGER,ids,MPI_COMM_WORLD,ierr) + !call co_broadcast( arg,ids+1 ) +!--- 2020 Fujitsu end return end subroutine @@ -230,15 +238,17 @@ subroutine comlib_bcast_r8_array(arg,ids) ! ! Broadcast real(8) array ! -!coarray use mpi + use mpi implicit none real(8), intent(inout) :: arg(:) integer, intent(in) :: ids -!coarray integer :: ilen,ierr + integer :: ilen,ierr -!coarray ilen=SIZE(arg) -!coarray call MPI_BCAST(arg,ilen,MPI_REAL8,ids,MPI_COMM_WORLD,ierr) - call co_broadcast( arg,ids+1 ) +!--- 2020 Fujitsu + ilen=SIZE(arg) + call MPI_BCAST(arg,ilen,MPI_REAL8,ids,MPI_COMM_WORLD,ierr) + !call co_broadcast( arg,ids+1 ) +!--- 2020 Fujitsu end return end subroutine @@ -247,15 +257,17 @@ subroutine comlib_bcast_c16_array(arg,ids) ! ! Broadcast complex(8) array ! -!coarray use mpi + use mpi implicit none complex(8), intent(inout) :: arg(:) integer, intent(in) :: ids -!coarray integer :: ilen,ierr + integer :: ilen,ierr -!coarray ilen=SIZE(arg) -!coarray call MPI_BCAST(arg,ilen,MPI_COMPLEX16,ids,MPI_COMM_WORLD,ierr) - call co_broadcast( arg,ids+1 ) +!--- 2020 Fujitsu + ilen=SIZE(arg) + call MPI_BCAST(arg,ilen,MPI_COMPLEX16,ids,MPI_COMM_WORLD,ierr) + !call co_broadcast( arg,ids+1 ) +!--- 2020 Fujitsu end return end subroutine @@ -351,7 +363,7 @@ subroutine comlib_make_c16(id,node,send,recv,isize) ! - recv : 1st component of reciever data array (complex(8)) ! - isize : total data size to be send/recieved in unit of bytes ! -!coarray use mpi + use mpi implicit none type(comlib_data_c16), intent(out):: id integer, intent(in) :: node,isize @@ -364,15 +376,15 @@ subroutine comlib_make_c16(id,node,send,recv,isize) idall(myid)%sdesc=node ! send : myid -> node !**** make send receive table for all nodes -!coarray do i=0,numprocs-1 -!coarray call MPI_BCAST(idall(i)%sdesc,1,MPI_INTEGER,i,MPI_COMM_WORLD,ierr) - do i=1,numprocs - buf = idall(i-1)%sdesc - call co_broadcast(buf,i) - !--- 2020 Fujitsu +!--- 2020 Fujitsu + do i=0,numprocs-1 + call MPI_BCAST(idall(i)%sdesc,1,MPI_INTEGER,i,MPI_COMM_WORLD,ierr) + !do i=1,numprocs + !buf = idall(i-1)%sdesc + !call co_broadcast(buf,i) !sync all - call xmp_sync_all(ierr) - !--- 2020 Fujitsu end + !call xmp_sync_all(ierr) +!--- 2020 Fujitsu end idall(i-1)%sdesc = buf end do do i=0,numprocs-1 @@ -408,7 +420,7 @@ subroutine comlib_make_c8(id,node,send,recv,isize) ! - recv : 1st component of reciever data array (complex(4)) ! - isize : total data size to be send/recieved in unit of bytes ! -!coarray use mpi + use mpi implicit none type(comlib_data_c8), intent(out):: id integer, intent(in) :: node,isize @@ -421,15 +433,15 @@ subroutine comlib_make_c8(id,node,send,recv,isize) idall(myid)%sdesc=node ! send : myid -> node !**** make send receive table for all nodes -!coarray do i=0,numprocs-1 -!coarray call MPI_BCAST(idall(i)%sdesc,1,MPI_INTEGER,i,MPI_COMM_WORLD,ierr) - do i=1,numprocs - buf = idall(i-1)%sdesc - call co_broadcast(buf,i) - !--- 2020 Fujitsu +!--- 2020 Fujitsu + do i=0,numprocs-1 + call MPI_BCAST(idall(i)%sdesc,1,MPI_INTEGER,i,MPI_COMM_WORLD,ierr) + !do i=1,numprocs + !buf = idall(i-1)%sdesc + !call co_broadcast(buf,i) !sync all - call xmp_sync_all(ierr) - !--- 2020 Fujitsu end + !call xmp_sync_all(ierr) +!--- 2020 Fujitsu end idall(i-1)%sdesc = buf enddo do i=0,numprocs-1 @@ -766,17 +778,19 @@ subroutine comlib_sumcast_i4(i4) ! ! sum and broadcast of integer(4) data ! -!coarray use mpi + use mpi implicit none integer, intent(inout) :: i4 integer :: i4tmp !coarray -! integer :: ierr + integer :: ierr ! -! call MPI_Allreduce(i4,i4tmp,1,MPI_INTEGER,MPI_SUM,MPI_COMM_WORLD,ierr) - i4tmp = i4 - call co_sum(i4tmp) +!--- 2020 Fujitsu + call MPI_Allreduce(i4,i4tmp,1,MPI_INTEGER,MPI_SUM,MPI_COMM_WORLD,ierr) + !i4tmp = i4 + !call co_sum(i4tmp) i4 = i4tmp +!--- 2020 Fujitsu end return end subroutine @@ -785,17 +799,19 @@ subroutine comlib_sumcast_r8(r8) ! ! sum and broadcast of real(8) data ! -!coarray use mpi + use mpi implicit none real(8), intent(inout) :: r8 real(8) :: r8tmp !coarray -! integer :: ierr + integer :: ierr ! -! call MPI_Allreduce(r8,r8tmp,1,MPI_REAL8,MPI_SUM,MPI_COMM_WORLD,ierr) - r8tmp = r8 - call co_sum(r8tmp) +!--- 2020 Fujitsu + call MPI_Allreduce(r8,r8tmp,1,MPI_REAL8,MPI_SUM,MPI_COMM_WORLD,ierr) + !r8tmp = r8 + !call co_sum(r8tmp) r8 = r8tmp +!--- 2020 Fujitsu end return end subroutine @@ -804,17 +820,19 @@ subroutine comlib_sumcast_r4(r4) ! ! sum and broadcast of real(4) data ! -!coarray use mpi + use mpi implicit none real(4), intent(inout) :: r4 real(4) :: r4tmp !coarray -! integer :: ierr + integer :: ierr ! -! call MPI_Allreduce(r4,r4tmp,1,MPI_REAL4,MPI_SUM,MPI_COMM_WORLD,ierr) - r4tmp = r4 - call co_sum(r4tmp) +!--- 2020 Fujitsu + call MPI_Allreduce(r4,r4tmp,1,MPI_REAL4,MPI_SUM,MPI_COMM_WORLD,ierr) + !r4tmp = r4 + !call co_sum(r4tmp) r4 = r4tmp +!--- 2020 Fujitsu end return end subroutine @@ -823,17 +841,19 @@ subroutine comlib_sumcast_c16(c16) ! ! sum and broadcast of complex(8) data ! -!coarray use mpi + use mpi implicit none complex(8), intent(inout) :: c16 complex(8) :: c16tmp !coarray -! integer :: ierr + integer :: ierr ! -! call MPI_Allreduce(c16,c16tmp,1,MPI_COMPLEX16,MPI_SUM,MPI_COMM_WORLD,ierr) +!--- 2020 Fujitsu + call MPI_Allreduce(c16,c16tmp,1,MPI_COMPLEX16,MPI_SUM,MPI_COMM_WORLD,ierr) c16tmp = c16 - call co_sum(c16tmp) - c16 = c16tmp + !call co_sum(c16tmp) + !c16 = c16tmp +!--- 2020 Fujitsu end return end subroutine @@ -842,17 +862,19 @@ subroutine comlib_sumcast_c8(c8) ! ! sum and broadcast of complex(4) data ! -!coarray use mpi + use mpi implicit none complex(4), intent(inout) :: c8 complex(4) :: c8tmp !coarray -! integer :: ierr + integer :: ierr ! -! call MPI_Allreduce(c8,c8tmp,1,MPI_COMPLEX8,MPI_SUM,MPI_COMM_WORLD,ierr) - c8tmp = c8 - call co_sum(c8tmp) +!--- 2020 Fujitsu + call MPI_Allreduce(c8,c8tmp,1,MPI_COMPLEX8,MPI_SUM,MPI_COMM_WORLD,ierr) + !c8tmp = c8 + !call co_sum(c8tmp) c8 = c8tmp +!--- 2020 Fujitsu end return end subroutine From 19ab0c9ac46f7fdbdf91d744d7678564d369a5bf Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 5 Mar 2021 11:18:09 +0900 Subject: [PATCH 16/70] [WIP] Change coarray declaration of xmpAPI_fmodules.f to pointer. --- MODYLAS-MINI/src/xmpAPI_fmodules.f | 82 ++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/MODYLAS-MINI/src/xmpAPI_fmodules.f b/MODYLAS-MINI/src/xmpAPI_fmodules.f index 1376f61..f0660f9 100755 --- a/MODYLAS-MINI/src/xmpAPI_fmodules.f +++ b/MODYLAS-MINI/src/xmpAPI_fmodules.f @@ -43,10 +43,22 @@ module trj_org end module c---------------------------------------------------------------------- module trj_mpi - real(8),allocatable :: wkxyz(:,:)[:] + integer(4) :: trj_mpi_img_dims(1) + ! real(8),allocatable :: wkxyz(:,:)[:] + real(8), POINTER :: wkxyz(:,:) => null () + integer(8) :: wkxyz_desc + real(8),allocatable :: wkv(:,:) - integer(4),allocatable :: i2m(:), m2i(:)[:] - integer(4),allocatable :: tag(:,:,:),na_per_cell(:,:,:)[:] + ! integer(4),allocatable :: i2m(:), m2i(:)[:] + integer(4),allocatable :: i2m(:) + integer(4), POINTER :: m2i(:) => null () + integer(8) :: m2i_desc + + !integer(4),allocatable :: tag(:,:,:),na_per_cell(:,:,:)[:] + integer(4),allocatable :: tag(:,:,:) + integer(4), POINTER :: na_per_cell(:,:,:) => null () + integer(8) :: na_per_cell_desc + integer(4) :: na1cell,na5cell,nadirect integer(4) :: naline,narea real(8),parameter :: na1cellmargin=2.00d0 ! ya !!100% margin @@ -271,18 +283,30 @@ module comm_base c---------------------------------------------------------------------- module comm_d3 integer nczdiv, ncydiv, ncxdiv - integer,allocatable :: icbufp(:)[:] - integer,allocatable :: ircbufp(:)[:] - integer,allocatable :: icbufm(:)[:] - integer,allocatable :: ircbufm(:)[:] - integer,allocatable :: ibuffp(:)[:] - integer,allocatable :: irbuffp(:)[:] - integer,allocatable :: ibuffm(:)[:] - integer,allocatable :: irbuffm(:)[:] - real(8),allocatable :: buffp(:,:)[:] - real(8),allocatable :: rbuffp(:,:)[:] - real(8),allocatable :: buffm(:,:)[:] - real(8),allocatable :: rbuffm(:,:)[:] + ! integer,allocatable :: icbufp(:)[:] + integer, POINTER :: icbufp(:) => null () + ! integer,allocatable :: ircbufp(:)[:] + integer, POINTER :: ircbufp(:) => null () + ! integer,allocatable :: icbufm(:)[:] + integer, POINTER :: icbufm(:) => null () + ! integer,allocatable :: ircbufm(:)[:] + integer, POINTER :: ircbufm(:) => null () + ! integer,allocatable :: ibuffp(:)[:] + integer, POINTER :: ibuffp(:) => null () + ! integer,allocatable :: irbuffp(:)[:] + integer, POINTER :: irbuffp(:) => null () + ! integer,allocatable :: ibuffm(:)[:] + integer, POINTER :: ibuffm(:) => null () + ! integer,allocatable :: irbuffm(:)[:] + integer, POINTER :: irbuffm(:) => null () + ! real(8),allocatable :: buffp(:,:)[:] + real(8), POINTER :: buffp(:,:) => null () + ! real(8),allocatable :: rbuffp(:,:)[:] + real(8), POINTER :: rbuffp(:,:) => null () + ! real(8),allocatable :: buffm(:,:)[:] + real(8), POINTER :: buffm(:,:) => null () + ! real(8),allocatable :: rbuffm(:,:)[:] + real(8), POINTER :: rbuffm(:,:) => null () end module c---------------------------------------------------------------------- module comm_bd @@ -301,12 +325,28 @@ module comm_bd integer,allocatable :: ibuffm(:) integer,allocatable :: isbufp(:) integer,allocatable :: isbufm(:) - real(8),allocatable :: rbuff_p(:,:)[:] - real(8),allocatable :: rbuff_m(:,:)[:] - integer,allocatable :: irbuff_p(:)[:] - integer,allocatable :: irbuff_m(:)[:] - integer,allocatable :: irsbuf_p(:)[:] - integer,allocatable :: irsbuf_m(:)[:] + ! real(8),allocatable :: rbuff_p(:,:)[:] + real(8), POINTER :: rbuff_p(:,:) => null () + real(8) :: rbuff_p_desc + ! real(8),allocatable :: rbuff_m(:,:)[:] + real(8), POINTER :: rbuff_m(:,:) => null () + real(8) :: rbuff_m_desc + ! integer,allocatable :: irbuff_p(:)[:] + integer, POINTER :: irbuff_p(:) => null () + integer :: irbuff_p_desc + ! integer,allocatable :: irbuff_m(:)[:] + integer, POINTER :: irbuff_m(:) => null () + integer :: irbuff_m_desc + ! integer,allocatable :: irsbuf_p(:)[:] + integer, POINTER :: irsbuf_p(:) => null () + integer :: irsbuf_p_desc + ! integer,allocatable :: irsbuf_m(:)[:] + integer, POINTER :: irsbuf_m(:) => null () + + integer(4), dimension(1) :: comm_bd_img_dims + + integer :: irsbuf_m_desc + integer,allocatable :: ncatmw(:,:,:,:) end module c---------------------------------------------------------------------- From eaf409e15e94b824cbf672ddff8f806a50f92b51 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 5 Mar 2021 11:27:34 +0900 Subject: [PATCH 17/70] [WIP] Change co_* routine to MPI routines in xmpAPI_parse_input.f. --- MODYLAS-MINI/src/xmpAPI_parse_input.f | 62 +++++++++++++++++++-------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/MODYLAS-MINI/src/xmpAPI_parse_input.f b/MODYLAS-MINI/src/xmpAPI_parse_input.f index b92c72c..9ed94e9 100755 --- a/MODYLAS-MINI/src/xmpAPI_parse_input.f +++ b/MODYLAS-MINI/src/xmpAPI_parse_input.f @@ -124,7 +124,8 @@ subroutine read_mdxyzbin read(f_restart_bin,end=100) n endif !coarray call MPI_Bcast(n, 1, MPI_INTEGER4, 0, MPI_COMM_WORLD, ierr) - call co_broadcast(n,source_image=1) + call MPI_Bcast(n, 1, MPI_INTEGER4, 0, MPI_COMM_WORLD, ierr) +! call co_broadcast(n,source_image=1) !! if (.not. allocated(xyz)) then @@ -164,20 +165,35 @@ subroutine read_mdxyzbin !coarray call MPI_Bcast(beta , 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) !coarray call MPI_Bcast(gamma, 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) !coarray call MPI_Bcast(vboxg, 9 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) - call co_broadcast( xyz(1:3,1:n), source_image=1 ) - call co_broadcast( v(1:3,1:n), source_image=1 ) - call co_broadcast( n_nhc, source_image=1 ) - call co_broadcast( rss(1:5), source_image=1 ) - call co_broadcast( vss(1:5), source_image=1 ) - call co_broadcast( rssb(1:5), source_image=1 ) - call co_broadcast( vssb(1:5), source_image=1 ) - call co_broadcast( cellx, source_image=1 ) - call co_broadcast( celly, source_image=1 ) - call co_broadcast( cellz, source_image=1 ) - call co_broadcast( alpha, source_image=1 ) - call co_broadcast( beta, source_image=1 ) - call co_broadcast( gamma, source_image=1 ) - call co_broadcast( vboxg(1:3,1:3), source_image=1 ) + + call MPI_Bcast(xyz , 3*n, MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(v , 3*n, MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(n_nhc, 1 , MPI_INTEGER4, 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(rss , 5 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(vss , 5 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(rssb , 5 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(vssb , 5 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(cellx, 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(celly, 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(cellz, 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(alpha, 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(beta , 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(gamma, 1 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) + call MPI_Bcast(vboxg, 9 , MPI_REAL8 , 0, MPI_COMM_WORLD, ierr) +! call co_broadcast( xyz(1:3,1:n), source_image=1 ) +! call co_broadcast( v(1:3,1:n), source_image=1 ) +! call co_broadcast( n_nhc, source_image=1 ) +! call co_broadcast( rss(1:5), source_image=1 ) +! call co_broadcast( vss(1:5), source_image=1 ) +! call co_broadcast( rssb(1:5), source_image=1 ) +! call co_broadcast( vssb(1:5), source_image=1 ) +! call co_broadcast( cellx, source_image=1 ) +! call co_broadcast( celly, source_image=1 ) +! call co_broadcast( cellz, source_image=1 ) +! call co_broadcast( alpha, source_image=1 ) +! call co_broadcast( beta, source_image=1 ) +! call co_broadcast( gamma, source_image=1 ) +! call co_broadcast( vboxg(1:3,1:3), source_image=1 ) !! !KF end cellxh = 0.5d0 * cellx @@ -296,9 +312,16 @@ subroutine read_mdffbin !coarray & MPI_COMM_WORLD, ierr) !coarray call MPI_Bcast(slength, rngrp_ps*10, MPI_REAL8, 0, !coarray & MPI_COMM_WORLD, ierr) - call co_broadcast( atom1S, source_image=1 ) - call co_broadcast( atom2S, source_image=1 ) - call co_broadcast( slength, source_image=1 ) + call MPI_Bcast(atom1S, rngrp_ps*10, MPI_INTEGER4, 0, + & MPI_COMM_WORLD, ierr) + call MPI_Bcast(atom2S, rngrp_ps*10, MPI_INTEGER4, 0, + & MPI_COMM_WORLD, ierr) + call MPI_Bcast(slength, rngrp_ps*10, MPI_REAL8, 0, + & MPI_COMM_WORLD, ierr) + +! call co_broadcast( atom1S, source_image=1 ) +! call co_broadcast( atom2S, source_image=1 ) +! call co_broadcast( slength, source_image=1 ) !! !-- void @@ -343,6 +366,7 @@ subroutine read_bcast_real(data, n, m, in) if (myrank == 0) read(in) data(1:m,i) end do !coarray call MPI_Bcast(data, n*m, MPI_REAL8, 0, MPI_COMM_WORLD, ierr) - call co_broadcast( data(1:m,1:n), source_image=1 ) + call MPI_Bcast(data, n*m, MPI_REAL8, 0, MPI_COMM_WORLD, ierr) +! call co_broadcast( data(1:m,1:n), source_image=1 ) !! end From e67d3f39906765ca31f16ab7c4a1907271067b8e Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 5 Mar 2021 14:02:12 +0900 Subject: [PATCH 18/70] [WIP] Change sync all to xmp_sync_all routines. --- MODYLAS-MINI/src/xmpAPI_app_f90.f | 8 +- MODYLAS-MINI/src/xmpAPI_comm.f | 64 ++++++++---- MODYLAS-MINI/src/xmpAPI_comm_3.f | 59 +++++++---- MODYLAS-MINI/src/xmpAPI_comm_fmm.f | 148 ++++++++++++++++++--------- MODYLAS-MINI/src/xmpAPI_domain_div.f | 4 +- 5 files changed, 193 insertions(+), 90 deletions(-) diff --git a/MODYLAS-MINI/src/xmpAPI_app_f90.f b/MODYLAS-MINI/src/xmpAPI_app_f90.f index a70d0f4..c2b3b33 100755 --- a/MODYLAS-MINI/src/xmpAPI_app_f90.f +++ b/MODYLAS-MINI/src/xmpAPI_app_f90.f @@ -227,9 +227,11 @@ subroutine record_current_state subroutine check_parallel_condition c----------------------------------------------------------------------- use mpivar + use xmp_api implicit none include 'mpif.h' integer(4) :: ierr + integer(4) :: status !### checm nprocs ###! if(mod(nprocs,2).ne.0)then @@ -237,7 +239,8 @@ subroutine check_parallel_condition write(*,*) 'ERROR: nprocs is not equal to 2 powers.' endif !coarray call mpi_barrier(mpi_comm_world, ierr) - sync all + !sync all + call xmp_sync_all(status) !! call mpiend stop 1 @@ -247,7 +250,8 @@ subroutine check_parallel_condition write(*,*) 'ERROR: nprocs less than 8 is not supported.' endif !coarray call mpi_barrier(mpi_comm_world, ierr) - sync all + !sync all + call xmp_sync_all(status) !! call mpiend stop 1 diff --git a/MODYLAS-MINI/src/xmpAPI_comm.f b/MODYLAS-MINI/src/xmpAPI_comm.f index 34aaf37..e4ebe7e 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm.f +++ b/MODYLAS-MINI/src/xmpAPI_comm.f @@ -130,6 +130,7 @@ subroutine comm_bound() use param use mpivar use unitcell + use xmp_api implicit none INCLUDE 'mpif.h' integer nbase, nbase2 @@ -164,6 +165,7 @@ subroutine comm_bound() ! integer istatus(mpi_status_size), ierr ! + integer(4) status rdcellx=dble(ncell)/cellx rdcelly=dble(ncell)/celly rdcellz=dble(ncell)/cellz @@ -489,7 +491,9 @@ subroutine comm_bound() !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) irsbuf_p(1:ncs+1)[ipz_dest+1] = isbufp(1:ncs+1) ! Put - sync all + + !sync all + call xmp_sync_all(status) !! ncar = irsbuf_p(ncsr) @@ -499,7 +503,8 @@ subroutine comm_bound() !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) rbuff_p(:,1:nca)[ipz_dest+1] = buffp(:,1:nca) ! Put - sync all +! sync all + call xmp_sync_all(status) !! !coarray call mpi_sendrecv(ibuffp, nca, MPI_INTEGER, @@ -508,7 +513,9 @@ subroutine comm_bound() !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) irbuff_p(1:nca)[ipz_dest+1] = ibuffp(1:nca) ! Put - sync all +! sync all + call xmp_sync_all(status) + !! ! @@ -647,7 +654,8 @@ subroutine comm_bound() !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) irsbuf_m(1:ncs+1)[ipz_dest+1] = isbufm(1:ncs+1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ncar = irsbuf_m(ncsr) @@ -657,7 +665,8 @@ subroutine comm_bound() !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) rbuff_m(1:6,1:nca)[ipz_dest+1] = buffm(1:6,1:nca) ! Put - sync all +! sync all + call xmp_sync_all(status) !! !coarray call mpi_sendrecv(ibuffm, nca, MPI_INTEGER, @@ -666,7 +675,8 @@ subroutine comm_bound() !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) irbuff_m(1:nca)[ipz_dest+1] = ibuffm(1:nca) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ! @@ -1001,9 +1011,11 @@ subroutine comm_bound() !coarray & irsbuf_p, ncsr, MPI_INTEGER, !coarray & ipy_src, ipy_src, !coarray & mpi_comm_world, istatus, ierr ) - sync all ! do Not change +! sync all ! do Not change + call xmp_sync_all(status) irsbuf_p(1:ncsp+1)[ipy_dest+1] = isbufp(1:ncsp+1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ncarp = irsbuf_p(ncsr) @@ -1021,7 +1033,8 @@ subroutine comm_bound() !coarray & ipy_src, ipy_src, !coarray & mpi_comm_world, istatus, ierr ) irbuff_p(1:ncap)[ipy_dest+1] = ibuffp(1:ncap) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ! @@ -1043,7 +1056,8 @@ subroutine comm_bound() !coarray & ipy_src, ipy_src, !coarray & mpi_comm_world, istatus, ierr ) irsbuf_m(1:ncsm+1)[ipy_dest+1] = isbufm(1:ncsm+1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ncarm = irsbuf_m(ncsr) @@ -1061,7 +1075,8 @@ subroutine comm_bound() !coarray & ipy_src, ipy_src, !coarray & mpi_comm_world, istatus, ierr ) irbuff_m(1:ncam)[ipy_dest+1] = ibuffm(1:ncam) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ! @@ -1297,9 +1312,11 @@ subroutine comm_bound() !coarray & irsbuf_p, ncsr, MPI_INTEGER, !coarray & ipx_src, ipx_src, !coarray & mpi_comm_world, istatus, ierr ) - sync all ! do Not change +! sync all ! do Not change + call xmp_sync_all(status) irsbuf_p(1:ncs+1)[ipx_dest+1] = isbufp(1:ncs+1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ncar = irsbuf_p(ncsr) @@ -1317,7 +1334,8 @@ subroutine comm_bound() !coarray & ipx_src, ipx_src, !coarray & mpi_comm_world, istatus, ierr ) irbuff_p(1:nca)[ipx_dest+1] = ibuffp(1:nca) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ! @@ -1390,9 +1408,11 @@ subroutine comm_bound() !coarray & irsbuf_m, ncsr, MPI_INTEGER, !coarray & ipx_src, ipx_src, !coarray & mpi_comm_world, istatus, ierr ) - sync all +! sync all + call xmp_sync_all(status) irsbuf_m(1:ncs+1)[ipx_dest+1] = isbufm(1:ncs+1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ncar = irsbuf_m(ncsr) @@ -1410,7 +1430,8 @@ subroutine comm_bound() !coarray & ipx_src, ipx_src, !coarray & mpi_comm_world, istatus, ierr ) irbuff_m(1:nca)[ipx_dest+1] = ibuffm(1:nca) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ! @@ -1805,7 +1826,8 @@ subroutine pre_record_data !coarray natmdisp(1) = 0 do mm = 1,np natmlist(me)[mm] = nselfatm ! Put - sync all +! sync all + call xmp_sync_all(status) enddo natmdisp(1) = 1 !! @@ -1820,7 +1842,8 @@ subroutine pre_record_data !coarray & mpiout,mpi_comm_world,ierr) ms = natmdisp(me) mdis(ms:ms+nselfatm-1)[mpiout+1] = m2i_tmp(1:nselfatm) - sync all +! sync all + call xmp_sync_all(status) nrearrange = mdis !! ! @@ -1849,7 +1872,8 @@ subroutine pre_record_data !coarray & mpiout,mpi_comm_world,ierr) ms = natmdisp(me)/6 rcvx(1:6,ms:ms+nselfatm-1)[mpiout+1] = snd(1:6,1:nselfatm) - sync all +! sync all + call xmp_sync_all(status) rcv = rcvx !! ! diff --git a/MODYLAS-MINI/src/xmpAPI_comm_3.f b/MODYLAS-MINI/src/xmpAPI_comm_3.f index 39dfbf5..8960179 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm_3.f +++ b/MODYLAS-MINI/src/xmpAPI_comm_3.f @@ -76,6 +76,7 @@ subroutine comm_direct_3() ! ver.20120314 use md_periodic use unitcell use mpivar + use xmp_api implicit none INCLUDE 'mpif.h' integer ipz_pdest, ipy_pdest, ipx_pdest @@ -120,6 +121,7 @@ subroutine comm_direct_3() ! ver.20120314 #endif !coarray integer nd + integer(4) status !! c----- common parameters for coordinate communication. ----- @@ -186,7 +188,8 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & mpi_comm_world, istatus, ierr ) ircbufp(1:nccp)[ipz_pdest+1] = icbufp(1:nccp) ! Put ircbufm(1:nccm)[ipz_mdest+1] = icbufm(1:nccm) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(ircbufp, nccp, @@ -297,7 +300,8 @@ subroutine comm_direct_3() ! ver.20120314 irbuffp(1:ncap)[ipz_pdest+1] = ibuffp(1:ncap) ! Put rbuffm(1:3,1:ncam)[ipz_mdest+1] = buffm(1:3,1:ncam) ! Put irbuffm(1:ncam)[ipz_mdest+1] = ibuffm(1:ncam) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else @@ -372,7 +376,8 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & mpi_comm_world, istatus, ierr ) icbufp(1:nccp)[ipz_pdest+1] = ircbufp(1:nccp) ! Put icbufm(1:nccm)[ipz_mdest+1] = ircbufm(1:nccm) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(icbufp, nccp, @@ -448,7 +453,8 @@ subroutine comm_direct_3() ! ver.20120314 ibuffp(1:ncarp)[ipz_pdest+1] = irbuffp(1:ncarp) ! Put buffm(1:3,1:ncarm)[ipz_mdest+1] = rbuffm(1:3,1:ncarm) ! Put ibuffm(1:ncarm)[ipz_mdest+1] = irbuffm(1:ncarm) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(buffp, 3*ncar2p, @@ -556,7 +562,8 @@ subroutine comm_direct_3() ! ver.20120314 nd = abs(icym1 - icym0) na_per_cell(:, icybm0:icybm0+nd, icx)[ipy_mdest+1] . = na_per_cell(:, icym0:icym0 +nd, icx) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(na_per_cell(icz0,icybp0,icx), nccp, @@ -631,12 +638,14 @@ subroutine comm_direct_3() ! ver.20120314 . = wkxyz(:,icasp:icasp+ncap-1) ! Put m2i(icarp:icarp+ncap-1)[ipy_pdest+1] . = m2i(icasp:icasp+ncap-1) ! Put - sync all +! sync all + call xmp_sync_all(status) wkxyz(:,icarm:icarm+ncam-1)[ipy_mdest+1] . = wkxyz(:,icasm:icasm+ncam-1) ! Put m2i(icarm:icarm+ncam-1)[ipy_mdest+1] . = m2i(icasm:icasm+ncam-1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(wkxyz(1,icarp), 3*ncap, @@ -690,11 +699,13 @@ subroutine comm_direct_3() ! ver.20120314 nd = abs(icyp1 - icyp0) na_per_cell(:, icybp0:icybp0+nd, icx)[ipy_pdest+1] . = na_per_cell(:, icybp1st:icybp1st+nd, icx) ! Put - sync all +! sync all + call xmp_sync_all(status) nd = abs(icym1 - icym0) na_per_cell(:, icybm0:icybm0+nd, icx)[ipy_pdest+1] . = na_per_cell(:, icybm1st:icybm1st+nd, icx) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(na_per_cell(icz0,icybp0,icx), nccp, @@ -759,12 +770,14 @@ subroutine comm_direct_3() ! ver.20120314 . = wkxyz(:,icasp:icasp+ncap-1) ! Put m2i(icarp:icarp+ncap-1)[ipy_pdest+1] . = m2i(icasp:icasp+ncap-1) ! Put - sync all +! sync all + call xmp_sync_all(status) wkxyz(:,icarm:icarm+ncam-1)[ipy_mdest+1] . = wkxyz(:,icasm:icasm+ncam-1) ! Put m2i(icarm:icarm+ncam-1)[ipy_mdest+1] . = m2i(icasm:icasm+ncam-1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(wkxyz(1,icarp), 3*ncap, @@ -845,7 +858,8 @@ subroutine comm_direct_3() ! ver.20120314 na_per_cell(icz0:icz1,icy0:icy1,icxbp0:icxbp0+(icxp1-icxp0)) . [ipx_pdest+1] . = na_per_cell(icz0:icz1,icy0:icy1,icxp0:icxp1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #endif !coarray call mpi_sendrecv(na_per_cell(icz0,icy0,icxm0), nccm, @@ -856,7 +870,8 @@ subroutine comm_direct_3() ! ver.20120314 na_per_cell(icz0:icz1,icy0:icy1,icxbm0:icxbm0+(icxm1-icxm0)) . [ipx_mdest+1] . = na_per_cell(icz0:icz1,icy0:icy1,icxm0:icxm1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else #ifndef HALFDIREE @@ -945,7 +960,8 @@ subroutine comm_direct_3() ! ver.20120314 . = wkxyz(:,icasp:icasp+ncap-1) ! Put m2i(icarp:icarp+ncap-1)[ipx_pdest+1] . = m2i(icasp:icasp+ncap-1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #endif !coarray call mpi_sendrecv(wkxyz(1,icasm), 3*ncam, @@ -961,7 +977,8 @@ subroutine comm_direct_3() ! ver.20120314 . = wkxyz(:,icasm:icasm+ncam-1) ! Put m2i(icarm:icarm+ncam-1)[ipx_mdest+1] . = m2i(icasm:icasm+ncam-1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else #ifndef HALFDIREE @@ -1031,7 +1048,8 @@ subroutine comm_direct_3() ! ver.20120314 nd = abs(icxp1 - icxp0) na_per_cell(icz0:icz1,icy0:icy1,icxbp0:icxbp0+nd)[ipx_pdest+1] .= na_per_cell(icz0:icz1,icy0:icy1,icxbp1st:icxbp1st+nd) - sync all +! sync all + call xmp_sync_all(status) !! #endif !coarray call mpi_sendrecv(na_per_cell(icz0,icy0,icxbm1st), nccm, @@ -1042,7 +1060,8 @@ subroutine comm_direct_3() ! ver.20120314 nd = abs(icxm1 - icxm0) na_per_cell(icz0:icz1,icy0:icy1,icxbm0:icxbm0+nd)[ipx_pdest+1] .= na_per_cell(icz0:icz1,icy0:icy1,icxbm1st:icxbm1st+nd) - sync all +! sync all + call xmp_sync_all(status) !! #else #ifndef HALFDIREE @@ -1118,7 +1137,8 @@ subroutine comm_direct_3() ! ver.20120314 . = wkxyz(:,icasp:icasp+ncap-1) ! Put m2i(icarp:icarp+ncap-1)[ipx_pdest+1] . = m2i(icasp:icasp+ncap-1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #endif !coarray call mpi_sendrecv(wkxyz(1,icasm), 3*ncam, @@ -1134,7 +1154,8 @@ subroutine comm_direct_3() ! ver.20120314 . = wkxyz(:,icasm:icasm+ncam-1) ! Put m2i(icarm:icarm+ncam-1)[ipx_mdest+1] . = m2i(icasm:icasm+ncam-1) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else #ifndef HALFDIREE diff --git a/MODYLAS-MINI/src/xmpAPI_comm_fmm.f b/MODYLAS-MINI/src/xmpAPI_comm_fmm.f index 6d49252..f6c9f4c 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm_fmm.f +++ b/MODYLAS-MINI/src/xmpAPI_comm_fmm.f @@ -32,6 +32,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, use comm_base use md_fmm use mpivar + use xmp_api implicit none INCLUDE 'mpif.h' integer m1 @@ -61,6 +62,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, integer iczb integer icyb0prior, icxb0prior integer ierr,istatus(mpi_status_size) + integer(4) status !coarray allocate( rccbuf(mylm*5*nscydiv*nscxdiv,2)[*] ) @@ -116,7 +118,8 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & myrank, rccbuf(1,1), ncc, MPI_DOUBLE_COMPLEX, !coarray & ipz_src, ipz_src,mpi_comm_world,istatus,ierr) rccbuf(1:ncc,1)[ipz_dest+1] = ccbuf(1:ncc) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ncc2 = 0 @@ -146,7 +149,8 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & myrank, rccbuf(1,ibr), ncc, MPI_DOUBLE_COMPLEX, !coarray & ipz_src, ipz_src,mpi_comm_world,istatus,ierr) rccbuf(1:ncc,ibr)[ipz_dest+1] = rccbuf(1:ncc,ibs) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ncc2 = 0 @@ -181,7 +185,8 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & rccbuf(1,1), ncc, MPI_DOUBLE_COMPLEX, !coarray & ipz_src, ipz_src,mpi_comm_world,istatus,ierr) rccbuf(1:ncc,1)[ipz_dest+1] = ccbuf(1:ncc) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ncc2 = 0 DO icx = icx0, icx1 @@ -205,7 +210,8 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & myrank, rccbuf(1,ibr), ncc, MPI_DOUBLE_COMPLEX, !coarray & ipz_src, ipz_src,mpi_comm_world,istatus,ierr) rccbuf(1:ncc,ibr)[ipz_dest+1] = rccbuf(1:ncc,ibs) ! Put - sync all +! sync all + call xmp_sync_all(status) !! ncc2 = 0 DO icx = icx0, icx1 @@ -242,11 +248,13 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & ipy_src, ipy_src,mpi_comm_world,istatus,ierr ) nd = abs(icyb1 - icyb0) ndis(me)[ipy_src+1] = icyb0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipy_dest+1) wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] . = wm_tmp( :, :, icy0:icy0+nd, icx ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! else icyb0prior = icyb0 @@ -258,11 +266,13 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipy_src, ipy_src,mpi_comm_world,istatus,ierr ) ndis(me)[ipy_src+1] = icyb0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipy_dest+1) wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] . = wm_tmp( :, :, icyb0prior:icyb0prior+nd, icx ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! endif END DO @@ -289,11 +299,13 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & ipy_src, ipy_src,mpi_comm_world,istatus,ierr) nd = abs(icyb1 - icyb0) ndis(me)[ipy_src+1] = icyb0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipy_dest+1) wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] . = wm_tmp( :, :, icy0:icy0+nd, icx ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! else icyb0prior = icyb0 @@ -304,11 +316,13 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipy_src, ipy_src,mpi_comm_world,istatus,ierr) ndis(me)[ipy_src+1] = icyb0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipy_dest+1) wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] . = wm_tmp( :, :, icyb0prior:icyb0prior+nd, icx ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! end if END DO @@ -334,11 +348,13 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & ipx_src, ipx_src,mpi_comm_world,istatus,ierr ) nd = abs(icxb1 - icxb0) ndis(me)[ipx_src+1] = icxb0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipx_dest+1) wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] . = wm_tmp( :, :, :, icx0:icx0+nd ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! else icxb0prior = icxb0 @@ -349,11 +365,13 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipx_src, ipx_src,mpi_comm_world,istatus,ierr ) ndis(me)[ipx_src+1] = icxb0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipx_dest+1) wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] . = wm_tmp( :, :, :, icxb0prior:icxb0prior+nd ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! end if END DO @@ -378,11 +396,13 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & ipx_src, ipx_src,mpi_comm_world,istatus,ierr ) nd = abs(icxb1 - icxb0) ndis(me)[ipx_src+1] = icxb0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipx_dest+1) wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] . = wm_tmp( :, :, :, icx0:icx0+nd ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! else icxb0prior = icxb0 @@ -393,11 +413,13 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipx_src, ipx_src,mpi_comm_world,istatus,ierr ) ndis(me)[ipx_src+1] = icxb0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipx_dest) wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] . = wm_tmp( :, :, :, icxb0prior:icxb0prior+nd ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! end if END DO @@ -417,6 +439,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, use md_fmm use md_fmm_domdiv_flg use mpivar + use xmp_api implicit none include 'mpif.h' integer(4) :: ilevel @@ -470,6 +493,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, integer m integer ibs, ibr integer istatus(mpi_status_size, 4), ierr + integer(4) status #ifndef SYNC_COM integer,dimension(4) :: irq integer nrq @@ -581,7 +605,8 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & mpi_comm_world, istatus, ierr ) rccbufp(1:nccp,1)[ipz_pdest+1] = ccbufp(1:nccp) ! Put rccbufm(1:nccm,1)[ipz_mdest+1] = ccbufm(1:nccm) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(rccbufp(1,1), nccp, @@ -650,7 +675,8 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & mpi_comm_world, istatus, ierr ) rccbufp(1:nccp,ibr)[ipz_pdest+1] = rccbufp(1:nccp,ibs) ! Put rccbufm(1:nccm,ibr)[ipz_mdest+1] = rccbufm(1:nccm,ibs) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(rccbufp(1,ibr), nccp, @@ -701,7 +727,8 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipz_pdest, myrank, !coarray & mpi_comm_world, istatus, ierr ) rccbufp(1:nccp,ibr)[ipz_pdest+1] = rccbufp(1:nccp,ibs) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_isend(rccbufp(1,ibs), nccp, @@ -731,7 +758,8 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipz_mdest, myrank, !coarray & mpi_comm_world, istatus, ierr ) rccbufm(1:nccm,ibr)[ipz_mdest+1] = rccbufm(1:nccm,ibs) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_isend(rccbufm(1,ibs), nccm, @@ -835,11 +863,13 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & mpi_comm_world, istatus, ierr ) nd = abs(icyp1 - icyp0) ndis(me)[ipy_psrc+1] = icybp0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipy_pdest+1) wm_tmp( :, :, nb:nb +nd, icx )[ipy_pdest+1] . = wm_tmp( :, :, icyp0:icyp0+nd, icx ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! !coarray call mpi_sendrecv(wm_tmp(1,1,icym0,icx), nccm, !coarray & MPI_DOUBLE_COMPLEX, @@ -849,11 +879,13 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & mpi_comm_world, istatus, ierr ) md = abs(icym1 - icym0) mdis(me)[ipy_msrc+1] = icybm0 ! Put - sync all +! sync all + call xmp_sync_all(status) mb = mdis(ipy_mdest+1) wm_tmp( :, :, mb:mb +md, icx )[ipy_mdest+1] . = wm_tmp( :, :, icym0:icym0+md, icx ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(wm_tmp(1,1,icybp0,icx), nccp, @@ -890,11 +922,13 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray * ipy_psrc, ipy_psrc, !coarray & mpi_comm_world, istatus, ierr ) ndis(me)[ipy_psrc+1] = icybp0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipy_pdest+1) wm_tmp( :, :, nb:nb +nd, icx )[ipy_pdest+1] ! Put . = wm_tmp( :, :, icybp0prior:icybp0prior+nd, icx ) - sync all +! sync all + call xmp_sync_all(status) !! !coarray call mpi_sendrecv(wm_tmp(1,1,icybm0prior,icx), nccm, !coarray & MPI_DOUBLE_COMPLEX, @@ -903,11 +937,13 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipy_msrc, ipy_msrc, !coarray & mpi_comm_world, istatus, ierr ) mdis(me)[ipy_msrc+1] = icybm0 ! Put - sync all +! sync all + call xmp_sync_all(status) mb = mdis(ipy_mdest+1) wm_tmp( :, :, mb:mb +md, icx )[ipy_mdest+1] ! Put . = wm_tmp( :, :, icybm0prior:icybm0prior+md, icx ) - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(wm_tmp(1,1,icybp0,icx), nccp, @@ -935,11 +971,13 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipy_pdest, myrank, !coarray & mpi_comm_world, istatus, ierr ) ndis(me)[ipy_psrc+1] = icybp0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipy_pdest+1) wm_tmp( :, :, nb:nb +nd, icx )[ipy_pdest+1] . = wm_tmp( :, :, icybp0prior:icybp0prior+nd, icx ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_isend(wm_tmp(1,1,icybp0prior,icx), nccp, @@ -968,11 +1006,13 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipy_mdest, myrank, !coarray & mpi_comm_world, istatus, ierr ) mdis(me)[ipy_msrc+1] = icybm0 ! Put - sync all +! sync all + call xmp_sync_all(status) md = mdis(ipy_mdest+1) wm_tmp( :, :, mb:mb +md, icx )[ipy_mdest+1] . = wm_tmp( :, :, icybm0prior:icybm0prior+md, icx ) ! Put - sync all +! sync all + call xmp_sync_all(status) #else call mpi_isend(wm_tmp(1,1,icybm0prior,icx), nccm, & MPI_DOUBLE_COMPLEX, ipy_mdest, myrank, @@ -1049,11 +1089,13 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipx_psrc, ipx_psrc, !coarray & mpi_comm_world, istatus, ierr ) ndis(me)[ipx_psrc+1] = icxbp0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipx_pdest+1) wm_tmp( :, :, :, nb:nb +nd )[ipx_pdest+1] . = wm_tmp( :, :, :, icxp0:icxp0+nd ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! !coarray call mpi_sendrecv(wm_tmp(1,1,1,icxm0), nccm, !coarray & MPI_DOUBLE_COMPLEX, @@ -1062,11 +1104,13 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipx_msrc, ipx_msrc, !coarray & mpi_comm_world, istatus, ierr ) mdis(me)[ipx_msrc+1] = icxbm0 ! Put - sync all +! sync all + call xmp_sync_all(status) mb = mdis(ipx_mdest+1) wm_tmp( :, :, :, mb:mb +md )[ipx_mdest+1] . = wm_tmp( :, :, :, icxm0:icxm0+md ) - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(wm_tmp(1,1,1,icxbp0), nccp, @@ -1106,11 +1150,13 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipx_psrc, ipx_psrc, !coarray & mpi_comm_world, istatus, ierr ) ndis(me)[ipx_psrc+1] = icxbp0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipx_pdest+1) wm_tmp( :, :, :, nb:nb +nd-1 )[ipx_pdest+1] . = wm_tmp( :, :, :, icxbp0prior:icxbp0prior+nd-1 ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! !coarray call mpi_sendrecv(wm_tmp(1,1,1,icxbm0prior), nccm, !coarray & MPI_DOUBLE_COMPLEX, @@ -1119,11 +1165,13 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipx_msrc, ipx_msrc, !coarray & mpi_comm_world, istatus, ierr ) mdis(me)[ipx_msrc+1] = icxbm0 ! Put - sync all +! sync all + call xmp_sync_all(status) mb = mdis(ipx_mdest+1) wm_tmp( :, :, :, mb:mb +md-1 )[ipx_mdest+1] . = wm_tmp( :, :, :, icxbm0prior:icxbm0prior+md-1 ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_irecv(wm_tmp(1,1,1,icxbp0), nccp, @@ -1151,11 +1199,13 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipx_pdest, myrank, !coarray & mpi_comm_world, istatus, ierr ) ndis(me)[ipx_psrc+1] = icxbp0 ! Put - sync all +! sync all + call xmp_sync_all(status) nb = ndis(ipx_pdest+1) wm_tmp( :, :, :, nb:nb +nd-1 )[ipx_pdest+1] . = wm_tmp( :, :, :, icxbp0prior:icxbp0prior+nd-1 ) ! put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_isend(wm_tmp(1,1,1,icxbp0prior), nccp, @@ -1184,11 +1234,13 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipx_mdest, myrank, !coarray & mpi_comm_world, istatus, ierr ) mdis(me)[ipx_msrc+1] = icxbm0 ! Put - sync all +! sync all + call xmp_sync_all(status) mb = mdis(ipx_mdest+1) wm_tmp( :, :, :, mb:mb +md )[ipx_mdest+1] . = wm_tmp( :, :, :, icxbm0prior:icxbm0prior+md ) ! Put - sync all +! sync all + call xmp_sync_all(status) !! #else call mpi_isend(wm_tmp(1,1,1,icxbm0prior), nccm, diff --git a/MODYLAS-MINI/src/xmpAPI_domain_div.f b/MODYLAS-MINI/src/xmpAPI_domain_div.f index 1856c1c..929cc9d 100755 --- a/MODYLAS-MINI/src/xmpAPI_domain_div.f +++ b/MODYLAS-MINI/src/xmpAPI_domain_div.f @@ -37,6 +37,7 @@ subroutine init_fmm_domain_div() integer(4) :: idx, idy, idz, imx, imy, imz,i include 'mpif.h' integer(4) :: ierr + integer(4) :: status maxdiv = nprocs !ya @@ -137,7 +138,8 @@ subroutine init_fmm_domain_div() iymin=ncell izmin=ncell !coarray call mpi_barrier(mpi_comm_world,ierr) - sync all + !sync all + call xmp_sync_all(status) !! if(myrank==0)then From 46be8a4f27c18b67eea1eafbf0e8df8b42496476 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 5 Mar 2021 15:30:09 +0900 Subject: [PATCH 19/70] [WIP] Update commentout. --- MODYLAS-MINI/src/xmpAPI_fmodules.f | 52 +++++++++++++++--------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/MODYLAS-MINI/src/xmpAPI_fmodules.f b/MODYLAS-MINI/src/xmpAPI_fmodules.f index f0660f9..8d9c697 100755 --- a/MODYLAS-MINI/src/xmpAPI_fmodules.f +++ b/MODYLAS-MINI/src/xmpAPI_fmodules.f @@ -49,12 +49,12 @@ module trj_mpi integer(8) :: wkxyz_desc real(8),allocatable :: wkv(:,:) - ! integer(4),allocatable :: i2m(:), m2i(:)[:] +! ! integer(4),allocatable :: i2m(:), m2i(:)[:] integer(4),allocatable :: i2m(:) integer(4), POINTER :: m2i(:) => null () integer(8) :: m2i_desc - !integer(4),allocatable :: tag(:,:,:),na_per_cell(:,:,:)[:] +! !integer(4),allocatable :: tag(:,:,:),na_per_cell(:,:,:)[:] integer(4),allocatable :: tag(:,:,:) integer(4), POINTER :: na_per_cell(:,:,:) => null () integer(8) :: na_per_cell_desc @@ -283,29 +283,29 @@ module comm_base c---------------------------------------------------------------------- module comm_d3 integer nczdiv, ncydiv, ncxdiv - ! integer,allocatable :: icbufp(:)[:] +! ! integer,allocatable :: icbufp(:)[:] integer, POINTER :: icbufp(:) => null () - ! integer,allocatable :: ircbufp(:)[:] +! ! integer,allocatable :: ircbufp(:)[:] integer, POINTER :: ircbufp(:) => null () - ! integer,allocatable :: icbufm(:)[:] +! ! integer,allocatable :: icbufm(:)[:] integer, POINTER :: icbufm(:) => null () - ! integer,allocatable :: ircbufm(:)[:] +! ! integer,allocatable :: ircbufm(:)[:] integer, POINTER :: ircbufm(:) => null () - ! integer,allocatable :: ibuffp(:)[:] +! ! integer,allocatable :: ibuffp(:)[:] integer, POINTER :: ibuffp(:) => null () - ! integer,allocatable :: irbuffp(:)[:] +! ! integer,allocatable :: irbuffp(:)[:] integer, POINTER :: irbuffp(:) => null () - ! integer,allocatable :: ibuffm(:)[:] +! ! integer,allocatable :: ibuffm(:)[:] integer, POINTER :: ibuffm(:) => null () - ! integer,allocatable :: irbuffm(:)[:] +! ! integer,allocatable :: irbuffm(:)[:] integer, POINTER :: irbuffm(:) => null () - ! real(8),allocatable :: buffp(:,:)[:] +! ! real(8),allocatable :: buffp(:,:)[:] real(8), POINTER :: buffp(:,:) => null () - ! real(8),allocatable :: rbuffp(:,:)[:] +! ! real(8),allocatable :: rbuffp(:,:)[:] real(8), POINTER :: rbuffp(:,:) => null () - ! real(8),allocatable :: buffm(:,:)[:] +! ! real(8),allocatable :: buffm(:,:)[:] real(8), POINTER :: buffm(:,:) => null () - ! real(8),allocatable :: rbuffm(:,:)[:] +! ! real(8),allocatable :: rbuffm(:,:)[:] real(8), POINTER :: rbuffm(:,:) => null () end module c---------------------------------------------------------------------- @@ -325,27 +325,27 @@ module comm_bd integer,allocatable :: ibuffm(:) integer,allocatable :: isbufp(:) integer,allocatable :: isbufm(:) - ! real(8),allocatable :: rbuff_p(:,:)[:] +! ! real(8),allocatable :: rbuff_p(:,:)[:] real(8), POINTER :: rbuff_p(:,:) => null () - real(8) :: rbuff_p_desc - ! real(8),allocatable :: rbuff_m(:,:)[:] + integer(8) :: rbuff_p_desc +! ! real(8),allocatable :: rbuff_m(:,:)[:] real(8), POINTER :: rbuff_m(:,:) => null () - real(8) :: rbuff_m_desc - ! integer,allocatable :: irbuff_p(:)[:] + integer(8) :: rbuff_m_desc +! ! integer,allocatable :: irbuff_p(:)[:] integer, POINTER :: irbuff_p(:) => null () - integer :: irbuff_p_desc - ! integer,allocatable :: irbuff_m(:)[:] + integer(8) :: irbuff_p_desc +! ! integer,allocatable :: irbuff_m(:)[:] integer, POINTER :: irbuff_m(:) => null () - integer :: irbuff_m_desc - ! integer,allocatable :: irsbuf_p(:)[:] + integer(8) :: irbuff_m_desc +! ! integer,allocatable :: irsbuf_p(:)[:] integer, POINTER :: irsbuf_p(:) => null () - integer :: irsbuf_p_desc - ! integer,allocatable :: irsbuf_m(:)[:] + integer(8) :: irsbuf_p_desc +! ! integer,allocatable :: irsbuf_m(:)[:] integer, POINTER :: irsbuf_m(:) => null () + integer(8) :: irsbuf_m_desc integer(4), dimension(1) :: comm_bd_img_dims - integer :: irsbuf_m_desc integer,allocatable :: ncatmw(:,:,:,:) end module From 222d544d8ce2f3265d263573468bfcab483ad7b7 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 5 Mar 2021 17:09:27 +0900 Subject: [PATCH 20/70] [WIP] Add allocation routines of XMP-API to xmpAPI_domain_div.f. --- MODYLAS-MINI/src/xmpAPI_domain_div.f | 35 +++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/MODYLAS-MINI/src/xmpAPI_domain_div.f b/MODYLAS-MINI/src/xmpAPI_domain_div.f index 929cc9d..efcc27a 100755 --- a/MODYLAS-MINI/src/xmpAPI_domain_div.f +++ b/MODYLAS-MINI/src/xmpAPI_domain_div.f @@ -427,15 +427,29 @@ subroutine fmod_alloc_metadata !init_fmm_direct_com() use shakerattleroll use mpivar use ompvar + use xmp_api implicit none integer(4) :: itmp + integer(8), dimension(3) :: na_per_cell_lb, na_per_cell_ub + integer(8), dimension(2) :: wkxyz_lb, wkxyz_ub + integer(8), dimension(1) :: m2i_lb, m2i_ub include 'mpif.h' !############ ! metadata !############ allocate(tag(lzdiv+4,lydiv+4,lxdiv+4)) - allocate(na_per_cell(lzdiv+4,lydiv+4,lxdiv+4)[*]) + ! allocate(na_per_cell(lzdiv+4,lydiv+4,lxdiv+4)[*]) + na_per_cell_lb(1) = 1 + na_per_cell_lb(2) = 1 + na_per_cell_lb(3) = 1 + na_per_cell_ub(1) = lzdiv+4 + na_per_cell_ub(2) = lydiv+4 + na_per_cell_ub(3) = lxdiv+4 + call xmp_new_coarray(na_per_cell_desc,4,3, + & na_per_cell_lb,na_per_cell_ub,1, trj_mpi_img_dims) + call xmp_coarray_bind(na_per_cell_desc,na_per_cell) + !############ ! segment !############ @@ -454,9 +468,24 @@ subroutine fmod_alloc_metadata !init_fmm_direct_com() na5cell=na1cell*5 nadirect=na1cell*(lxdiv+4)*(lydiv+4)*(lzdiv+4) !Coordinate & Velocity - allocate(wkxyz(3,nadirect)[*]) + ! allocate(wkxyz(3,nadirect)[*]) + wkxyz_lb(1) = 1 + wkxyz_lb(2) = 1 + wkxyz_ub(1) = 3 + wkxyz_ub(2) = nadirect + call xmp_new_coarray(wkxyz_desc,8,2, + & wkxyz_lb,wkxyz_ub,1, trj_mpi_img_dims) + call xmp_coarray_bind(wkxyz_desc,wkxyz) + allocate(wkv(3,nadirect)) - allocate(m2i(nadirect)[*]) + + ! allocate(m2i(nadirect)[*]) + m2i_lb(1) = 1 + m2i_ub(1) = nadirect + call xmp_new_coarray(m2i_desc,4,1, + & m2i_lb,m2i_ub,1,trj_mpi_img_dims) + call xmp_coarray_bind(m2i_desc,m2i) + !Force allocate(wk_f(3,nadirect)) allocate(w3_f(3,nadirect,0:nomp-1)) From 6576bee42982e50534c70a4b58fa6e497ce3f58e Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 5 Mar 2021 17:13:53 +0900 Subject: [PATCH 21/70] [WIP] Add mpi setting routines. --- MODYLAS-MINI/src/xmpAPI_mpitool.f | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/MODYLAS-MINI/src/xmpAPI_mpitool.f b/MODYLAS-MINI/src/xmpAPI_mpitool.f index 678cc8c..d2ae740 100755 --- a/MODYLAS-MINI/src/xmpAPI_mpitool.f +++ b/MODYLAS-MINI/src/xmpAPI_mpitool.f @@ -28,15 +28,23 @@ c subroutine mpistart use mpivar + use xmp_api implicit none include 'mpif.h' integer(4) :: ierr + call xmp_api_init !coarray call mpi_init(ierr) !coarray call mpi_comm_size(mpi_comm_world,nprocs,ierr) !coarray call mpi_comm_rank(mpi_comm_world,myrank,ierr) - nprocs = num_images() - myrank = this_image()-1 + ! nprocs = num_images() +! nprocs = xmp_num_images() + ! TODO: use xmp_num_images + call mpi_comm_size(mpi_comm_world,nprocs,ierr) + + + ! myrank = this_image()-1 + myrank = xmp_this_image() - 1 !! mpiout=0 return @@ -47,6 +55,7 @@ subroutine mpiend !coarray include 'mpif.h' !coarray integer ierr !coarray call mpi_finalize(ierr) + call xmp_api_finalize return end From 8baff62ea06e222c193be18eaeba102ca77fcd04 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 5 Mar 2021 17:14:45 +0900 Subject: [PATCH 22/70] [WIP] Update coarray option of make_setting.xmp_api. --- MODYLAS-MINI/src/make_setting.xmp_api | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MODYLAS-MINI/src/make_setting.xmp_api b/MODYLAS-MINI/src/make_setting.xmp_api index bb2e4d3..71e1a5e 100755 --- a/MODYLAS-MINI/src/make_setting.xmp_api +++ b/MODYLAS-MINI/src/make_setting.xmp_api @@ -17,4 +17,4 @@ OMNI_LIB = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINK FFLAGS += -I$(MPIHOME)/include $(OMNI_LIB) $(OMNI_INC) # TODO: temporal option -FFLAGS += -fcoarray=lib +#FFLAGS += -fcoarray=lib From baa0b733bbe7d29c020195bac8636b1c2b6d7b68 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 5 Mar 2021 17:22:47 +0900 Subject: [PATCH 23/70] [WIP] Add allocation routines of XMP-API to xmpAPI_comm.f. --- MODYLAS-MINI/src/xmpAPI_comm.f | 113 +++++++++++++++++++++++++++++---- 1 file changed, 100 insertions(+), 13 deletions(-) diff --git a/MODYLAS-MINI/src/xmpAPI_comm.f b/MODYLAS-MINI/src/xmpAPI_comm.f index e4ebe7e..969c883 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm.f +++ b/MODYLAS-MINI/src/xmpAPI_comm.f @@ -75,6 +75,11 @@ subroutine init_comm_bound() use mpivar implicit none integer(4) :: itmp + integer(8), dimension(1) :: irbuff_p_lb, irbuff_p_ub, + & irbuff_m_lb, irbuff_m_ub, irsbuf_p_lb, irsbuf_p_ub, + & irsbuf_m_lb, irsbuf_m_ub + integer(8), dimension(2) :: rbuff_p_lb,rbuff_p_ub, + & rbuff_m_lb, rbuff_m_ub npz = nzdiv npy = nydiv @@ -106,14 +111,54 @@ subroutine init_comm_bound() allocate(ibuffm ( max_cellcbd*max_mvatom)) allocate(isbufp (2*max_cellcbd + 1 + max_cellcbd*max_mvseg)) allocate(isbufm (2*max_cellcbd + 1 + max_cellcbd*max_mvseg)) - allocate(rbuff_p (6,max_cellcbd*max_mvatom)[*]) - allocate(rbuff_m (6,max_cellcbd*max_mvatom)[*]) - allocate(irbuff_p( max_cellcbd*max_mvatom)[*]) - allocate(irbuff_m( max_cellcbd*max_mvatom)[*]) - allocate(irsbuf_p(2*max_cellcbd + 1 + max_cellcbd*max_mvseg)[*]) - allocate(irsbuf_m(2*max_cellcbd + 1 + max_cellcbd*max_mvseg)[*]) allocate( ncatmw(32, nczdiv+2, ncydiv+2, ncxdiv+2) ) +! !allocate(rbuff_p (6,max_cellcbd*max_mvatom)[*]) + rbuff_p_lb(1) = 1 + rbuff_p_lb(2) = 1 + rbuff_p_ub(1) = max_cellcbd*max_mvatom + rbuff_p_ub(2) = 6 + call xmp_new_coarray(rbuff_p_desc,8,2, + & rbuff_p_lb,rbuff_p_ub,1,comm_bd_img_dims) + call xmp_coarray_bind(rbuff_p_desc,rbuff_p) + +! !allocate(rbuff_m (6,max_cellcbd*max_mvatom)[*]) + rbuff_m_lb(1) = 1 + rbuff_m_lb(2) = 1 + rbuff_m_ub(1) = max_cellcbd*max_mvatom + rbuff_m_ub(2) = 6 + call xmp_new_coarray(rbuff_m_desc,8,2, + & rbuff_m_lb,rbuff_m_ub,1,comm_bd_img_dims) + call xmp_coarray_bind(rbuff_m_desc,rbuff_m) + +! !allocate(irbuff_p( max_cellcbd*max_mvatom)[*]) + irbuff_p_lb(1) = 1 + irbuff_p_ub(1) = max_cellcbd*max_mvatom + call xmp_new_coarray(irbuff_p_desc,4,1, + & irbuff_p_lb,irbuff_p_ub,1,comm_bd_img_dims) + call xmp_coarray_bind(irbuff_p_desc,irbuff_p) + +! !allocate(irbuff_m( max_cellcbd*max_mvatom)[*]) + irbuff_m_lb(1) = 1 + irbuff_m_ub(1) = max_cellcbd*max_mvatom + call xmp_new_coarray(irbuff_m_desc,4,1, + & irbuff_m_lb,irbuff_m_ub,1,comm_bd_img_dims) + call xmp_coarray_bind(irbuff_m_desc,irbuff_m) + +! !allocate(irsbuf_p(2*max_cellcbd + 1 + max_cellcbd*max_mvseg)[*]) + irsbuf_p_lb(1) = 1 + irsbuf_p_ub(1) = 2*max_cellcbd + 1 + max_cellcbd*max_mvseg + call xmp_new_coarray(irsbuf_p_desc,4,1, + & irsbuf_p_lb,irsbuf_p_ub,1,comm_bd_img_dims) + call xmp_coarray_bind(irsbuf_p_desc,irsbuf_p) + +! !allocate(irsbuf_m(2*max_cellcbd + 1 + max_cellcbd*max_mvseg)[*]) + irsbuf_m_lb(1) = 1 + irsbuf_m_ub(1) = 2*max_cellcbd + 1 + max_cellcbd*max_mvseg + call xmp_new_coarray(irsbuf_m_desc,4,1, + & irsbuf_m_lb,irsbuf_m_ub,1,comm_bd_img_dims) + call xmp_coarray_bind(irsbuf_m_desc,irsbuf_m) + return end c---------------------------------------------------------------------- @@ -490,6 +535,11 @@ subroutine comm_bound() !coarray & irsbuf_p, ncsr, MPI_INTEGER, !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) + call xmp_new_array_section(irsbuf_p_sec,1) + call xmp_array_section_set_triplet(irsbuf_p_sec,1,1, + & ncs+1,1,status) + + comm_bd_img_dims(1) = ipz_dest+1 irsbuf_p(1:ncs+1)[ipz_dest+1] = isbufp(1:ncs+1) ! Put !sync all @@ -1767,21 +1817,52 @@ subroutine pre_record_data real(8),allocatable :: snd(:,:),rcv(:,:) !coarray integer(4),allocatable :: natmlist(:),natmdisp(:) integer(4),allocatable :: natmdisp(:) - integer(4),allocatable :: natmlist(:)[:] +! integer(4),allocatable :: natmlist(:)[:] + integer(4), POINTER :: natmlist(:) => null () + integer(8) :: natmlist_desc + integer(4),allocatable :: natmlist_tmp(:) - integer,allocatable :: ndis(:)[:], mdis(:)[:] - real(8),allocatable :: rcvx(:,:)[:] +! integer,allocatable :: ndis(:)[:], mdis(:)[:] + integer, POINTER :: ndis(:) => null () + integer, POINTER :: mdis(:) => null () + integer(8) :: ndis_desc + integer(8) :: mdis_desc + +! real(8),allocatable :: rcvx(:,:)[:] + real(8), POINTER :: rcvx(:,:) => null () + integer(8) :: rcvx_desc + integer :: me, np, ms, mm !! integer(4),allocatable :: nrearrange(:) integer(4) :: m2i_tmp(na1cell*lxdiv*lydiv*lzdiv) + integer(4) :: img_dims(1) + integer(8), dimension(1) :: natmlist_lb, natmlist_ub, + & ndis_lb, ndis_ub, mdis_lb, mdis_ub + integer(8), dimension(2) :: rcvx_lb, rcvx_ub + !coarray me = this_image() np = num_images() - allocate(ndis(np)[*]) - allocate(mdis(n)[*]) - allocate(rcvx(6,n)[*]) +! allocate(ndis(np)[*]) +! allocate(mdis(n)[*]) +! allocate(rcvx(6,n)[*]) + ndis_lb(1) = 1 + ndis_ub(1) = np + mdis_lb(1) = 1 + mdis_ub(1) = n + rcvx_lb(1) = 1 + rcvx_lb(2) = 1 + rcvx_ub(1) = 6 + rcvx_ub(2) = n + call xmp_new_coarray(ndis_desc,4,1,ndis_lb,ndis_ub,1, img_dims) + call xmp_new_coarray(mdis_desc,4,1,mdis_lb,mdis_ub,1, img_dims) + call xmp_new_coarray(rcvx_desc,8,2,rcvx_lb,rcvx_ub,1, img_dims) + call xmp_coarray_bind(ndis_desc,ndis) + call xmp_coarray_bind(mdis_desc,mdis) + call xmp_coarray_bind(rcvx_desc,rcvx) + !! if(nprocs.eq.1) then @@ -1799,7 +1880,13 @@ subroutine pre_record_data allocate(snd(6,n)) allocate(rcv(6,n)) !coarray allocate(natmlist(nprocs),natmdisp(nprocs)) - allocate(natmlist(nprocs)[*]) + !allocate(natmlist(nprocs)[*]) + natmlist_lb(1) = 1 + natmlist_ub(1) = nprocs + call xmp_new_coarray(natmlist_desc,8,2, + & natmlist_lb,natmlist_ub,1,img_dims) + call xmp_coarray_bind(natmlist_desc,natmlist) + allocate(natmlist_tmp(nprocs)) allocate(natmdisp(nprocs)) !! From bbd0440377731c5aa76c3f8b7da206e9c54fcdcb Mon Sep 17 00:00:00 2001 From: tozaki_chisae Date: Fri, 5 Mar 2021 19:21:09 +0900 Subject: [PATCH 24/70] [WIP] modify 15 files. --- FFB-MINI/src/bcgs3x.F | 23 +++++++++++--- FFB-MINI/src/bcgsxe.F | 22 ++++++++++--- FFB-MINI/src/calax3.F | 13 ++++++-- FFB-MINI/src/callap.F | 13 ++++++-- FFB-MINI/src/dd_mpi/dd_mpi.F90 | 27 ++++++++++------ FFB-MINI/src/grad3x.F | 13 ++++++-- FFB-MINI/src/les3x.F | 58 +++++++++++++++++++++++++--------- FFB-MINI/src/lessfx.F | 18 +++++++++-- FFB-MINI/src/lrfnms.F | 13 ++++++-- FFB-MINI/src/miniapp_util.F | 2 +- FFB-MINI/src/nodlex.F | 13 ++++++-- FFB-MINI/src/pres3e.F | 8 ++++- FFB-MINI/src/rcmelm.F | 28 +++++++++++++--- FFB-MINI/src/vel3d1.F | 34 ++++++++++++++++---- FFB-MINI/src/vel3d2.F | 13 ++++++-- 15 files changed, 238 insertions(+), 60 deletions(-) diff --git a/FFB-MINI/src/bcgs3x.F b/FFB-MINI/src/bcgs3x.F index 18ab675..efbca40 100755 --- a/FFB-MINI/src/bcgs3x.F +++ b/FFB-MINI/src/bcgs3x.F @@ -1,7 +1,10 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, * NPP,NCRS,IPCRS,AAAPC,B,S,NITR,RESR, * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, - * WEIGHT,R0,RK,PK,APK,ATK,TK,S0,RX,RY, +C Fj +C * WEIGHT,R0,RK,PK,APK,ATK,TK,S0,RX,RY, + * WEIGHT,R0,RK,PK,APK,ATK,TK,S0,rx_desc,ry_desc, +C Fj * IUT0,IERR, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) #include "timing.h" @@ -31,6 +34,9 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, CCC [WORK] REAL*4 RX(0:N,ME),RY(0:N,ME),WEIGHT(NP), * R0(NP),RK(NP),PK(NP),APK(NP),ATK(NP),TK(NP),S0(NP) +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj C C [FULL UNROLL] INTEGER*4 JUNROL @@ -148,7 +154,10 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, call maprof_time_start(TM_CALAX3) CALL CALAX3(AAAPC, S, RK, NP, NE, NCRS, IPCRS, NPP, * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * RX,RY,IUT0,IERR, +C Fj +C * RX,RY,IUT0,IERR, + * rx_desc,ry_desc,IUT0,IERR, +C Fj * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) call maprof_time_stop(TM_CALAX3) C @@ -213,7 +222,10 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, call maprof_time_start(TM_CALAX3) CALL CALAX3(AAAPC, PK, APK, NP, NE, NCRS, IPCRS, NPP, * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * RX,RY,IUT0,IERR, +C Fj +C * RX,RY,IUT0,IERR, + * rx_desc,ry_desc,IUT0,IERR, +C Fj * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) call maprof_time_stop(TM_CALAX3) C @@ -271,7 +283,10 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, call maprof_time_start(TM_CALAX3) CALL CALAX3(AAAPC, TK, ATK, NP, NE, NCRS, IPCRS, NPP, * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * RX,RY,IUT0,IERR, +C Fj +C * RX,RY,IUT0,IERR, + * rx_desc,ry_desc,IUT0,IERR, +C Fj * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) call maprof_time_stop(TM_CALAX3) C diff --git a/FFB-MINI/src/bcgsxe.F b/FFB-MINI/src/bcgsxe.F index 9c98f5a..9dd6c8f 100755 --- a/FFB-MINI/src/bcgsxe.F +++ b/FFB-MINI/src/bcgsxe.F @@ -4,7 +4,11 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * B,NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT,NITR,RESR,S, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, - * R0,RK,PK,APK,ATK,TK,FXYZ,RX,RY,MWRK,WRKN, +C Fj +C * R0,RK,PK,APK,ATK,TK,FXYZ,RX,RY,MWRK,WRKN, + * R0,RK,PK,APK,ATK,TK,FXYZ,rx_desc,ry_desc,MWRK, + * WRKN, +C Fj * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -40,6 +44,7 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, REAL*4 RX(0:N,ME),RY(0:N,ME), 1 R0(NE),RK(NE),PK(NE),APK(NE),ATK(NE),TK(NE), 2 FXYZ(3,NP) + INTEGER*8 rx_desc,ry_desc INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) C @@ -160,7 +165,10 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, - * FXYZ,RX,RY,MWRK,WRKN, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C @@ -228,7 +236,10 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, - * FXYZ,RX,RY,MWRK,WRKN, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C @@ -291,7 +302,10 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, - * FXYZ,RX,RY,MWRK,WRKN, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C diff --git a/FFB-MINI/src/calax3.F b/FFB-MINI/src/calax3.F index ec71bda..aa0feed 100755 --- a/FFB-MINI/src/calax3.F +++ b/FFB-MINI/src/calax3.F @@ -1,7 +1,10 @@ C======================================================================= SUBROUTINE CALAX3(A, S, AS, NP, NE, NCRS, IPCRS, NPP, * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * RX,RY,IUT0,IERR, +C Fj +C * RX,RY,IUT0,IERR, + * rx_desc,ry_desc,IUT0,IERR, +C Fj * JUNROL,NPPMAX,NCRS2,TS,TA,ITPCRS) C======================================================================= #include "timing.h" @@ -16,6 +19,9 @@ SUBROUTINE CALAX3(A, S, AS, NP, NE, NCRS, IPCRS, NPP, INTEGER MAXBUF,IDUM INTEGER N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,IUT0,IERR REAL*4 RX,RY +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj C DIMENSION LDOM(NDOM),NBPDOM(NDOM) DIMENSION IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM) @@ -177,7 +183,10 @@ SUBROUTINE CALAX3(A, S, AS, NP, NE, NCRS, IPCRS, NPP, call maprof_time_start(TM_CALAX3_COM) IDUM = 1 CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * AS,AS,AS,NP,IUT0,IERR,RX,RY,MAXBUF) +C Fj +C * AS,AS,AS,NP,IUT0,IERR,RX,RY,MAXBUF) + * AS,AS,AS,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj call maprof_time_stop(TM_CALAX3_COM) IF(IERR.NE.0) THEN WRITE(IUT0,*) diff --git a/FFB-MINI/src/callap.F b/FFB-MINI/src/callap.F index 660b903..8f0945c 100755 --- a/FFB-MINI/src/callap.F +++ b/FFB-MINI/src/callap.F @@ -5,7 +5,10 @@ SUBROUTINE CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, - * FXYZ,RX,RY,MWRK,WRKN, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -44,6 +47,9 @@ SUBROUTINE CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, C CCC [WORK] REAL*4 RX(0:N,ME),RY(0:N,ME) +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) C @@ -160,7 +166,10 @@ SUBROUTINE CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, - * FXYZ,RX,RY,MWRK,WRKN, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) call maprof_time_stop(TM_GRAD3X) C diff --git a/FFB-MINI/src/dd_mpi/dd_mpi.F90 b/FFB-MINI/src/dd_mpi/dd_mpi.F90 index ea5a35d..c70a584 100755 --- a/FFB-MINI/src/dd_mpi/dd_mpi.F90 +++ b/FFB-MINI/src/dd_mpi/dd_mpi.F90 @@ -806,14 +806,15 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT DIMENSION LDOM(NDOM),NBPDOM(NDOM),IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM), FX(NP),FY(NP),FZ(NP) ! Fujitsu start 202103 ! DIMENSION BUFSND(MAXBUF)[*], BUFRCV(MAXBUF)[*] - INTEGER*4 , POINTER :: BUFSND ( : , : ) => null ( ) - INTEGER*4 , POINTER :: BUFRCV ( : , : ) => null ( ) + REAL*4 , POINTER :: BUFSND ( : ) => null ( ) + REAL*4 , POINTER :: BUFRCV ( : ) => null ( ) INTEGER*8 :: snd_desc, rcv_desc INTEGER*8 :: snd_sec, rcv_sec INTEGER*8, DIMENSION(1) :: snd_lb, snd_ub, rcv_lb, rcv_ub INTEGER*8 :: st_desc, st_l_desc INTEGER*8 :: st_sec, st_l_sec INTEGER*8, DIMENSION(1) :: st_lb, st_ub, st_l_lb, st_l_ub + INTEGER*8 :: start1, end1, start2, end2 INTEGER*4 :: img_dims(1) INTEGER*4 status ! Fujitsu end 202103 @@ -824,8 +825,8 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT INTEGER MAX_RECV_LEN ! Fujitsu start 202103 ! INTEGER ,ALLOCATABLE :: START_R(:)[:] - INTEGER*4 , POINTER :: START_R ( : , : ) => null ( ) - INTEGER*4 , POINTER :: start_rr_p ( : , : ) => null ( ) + INTEGER*4 , POINTER :: START_R ( : ) => null ( ) + INTEGER*4 , POINTER :: start_rr_p ( : ) => null ( ) ! Fujitsu end 202103 ! INTEGER ,ALLOCATABLE :: END_R(:)[:] INTEGER ,ALLOCATABLE :: START_S(:) @@ -1128,8 +1129,12 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) ! Fujitsu start 202103 ! START_RR = START_R(ME)[LDOM(IDOM)] - call xmp_array_section_set_triplet(st_sec,1,ME,ME,1,status) - call xmp_array_section_set_triplet(st_l_sec,1,1,1,1,status) + start1 = ME + end1 = ME + start2 = 1 + end2 = 1 + call xmp_array_section_set_triplet(st_sec,1,start1,end1,1,status) + call xmp_array_section_set_triplet(st_l_sec,1,start2,end2,1,status) img_dims = LDOM(IDOM) call xmp_coarray_get(img_dims,st_desc,st_sec, & st_l_desc,st_l_sec,status) @@ -1139,10 +1144,14 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! Fujitsu start 202103 ! BUFRCV(START_RR:END_RR)[LDOM(IDOM)] = & ! BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) - call xmp_array_section_set_triplet(rcv_sec,1,START_RR,END_RR,1,status) + start1 = START_RR + end1 = END_RR + call xmp_array_section_set_triplet(rcv_sec,1,start1,end1,1,status) START_RR = START_S(LDOM(IDOM)) END_RR = END_S(LDOM(IDOM)) - call xmp_array_section_set_triplet(snd_sec,1,START_RR,END_RR,1,status) + start1 = START_RR + end1 = END_RR + call xmp_array_section_set_triplet(snd_sec,1,start1,end1,1,status) img_dims = LDOM(IDOM) call xmp_coarray_put(img_dims,rcv_desc,rcv_sec,snd_desc,snd_sec,status); ! Fujitsu end 202103 @@ -1353,7 +1362,7 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! call xmp_coarray_deallocate(snd_desc, status) ! call xmp_coarray_deallocate(rcv_desc, status) ! - call xmp_finalize_all + call xmp_api_finalize ! Fujitsu end 202103 ! ! IPART = IPART diff --git a/FFB-MINI/src/grad3x.F b/FFB-MINI/src/grad3x.F index 73c3780..5ed006d 100755 --- a/FFB-MINI/src/grad3x.F +++ b/FFB-MINI/src/grad3x.F @@ -4,7 +4,10 @@ SUBROUTINE GRAD3X(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, - * FXYZ,RX,RY,MWRK,WRKN, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -42,6 +45,9 @@ SUBROUTINE GRAD3X(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, C CCC [WORK] REAL*4 RX(0:N,ME),RY(0:N,ME) +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) C @@ -236,7 +242,10 @@ SUBROUTINE GRAD3X(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, call maprof_time_start(TM_GRAD3X_COM) CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * WRKN(1,4),WRKN(1,5),WRKN(1,6),NP,IUT0,IERR, - * RX,RY,MAXBUF) +C Fj +C * RX,RY,MAXBUF) + * rx_desc,ry_desc,MAXBUF) +C Fj call maprof_time_stop(TM_GRAD3X_COM) IF(IERR.NE.0) THEN WRITE(IUT0,*) diff --git a/FFB-MINI/src/les3x.F b/FFB-MINI/src/les3x.F index 409e8f8..2637d2c 100755 --- a/FFB-MINI/src/les3x.F +++ b/FFB-MINI/src/les3x.F @@ -1,4 +1,4 @@ -X======================================================================C +C======================================================================C C C C SOFTWARE NAME : FRONTFLOW_BLUE.8.1 C C C @@ -332,10 +332,10 @@ SUBROUTINE LES3X(FILEIN) * WRK09(:),WRK10(:),WRK11(:),WRK12(:), * WRK13(:),WRK3(:,:) CC Fj start 202103 - INTEGER , POINTER :: RX ( : , : ) => null ( ) - INTEGER , POINTER :: RY ( : , : ) => null ( ) + REAL*4 , POINTER :: RX ( : , : ) => null ( ) + REAL*4 , POINTER :: RY ( : , : ) => null ( ) INTEGER*8 :: rx_desc, ry_desc - INTEGER*8, DIMENSION(1) :: rx_lb, rx_ub, ry_lb, ry_ub + INTEGER*8, DIMENSION(2) :: rx_lb, rx_ub, ry_lb, ry_ub INTEGER*4 :: img_dims(1) INTEGER*4 status CC Fj end 202103 @@ -1129,10 +1129,14 @@ SUBROUTINE LES3X(FILEIN) ALLOCATE(DWRK04(3,N1 ), STAT=LERR(26)) ALLOCATE(DWRK05( MGAUSS), STAT=LERR(27)) C Fj start 202103 - rx_lb(1) = N1 - rx_ub(1) = ME - ry_lb(1) = N1 - ry_ub(1) = ME + rx_lb(1) = 1 + rx_lb(2) = 1 + rx_ub(1) = N1 + rx_ub(2) = ME + ry_lb(1) = 1 + ry_lb(2) = 1 + ry_ub(1) = N1 + ry_ub(2) = ME call xmp_new_coarray(rx_desc, 4, 1, rx_lb, rx_ub, 1, img_dims) call xmp_new_coarray(ry_desc, 4, 1, ry_lb, ry_ub, 1, img_dims) call xmp_coarray_bind(rx_desc,RX) @@ -1445,7 +1449,10 @@ SUBROUTINE LES3X(FILEIN) * MDOM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * WRK01,WRK02,WRK03,LWRK06, * DWRK3,LWRK01,LWRK02,WRK04,NODWK1,NODWK2,NODWK3, - * RX,RY,NPB0, +C Fj +C * RX,RY,NPB0, + * rx_desc,ry_desc,NPB0, +C Fj * NPB1,LPB1,XPB1,YPB1,ZPB1, * NPB2,LPB2,XPB2,YPB2,ZPB2, * LWRK03,LWRK04, @@ -1565,7 +1572,10 @@ SUBROUTINE LES3X(FILEIN) * NPCCL ,LPCCL1,LPCCL2, * IPART ,MPINT ,NPINT ,LPINT1,LPINT2,LPINT3, * MDOM ,NDOM ,LDOM ,NBPDOM,MBPDOM,IPSLF,IPSND, - * LPBTOA,IUT0,IUT6,IERR,RX,RY, +C Fj +C * LPBTOA,IUT0,IUT6,IERR,RX,RY, + * LPBTOA,IUT0,IUT6,IERR,rx_desc,ry_desc, +C Fj * MWRK,WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, * LWRK01,LEWRK) CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) @@ -1652,7 +1662,10 @@ SUBROUTINE LES3X(FILEIN) MELM=NELM+1 CALL ELM3DX(MGAUSS,IGAUSH, * MELM,N1,NE,NP,NEX,XD,YD,ZD,NODE, +C Fj * SNI ,DNXI,DNYI,DNZI,SN,RX,RY,WRKN, +C * SNI ,DNXI,DNYI,DNZI,SN,rx_desc,ry_desc,WRKN, +C Fj * E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EXZ,EYZ, * DWRK01,DWRK02,DWRK03,DWRK04,DWRK05,IUT0,IERR) CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) @@ -1690,7 +1703,10 @@ SUBROUTINE LES3X(FILEIN) ENDDO IDUM=1 CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * CM,CM,CM,NP,IUT0,IERR,RX,RY,MAXBUF) +C Fj +C * CM,CM,CM,NP,IUT0,IERR,RX,RY,MAXBUF) + * CM,CM,CM,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj DO 2700 IP=1,NP CM(IP)=1.0E0/CM(IP) 2700 CONTINUE @@ -1701,7 +1717,10 @@ SUBROUTINE LES3X(FILEIN) WRITE(IUT6,*) ' ** INTERPOLATING PRESSURE TO NODES **' CALL NODLEX(NODE,ME,NE,NP,N1,NEX,SN, * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * P,PN,CM,IUT0,IERR,RX,RY,MAXBUF) +C Fj +C * P,PN,CM,IUT0,IERR,RX,RY,MAXBUF) + * P,PN,CM,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj IF(IERRA.NE.0) THEN WRITE(IUT0,*) BLANK WRITE(IUT0,*) ERMSGC @@ -1852,7 +1871,10 @@ SUBROUTINE LES3X(FILEIN) * LWRK01,LWRK02, * WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, * WRK07,WRK08,WRK09,WRK10,WRK11,WRK12, - * RX,RY, +C Fj +C * RX,RY, + * rx_desc,ry_desc, +C Fj * JUNROL,NPPMAX,NCRS2,WRK13,TACRS,ITPCRS, * IUT0,IERR) call maprof_time_stop(TM_VEL3D1) @@ -1878,7 +1900,10 @@ SUBROUTINE LES3X(FILEIN) * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * LWRK01,LWRK02,WRK3,WRK01,WRK02,WRK03,WRK04, * WRK05,WRK06,WRK07,WRK08,WRK09,WRK10, - * PRCM,APRCM,RX,RY,MWRK,WRKN, +C Fj +C * PRCM,APRCM,RX,RY,MWRK,WRKN, + * PRCM,APRCM,rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) call maprof_time_stop(TM_PRES3E) IF(IERR.NE.0) GOTO 9999 @@ -1897,7 +1922,10 @@ SUBROUTINE LES3X(FILEIN) * NPWALL,LPWALL,UWALL,VWALL,WWALL, * NPSYMT,NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, * IPART ,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * RX,RY,MWRK,WRKN,WRK3,WRK01, +C Fj +C * RX,RY,MWRK,WRKN,WRK3,WRK01, + * rx_desc,ry_desc,MWRK,WRKN,WRK3,WRK01, +C Fj * IUT0,IERR, * WRK05) C diff --git a/FFB-MINI/src/lessfx.F b/FFB-MINI/src/lessfx.F index a282c44..f02487f 100755 --- a/FFB-MINI/src/lessfx.F +++ b/FFB-MINI/src/lessfx.F @@ -18,7 +18,10 @@ SUBROUTINE LESSFX(LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, * NPCCL ,LPCCL1,LPCCL2, * IPART ,MPINT ,NPINT ,LPINT1,LPINT2,LPINT3, * MDOM ,NDOM ,LDOM ,NBPDOM,MBPDOM,IPSLF,IPSND, - * LPBTOA,IUT0,IUT6,IERR,RX,RY, +C Fj +C * LPBTOA,IUT0,IUT6,IERR,RX,RY, + * LPBTOA,IUT0,IUT6,IERR,rx_desc,ry_desc, +C Fj * MWRK,WRK1,WRK2,WRK3,WRK4,WRK5,WRK6, * IWRK,IWRK2) IMPLICIT NONE @@ -36,6 +39,9 @@ SUBROUTINE LESSFX(LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, * LPCCL1(NPCCL),LPCCL2(NPCCL) , * WRK1(NE),WRK2(NE),WRK3(NE),WRK4(NP),WRK5(NP),WRK6(NP), * IWRK(MWRK),IWRK2(2,MWRK),RX(0:N,NE),RY(0:N,NE) +C Fj + INTEGER*8 :: rx_desc, ry_desc +C Fj C DIMENSION LPINT1(MPINT),LPINT2(MPINT),LPINT3(MPINT), 1 LDOM (MDOM) ,NBPDOM(MDOM) , @@ -173,7 +179,10 @@ SUBROUTINE LESSFX(LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, IDIM = 0 CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM, * IPSLF,IPSND,MBPDOM,WRK4,WRK5,WRK6,NP, - * IUT0,IERR,RX,RY,MAXBUF) +C Fj +C * IUT0,IERR,RX,RY,MAXBUF) + * IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) IF(IERRA.NE.0) THEN @@ -289,7 +298,10 @@ SUBROUTINE LESSFX(LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, IDIM = 3 CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM, * IPSLF,IPSND,MBPDOM,WRK4,WRK5,WRK6,NP, - * IUT0,IERR,RX,RY,MAXBUF) +C Fj +C * IUT0,IERR,RX,RY,MAXBUF) + * IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) IF(IERRA.NE.0) THEN WRITE(IUT0,*) diff --git a/FFB-MINI/src/lrfnms.F b/FFB-MINI/src/lrfnms.F index 6ccc956..7646af0 100755 --- a/FFB-MINI/src/lrfnms.F +++ b/FFB-MINI/src/lrfnms.F @@ -10,7 +10,10 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, * MDOM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * UFIX,VFIX,WFIX,LPFRM, * CRD,LWRK01,LWRK02,WRK04,NDRFN,NDORG,NODEBK, - * RX,RY,NPB0, +C Fj +C * RX,RY,NPB0, + * rx_desc,ry_desc,NPB0, +C Fj * NPB1,LPB1,XPB1,YPB1,ZPB1, * NPB2,LPB2,XPB2,YPB2,ZPB2, * ITYPOR,ITYPRF,IUT6,IUT0,IERR) @@ -74,6 +77,9 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, INTEGER*4 LPFRM(MP) REAL*8 CRD(MP*3) REAL*4 RX(ME*8),RY(ME*8) +C Fj + INTEGER*8 :: rx_desc, ry_desc +C Fj INTEGER*4 NODED(8),LEACNV(ME) INTEGER*4 LWRK01(ME),LWRK02(MP) INTEGER*4 WRK04(MP),NDRFN(ME*8),NDORG(NE*8),NODEBK(8,NE) @@ -195,7 +201,10 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, C IDIM = 0 CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * WRK04,WRK04,WRK04,NP,IUT0,IERR,RX,RY,MAXBUF) +C Fj +C * WRK04,WRK04,WRK04,NP,IUT0,IERR,RX,RY,MAXBUF) + * WRK04,WRK04,WRK04,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) IF(IERRA.NE.0) THEN WRITE(IUT0,*) diff --git a/FFB-MINI/src/miniapp_util.F b/FFB-MINI/src/miniapp_util.F index 6c265fd..7838dab 100755 --- a/FFB-MINI/src/miniapp_util.F +++ b/FFB-MINI/src/miniapp_util.F @@ -76,7 +76,7 @@ end subroutine calave c========================================================= - integer function total(ival) + integer*4 function total(ival) c c return total amount of an integer variable over whole domains c diff --git a/FFB-MINI/src/nodlex.F b/FFB-MINI/src/nodlex.F index d9d0f47..e0a6a5b 100755 --- a/FFB-MINI/src/nodlex.F +++ b/FFB-MINI/src/nodlex.F @@ -1,7 +1,10 @@ SUBROUTINE NODLEX * (NODE,ME,NE,NP,N1,NEX,SN, * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * VALELM,VALNOD,CM,IUT0,IERR,BUFSND,BUFRCV,MAXBUF) +C Fj +C * VALELM,VALNOD,CM,IUT0,IERR,BUFSND,BUFRCV,MAXBUF) + * VALELM,VALNOD,CM,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj C IMPLICIT NONE C @@ -14,6 +17,9 @@ SUBROUTINE NODLEX REAL*4 VALELM(NE),VALNOD(NP),CM(NP) INTEGER*4 IUT0,IERR,MAXBUF REAL*4 BUFSND(MAXBUF),BUFRCV(MAXBUF) +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj C CHARACTER*60 ERMSGC & /' ## SUBROUTINE NODLEX: FATAL ERROR REPORT ; RETURNED' / @@ -53,7 +59,10 @@ SUBROUTINE NODLEX IDUM=1 CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, & VALNOD,VALNOD,VALNOD,NP,IUT0,IERR, - & BUFSND,BUFRCV,MAXBUF) +C Fj +C & BUFSND,BUFRCV,MAXBUF) + & rx_desc,ry_desc,MAXBUF) +C Fj IF(IERR.NE.0) THEN WRITE(IUT0,*) WRITE(IUT0,*) ERMSGC diff --git a/FFB-MINI/src/pres3e.F b/FFB-MINI/src/pres3e.F index e145b93..eaf9990 100755 --- a/FFB-MINI/src/pres3e.F +++ b/FFB-MINI/src/pres3e.F @@ -9,7 +9,10 @@ SUBROUTINE PRES3E(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * LPFIX,LFIX3D,FXYZ,WRK01,WRK02,WRK03,WRK04, * WRK05,WRK06,WRK07,WRK08,WRK09,B, - * PRCM,APRCM,RX,RY,MWRK,WRKN, +C Fj +C * PRCM,APRCM,RX,RY,MWRK,WRKN, + * PRCM,APRCM,rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -49,6 +52,9 @@ SUBROUTINE PRES3E(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, REAL*4 PRCM(MRCM,NE),APRCM(MRCM,NE) INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj C C [IN:MID NODE COLORING] C diff --git a/FFB-MINI/src/rcmelm.F b/FFB-MINI/src/rcmelm.F index 33064b8..0856a7f 100755 --- a/FFB-MINI/src/rcmelm.F +++ b/FFB-MINI/src/rcmelm.F @@ -6,7 +6,10 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * FXYZ,WRK01,WRK02,WRK03,WRK04, * WRK05,WRK06,RRCM, W1RCM,W2RCM,PRCM,APRCM, - * RX,RY,MWRK,WRKN, +C Fj +C * RX,RY,MWRK,WRKN, + * rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -47,6 +50,9 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * WRK03(NE),WRK04(NE),WRK05(NE),WRK06(NE), * RRCM(NE),PRCM(MRCM,NE),APRCM(MRCM,NE), * W1RCM(NE),W2RCM(NE) +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) C @@ -172,7 +178,10 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, - * FXYZ,RX,RY,MWRK,WRKN, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C @@ -258,7 +267,10 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * NITRB,RESB,W1RCM, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, - * FXYZ,RX,RY,MWRK,WRKN, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) call maprof_time_stop(TM_BCGSXE) IF(IERR.NE.0) THEN @@ -415,7 +427,10 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, - * FXYZ,RX,RY,MWRK,WRKN, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C @@ -476,7 +491,10 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, - * FXYZ,RX,RY,MWRK,WRKN, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C diff --git a/FFB-MINI/src/vel3d1.F b/FFB-MINI/src/vel3d1.F index d4ea4c4..92b24d0 100755 --- a/FFB-MINI/src/vel3d1.F +++ b/FFB-MINI/src/vel3d1.F @@ -15,7 +15,10 @@ SUBROUTINE VEL3D1 * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * LPFIX,LFIX3D, * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,A0,AR,RHSU,RHSV,RHSW, - * RX,RY, +C Fj +C * RX,RY, + * rx_desc,ry_desc, +C Fj * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS, * IUT0,IERR) C @@ -46,6 +49,7 @@ SUBROUTINE VEL3D1 * A,UG,VG,WG,UE,VE,WE, * WRK01,WRK02,WRK03,WRK04,A0,AR, * RHSU,RHSV,RHSW,APCRS + INTEGER*8 rx_desc,ry_desc C C @@ -620,7 +624,10 @@ SUBROUTINE VEL3D1 IDUM=1 C* call maprof_time_start(TM_VEL3D1_COM) CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * AR,AR,AR,NP,IUT0,IERR,RX,RY,MAXBUF) +C Fj +C * AR,AR,AR,NP,IUT0,IERR,RX,RY,MAXBUF) + * AR,AR,AR,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj C* call maprof_time_stop(TM_VEL3D1_COM) IF(IERR.NE.0) THEN WRITE(IUT0,*) @@ -643,7 +650,10 @@ SUBROUTINE VEL3D1 IDUM = 3 C* call maprof_time_start(TM_VEL3D1_COM) CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * RHSU,RHSV,RHSW,NP,IUT0,IERR,RX,RY,MAXBUF) +C Fj +C * RHSU,RHSV,RHSW,NP,IUT0,IERR,RX,RY,MAXBUF) + * RHSU,RHSV,RHSW,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj C* call maprof_time_stop(TM_VEL3D1_COM) IF(IERR.NE.0) THEN WRITE(IUT0,*) @@ -707,21 +717,33 @@ SUBROUTINE VEL3D1 CALL BCGS3X(IBCGS,IPART,NMAX,EPS,ME,N,NE,NP, * NPP,NCRS,IPCRS,APCRS,RHSU,U,NITRU,RESU, * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, - * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, +C Fj +C * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, + * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05, + * rx_desc,ry_desc, +C Fj * IUT0,IERR1, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) IF(NITRU.LT.NMAX) IRESU=1 CALL BCGS3X(IBCGS,IPART,NMAX,EPS,ME,N,NE,NP, * NPP,NCRS,IPCRS,APCRS,RHSV,V,NITRV,RESV, * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, - * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, +C Fj +C * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, + * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05, + * rx_desc,ry_desc, +C Fj * IUT0,IERR2, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) IF(NITRV.LT.NMAX) IRESV=1 CALL BCGS3X(IBCGS,IPART,NMAX,EPS,ME,N,NE,NP, * NPP,NCRS,IPCRS,APCRS,RHSW,W,NITRW,RESW, * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, - * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, +C Fj +C * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, + * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05, + * rx_desc,ry_desc, +C Fj * IUT0,IERR3, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) IF(NITRW.LT.NMAX) IRESW=1 diff --git a/FFB-MINI/src/vel3d2.F b/FFB-MINI/src/vel3d2.F index 6155577..b9babb1 100755 --- a/FFB-MINI/src/vel3d2.F +++ b/FFB-MINI/src/vel3d2.F @@ -8,7 +8,10 @@ SUBROUTINE VEL3D2(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * NPWALL,LPWALL,UWALL,VWALL,WWALL, * NPSYMT,NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, * IPART ,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * RX,RY,MWRK,WRKN,FXYZ,UG, +C Fj +C * RX,RY,MWRK,WRKN,FXYZ,UG, + * rx_desc,ry_desc,MWRK,WRKN,FXYZ,UG, +C Fj * IUT0,IERR, * WRK02) IMPLICIT NONE @@ -33,6 +36,9 @@ SUBROUTINE VEL3D2(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * UINLT,VINLT,WINLT,UWALL,VWALL,WWALL, * XPSYMT,YPSYMT,ZPSYMT, * RX,RY,FXYZ,UG +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj C C [IN:MID NODE COLORING] C @@ -289,7 +295,10 @@ SUBROUTINE VEL3D2(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, IDIM=3 CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * WRKN(1,1),WRKN(1,2),WRKN(1,3),NP,IUT0,IERR, - * RX,RY,MAXBUF) +C Fj +C * RX,RY,MAXBUF) + * rx_desc,ry_desc,MAXBUF) +C Fj IF(IERR.NE.0) THEN WRITE(IUT0,*) WRITE(IUT0,*) ERMSGC From 5d16a70332e4ca93aaf4cb5508f267f006a1a677 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 5 Mar 2021 19:53:24 +0900 Subject: [PATCH 25/70] [WIP] Add xmpAPI_* files for XMP-API. --- NTCHEM-MINI/src/mp2/GNUmakefile | 28 +- .../mp2/xmpAPI_rimp2_driver_incore_mpiomp.F90 | 127 +++ ...mpAPI_rimp2_riint2_mdint2_int2c_mpiomp.F90 | 250 ++++++ ...mpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 | 818 ++++++++++++++++++ ...I_rimp2_rmp2energy_semidirect_v_mpiomp.F90 | 308 +++++++ .../xmpAPI_rimp2_tran3c1_incore_v_mpiomp.F90 | 606 +++++++++++++ .../xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 | 390 +++++++++ 7 files changed, 2521 insertions(+), 6 deletions(-) create mode 100755 NTCHEM-MINI/src/mp2/xmpAPI_rimp2_driver_incore_mpiomp.F90 create mode 100755 NTCHEM-MINI/src/mp2/xmpAPI_rimp2_riint2_mdint2_int2c_mpiomp.F90 create mode 100755 NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 create mode 100755 NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_semidirect_v_mpiomp.F90 create mode 100755 NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c1_incore_v_mpiomp.F90 create mode 100755 NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 diff --git a/NTCHEM-MINI/src/mp2/GNUmakefile b/NTCHEM-MINI/src/mp2/GNUmakefile index 8ce36f7..18e6757 100755 --- a/NTCHEM-MINI/src/mp2/GNUmakefile +++ b/NTCHEM-MINI/src/mp2/GNUmakefile @@ -22,6 +22,22 @@ F90EXT=-DDEBUG #F90EXT=-DUSE_MERGE -DDEBUG endif +ifeq ($(USE_XMP_API),yes) +DRIVER_INCORE=xmpAPI_rimp2_driver_incore_mpiomp +RIINT2_MDINT2=xmpAPI_rimp2_riint2_mdint2_int2c_mpiomp +ENERGE_INCORE=xmpAPI_rimp2_rmp2energy_incore_v_mpiomp +RMP2_RMP2ENERGY=xmpAPI_rimp2_rmp2energy_semidirect_v_mpiomp +RIMP2_TRAN3C1=xmpAPI_rimp2_tran3c1_incore_v_mpiomp +RIMP2_TRAN3C2=xmpAPI_rimp2_tran3c2_incore_v_mpiomp +else +DRIVER_INCORE=rimp2_driver_incore_mpiomp +RIINT2_MDINT2=rimp2_riint2_mdint2_int2c_mpiomp +ENERGE_INCORE=rimp2_rmp2energy_incore_v_mpiomp +RMP2_RMP2ENERGY=rimp2_rmp2energy_semidirect_v_mpiomp +RIMP2_TRAN3C1=rimp2_tran3c1_incore_v_mpiomp +RIMP2_TRAN3C2=rimp2_tran3c2_incore_v_mpiomp +endif + # Name of executable program = rimp2.exe @@ -105,15 +121,15 @@ rimp2_int2_initial_mpiomp.o \ rimp2_riint2_final_mpiomp.o \ rimp2_tran3c1_semidirect_v_mpiomp.o \ rimp2_tran3c2_semidirect_v_mpiomp.o \ -rimp2_rmp2energy_semidirect_v_mpiomp.o \ -rimp2_riint2_mdint2_int2c_mpiomp.o \ +$(RMP2_RMP2ENERGY).o \ +$(RIINT2_MDINT2).o \ rimp2_riint2_mdint2_pscreen_mpiomp.o objects_mpiomp2= \ -rimp2_driver_incore_mpiomp.o \ -rimp2_tran3c1_incore_v_mpiomp.o \ -rimp2_tran3c2_incore_v_mpiomp.o \ -rimp2_rmp2energy_incore_v_mpiomp.o +$(DRIVER_INCORE).o \ +$(RIMP2_TRAN3C1).o \ +$(RIMP2_TRAN3C2).o \ +$(ENERGE_INCORE).o objects_gpu= \ cublas-fortran.o diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_driver_incore_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_driver_incore_mpiomp.F90 new file mode 100755 index 0000000..83b993f --- /dev/null +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_driver_incore_mpiomp.F90 @@ -0,0 +1,127 @@ + SUBROUTINE RIMP2_Driver_InCore_MPIOMP +! + USE MP2_Module, ONLY : IOccBat, LenOccBat, NOccBat_per_Proc, NActO, NActV + USE RIMP2_Module, ONLY : NBF_RI, RIInt3c2a, RIInt3c2b, RIInt3c3a, RIInt3c3b + USE MP2_Basis_Module, ONLY : Spherical, LtuvMin_Car, LtuvMin_Sph, LtuvMax_Car, LtuvMax_Sph + USE RIMP2_Basis_Module, ONLY : NShel_RI, KType_RI + USE MPI_Module, ONLY : NProcs, MyRank, NProcsMat, MyRankMat, NProcsMO, MyRankMO +! +! o Driver subroutine for RI-MP2 energy evaluation +! + IMPLICIT NONE +! + INCLUDE 'mpif.h' +! + INTEGER :: LenOccBat_per_Proc, NBF_RI_per_ProcMat + INTEGER :: KK, KBF_RI, NK, IAnglC + INTEGER :: IErr + REAL(8) :: TimeBgn, TimeEnd, WTimeBgn, WTimeEnd +! +! o Obtaining RI-MP2 batch infomation and memory allocation +! + CALL RIMP2_Get_BatchInfo_MPI +! +! o Initialization of the RI integral evaluation +! + IF (MyRank == 0) THEN + PRINT '(" ... Enter (RIInt2_Initial )")' + END IF + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + CALL RIMP2_Int2_Initial_MPIOMP + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + IF (MyRank == 0) THEN + PRINT '(" ... CPU time (RIInt2_Initial ) :", F12.2)', TimeEnd - TimeBgn + PRINT '(" ... WALL time (RIMP2_Initial ) :", F12.2)', WTimeEnd - WTimeBgn + END IF +! +! o Construction of (ia|P) integrals +! + IF (MyRank == 0) THEN + PRINT '(" ... Enter (RIMP2_Tran3c1 )")' + END IF + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) +! + KBF_RI = 0 +!MPI parallel + DO KK = 1, NShel_RI + IF (MOD(KK, NProcsMO) /= MyRankMO) CYCLE +!MPI parallel +! + IAnglC = KType_RI(KK) + IF (Spherical) THEN + NK = LtuvMax_Sph(IAnglC) - LtuvMin_Sph(IAnglC) + 1 + ELSE + NK = LtuvMax_Car(IAnglC) - LtuvMin_Car(IAnglC) + 1 + END IF + KBF_RI = KBF_RI + NK +! + END DO + ALLOCATE(RIInt3c2a(NActO(1)*NActV(1),KBF_RI)) + + ! test + RIInt3c2a(:,:) = 0.0 +! + CALL RIMP2_Tran3c1_InCore_V_MPIOMP +!coarray CALL MPI_Barrier(MPI_COMM_WORLD, IErr) + sync all + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + IF (MyRank == 0) THEN + PRINT '(" ... CPU time (RIMP2_Tran3c1 ) :", F12.2)', TimeEnd - TimeBgn + PRINT '(" ... WALL time (RIMP2_Tran3c1 ) :", F12.2)', WTimeEnd - WTimeBgn + END IF +! +! o Construction of (ia|Q) integrals +! + IF (MyRank == 0) THEN + PRINT '(" ... Enter (RIMP2_Tran3c2 )")' + END IF + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) +! + LenOccBat_per_Proc = LenOccBat * NOccBat_per_Proc + NBF_RI_per_ProcMat = NBF_RI / NProcsMat + if (MOD(NBF_RI, NProcsMat) > MyRankMat) then + NBF_RI_per_ProcMat = NBF_RI_per_ProcMat + 1 + end if + ALLOCATE(RIInt3c3a(NBF_RI_per_ProcMat*NActO(1),LenOccBat_per_Proc)) + CALL RIMP2_Tran3c2_InCore_V_MPIOMP +!coarray CALL MPI_Barrier(MPI_COMM_WORLD, IErr) + sync all + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + IF (MyRank == 0) THEN + PRINT '(" ... CPU time (RIMP2_Tran3c2 ) :", F12.2)', TimeEnd - TimeBgn + PRINT '(" ... WALL time (RIMP2_Tran3c2 ) :", F12.2)', WTimeEnd - WTimeBgn + END IF +! +! o Finalization of the RI integral evaluation +! + CALL RIMP2_RIInt2_Final_MPIOMP + DEALLOCATE(RIInt3c2a) +! +! o Construction of (ia|jb) integrals and evaluation of MP2 correlation energy +! o RMP2 case +! + IF (MyRank == 0) THEN + PRINT '(" ... Enter (RIMP2_RMP2Energy)")' + END IF + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + CALL RIMP2_RMP2Energy_InCore_V_MPIOMP + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + IF (MyRank == 0) THEN + PRINT '(" ... CPU time (RIMP2_RMP2Energy) :", F12.2)', TimeEnd - TimeBgn + PRINT '(" ... WALL time (RIMP2_RMP2Energy) :", F12.2)', WTimeEnd - WTimeBgn + END IF +! +! o Memory deallocation +! + DEALLOCATE(RIInt3c3a) + DEALLOCATE(IOccBat) +! + END SUBROUTINE diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_riint2_mdint2_int2c_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_riint2_mdint2_int2c_mpiomp.F90 new file mode 100755 index 0000000..d8d0603 --- /dev/null +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_riint2_mdint2_int2c_mpiomp.F90 @@ -0,0 +1,250 @@ + SUBROUTINE RIMP2_RIInt2_MDInt2_Int2c_MPIOMP +! + USE MP2_Basis_Module, ONLY : Spherical, Centr + USE MP2_Constant_Module, ONLY : Zero, Half, One, Pi252, RLN10 + USE MP2_Parameter_Module, ONLY : MaxSgmt + USE RIMP2_Module, ONLY : RIInt2c, NBC_RI + USE Int2_Module, ONLY : IAnglA, IAnglB, IAnglC, IAnglD, PreFactAB, PreFactCD, CCoefAB, CCoefCD, & + & PX, PY, PZ, PAX, PAY, PAZ, PBX, PBY, PBZ, QX, QY, QZ, QCX, QCY, QCZ, QDX, QDY, QDZ, & + & CContAB, CContCD, ExpntA, ExpntB, ExpntC, ExpntD, ExpntP, ExpntQ, ThrPrim + USE Int2_ECoef_Module, ONLY : ECoefXAB, ECoefYAB, ECoefZAB, ECoefXCD, ECoefYCD, ECoefZCD, ExpPHalf, ExpQHalf + USE Int2_Int2e_Module, ONLY : ExpntPQ1, ExpntPQ2, PQX, PQY, PQZ + USE Int2_Gamma_Module, ONLY : FF, MaxtuvGam + USE RIMP2_Basis_Module, ONLY : KAtom_RI, KType_RI, KStart_RI, KontG_RI, Expnt_RI, CCoef_RI, NShel_RI + USE MPI_Module, ONLY : NProcs, MyRank, MPIIO, IORank, MPI_COMM_IO, MPI_COMM_MO, NProcsMO, MyRankMO, & + & MPI_COMM_Mat, NProcsMat, MyRankMat +! + IMPLICIT NONE +! + INCLUDE 'mpif.h' +! + INTEGER :: II, JJ, KK, LL, IJ, KL, I, K, ITemp, KTemp, NPIJ, NPKL + INTEGER :: IAtomA, IAtomB, IAtomC, IAtomD + INTEGER :: IPrim1, KPrim1, IPrim2, KPrim2, IJPrim, KLPrim + INTEGER :: IErr + REAL(8) :: ExpA, ExpC, ExpP, ExpQ, ExpPI, ExpQI, ExpAR2, ExpCR2, ExpnPQ, ExpKAB, ExpKCD + REAL(8) :: ACentX, ACentY, ACentZ, BCentX, BCentY, BCentZ + REAL(8) :: CCentX, CCentY, CCentZ, DCentX, DCentY, DCentZ + REAL(8) :: R2AB, R2CD + REAL(8) :: ThrFac + REAL(8), ALLOCATABLE :: RWork1(:) +! + REAL(8) :: TimeBgn, TimeEnd, WTimeBgn, WTimeEnd + REAL(8) :: Time_RIInt2c, Time_RIInt2cC, WTime_RIInt2c, WTime_RIInt2cC +! +! o Initialization +! + ThrFac = RLN10 * LOG10(ThrPrim) +! + ALLOCATE(RWork1(NBC_RI)) + CALL DCOPY(NBC_RI, Zero, 0, RWork1, 1) +! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) +! +!$OMP PARALLEL DEFAULT(SHARED) & +!$OMP&PRIVATE(II, IAtomA, ACentX, ACentY, ACentZ, I, IPrim1, IPrim2, ITemp, & +!$OMP& JJ, IAtomB, BCentX, BCentY, BCentZ, & +!$OMP& KK, IAtomC, CCentX, CCentY, CCentZ, K, KPrim1, KPrim2, KTemp, & +!$OMP& LL, IAtomD, DCentX, DCentY, DCentZ, & +!$OMP& R2AB, IJPrim, ExpA, ExpAR2, ExpP, ExpPI, ExpKAB, NPIJ, IJ, & +!$OMP& R2CD, KLPrim, ExpC, ExpCR2, ExpQ, ExpQI, ExpKCD, NPKL, KL, & +!$OMP& ExpnPQ) +! +! o Allocate memory +! + CALL Int2_Allocate + CALL Int2_ECoef_Allocate + CALL Int2_Int2e_Allocate + CALL Int2_Array_Allocate + ALLOCATE(FF(MaxtuvGam)) +! +!$OMP DO SCHEDULE(DYNAMIC, 1) + DO II = NShel_RI, 1, -1 +! DO II = 1, NShel_RI + IAtomA = KAtom_RI(II) + IAnglA = KType_RI(II) + ACentX = Centr(1,IAtomA) + ACentY = Centr(2,IAtomA) + ACentZ = Centr(3,IAtomA) + IPrim1 = KStart_RI(II) + IPrim2 = IPrim1 + KontG_RI(II) - 1 + ITemp = 0 + DO I = IPrim1, IPrim2 + ITemp = ITemp + 1 + ExpntA(ITemp) = Expnt_RI(I) + END DO +! + JJ = 1 + IAtomB = IAtomA + IAnglB = 0 ! s-type + BCentX = Centr(1,IAtomB) + BCentY = Centr(2,IAtomB) + BCentZ = Centr(3,IAtomB) + ExpntB(1) = Zero +! + R2AB = Zero + IJPrim = 0 + DO I = IPrim1, IPrim2 + ExpA = ExpntA(I-IPrim1+1) + ExpAR2 = ExpA * R2AB + ExpP = ExpA + ExpPI = One / ExpP + ExpKAB = Zero + IJPrim = IJPrim + 1 + PreFactAB(IJPrim) = One + ExpntP(IJPrim) = ExpP + ExpPHalf(IJPrim) = Half * ExpPI + CCoefAB(IJPrim) = CCoef_RI(I) + PX(IJPrim) = ACentX + PY(IJPrim) = ACentY + PZ(IJPrim) = ACentZ + PAX(IJPrim) = Zero + PAY(IJPrim) = Zero + PAZ(IJPrim) = Zero + PBX(IJPrim) = Zero + PBY(IJPrim) = Zero + PBZ(IJPrim) = Zero + END DO + NPIJ = IJPrim + IF (NPIJ == 0) GO TO 100 +! +! o Normalization +! + CALL RIInt2_MDInt2_CCont(CCoefAB, CContAB, IAnglA, IAnglB, NPIJ) +! +! o Generate E-coefficients for A and B +! + CALL MDInt2_ECoef1(ECoefXAB, ECoefYAB, ECoefZAB, PAX, PAY, PAZ, PBX, PBY, PBZ, ExpPHalf, PreFactAB, & + & IAnglA, IAnglB, NPIJ) +! + DO KK = 1, II +!MPI parallel + IF (MOD(KK, NProcs) /= MyRank) GO TO 200 +! IF (MOD(KK, NProcsMO) /= MyRankMO) GO TO 200 +! IF (MOD(KK, NProcsMat) /= MyRankMat) GO TO 200 +!MPI parallel + IAtomC = KAtom_RI(KK) + IAnglC = KType_RI(KK) + CCentX = Centr(1,IAtomC) + CCentY = Centr(2,IAtomC) + CCentZ = Centr(3,IAtomC) + KPrim1 = KStart_RI(KK) + KPrim2 = KPrim1 + KontG_RI(KK) - 1 + KTemp = 0 + DO K = KPrim1, KPrim2 + KTemp = KTemp + 1 + ExpntC(KTemp) = Expnt_RI(K) + END DO +! + LL = 1 + IAtomD = IAtomC + IAnglD = 0 ! s-type + DCentX = Centr(1,IAtomD) + DCentY = Centr(2,IAtomD) + DCentZ = Centr(3,IAtomD) + ExpntD(1) = Zero +! + R2CD = Zero + KLPrim = 0 + DO K = KPrim1, KPrim2 + ExpC = ExpntC(K-KPrim1+1) + ExpCR2 = ExpC * R2CD + ExpQ = ExpC + ExpQI = One / ExpQ + ExpKCD = Zero + KLPrim = KLPrim + 1 + PreFactCD(KLPrim) = One + ExpntQ(KLPrim) = ExpQ + ExpQHalf(KLPrim) = Half * ExpQI + CCoefCD(KLPrim) = CCoef_RI(K) + QX(KLPrim) = CCentX + QY(KLPrim) = CCentY + QZ(KLPrim) = CCentZ + QCX(KLPrim) = Zero + QCY(KLPrim) = Zero + QCZ(KLPrim) = Zero + QDX(KLPrim) = Zero + QDY(KLPrim) = Zero + QDZ(KLPrim) = Zero + END DO + NPKL = KLPrim + IF (NPKL == 0) GO TO 200 +! +! o Normalization +! + CALL RIInt2_MDInt2_CCont(CCoefCD, CContCD, IAnglC, IAnglD, NPKL) +! +! o Generate E-coefficients for C and D +! + CALL MDInt2_ECoef1(ECoefXCD, ECoefYCD, ECoefZCD, QCX, QCY, QCZ, QDX, QDY, QDZ, ExpQHalf, PreFactCD, & + & IAnglC, IAnglD, NPKL) +! +! o Generate R-integrals +! + DO IJ = 1, NPIJ + DO KL = 1, NPKL + ExpnPQ = ExpntP(IJ) + ExpntQ(KL) + ExpntPQ1(KL,IJ) = ExpntP(IJ) * ExpntQ(KL) / ExpnPQ + ExpnPQ = ExpntP(IJ) * ExpntQ(KL) * SQRT(ExpnPQ) + ExpntPQ2(KL,IJ) = Pi252 / ExpnPQ ! Adsorption + PQX(KL,IJ) = PX(IJ) - QX(KL) + PQY(KL,IJ) = PY(IJ) - QY(KL) + PQZ(KL,IJ) = PZ(IJ) - QZ(KL) + END DO + END DO + CALL MDInt2_R0tuv(NPIJ, NPKL) +! +! o Construct two-electron integrals +! + IF (Spherical) THEN + CALL RIMP2_RIInt2_MDInt2_ERI2c_Sph(II, JJ, KK, LL, NPIJ, NPKL, RWork1) + ELSE + CALL RIMP2_RIInt2_MDInt2_ERI2c_Car(II, JJ, KK, LL, NPIJ, NPKL, RWork1) + END IF +! + 200 CONTINUE +! + END DO +! + 100 CONTINUE +! + END DO +!$OMP END DO +! +! o Deallocate memory +! + CALL Int2_Deallocate + CALL Int2_ECoef_Deallocate + CALL Int2_Int2e_Deallocate + CALL Int2_Array_Deallocate + DEALLOCATE(FF) +!$OMP END PARALLEL + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_RIInt2c = TimeEnd - TimeBgn + WTime_RIInt2c = TimeEnd - TimeBgn +! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) +! CALL MPI_Allreduce(RWork1, RIInt2c, NBC_RI, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, IErr) +! CALL MPI_Allreduce(RWork1, RIInt2c, NBC_RI, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_MO, IErr) +! CALL MPI_Allreduce(RWork1, RIInt2c, NBC_RI, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_MAT, IErr) +!coarray + RIInt2c(1:NBC_RI) = RWork1(1:NBC_RI) + call co_sum(RIInt2c(1:NBC_RI)) +!! + DEALLOCATE(RWork1) + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_RIInt2cC = TimeEnd - TimeBgn + WTime_RIInt2cC = TimeEnd - TimeBgn +! + IF (MyRank == 0) THEN + PRINT '(" ..... CPU time (RIInt2c calc ) :", F12.2)', Time_RIInt2c + PRINT '(" ..... CPU time (RIInt2c comm ) :", F12.2)', Time_RIInt2cC + PRINT '(" ..... WALL time (RIInt2c calc ) :", F12.2)', WTime_RIInt2c + PRINT '(" ..... WALL time (RIInt2c comm ) :", F12.2)', WTime_RIInt2cC + END IF +! + END SUBROUTINE diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 new file mode 100755 index 0000000..2014ead --- /dev/null +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 @@ -0,0 +1,818 @@ + SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP +! +! o 4c integral generation and RMP2 energy accumulation +! + USE MP2_Module, ONLY : IOccBat, NOccBat, LenOccBat, NOccBat_per_Proc, NMO, & + & NActO, NActV, NFrzO, EMP2, ESCSMP2, E1, E2T, E2S, E2, E2SCS, Name, IPrint + USE RIMP2_Module, ONLY : NBF_RI, RIInt3c3a + Use MP2_Constant_Module, ONLY : Zero, One, Two, Three, P12 + USE MPI_Module, ONLY : NProcs, MyRank, MPIIO, IORank, MPI_COMM_IO, MPI_COMM_MO, NProcsMO, MyRankMO, & + & MPI_COMM_MAT, NProcsMat, MyRankMat +! + IMPLICIT NONE +! + INCLUDE 'mpif.h' +! +#ifdef MPIINT8 +#define MPI_INTEGER MPI_INTEGER8 +#endif +! + INTEGER, PARAMETER :: IO = 99 + CHARACTER(LEN=255) :: FBuf + REAL(8) :: E2TP, E2SP, E2Tab, E2Sab + REAL(8) :: T2, Fac + REAL(8) :: EigIb, EigIab, EigIjab, EigIi + INTEGER :: IaBat, IbBat, Ii, Ij, Ia, Ib + INTEGER :: IaBg, IaEd, IbBg, IbEd, IaBg_Proc, IbBg_Proc, Ib_Send + INTEGER :: I, J + INTEGER :: NMOInt3BufSize + INTEGER :: NHOMO + INTEGER :: IaBat_Proc, IbBat_Proc, Jranksend, Jrankrecv, Jrank_diff, IaBat_Proc_End + integer :: Jranksend_1, Jrankrecv_1 + INTEGER :: NOccBat_per_Proc_half, NProcs_half, IaBatBg + INTEGER :: IErr + INTEGER :: ireq(2) + LOGICAL :: EvenProcs, ExchIBat + INTEGER :: NBF_RI_per_ProcMat + INTEGER, ALLOCATABLE :: istat1(:), istat2(:), istat3(:) + + ! REAL(8), ALLOCATABLE, target :: MOInt3ck(:,:) + REAL(8), pointer :: MOInt3ck(:,:) +#ifndef USE_GPU + integer, parameter :: NUM_STREAM = 1 + REAL(8), allocatable, target :: RWork2_Pool(:,:,:) + REAL(8), allocatable, target :: CommBuf(:,:,:) +#else + ! *** for GPU *** + integer, parameter :: NUM_STREAM = 3 + REAL(8), allocatable, target, pinned :: RWork2_Pool(:,:,:) + REAL(8), allocatable, target, pinned :: CommBuf(:,:,:) +#endif + REAL(8), pointer :: RWork2(:,:) + REAL(8), pointer :: SendBuf(:,:) + REAL(8), pointer :: RecvBuf(:,:) + integer :: SendBufId + integer :: RecvBufId + + REAL(8), ALLOCATABLE :: Eig(:) + REAL(8), allocatable :: MOIntSome(:,:) +! +! + REAL(8) :: TimeBgn, TimeEnd, Time_MOI, Time_EMP2, WTimeBgn, WTimeEnd, WTime_MOI, WTime_EMP2 + REAL(8) :: Time_T3C, Time_MOIC, Time_EMP2C, WTime_T3C, WTime_MOIC, WTime_EMP2C +! + real(8), parameter :: maxMem = 1e9 + ! real(8), parameter :: maxMem = 5e8 + integer :: id_st + integer, parameter :: NUM_EVENT = NUM_STREAM*2 + integer :: id_ev, id_ev_next + integer :: id_A, id_Am + integer :: id_B, id_Bm + integer :: maxMN + integer :: nGrp, nGrpMax, nBlk + integer :: lenB, lenA + integer :: OfstI, OfstJ + integer :: Ia_0, Ia_1 ! Ia = Ia_0 + Ia_1 + integer :: Ib_0, Ib_1 ! Ib = Ib_0 + Ib_1 + integer :: m, n, k + integer :: lda, ldb, ldc + integer, allocatable :: devptr_A(:), devptr_B(:), devptr_C(:) + integer :: count_dgemm ! debug + + integer :: Mod_NActO_NProcsMat + integer :: IiBgn, IiEnd, leni + integer :: Max_LCount, LCount, LNumber, LNumber_Base + integer, allocatable :: Ia_0_list(:), Ib_0_list(:) + integer, allocatable :: cflag_set_mat_A(:), cflag_set_mat_B(:) + + integer, allocatable :: commIndexEach(:) + integer, allocatable :: commSizeEach(:) + integer :: commPhase, commCount, commSizeTotal + integer :: chunk, myChunk + +!coarray + real(8), allocatable :: sbuf(:)[:] + real(8), allocatable :: rbuf(:)[:] + integer bufsize + integer, save :: jsta +!! + + Time_T3C = Zero + Time_MOI = Zero + Time_MOIC = Zero + Time_EMP2 = Zero + Time_EMP2C = Zero + WTime_T3C = Zero + WTime_MOI = Zero + WTime_MOIC = Zero + WTime_EMP2 = Zero + WTime_EMP2C = Zero + + NBF_RI_per_ProcMat = NBF_RI / NProcsMat + if (MOD(NBF_RI, NProcsMat) > MyRankMat) then + NBF_RI_per_ProcMat = NBF_RI_per_ProcMat + 1 + end if + + leni = NActO(1) / NProcsMat + Mod_NActO_NProcsMat = MOD(NActO(1), NProcsMat) + if (Mod_NActO_NProcsMat > MyRankMat) then + IiBgn = leni * MyRankMat + MyRankMat + 1 + IiEnd = IiBgn + leni + else + IiBgn = leni * MyRankMat + Mod_NActO_NProcsMat + 1 + IiEnd = IiBgn + leni - 1 + end if + +! write(*, *) 'MyRankMat=', MyRankMat, 'NBF_RI_per_ProcMat =', NBF_RI_per_ProcMat + + ! + m = NActO(1) + n = NActO(1) + k = NBF_RI_per_ProcMat + maxMN = sqrt(maxMem/8 + k*k) - k + + nGrp = (NActO(1)*LenOccBat + maxMN-1) / maxMN + if (nGrp <= 1) nGrp = 2 ! test + + nBlk = (LenOccBat + nGrp-1) / nGrp +#ifndef USE_MERGE + nBlk = 1 ! When nBlk=1, this code behave as if original one +#endif + + nGrp = (LenOccBat + nBlk-1) / nBlk +!coarray +! CALL MPI_Allreduce( nGrp, nGrpMax, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr ) + nGrpMax = nGrp + call co_max(nGrpMax) +!! + nGrp = nGrpMax + + ALLOCATE(devptr_A(1:NUM_STREAM), devptr_B(1:NUM_STREAM), devptr_C(1:NUM_STREAM)) + + Max_LCount = (nGrp+1) * (nGrp+1) + ALLOCATE(Ia_0_list(Max_LCount), Ib_0_list(Max_LCount)) + ALLOCATE(cflag_set_mat_A(nGrp+1), cflag_set_mat_B(nGrp+1)) + + allocate(commSizeEach(Max_LCount), commIndexEach(Max_LCount)) + + commCount = (nGrp) * (nGrp+1) / 2 + +#ifdef DEBUG + if (MyRank == 0) then + write(*,*) "# NBF_RI:", NBF_RI + write(*,*) "# NActO(1):", NActO(1) + write(*,*) "# LenOccBat:", LenOccBat + write(*,*) "# maxMem:", maxMem + write(*,*) "# maxMN:", maxMN + write(*,*) "# nBlk:", nBlk + write(*,*) "# nGrp:", nGrp + write(*,*) "# Max_LCount:", Max_LCount + write(*,*) "# NUM_STREAM:", NUM_STREAM + write(*,'(a,3i8)') "# M,N,K:", m, n, k + endif +#endif + count_dgemm = 0 ! debug +! +! o Memory allocation +! + ! ALLOCATE(MOInt3ck(NBF_RI*NActO(1),LenOccBat)) + ! ALLOCATE(CommBuf(NBF_RI_per_ProcMat*NActO(1),LenOccBat,2)) + ALLOCATE(CommBuf(NBF_RI_per_ProcMat*NActO(1),LenOccBat,2)) + + Jrankrecv_1 = mod( MyRankMO + 1 + NProcsMO, NProcsMO ) + Jranksend_1 = mod( MyRankMO - 1 + NProcsMO, NProcsMO ) + + m = NActO(1) * nBlk + n = NActO(1) * nBlk + k = NBF_RI_per_ProcMat + lda = k + ldb = k + ldc = m + ALLOCATE(MOIntSome(m,n)) + ALLOCATE(RWork2_Pool(m,n,NUM_STREAM)) + +#ifdef USE_GPU + ! + ! initialize cublaas + ! + CALL CPU_TIME(TimeBgn) +!coarray call mpi_barrier(MPI_COMM_WORLD,ierr) ! not essential, just make measured time meaningful + sync all +!! + call cublas_init() + ! allocate memory space for matrix A,B and C on GPU + do id_st = 1, NUM_STREAM + call cublas_alloc( devptr_A(id_st), m, k ) + call cublas_alloc( devptr_B(id_st), k, n ) + call cublas_alloc( devptr_C(id_st), m, n ) + enddo +!coarray call mpi_barrier(MPI_COMM_WORLD,ierr) ! not essential, just make measured time meaningful + sync all +!! + CALL CPU_TIME(TimeEnd) +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'("# gpu init/alloc time",F12.2)') TimeEnd - TimeBgn + end if +#endif +#endif + + ALLOCATE(Eig(NMO)) + ALLOCATE(istat1(MPI_STATUS_SIZE)) + ALLOCATE(istat2(MPI_STATUS_SIZE)) + ALLOCATE(istat3(MPI_STATUS_SIZE)) +! + NMOInt3BufSize = NBF_RI_per_ProcMat * NActO(1) * LenOccBat + NHOMO = NFrzO(1) + NActO(1) +! + EvenProcs = .FALSE. + IF (MOD(NProcsMO, 2) == 0) THEN + EvenProcs = .TRUE. + END IF + + ! + commIndexEach(:) = 0 + commSizeEach(:) = 0 + commSizeTotal = 0 + chunk = (LenOccBat + commCount - 1) / commCount + DO commPhase = 1, commCount + commIndexEach(commPhase) = chunk * (commPhase-1) + 1 + myChunk = LenOccBat - commIndexEach(commPhase) + 1 + if ( myChunk > chunk ) myChunk = chunk + if ( myChunk < 0 ) myChunk = 0 + + commSizeEach(commPhase) = myChunk * (NBF_RI_per_ProcMat * NActO(1)) + commSizeTotal = commSizeTotal + commSizeEach(commPhase) +#ifdef DEBUG + if ( myRank == 0 ) then + write(*,'(a,3i6,i10)') & + "# comm phase, start index, chunk, comm size, ", commPhase, & + commIndexEach(commPhase), & + myChunk, & + commSizeEach(commPhase) + endif +#endif + End DO + if ( commSizeTotal /= NMOInt3BufSize ) then + write(*,'(a,2i10)') "# wrong comm size, ", commSizeTotal, NMOInt3BufSize + stop + endif + ! + +! +! o Read orbital energies +! + IF (MPIIO) THEN + FBuf = TRIM(Name)//".OrbEne" + OPEN(UNIT=IO, FILE=TRIM(FBuf), STATUS='OLD', ACCESS='SEQUENTIAL', FORM='FORMATTED') + READ(IO, *) Eig(1:NMO) + CLOSE(IO) + END IF +!coarray +! CALL MPI_Bcast(Eig, NMO, MPI_DOUBLE_PRECISION, IORank, MPI_COMM_IO, IErr) +!! +! + IF ((MyRank == 0) .AND. (IPrint >= 1)) THEN + WRITE(*, *) '+++++ Orbital energy (Alpha) +++++' + WRITE(*, '(10F12.6)') Eig(1:NMO) + END IF +! +! o Calculation of RMP2 correlation energy +! + NProcs_half = NProcsMO / 2 + NOccBat_per_Proc_half = NOccBat_per_Proc / 2 +! + E2TP = Zero + E2SP = Zero +! + IbBat = MyRankMO * NOccBat_per_Proc + 1 + IbBg_Proc = IOccBat(1,IbBat,1) + IaBg_Proc = IbBg_Proc +!MPI Parallel + DO IbBat_Proc = 1, NOccBat_per_Proc +!MPI Parallel + + IbBat = MyRankMO * NOccBat_per_Proc + IbBat_Proc + IbBg = IOccBat(1,IbBat,1) + 1 + Ib_Send = IbBg - IbBg_Proc +! +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'(a,i6,a,i6,a,i6)') '# Ib_Send=', Ib_Send,', IbBg=',IbBg,', IbBg_Proc=',IbBg_Proc + endif +#endif +! + RecvBuf => RIInt3c3a(:,Ib_Send:) +! + DO Jrank_diff = 0, NProcs_half +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'(a,i6,a,i6)') '# Jrank_diff =', Jrank_diff, ', NProcs_half=', NProcs_half + endif +#endif + ExchIBat = EvenProcs .AND. (jrank_diff == NProcs_half) + Jrankrecv = MyRankMO + jrank_diff + Jranksend = MyRankMO - jrank_diff + IF (Jrankrecv >= NProcsMO) Jrankrecv = Jrankrecv - NProcsMO + IF (Jranksend < 0) Jranksend = Jranksend + NProcsMO + IbBat = Jrankrecv * NOccBat_per_Proc + IbBat_Proc + IbBg = IOccBat(1,IbBat,1) + 1 + IbEd = IOccBat(1,IbBat,1) + IOccBat(2,IbBat,1) +! +! o Communicate three-center MO integral (jb|Q) with each process +! +! WTimeBgn = MPI_WTIME() +! CALL CPU_TIME(TimeBgn) + + MOInt3ck => RecvBuf + + if ( (Jrank_diff /= NProcs_half) .OR. (NProcs_half == 0) ) then + ! call non-blocking MPI reqs to exchange data that will be used at next iteratoin (not for now) + RecvBufId = mod(Jrank_diff, 2) + 1 + SendBuf => MOInt3ck + RecvBuf => CommBuf(:,:,RecvBufId) + + ! at once + ! CALL MPI_ISend(SendBuf, NMOInt3BufSize, MPI_DOUBLE_PRECISION, Jranksend_1, 0, MPI_COMM_MO, ireq(1), IErr) + ! CALL MPI_IRecv(RecvBuf, NMOInt3BufSize, MPI_DOUBLE_PRECISION, Jrankrecv_1, 0, MPI_COMM_MO, ireq(2), IErr) + endif + +! CALL CPU_TIME(TimeEnd) +! WTimeEnd = MPI_WTIME() +! Time_T3C = Time_T3C + TimeEnd - TimeBgn +! WTime_T3C = WTime_T3C + WTimeEnd - WTimeBgn +! + IaBatBg = MyRankMO * NOccBat_per_Proc + IF (ExchIBat .AND. (MyRankMO <= Jrankrecv)) THEN + IF (IbBat_Proc > NOccBat_per_Proc_half) THEN + IaBatBg = MyRankMO * NOccBat_per_Proc + NOccBat_per_Proc / 2 + END IF + END IF + IF (ExchIBat .AND. (MyRankMO > Jrankrecv)) THEN + IF (IbBat_Proc <= NOccBat_per_Proc_half) THEN + IaBatBg = MyRankMO * NOccBat_per_Proc + NOccBat_per_Proc / 2 + END IF + END IF +! + IaBat_Proc_End = NOccBat_per_Proc + IF (Jrank_diff == 0) THEN + IaBat_Proc_End = IbBat_Proc + ELSE IF (ExchIBat) THEN + IaBat_Proc_End = NOccBat_per_Proc_half + END IF + +!MPI Parallel + DO IaBat_Proc = 1, IaBat_Proc_End +!MPI Parallel + IaBat = IaBatBg + IaBat_Proc + IaBg = IOccBat(1,IaBat,1) + 1 + IaEd = IOccBat(1,IaBat,1) + IOccBat(2,IaBat,1) +! + LCount = 0 + DO Ib_0 = IbBg, IbEd, nBlk + lenB = IbEd - Ib_0 + 1 + if ( lenB > nBlk ) lenB = nBlk + J = Ib_0 - IbBg + 1 + + IF (IaBat == IbBat) IaEd = Ib_0 + lenB - 1 + + DO Ia_0 = IaBg, IaEd, nBlk + lenA = IaEd - Ia_0 + 1 + if ( lenA > nBlk ) lenA = nBlk + I = Ia_0 - IaBg_Proc + + LCount = LCount + 1 + Ib_0_List(LCount) = Ib_0 + Ia_0_List(LCount) = Ia_0 + ENDDO + ENDDO + cflag_set_mat_A(:) = 0 + cflag_set_mat_B(:) = 0 +! + ! if ( LCount < commCount ) then + ! write(*,'(a,i6,a,i6,a,i6)') "[WARN] myRank=", myRank, ", LCount=", LCount, ", commCount=", commCount + ! ! stop + ! endif +! +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'(a,i6)') "# LCount:", LCount + write(*,'("# ",i6,":",i6,"-",i6,"(",i3,")",",",i6,"-",i6,"(",i3,")")') MyRank, & + IbBg, IbEd, IbEd - IbBg + 1, & + IaBg, IaEd, IaEd - IaBg + 1 + endif +#endif + + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + commPhase = 1 + if ( commSizeEach(commPhase) > 0 ) then +!coarray +! CALL MPI_ISend(SendBuf(1,commIndexEach(commPhase)), commSizeEach(commPhase), & +! MPI_DOUBLE_PRECISION, Jranksend_1, commPhase, MPI_COMM_MO, ireq(1), IErr) +! CALL MPI_IRecv(RecvBuf(1,commIndexEach(commPhase)), commSizeEach(commPhase), & +! MPI_DOUBLE_PRECISION, Jrankrecv_1, commPhase, MPI_COMM_MO, ireq(2), IErr) + bufsize = commSizeEach(commPhase) + allocate(sbuf(bufsize)[*]) + allocate(rbuf(bufsize)[*]) + jsta = commIndexEach(commPhase) + sbuf(1:bufsize) = SendBuf(1:bufsize,jsta) + rbuf(1:bufsize)[Jranksend_1+1] = sbuf(1:bufsize) +!! + endif + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_T3C = Time_T3C + TimeEnd - TimeBgn + WTime_T3C = WTime_T3C + WTimeEnd - WTimeBgn + + ! + ! + DO LNumber_Base = 1, LCount + (NUM_STREAM-1) + + ! + ! 1st half + ! + LNumber = LNumber_Base + if ( LNumber >= 1 .and. LNumber <= LCount ) then + + Ib_0 = Ib_0_List(LNumber) + lenB = IbEd - Ib_0 + 1 + if ( lenB > nBlk ) lenB = nBlk + J = Ib_0 - IbBg + 1 + ! test + id_B = (Ib_0-IbBg)/nBlk + 1 + id_Bm = mod(id_B - 1, NUM_STREAM) + 1 + + IF (IaBat == IbBat) IaEd = Ib_0 + lenB - 1 + + Ia_0 = Ia_0_List(LNumber) + lenA = IaEd - Ia_0 + 1 + if ( lenA > nBlk ) lenA = nBlk + I = Ia_0 - IaBg_Proc + ! test + id_A = (Ia_0-IaBg)/nBlk + 1 + id_Am = mod(id_A - 1, NUM_STREAM) + 1 + + m = NActO(1)*lenA + n = NActO(1)*lenB + k = NBF_RI_per_ProcMat + + id_st = mod(LNumber-1, NUM_STREAM) + 1 + RWork2 => RWork2_Pool(:,:,id_st) + + id_ev = mod(LNumber-1, NUM_EVENT) + 1 + id_ev_next = mod(id_ev, NUM_EVENT) + 1 +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'(a,i6,a,i4,a,i6,a,i4,a,i6,a,i4,a,3i6)') & + "# H1: LNumber:", LNumber, ", id_st:", id_st, & + ", Ib_0:", Ib_0, ", lenB:", lenB, & + ", Ia_0:", Ia_0, ", lenA:", lenA, & + ", (m,n,k):", m, n, k + write(*,'(a,i3,a,i3,a,i3,a,i3)') "# id_B=", id_B, ", id_A=", id_A, & + ", id_ev=", id_ev, ", id_ev_next=", id_ev_next + endif +#endif +! +! o Evaluation of four-center MO integrals (ia|jb) from three-center integrals +! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + ! DGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) +#ifdef USE_GPU + ! wait for previous send to complete + call cublas_ev_wait( id_ev, id_st ) + + ! send matrix B to GPU + if ( cflag_set_mat_B(id_B) == 0 ) then + call cublas_set_matrix_async( k, n, MOInt3ck (1,J), ldb, devptr_B(id_Bm), ldb, id_st ) + cflag_set_mat_B(id_B) = 1 + endif + ! send matrix A to GPU + if ( nGrp <= NUM_STREAM ) then + if ( cflag_set_mat_A(id_A) == 0 ) then + call cublas_set_matrix_async( k, m, RIInt3c3a(1,I), lda, devptr_A(id_Am), lda, id_st ) + cflag_set_mat_A(id_A) = 1 + endif + else + id_Am = id_st + call cublas_set_matrix_async( k, m, RIInt3c3a(1,I), lda, devptr_A(id_Am), lda, id_st ) + endif + + ! + call cublas_ev_rec( id_ev_next, id_st ) + + ! wait for previous dgemm to complete + call cublas_ev2_wait( id_ev, id_st ) + + ! run dgemm on GPU + call cublas_dgemm_async('T', 'N', m, n, k, One, & + devptr_A(id_Am), lda, & + devptr_B(id_Bm), ldb, Zero, & + devptr_C(id_st), ldc, id_st ) + + ! + call cublas_ev2_rec( id_ev_next, id_st ) + + ! get matrix C from GPU + call cublas_get_matrix_async( m, n, devptr_C(id_st), ldc, RWork2, ldc, id_st ) + + ! debug + ! call cublas_st_sync( id_st ) +#else + CALL DGEMM('T', 'N', m, n, k, One, & + RIInt3c3a(1,I), lda, & + MOInt3ck (1,J), ldb, Zero, & + RWork2, ldc ) +#endif + count_dgemm = count_dgemm + 1 ! debug + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_MOI = Time_MOI + TimeEnd - TimeBgn + WTime_MOI = WTime_MOI + WTimeEnd - WTimeBgn + endif + ! + ! end of 1st half + ! + + ! + ! 2nd half + ! + LNumber = LNumber_Base - (NUM_STREAM-1) + + if ( LNumber >= 1 .and. LNumber <= LCount ) then + + Ib_0 = Ib_0_List(LNumber) + lenB = IbEd - Ib_0 + 1 + if ( lenB > nBlk ) lenB = nBlk + J = Ib_0 - IbBg + 1 + + IF (IaBat == IbBat) IaEd = Ib_0 + lenB - 1 + + Ia_0 = Ia_0_List(LNumber) + lenA = IaEd - Ia_0 + 1 + if ( lenA > nBlk ) lenA = nBlk + I = Ia_0 - IaBg_Proc + + m = NActO(1)*lenA + n = NActO(1)*lenB + k = NBF_RI_per_ProcMat + + id_st = mod(LNumber-1, NUM_STREAM) + 1 + RWork2 => RWork2_Pool(:,:,id_st) +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'(a,i6,a,i4,a,i6,a,i4,a,i6,a,i4,a,3i6)') & + "# H2: LNumber:", LNumber, ", id_st:", id_st, & + ", Ib_0:", Ib_0, ", lenB:", lenB, & + ", Ia_0:", Ia_0, ", lenA:", lenA, & + ", (m,n,k):", m, n, k + endif +#endif + commPhase = LNumber + if ( commPhase <= commCount .and. commSizeEach(commPhase) > 0 ) then +!coarray +! CALL MPI_Wait(ireq(1), istat1, IErr) +! CALL MPI_Wait(ireq(2), istat2, IErr) + sync all + RecvBuf(1:bufsize,jsta) = rbuf(1:bufsize) + if (allocated(sbuf)) deallocate(sbuf) + if (allocated(rbuf)) deallocate(rbuf) +!! + endif + + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + commPhase = LNumber + 1 + if ( commPhase <= commCount .and. commSizeEach(commPhase) > 0 ) then +!coarray +! CALL MPI_ISend(SendBuf(1,commIndexEach(commPhase)), commSizeEach(commPhase), & +! MPI_DOUBLE_PRECISION, Jranksend_1, commPhase, MPI_COMM_MO, ireq(1), IErr) +! CALL MPI_IRecv(RecvBuf(1,commIndexEach(commPhase)), commSizeEach(commPhase), & +! MPI_DOUBLE_PRECISION, Jrankrecv_1, commPhase, MPI_COMM_MO, ireq(2), IErr) + bufsize = commSizeEach(commPhase) + allocate(sbuf(bufsize)[*]) + allocate(rbuf(bufsize)[*]) + jsta = commIndexEach(commPhase) + sbuf(1:bufsize) = SendBuf(1:bufsize,jsta) + rbuf(1:bufsize)[Jranksend_1+1] = sbuf(1:bufsize) +!! + endif + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_T3C = Time_T3C + TimeEnd - TimeBgn + WTime_T3C = WTime_T3C + WTimeEnd - WTimeBgn + +#ifdef USE_GPU + call cublas_st_sync( id_st ) +#endif + ! + ! do reductoin sum for dgemm result among processes in a MPI_COMM_MAT group + ! typical number of process in a MPI_COMM_MAT is upto 8 (might be more in case of 10K processes) + ! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) +!coarray CALL MPI_Allreduce(RWork2, MOIntSome, ldc*n, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_MAT, IErr) + MOIntSome(1:ldc,1:n) = RWork2(1:ldc,1:n) +!! + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_MOIC = Time_MOIC + TimeEnd - TimeBgn + WTime_MOIC = WTime_MOIC + WTimeEnd - WTimeBgn + +! +! o Evaluation of MP2 correlation energy for ij orbital pair +! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + ! + do Ib_1 = 1, lenB + do Ia_1 = 1, lenA + Ib = Ib_0 + ib_1 - 1 + Ia = Ia_0 + Ia_1 - 1 + if ((IaBat == IbBat) .and. (Ia > Ib)) cycle + + EigIb = - Eig(Ib+NHOMO) + EigIab = EigIb - Eig(Ia+NHOMO) +! + OfstJ = (Ib_1 - 1)*NActO(1) + OfstI = (Ia_1 - 1)*NActO(1) +! + E2Tab = Zero + E2Sab = Zero + +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(Ii, Ij, EigIjab, EigIi, T2) +!$OMP DO REDUCTION(+: E2Tab, E2Sab) + DO Ij = 1, NActO(1) + EigIjab = EigIab + Eig(Ij+NFrzO(1)) + DO Ii = IiBgn, IiEnd + EigIi = Eig(Ii+NFrzO(1)) + T2 = MOIntSome(Ii+OfstI,Ij+OfstJ) / (EigIjab + EigIi) + E2Tab = E2Tab + T2 * (MOIntSome(Ii+OfstI,Ij+OfstJ) - MOIntSome(Ij+OfstI,Ii+OfstJ)) + E2Sab = E2Sab + T2 * MOIntSome(Ii+OfstI,Ij+OfstJ) + END DO + END DO +!$OMP END DO +!$OMP END PARALLEL +! + IF (Ia /= Ib) THEN + Fac = Two + ELSE + Fac = One + END IF + E2TP = E2TP + Fac * E2Tab + E2SP = E2SP + Fac * E2Sab +! + end DO + end DO + ! + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_EMP2 = Time_EMP2 + TimeEnd - TimeBgn + WTime_EMP2 = WTime_EMP2 + WTimeEnd - WTimeBgn + ! + end if + ! end of 2nd half + + END DO + + ! + ! when commCount > LCount, following MPI reqs are necessary ... + ! + do LNumber = LCount+1, commCount + commPhase = LNumber + if ( commPhase <= commCount .and. commSizeEach(commPhase) > 0 ) then +!debug + print *,'passing point 1.' +!! + CALL MPI_Wait(ireq(1), istat1, IErr) + CALL MPI_Wait(ireq(2), istat2, IErr) + endif + + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + commPhase = LNumber + 1 + if ( commPhase <= commCount .and. commSizeEach(commPhase) > 0 ) then +!debug + print *,'passing point 2.' +!! + CALL MPI_ISend(SendBuf(1,commIndexEach(commPhase)), commSizeEach(commPhase), & + MPI_DOUBLE_PRECISION, Jranksend_1, commPhase, MPI_COMM_MO, ireq(1), IErr) + CALL MPI_IRecv(RecvBuf(1,commIndexEach(commPhase)), commSizeEach(commPhase), & + MPI_DOUBLE_PRECISION, Jrankrecv_1, commPhase, MPI_COMM_MO, ireq(2), IErr) + endif + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_T3C = Time_T3C + TimeEnd - TimeBgn + WTime_T3C = WTime_T3C + WTimeEnd - WTimeBgn + enddo + +! + END DO + + END DO + END DO +! +! o Memory deallocation +! +#ifdef USE_GPU + ! + ! finalize cublas + ! + CALL CPU_TIME(TimeBgn) +!coarray call mpi_barrier(MPI_COMM_WORLD,ierr) ! not essential, just make measured time meaningful + sync all +!! + ! free buffer on GPU + do id_st = 1, NUM_STREAM + call cublas_free( devptr_A(id_st) ) + call cublas_free( devptr_B(id_st) ) + call cublas_free( devptr_C(id_st) ) + enddo + call cublas_fin() +!coarray call mpi_barrier(MPI_COMM_WORLD,ierr) ! not essential, just make measured time meaningful + sync all +!! + CALL CPU_TIME(TimeEnd) +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'("# gpu free/fin time",F12.2)') TimeEnd - TimeBgn + write(*,*) "# count_dgemm:", count_dgemm + end if +#endif +#endif + + ! DEALLOCATE(MOInt3ck) + DEALLOCATE(CommBuf) + DEALLOCATE(MOIntSome) + DEALLOCATE(Eig) + DEALLOCATE(istat1) + DEALLOCATE(istat2) + DEALLOCATE(istat3) + DEALLOCATE(RWork2_Pool) + DEALLOCATE(Ia_0_list, Ib_0_list) + DEALLOCATE(cflag_set_mat_A, cflag_set_mat_B) + DEALLOCATE(devptr_A, devptr_B, devptr_C) + +! +! o Correct MP2 correlation energy +! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) +!coarray +! CALL MPI_Allreduce(E2SP, E2S, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, IErr) +! CALL MPI_Allreduce(E2TP, E2T, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, IErr) + E2S = E2SP + E2T = E2TP + call co_sum(E2S) + call co_sum(E2T) +!! + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_EMP2C = Time_EMP2C + TimeEnd - TimeBgn + WTime_EMP2C = WTime_EMP2C + WTimeEnd - WTimeBgn +! + IF (MyRank == 0) THEN +! +! o Read the SCF energy +! + FBuf = TRIM(Name)//".TotEne" + OPEN(UNIT=IO, FILE=TRIM(FBuf), STATUS='OLD', ACCESS='SEQUENTIAL', FORM='FORMATTED') + READ(IO, *) EMP2 + CLOSE(IO) +! +! o Write the MP2 correlation energy +! + E2 = E2S + E2T + E2SCS = E2S * P12 + E2T / Three +! + WRITE(*, *) 'SCF energy =', EMP2 + WRITE(*, *) 'MP1 energy =', E1 + WRITE(*, *) 'MP2 energy (Singlet corr ) =', E2S + WRITE(*, *) 'MP2 energy (Triplet corr ) =', E2T + WRITE(*, *) 'MP2 energy (Total corr ) =', E2 + WRITE(*, *) 'SCS-MP2 energy (Total corr ) =', E2SCS +! +! o Write the total MP2 energy +! + ESCSMP2 = EMP2 + EMP2 = EMP2 + E1 + E2 + ESCSMP2 = ESCSMP2 + E1 + E2SCS +! + WRITE(*, *) 'Total MP2 energy =', EMP2 + WRITE(*, *) 'Total SCS-MP2 energy =', ESCSMP2 +! + PRINT '(" ..... CPU time (3/3k 2cints comm) :", F12.2)', Time_T3C + PRINT '(" ..... CPU time (4c Ints ) :", F12.2)', Time_MOI + PRINT '(" ..... CPU time (4c Ints comm ) :", F12.2)', Time_MOIC + PRINT '(" ..... CPU time (EMP2 corr. ) :", F12.2)', Time_EMP2 + PRINT '(" ..... CPU time (EMP2 corr. comm ) :", F12.2)', Time_EMP2C + PRINT '(" ..... WALL time (3/3k 2cints comm) :", F12.2)', WTime_T3C + PRINT '(" ..... WALL time (4c Ints ) :", F12.2)', WTime_MOI + PRINT '(" ..... WALL time (4c Ints comm ) :", F12.2)', WTime_MOIC + PRINT '(" ..... WALL time (EMP2 corr. ) :", F12.2)', WTime_EMP2 + PRINT '(" ..... WALL time (EMP2 corr. comm ) :", F12.2)', WTime_EMP2C +! + END IF +! + END SUBROUTINE diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_semidirect_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_semidirect_v_mpiomp.F90 new file mode 100755 index 0000000..e7c9768 --- /dev/null +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_semidirect_v_mpiomp.F90 @@ -0,0 +1,308 @@ + SUBROUTINE RIMP2_RMP2Energy_SemiDirect_V_MPIOMP +! +! o 4c integral generation and RMP2 energy accumulation +! + USE MP2_Module, ONLY : IOccBat, NOccBat, LenOccBat, NOccBat_per_Proc, NMO, & + & NActO, NActV, NFrzO, EMP2, ESCSMP2, E1, E2T, E2S, E2, E2SCS, Name, IPrint + USE RIMP2_Module, ONLY : NBF_RI + Use MP2_Constant_Module, ONLY : Zero, One, Two, Three, P12 + USE MPI_Module, ONLY : NProcs, MyRank, MPIIO, IORank, MPI_COMM_IO +! + IMPLICIT NONE +! + INCLUDE 'mpif.h' +! + INTEGER, PARAMETER :: IO = 99 + CHARACTER(LEN=255) :: FBuf + CHARACTER(LEN=10) :: RankNo + REAL(8) :: E2TP, E2SP, E2Tab, E2Sab + REAL(8) :: T2, Fac + REAL(8) :: EigIb, EigIab, EigIjab, EigIi + INTEGER :: IaBat, IbBat, Ii, Ij, Ia, Ib + INTEGER :: IaBg, IaEd, IbBg, IbEd + INTEGER :: I, J + INTEGER :: NMOInt3BufSize + INTEGER :: NHOMO + INTEGER :: IaBat_Proc, IbBat_Proc, Jranksend, Jrankrecv, Jrank_diff, IaBat_Proc_End + INTEGER :: NOccBat_per_Proc_half, NProcs_half, IaBatBg + INTEGER :: IErr + INTEGER :: ireq(2) + LOGICAL :: EvenProcs, ExchIBat + INTEGER, ALLOCATABLE :: istat1(:), istat2(:) + REAL(8), ALLOCATABLE :: MOInt3cb(:,:), MOInt3ck(:,:), MOInt(:,:), Eig(:) + REAL(8), ALLOCATABLE :: MOInt3ck0(:,:) +! + REAL(8) :: TimeBgn, TimeEnd, Time_MOI, Time_EMP2, WTimeBgn, WTimeEnd, WTime_MOI, WTime_EMP2 + REAL(8) :: Time_T3RK, Time_T3C, Time_T3RB, WTime_T3RK, WTime_T3C, WTime_T3RB +! + Time_T3RK = Zero + Time_T3C = Zero + Time_T3RB = Zero + Time_MOI = Zero + Time_EMP2 = Zero + WTime_T3RK = Zero + WTime_T3C = Zero + WTime_T3RB = Zero + WTime_MOI = Zero + WTime_EMP2 = Zero +! +! o Memory allocation +! + ALLOCATE(MOInt3cb(NBF_RI*NActO(1),LenOccBat)) + ALLOCATE(MOInt3ck(NBF_RI*NActO(1),LenOccBat)) + ALLOCATE(MOInt(NActO(1),NActO(1))) + ALLOCATE(Eig(NMO)) + ALLOCATE(MOInt3ck0(NBF_RI*NActO(1),LenOccBat)) + ALLOCATE(istat1(MPI_STATUS_SIZE)) + ALLOCATE(istat2(MPI_STATUS_SIZE)) +! + NMOInt3BufSize = NBF_RI * NActO(1) * LenOccBat + NHOMO = NFrzO(1) + NActO(1) +! + EvenProcs = .FALSE. + IF (MOD(NProcs, 2) == 0) THEN + EvenProcs = .TRUE. + END IF +! +! o Read orbital energies +! + IF (MPIIO) THEN + FBuf = TRIM(Name)//".OrbEne" + OPEN(UNIT=IO, FILE=TRIM(FBuf), STATUS='OLD', ACCESS='SEQUENTIAL', FORM='FORMATTED') + READ(IO, *) Eig(1:NMO) + CLOSE(IO) + END IF + CALL MPI_Bcast(Eig, NMO, MPI_DOUBLE_PRECISION, IORank, MPI_COMM_IO, IErr) +! + IF ((MyRank == 0) .AND. (IPrint >= 1)) THEN + WRITE(*, *) '+++++ Orbital energy (Alpha) +++++' + WRITE(*, '(10F12.6)') Eig(1:NMO) + END IF +! +! o Open 3c MO integral file (ia|q) +! + WRITE(RankNo,'(I0)') MyRank + FBuf = TRIM(Name)//".MOInt3cA."//Trim(RankNo) + OPEN(UNIT=IO, FILE=TRIM(FBuf), STATUS='UNKNOWN', ACCESS='DIRECT', FORM='UNFORMATTED', RECL=(8*NBF_RI*NActO(1))) +! +! o Calculation of RMP2 correlation energy +! + NProcs_half = NProcs / 2 + NOccBat_per_Proc_half = NOccBat_per_Proc / 2 +! + E2TP = Zero + E2SP = Zero +! +!MPI Parallel + DO IbBat_Proc = 1, NOccBat_per_Proc +!MPI Parallel + IbBat = MyRank * NOccBat_per_Proc + IbBat_Proc + IbBg = IOccBat(1,IbBat,1) + 1 + IbEd = IOccBat(1,IbBat,1) + IOccBat(2,IbBat,1) +! +! o Reading of three-center MO integral (jb|Q) from file +! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + J = 0 + DO Ib = IbBg, IbEd + J = J + 1 + READ(UNIT=IO, REC=Ib) MOInt3ck0(:,J) + END DO + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_T3RK = Time_T3RK + TimeEnd - TimeBgn + WTime_T3RK = WTime_T3RK + WTimeEnd - WTimeBgn +! + DO Jrank_diff = 0, NProcs_half + ExchIBat = EvenProcs .AND. (jrank_diff == NProcs_half) + Jrankrecv = MyRank + jrank_diff + Jranksend = MyRank - jrank_diff + IF (Jrankrecv >= Nprocs) Jrankrecv = Jrankrecv - NProcs + IF (Jranksend < 0) Jranksend = Jranksend + NProcs + IbBat = Jrankrecv * NOccBat_per_Proc + IbBat_Proc + IbBg = IOccBat(1,IbBat,1) + 1 + IbEd = IOccBat(1,IbBat,1) + IOccBat(2,IbBat,1) +! +! o Communicate three-center MO integral (jb|Q) with each process +! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + CALL MPI_ISend(MOInt3ck0, NMOInt3BufSize, MPI_DOUBLE_PRECISION, Jranksend, 0, MPI_COMM_WORLD, ireq(1), IErr) + CALL MPI_IRecv(MOInt3ck, NMOInt3BufSize, MPI_DOUBLE_PRECISION, Jrankrecv, 0, MPI_COMM_WORLD, ireq(2), IErr) + CALL MPI_Wait(ireq(1), istat1, IErr) + CALL MPI_Wait(ireq(2), istat2, IErr) + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_T3C = Time_T3C + TimeEnd - TimeBgn + WTime_T3C = WTime_T3C + WTimeEnd - WTimeBgn +! + IaBatBg = MyRank * NOccBat_per_Proc + IF (ExchIBat .AND. (MyRank <= Jrankrecv)) THEN + IF (IbBat_Proc > NOccBat_per_Proc_half) THEN + IaBatBg = MyRank * NOccBat_per_Proc + NOccBat_per_Proc / 2 + END IF + END IF + IF (ExchIBat .AND. (MyRank > Jrankrecv)) THEN + IF (IbBat_Proc <= NOccBat_per_Proc_half) THEN + IaBatBg = MyRank * NOccBat_per_Proc + NOccBat_per_Proc / 2 + END IF + END IF +! + IaBat_Proc_End = NOccBat_per_Proc + IF (Jrank_diff == 0) THEN + IaBat_Proc_End = IbBat_Proc + ELSE IF (ExchIBat) THEN + IaBat_Proc_End = NOccBat_per_Proc_half + END IF +!MPI Parallel + DO IaBat_Proc = 1, IaBat_Proc_End +!MPI Parallel + IaBat = IaBatBg + IaBat_Proc + IaBg = IOccBat(1,IaBat,1) + 1 + IaEd = IOccBat(1,IaBat,1) + IOccBat(2,IaBat,1) +! +! o Reading of three-center MO integral (ia|Q) from file +! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + I = 0 + DO Ia = IaBg, IaEd + I = I + 1 + READ(UNIT=IO, REC=Ia) MOInt3cb(:,I) + END DO + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_T3RB = Time_T3RB + TimeEnd - TimeBgn + WTime_T3RB = WTime_T3RB + WTimeEnd - WTimeBgn +! + J = 0 + DO Ib = IbBg, IbEd + EigIb = - Eig(Ib+NHOMO) + J = J + 1 + IF (IaBat == IbBat) IaEd = Ib + I = 0 + DO Ia = IaBg, IaEd + EigIab = EigIb - Eig(Ia+NHOMO) + I = I + 1 +! +! o Evaluation of four-center MO integrals (ia|jb) from three-center integrals +! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + CALL DGEMM('T', 'N', NActO(1), NActO(1), NBF_RI, One, MOInt3cb(1,I), NBF_RI, & + & MOInt3ck(1,J), NBF_RI, Zero, MOInt, NActO(1)) + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_MOI = Time_MOI + TimeEnd - TimeBgn + WTime_MOI = WTime_MOI + WTimeEnd - WTimeBgn +! + E2Tab = Zero + E2Sab = Zero +! +! o Evaluation of MP2 correlation energy for ij orbital pair +! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(Ii, Ij, EigIjab, EigIi, T2) +!$OMP DO REDUCTION(+: E2Tab, E2Sab) + DO Ij = 1, NActO(1) + EigIjab = EigIab + Eig(Ij+NFrzO(1)) + DO Ii = 1, NActO(1) + EigIi = Eig(Ii+NFrzO(1)) + T2 = MOInt(Ii,Ij) / (EigIjab + EigIi) + E2Tab = E2Tab + T2 * (MOInt(Ii,Ij) - MOInt(Ij,Ii)) + E2Sab = E2Sab + T2 * MOInt(Ii,Ij) + END DO + END DO +!$OMP END DO +!$OMP END PARALLEL +! + IF (Ia /= Ib) THEN + Fac = Two + ELSE + Fac = One + END IF + E2TP = E2TP + Fac * E2Tab + E2SP = E2SP + Fac * E2Sab + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_EMP2 = Time_EMP2 + TimeEnd - TimeBgn + WTime_EMP2 = WTime_EMP2 + WTimeEnd - WTimeBgn +! + END DO + END DO +! + END DO + END DO + END DO +! +! o Close 3c MO integral file +! + CLOSE(IO, STATUS='DELETE') +! +! o Memory deallocation +! + DEALLOCATE(MOInt3cb) + DEALLOCATE(MOInt3ck) + DEALLOCATE(MOInt) + DEALLOCATE(Eig) + DEALLOCATE(MOInt3ck0) + DEALLOCATE(istat1) + DEALLOCATE(istat2) +! +! o Correct MP2 correlation energy to master process (rank=0) +! +!coarray +! CALL MPI_Reduce(E2SP, E2S, 1, MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, IErr) +! CALL MPI_Reduce(E2TP, E2T, 1, MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, IErr) + E2S = E2SP + E2T = E2TP + call co_sum(E2S,result_image=1) + call co_sum(E2T,result_image=1) +!! + + IF (MyRank == 0) THEN +! +! o Read the SCF energy +! + FBuf = TRIM(Name)//".TotEne" + OPEN(UNIT=IO, FILE=TRIM(FBuf), STATUS='OLD', ACCESS='SEQUENTIAL', FORM='FORMATTED') + READ(IO, *) EMP2 + CLOSE(IO) +! +! o Write the MP2 correlation energy +! + E2 = E2S + E2T + E2SCS = E2S * P12 + E2T / Three +! + WRITE(*, *) 'SCF energy =', EMP2 + WRITE(*, *) 'MP1 energy =', E1 + WRITE(*, *) 'MP2 energy (Singlet corr ) =', E2S + WRITE(*, *) 'MP2 energy (Triplet corr ) =', E2T + WRITE(*, *) 'MP2 energy (Total corr ) =', E2 + WRITE(*, *) 'SCS-MP2 energy (Total corr ) =', E2SCS +! +! o Write the total MP2 energy +! + ESCSMP2 = EMP2 + EMP2 = EMP2 + E1 + E2 + ESCSMP2 = ESCSMP2 + E1 + E2SCS +! + WRITE(*, *) 'Total MP2 energy =', EMP2 + WRITE(*, *) 'Total SCS-MP2 energy =', ESCSMP2 +! + PRINT '(" ..... CPU time (3/3k 2cints read) :", F12.2)', Time_T3RK + PRINT '(" ..... CPU time (3/3k 2cints comm) :", F12.2)', Time_T3C + PRINT '(" ..... CPU time (3/3b 2cints read) :", F12.2)', Time_T3RB + PRINT '(" ..... CPU time (4c Ints ) :", F12.2)', Time_MOI + PRINT '(" ..... CPU time (EMP2 corr. ) :", F12.2)', Time_EMP2 + PRINT '(" ..... WALL time (3/3k 2cints read) :", F12.2)', WTime_T3RK + PRINT '(" ..... WALL time (3/3k 2cints comm) :", F12.2)', WTime_T3C + PRINT '(" ..... WALL time (3/3b 2cints read) :", F12.2)', WTime_T3RB + PRINT '(" ..... WALL time (4c Ints ) :", F12.2)', WTime_MOI + PRINT '(" ..... WALL time (EMP2 corr. ) :", F12.2)', WTime_EMP2 +! + END IF +! + END SUBROUTINE diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c1_incore_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c1_incore_v_mpiomp.F90 new file mode 100755 index 0000000..2ddf6ce --- /dev/null +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c1_incore_v_mpiomp.F90 @@ -0,0 +1,606 @@ + SUBROUTINE RIMP2_Tran3c1_InCore_V_MPIOMP +! + USE MP2_Module, ONLY : IOccBat, NOccBat, PScreen, ThrPre, SchwInt, NBF, NMO, NActO, NActV, NFrzO, & + & MaxContS, Name, IPrint + USE RIMP2_Module, ONLY : SchwInt_RI, RIInt3c2a, NBF_RI, NBF_RI_MyRank, IdxBF_RI_MyRank + USE MP2_Basis_Module, ONLY : Expnt, KontG, Spherical, CCoef, KStart, KAtom, NShel, Centr, KType, & + & LtuvMin_Car, LtuvMin_Sph, LtuvMax_Car, LtuvMax_Sph + USE RIMP2_Basis_Module, ONLY : Expnt_RI, KontG_RI, CCoef_RI, KStart_RI, KAtom_RI, NShel_RI, KType_RI, KLoc_RI_Car, KLoc_RI_Sph + USE Int2_Module, ONLY : ExpntA, ExpntB, ExpntC, ExpntD, ExpntP, ExpntQ, & + & PX, PY, PZ, QX, QY, QZ, PAX, PAY, PAZ, PBX, PBY, PBZ, QCX, QCY, QCZ, QDX, QDY, QDZ, & + & CContAB, CContCD, CCoefAB, CCoefCD, PreFactAB, PreFactCD, IAnglA, IAnglB, IAnglC, IAnglD, DoPH, ThrPrim + USE Int2_ECoef_Module, ONLY : ECoefXAB, ECoefYAB, ECoefZAB, ECoefXCD, ECoefYCD, ECoefZCD, ExpPHalf, ExpQHalf + USE Int2_Int2e_Module, ONLY : PQX, PQY, PQZ, ExpntPQ1, ExpntPQ2 + USE Int2_Gamma_Module, ONLY : FF, MaxtuvGam + USE MP2_Constant_Module, ONLY : Zero, One, Half, Pi252, RLN10 + USE MPI_Module, ONLY : NProcs, MyRank, MPIIO, IORank, MPI_COMM_IO, MPI_COMM_MO, NProcsMO, MyRankMO, & + & MPI_COMM_Mat, NProcsMat, MyRankMat +! +! o 3c RI integral transformation from AO to MO basis +! + IMPLICIT NONE +! + INCLUDE 'mpif.h' +! +#ifdef MPIINT8 +#define MPI_INTEGER MPI_INTEGER8 +#endif +! + INTEGER, PARAMETER :: IO = 99 + CHARACTER(LEN=255) :: FBuf + INTEGER :: II, JJ, KK, IJ, KL, I, J, K, ITemp, JTemp, KTemp + INTEGER :: IAtomA, IAtomB, IAtomC, IAtomD + INTEGER :: IPrim1, JPrim1, KPrim1, IPrim2, JPrim2, KPrim2, JPTemp2 + INTEGER :: IIOff + INTEGER :: Mod_NBF_NProcsMat + INTEGER :: lenj, jdim, jbgn + REAL(8) :: ExpA, ExpB, ExpC, ExpD, ExpP, ExpQ, ExpPI, ExpQI, ExpAR2, ExpCR2, ExpnPQ + REAL(8) :: ACentX, ACentY, ACentZ, BCentX, BCentY, BCentZ + REAL(8) :: CCentX, CCentY, CCentZ, DCentX, DCentY, DCentZ + REAL(8) :: ABCentX, ABCentY, ABCentZ, R2AB, R2CD + REAL(8) :: ThrFac +! +! o E-coefficients +! + INTEGER :: IJPrim, KLPrim + INTEGER :: NPIJ, NPKL + REAL(8) :: ExpKAB, ExpKCD +! + INTEGER :: KBF_RI, LabelK, NK, MAX_NK + INTEGER :: MActO, MActV +! +! o Integral prescreening +! + REAL(8) :: SchwAB, SchwCD +! +#ifdef USE_GPU + REAL(8), ALLOCATABLE, pinned :: CMO(:,:,:), ERIMat(:,:), T1Int(:,:) + REAL(8), ALLOCATABLE, pinned :: RWork1(:,:), RWork2(:,:) +#else + REAL(8), ALLOCATABLE :: CMO(:,:,:), ERIMat(:,:), T1Int(:,:) + REAL(8), ALLOCATABLE :: RWork1(:,:), RWork2(:,:) +#endif + integer :: devptr_CMO1, devptr_CMO2 + integer, allocatable :: devptr_ERIMAT(:), devptr_T1Int(:), devptr_RWork1(:) +! + REAL(8) :: TimeBgn, TimeEnd, Time_T0, Time_T1, Time_T2, WTimeBgn, WTimeEnd, WTime_T0, WTime_T1, WTime_T2 + REAL(8) :: Time_T0C, Time_T2C, WTime_T0C, WTime_T2C +! + INTEGER :: IRank + INTEGER :: IErr + integer, parameter :: Num_Stream = 3 + integer :: id_st + integer :: lm, ln, lk +! + Time_T0 = Zero + Time_T1 = Zero + Time_T2 = Zero + Time_T0C = Zero + Time_T2C = Zero + WTime_T0 = Zero + WTime_T1 = Zero + WTime_T2 = Zero + WTime_T0C = Zero + WTime_T2C = Zero +! +! o Initialization +! + ThrFac = RLN10 * LOG10(ThrPrim) + ! MActO = MAX(NActO(1), NActO(2)) + MActO = NActO(1) + MActV = MAX(NActV(1), NActV(2)) +! + lenj = NBF / NProcsMat + Mod_NBF_NProcsMat = MOD(NBF, NProcsMat) + if (Mod_NBF_NProcsMat > MyRankMat) then + jbgn = lenj * MyRankMat + MyRankMat + 1 + jdim = lenj + 1 + else + jbgn = lenj * MyRankMat + Mod_NBF_NProcsMat + 1 + jdim = lenj + end if +!debug +! if (myrank == 0) then +! write(*, *) 'MyRankMat=', MyRankMat +! write(*, *) 'jbgn=', jbgn, ' jdim=', jdim +! end if +! +! o Allocate memory +! + ALLOCATE(CMO(NBF,NMO,1)) +!MPI + ALLOCATE(NBF_RI_MyRank(0:NProcsMO-1)) + ALLOCATE(IdxBF_RI_MyRank(NBF_RI)) +!MPI +! +! o Get infomation of parallel distribution of aux. basis +! + MAX_NK = 0 ! test + DO IRank = 0, (NProcsMO - 1) + KBF_RI = 0 + DO KK = 1, NShel_RI + IF (MOD(KK, NProcsMO) /= IRank) CYCLE + IAnglC = KType_RI(KK) + IF (Spherical) THEN + NK = LtuvMax_Sph(IAnglC) - LtuvMin_Sph(IAnglC) + 1 + ELSE + NK = LtuvMax_Car(IAnglC) - LtuvMin_Car(IAnglC) + 1 + END IF + KBF_RI = KBF_RI + NK + if (MAX_NK < NK) MAX_NK = NK ! test + END DO + NBF_RI_MyRank(IRank) = KBF_RI +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'(a,i4,a,i6)') "# NBF_RI_MyRank(", IRank, ") = ", KBF_RI + endif +#endif + END DO +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'(a,i4)') "# MAX_NK = ", MAX_NK + write(*,'(a,i4)') "# NBF = ", NBF + write(*,'(a,i4)') "# NMO = ", NMO + endif +#endif +! +! o Read MO coefficients +! + IF (MPIIO) THEN + FBuf = TRIM(Name)//".MO" + OPEN(UNIT=IO, FILE=TRIM(FBuf), STATUS='OLD', ACCESS='SEQUENTIAL', FORM='FORMATTED') + READ(IO, *) CMO(1:NBF,1:NMO,1) + CLOSE(IO) + END IF +!coarray CALL MPI_Bcast(CMO, NBF*NMO, MPI_DOUBLE_PRECISION, IORank, MPI_COMM_IO, IErr) +! + IF ((MyRank == 0) .AND. (IPrint >= 2)) THEN + WRITE(*, *) '+++++ MO coefficient (Alpha) +++++' + CALL Util_MatOut(CMO(1,1,1), NBF, NMO) + END IF +! +! anaruse, test + ALLOCATE(ERIMat(NBF*NBF,MAX_NK)) + ALLOCATE(T1Int(MActO*NBF,MAX_NK)) + ! ALLOCATE(RWork1(NActO(1)*NActV(1),MAX_NK)) + ALLOCATE(RWork2(NBF*NBF,MAX_NK)) + +#ifdef USE_GPU + allocate(devptr_ERIMAT(MAX_NK), devptr_T1Int(MAX_NK), devptr_RWork1(MAX_NK)) + call cublas_init() + ! + call cublas_alloc( devptr_CMO1, NBF, NMO ); + call cublas_alloc( devptr_CMO2, NBF, NMO ); + do k = 1, MAX_NK + call cublas_alloc( devptr_ERIMAT(k), NBF, NBF ); + call cublas_alloc( devptr_T1Int(k), MActO, NBF ); + call cublas_alloc( devptr_Rwork1(k), NActO(1), NActV(1) ); + end do + ! + call cublas_set_matrix( NBF, NActO(1), CMO(1,(NFrzO(1)+1),1), NBF, devptr_CMO1, NBF ); + call cublas_set_matrix( NBF, NActV(1), CMO(1,(NFrzO(1)+NActO(1)+1),1), NBF, devptr_CMO2, NBF ); +#endif + +! +!$omp parallel +! +! o Allocate memory for 3c AO integrals +! + CALL Int2_Allocate + CALL Int2_ECoef_Allocate + CALL Int2_Array_Allocate + CALL Int2_Int2e_Allocate + ALLOCATE(FF(MaxtuvGam)) +!$omp end parallel +! + KBF_RI = 0 +!MPI parallel + DO KK = 1, NShel_RI + IF (MOD(KK, NProcsMO) /= MyRankMO) CYCLE +!MPI parallel + +!#ifdef DEBUG +#if 1 + if ( MyRank == 0 ) then + write(*,'(a,i6,a,i6)') "# KK = ", KK, ", NShel_RI = ", NShel_RI + endif +#endif + + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) +! + IAnglC = KType_RI(KK) + IF (Spherical) THEN + LabelK = KLoc_RI_Sph(KK) + NK = LtuvMax_Sph(IAnglC) - LtuvMin_Sph(IAnglC) + 1 + ELSE + LabelK = KLoc_RI_Car(KK) + NK = LtuvMax_Car(IAnglC) - LtuvMin_Car(IAnglC) + 1 + END IF + ! ALLOCATE(RWork2(NBF*NBF,NK)) + CALL DCOPY(NBF*NBF*NK, Zero, 0, RWork2, 1) +! +! o Evaluation of three-center RI integrals (PQ|C) +! +!$OMP PARALLEL DEFAULT(SHARED) & +!$OMP&PRIVATE(II, IAtomA, ACentX, ACentY, ACentZ, IPrim1, IPrim2, ITemp, I, IIOff, & +!$OMP& JJ, IAtomB, BCentX, BCentY, BCentZ, JPrim1, JPrim2, JTemp, J, JPTemp2, & +!$OMP& IAtomC, CCentX, CCentY, CCentZ, KPrim1, KPrim2, KTemp, K, & +!$OMP& IAtomD, DCentX, DCentY, DCentZ, & +!$OMP& IJPrim, KLPrim, IJ, KL, NPIJ, NPKL, & +!$OMP& ABCentX, ABCentY, ABCentZ, R2AB, R2CD, SchwAB, SchwCD, & +!$OMP& ExpA, ExpB, ExpC, ExpD, ExpP, ExpQ, ExpPI, ExpQI, ExpAR2, ExpCR2, ExpnPQ, & +!$OMP& ExpKAB, ExpKCD) +! +! o Allocate memory for 3c AO integrals +! + ! CALL Int2_Allocate + ! CALL Int2_ECoef_Allocate + ! CALL Int2_Array_Allocate + ! CALL Int2_Int2e_Allocate + ! ALLOCATE(FF(MaxtuvGam)) +! + IAtomC = KAtom_RI(KK) + IAnglC = KType_RI(KK) + CCentX = Centr(1,IAtomC) + CCentY = Centr(2,IAtomC) + CCentZ = Centr(3,IAtomC) + KPrim1 = KStart_RI(KK) + KPrim2 = KPrim1 + KontG_RI(KK) - 1 + ! KTemp = 0 + ! DO K = KPrim1, KPrim2 + ! KTemp = KTemp + 1 + ! ExpntC(KTemp) = Expnt_RI(K) + ! END DO +! + IAtomD = IAtomC + IAnglD = 0 ! s-type + DCentX = Centr(1,IAtomD) + DCentY = Centr(2,IAtomD) + DCentZ = Centr(3,IAtomD) + ExpntD(1) = Zero +! + R2CD = Zero + KLPrim = 0 + DO K = KPrim1, KPrim2 + ExpC = Expnt_RI(K) + ExpCR2 = ExpC * R2CD + ExpD = ExpntD(1) + ExpQ = ExpC + ExpD + ExpQI = One / ExpQ + ExpKCD = - ExpCR2 * ExpD * ExpQI + KLPrim = KLPrim + 1 + PreFactCD(KLPrim) = EXP(ExpKCD) + ExpntQ(KLPrim) = ExpQ + ExpQHalf(KLPrim) = Half * ExpQI + CCoefCD(KLPrim) = CCoef_RI(K) + QX(KLPrim) = (ExpC * CCentX + ExpD * DCentX) * ExpQI + QY(KLPrim) = (ExpC * CCentY + ExpD * DCentY) * ExpQI + QZ(KLPrim) = (ExpC * CCentZ + ExpD * DCentZ) * ExpQI + QCX(KLPrim) = QX(KLPrim) - CCentX + QCY(KLPrim) = QY(KLPrim) - CCentY + QCZ(KLPrim) = QZ(KLPrim) - CCentZ + QDX(KLPrim) = QX(KLPrim) - DCentX + QDY(KLPrim) = QY(KLPrim) - DCentY + QDZ(KLPrim) = QZ(KLPrim) - DCentZ + END DO + NPKL = KLPrim + + IF (PScreen) THEN + SchwCD = SchwInt_RI(KK) + END IF +! +! o Normalization +! + CALL RIInt2_MDInt2_CCont(CCoefCD, CContCD, IAnglC, IAnglD, NPKL) +! +! o Generate E-coefficients for C and D +! + CALL MDInt2_ECoef1(ECoefXCD, ECoefYCD, ECoefZCD, QCX, QCY, QCZ, QDX, QDY, QDZ, ExpQHalf, PreFactCD, & + & IAnglC, IAnglD, NPKL) +! +! o calculate 3c AO integrals (PQ|C) +! +!$OMP DO SCHEDULE(DYNAMIC, 1) + DO II = NShel, 1, -1 +! DO II = 1, NShel + ! debug + ! if ( MyRank == 0 ) then + ! write(*,'(a,2i8)') "II:", II, NShel + ! endif + + IAtomA = KAtom(II) + IAnglA = KType(II) + ACentX = Centr(1,IAtomA) + ACentY = Centr(2,IAtomA) + ACentZ = Centr(3,IAtomA) + IPrim1 = KStart(II) + IPrim2 = IPrim1 + KontG(II) - 1 + IIOff = (II * (II - 1)) / 2 +! +!MPI parallel + DO JJ = 1, II + IF (MOD(JJ, NProcsMat) /= MyRankMat) CYCLE +!MPI parallel +! +! o Check integrals by the Schwarz inequality +! + IF (PScreen) THEN + SchwAB = SchwInt(IIOff+JJ) + IF ((SchwAB * SchwCD) < ThrPre) CYCLE + END IF +! + IAtomB = KAtom(JJ) + IAnglB = KType(JJ) + BCentX = Centr(1,IAtomB) + BCentY = Centr(2,IAtomB) + BCentZ = Centr(3,IAtomB) + JPrim1 = KStart(JJ) + JPrim2 = JPrim1 + KontG(JJ) - 1 +! + ABCentX = ACentX - BCentX + ABCentY = ACentY - BCentY + ABCentZ = ACentZ - BCentZ + R2AB = ABCentX * ABCentX + ABCentY * ABCentY + ABCentZ * ABCentZ + IJPrim = 0 + DO I = IPrim1, IPrim2 + ExpA = Expnt(I) + ExpAR2 = ExpA * R2AB + JPTemp2 = JPrim2 + IF (II == JJ) JPTemp2 = I + DO J = JPrim1, JPTemp2 + ExpB = Expnt(J) + ExpP = ExpA + ExpB + ExpPI = One / ExpP + ExpKAB = - ExpAR2 * ExpB * ExpPI + IF (ExpKAB >= ThrFac) THEN + IJPrim = IJPrim + 1 + PreFactAB(IJPrim) = EXP(ExpKAB) + ExpntP(IJPrim) = ExpP + ExpPHalf(IJPrim) = Half * ExpPI + CCoefAB(IJPrim) = CCoef(I) * CCoef(J) + IF ((II == JJ) .AND. (I /= J)) CCoefAB(IJPrim) = CCoefAB(IJPrim) + CCoefAB(IJPrim) + PX(IJPrim) = (ExpA * ACentX + ExpB * BCentX) * ExpPI + PY(IJPrim) = (ExpA * ACentY + ExpB * BCentY) * ExpPI + PZ(IJPrim) = (ExpA * ACentZ + ExpB * BCentZ) * ExpPI + PAX(IJPrim) = PX(IJPrim) - ACentX + PAY(IJPrim) = PY(IJPrim) - ACentY + PAZ(IJPrim) = PZ(IJPrim) - ACentZ + PBX(IJPrim) = PX(IJPrim) - BCentX + PBY(IJPrim) = PY(IJPrim) - BCentY + PBZ(IJPrim) = PZ(IJPrim) - BCentZ + END IF + END DO + END DO + NPIJ = IJPrim + IF (NPIJ == 0) CYCLE +! +! o Normalization +! + CALL Int2_CCont(CCoefAB, CContAB, II, JJ, IAnglA, IAnglB, NPIJ) +! +! o Generate E-coefficients for A and B +! + IF (IAnglA >= IAnglB) THEN + CALL MDInt2_ECoef1(ECoefXAB, ECoefYAB, ECoefZAB, PAX, PAY, PAZ, PBX, PBY, PBZ, ExpPHalf, PreFactAB, & + & IAnglA, IAnglB, NPIJ) + ELSE + CALL MDInt2_ECoef2(ECoefXAB, ECoefYAB, ECoefZAB, PAX, PAY, PAZ, PBX, PBY, PBZ, ExpPHalf, PreFactAB, & + & IAnglA, IAnglB, NPIJ) + END IF +! +! o Generate R-integrals +! +#if 0 + DO IJ = 1, NPIJ + DO KL = 1, NPKL + ExpnPQ = ExpntP(IJ) + ExpntQ(KL) + ExpntPQ1(KL,IJ) = ExpntP(IJ) * ExpntQ(KL) / ExpnPQ + ExpnPQ = ExpntP(IJ) * ExpntQ(KL) * SQRT(ExpnPQ) + ExpntPQ2(KL,IJ) = Pi252 / ExpnPQ ! Adsorption + PQX(KL,IJ) = PX(IJ) - QX(KL) + PQY(KL,IJ) = PY(IJ) - QY(KL) + PQZ(KL,IJ) = PZ(IJ) - QZ(KL) + END DO + END DO + CALL MDInt2_R0tuv(NPIJ, NPKL) +#else + CALL MDInt2_R0tuv_test(NPIJ, NPKL) +#endif +! +! o Construct three-center two-electron ERI matrix +! + IF (Spherical) THEN + CALL RIMP2_RIInt2_MDInt2_ERI3c_Sph(II, JJ, KK, NPIJ, NPKL, RWork2) + ELSE + CALL RIMP2_RIInt2_MDInt2_ERI3c_Car(II, JJ, KK, NPIJ, NPKL, RWork2) + END IF +! + END DO + END DO +!$OMP END DO +! +! o Deallocate memory for 3c AO integrals +! + ! CALL Int2_Deallocate + ! CALL Int2_ECoef_Deallocate + ! CALL Int2_Int2e_Deallocate + ! CALL Int2_Array_Deallocate + ! DEALLOCATE(FF) +!$OMP END PARALLEL +! + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_T0 = Time_T0 + TimeEnd - TimeBgn + WTime_T0 = WTime_T0 + WTimeEnd - WTimeBgn +! + IF (Spherical) THEN + LabelK = KLoc_RI_Sph(KK) + NK = LtuvMax_Sph(IAnglC) - LtuvMin_Sph(IanglC) + 1 + ELSE + LabelK = KLoc_RI_Car(KK) + NK = LtuvMax_Car(IAnglC) - LtuvMin_Car(IAnglC) + 1 + END IF +! +#ifdef USE_GPU + DO id_st = 1, Num_Stream + call cublas_st_sync( id_st ) + End DO +#endif + +! +! o Communicate 3c integral matrix +! + ! WTimeBgn = MPI_WTIME() + ! CALL CPU_TIME(TimeBgn) + ! CALL MPI_Allreduce(RWork2(1,1), ERIMat(1,1), NBF*NBF*NK, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_MAT, IErr) + ! CALL CPU_TIME(TimeEnd) + ! WTimeEnd = MPI_WTIME() + ! Time_T0C = Time_T0C + TimeEnd - TimeBgn + ! WTime_T0C = WTime_T0C + WTimeEnd - WTimeBgn + + DO K = 1, NK + KBF_RI = KBF_RI + 1 + LabelK = LabelK + 1 + IdxBF_RI_MyRank(KBF_RI) = LabelK + id_st = mod(K-1, Num_Stream) + 1 +! +! o Communicate 3c integral matrix +! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) +!coarray +! CALL MPI_Allreduce(RWork2(1,K), ERIMat(1,K), NBF*NBF, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_MAT, IErr) + ERIMat(1:NBF*NBF,K) = RWork2(1:NBF*NBF,K) +!! + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_T0C = Time_T0C + TimeEnd - TimeBgn + WTime_T0C = WTime_T0C + WTimeEnd - WTimeBgn +! +! o 1/3 integral transformation +! (pq|c) -> (iq|c) +! +#ifdef DEBUG + ! if ( MyRank == 0 ) then + ! write(*,'(a,i4,a,3i6)') " K=",K, ", 1/3, (m,n,k)=", NActO(1), NBF, NBF + ! endif +#endif + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + lm = NActO(1) + ln = NBF + lk = NBF +#ifdef USE_GPU + call cublas_set_matrix_async( lk, ln, ERIMat(1,K), lk, devptr_ERIMAT(K), lk, id_st ) + call cublas_dgemm_async('T', 'N', lm, ln, lk, One, & + devptr_CMO1, NBF, & + devptr_ERIMat(K), NBF, Zero, & + devptr_T1Int(K), NActO(1), id_st ) + ! call cublas_get_matrix_async( lm, ln, devptr_T1Int(K), lm, T1Int(1,K), lm, id_st ) +#else + CALL DGEMM('T', 'N', lm, ln, lk, One, & + CMO(1,(NFrzO(1)+1),1), NBF, & + ERIMat(1,K), NBF, Zero, & + T1Int(1,K), NActO(1)) +#endif + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_T1 = Time_T1 + TimeEnd - TimeBgn + WTime_T1 = WTime_T1 + WTimeEnd - WTimeBgn +! +! o 2/3 integral transformation +! (iq|c) -> (ia|c) +! +#ifdef DEBUG + ! if ( MyRank == 0 ) then + ! write(*,'(a,i4,a,3i6)') " K=",K, ", 2/3, (m,n,k)=", NActO(1), NActV(1), NBF + ! endif +#endif + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + lm = NActO(1) + ln = NActV(1) + lk = NBF +#ifdef USE_GPU + ! call cublas_set_matrix_async( lm, lk, T1Int(1,K), lm, devptr_T1Int(K), lm, id_st ) + call cublas_dgemm_async('N', 'N', lm, ln, lk, One, & + devptr_T1Int(K), NActO(1), & + devptr_CMO2, NBF, Zero, & + devptr_RWork1(K), NActO(1), id_st) + call cublas_get_matrix_async( lm, ln, devptr_RWork1(K), lm, RIInt3c2a(1,KBF_RI), lm, id_st ) +#else + CALL DGEMM('N', 'N', lm, ln, lk, One, & + T1Int(1,K), NActO(1), & + CMO(1,(NFrzO(1)+NActO(1)+1),1), NBF, Zero, & + RIInt3c2a(1,KBF_RI), NActO(1)) +#endif + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_T2 = Time_T2 + TimeEnd - TimeBgn + WTime_T2 = WTime_T2 + WTimeEnd - WTimeBgn + END DO +! + END DO + +#ifdef USE_GPU + DO id_st = 1, Num_Stream + call cublas_st_sync( id_st ) + End DO +#endif + +!$omp parallel +! +! o Deallocate memory for 3c AO integrals +! + CALL Int2_Deallocate + CALL Int2_ECoef_Deallocate + CALL Int2_Int2e_Deallocate + CALL Int2_Array_Deallocate + DEALLOCATE(FF) +!$omp end parallel + +! anaruse, test + DEALLOCATE(ERIMat) + DEALLOCATE(T1Int) + ! DEALLOCATE(RWork1) + DEALLOCATE(RWork2) + +#ifdef USE_GPU + call cublas_free( devptr_CMO1 ); + call cublas_free( devptr_CMO2 ); + do k = 1, MAX_NK + call cublas_free( devptr_ERIMAT(k) ); + call cublas_free( devptr_T1Int(k) ); + call cublas_free( devptr_Rwork1(k) ); + end do + ! call cublas_fin() + deallocate( devptr_ERIMAT, devptr_T1Int, devptr_RWork1 ) +#endif +! +! o Deallocate memory +! + DEALLOCATE(CMO) +! +#ifdef DEBUG + write(*,'(a,i6,F12.2)') "# ", MyRank, WTime_T0 +#endif +!coarray +! call MPI_Barrier( MPI_COMM_WORLD, IErr ) + sync all +!! +! + IF (MyRank == 0) THEN + PRINT '(" ..... CPU time (3c-RIInt ) :", F12.2)', Time_T0 + PRINT '(" ..... CPU time (3c-RIInt comm ) :", F12.2)', Time_T0C + PRINT '(" ..... CPU time (1/3 tran3c1 ) :", F12.2)', Time_T1 + PRINT '(" ..... CPU time (2/3 tran3c1 ) :", F12.2)', Time_T2 + PRINT '(" ..... CPU time (2/3 tran3c1 comm) :", F12.2)', Time_T2C + PRINT '(" ..... WALL time (3c-RIInt ) :", F12.2)', WTime_T0 + PRINT '(" ..... WALL time (3c-RIInt comm ) :", F12.2)', WTime_T0C + PRINT '(" ..... WALL time (1/3 tran3c1 ) :", F12.2)', WTime_T1 + PRINT '(" ..... WALL time (2/3 tran3c1 ) :", F12.2)', WTime_T2 + PRINT '(" ..... WALL time (2/3 tran3c1 comm) :", F12.2)', WTime_T2C + END IF +! + END SUBROUTINE diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 new file mode 100755 index 0000000..515f34f --- /dev/null +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 @@ -0,0 +1,390 @@ + SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP +! +! o three-center RI integral transformation from original aux. basis to transformed aux. basis +! (ia|c) -> (ia|d) +! + USE MP2_Module, ONLY : IOccBat, NOccBat, LenOccBat, NOccBat_per_Proc, NActO, NActV, Name + USE RIMP2_Module, ONLY : NBF_RI, NBC_RI, RIInt2c, RI2cInv, RIInt3c2a, RIInt3c2b, RIInt3c3a, RIInt3c3b, & + & NBF_RI_MyRank, IdxBF_RI_MyRank + USE MP2_Constant_Module, ONLY : Zero, One + USE MPI_Module, ONLY : MPI_COMM_MO, NProcs, MyRank, NProcsMO, MyRankMO, NProcsMat, MyRankMat +! + IMPLICIT NONE +! + INCLUDE 'mpif.h' +! +! #undef USE_GPU +! +#ifdef MPIINT8 +#define MPI_INTEGER MPI_INTEGER8 +#endif +! + INTEGER :: MXNActO + INTEGER :: IaBat, Ia, Ii, I, K + INTEGER :: IaBg, IaEd, IaBg_Proc + INTEGER :: IaBat_Proc, Irank_diff, Iranksend, Irankrecv + INTEGER :: MXNBF_RI_MyRank + INTEGER :: NT2BufSize + INTEGER :: IErr + INTEGER :: ireq(4) + INTEGER :: Mod_NBF_RI_NProcsMat + INTEGER :: leni, ibgn, idim + REAL(8) :: TimeBgn, TimeEnd, WTimeBgn, WTimeEnd +!coarray +! INTEGER, ALLOCATABLE :: IdxBF_RI_Irank(:) + INTEGER, ALLOCATABLE :: IdxBF_RI_Irank(:)[:] +!! + INTEGER, ALLOCATABLE :: istat1(:), istat2(:), istat3(:), istat4(:) +#ifdef USE_GPU + REAL(8), ALLOCATABLE, pinned :: T2Int(:,:,:,:) + real(8), parameter :: maxMem = 1e9 + ! real(8), parameter :: maxMem = 1e8 + integer, parameter :: Num_Stream = 3 + integer, parameter :: Num_Buf = 2 + integer :: devptr_A, devptr_B(Num_Stream), devptr_C(Num_Stream) + integer :: id_st +#else + REAL(8), ALLOCATABLE :: T2Int(:,:,:,:) + integer, parameter :: Num_Buf = 1 +#endif +!coarray +! REAL(8), ALLOCATABLE :: T2BufSend(:,:,:), T2BufRecv(:,:,:) + REAL(8), ALLOCATABLE :: T2BufSend(:,:,:), T2BufRecv(:,:,:)[:] +!! +! + REAL(8) :: Time_T2C, Time_T3, WTime_T2C, WTime_T3 +! + integer :: lm, ln, lk + integer :: maxBlkN, stpBlkN, numBlkN + integer :: maxMN, maxLenN, maxLenM + integer :: j + integer :: id_buf +! + Time_T2C = Zero + Time_T3 = Zero + WTime_T2C = Zero + WTime_T3 = Zero +! + leni = NBF_RI / NProcsMat + Mod_NBF_RI_NProcsMat = MOD(NBF_RI, NProcsMat) + if (Mod_NBF_RI_NProcsMat > MyRankMat) then + ibgn = leni * MyRankMat + MyRankMat + 1 + idim = leni + 1 + else + ibgn = leni * MyRankMat + Mod_NBF_RI_NProcsMat + 1 + idim = leni + end if +! +! if (MyRank == 0) then +! write(*, *) 'MyRankMat=', MyRankMat +! write(*, *) 'leni=', leni +! write(*, *) 'ibgn=', ibgn +! write(*, *) 'idim=', idim +! end if +! + ALLOCATE(RIInt2c(NBC_RI)) +! CALL DCOPY(NBC_RI, Zero, 0, RIInt2c, 1) +! +! o Evaluation of two-center RI integrals (P|Q) +! + IF (MyRank == 0) THEN + PRINT '(" ..... Enter (RIInt2_MDInt2c )")' + END IF + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + CALL RIMP2_RIInt2_MDInt2_Int2c_MPIOMP + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + IF (MyRank == 0) THEN + PRINT '(" ..... CPU time (RIInt2_MDInt2c ) :", F12.2)', TimeEnd - TimeBgn + PRINT '(" ..... WALL time (RIInt2_MDInt2c ) :", F12.2)', WTimeEnd - WTimeBgn + END IF +! + ALLOCATE(RI2cInv(NBF_RI, NBF_RI)) +! +! o Calculation of the inverse matrix of two-center RI integrals (P|Q)^-1 +! + IF (MyRank == 0) THEN + PRINT '(" ..... Enter (RIInt2_Inv2c )")' + END IF + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + CALL RIMP2_Inv2c_MPI + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + IF (MyRank == 0) THEN + PRINT '(" ..... CPU time (RIInt2_Inv2c ) :", F12.2)', TimeEnd - TimeBgn + PRINT '(" ..... WALL time (RIInt2_Inv2c ) :", F12.2)', WTimeEnd - WTimeBgn + END IF +! + DEALLOCATE(RIInt2c) +! +! o Allocation of memory +! + MXNActO = MAX(NActO(1), NActO(2)) + MXNBF_RI_MyRank = MAXVAL(NBF_RI_MyRank) + NT2BufSize = MXNActO * LenOccBat * MXNBF_RI_MyRank + ALLOCATE(T2Int(NBF_RI,MXNActO,LenOccBat, Num_Buf)) +!coarray +! ALLOCATE(IdxBF_RI_Irank(MXNBF_RI_MyRank)) + ALLOCATE(IdxBF_RI_Irank(MXNBF_RI_MyRank)[*]) +!! + ALLOCATE(istat1(MPI_STATUS_SIZE)) + ALLOCATE(istat2(MPI_STATUS_SIZE)) + ALLOCATE(istat3(MPI_STATUS_SIZE)) + ALLOCATE(istat4(MPI_STATUS_SIZE)) + + ALLOCATE(T2BufSend(MXNActO,MXNBF_RI_MyRank,LenOccBat)) +!coarray +! ALLOCATE(T2BufRecv(MXNActO,MXNBF_RI_MyRank,LenOccBat)) + ALLOCATE(T2BufRecv(MXNActO,MXNBF_RI_MyRank,LenOccBat)[*]) +!! +! + ! test + lm = idim + ln = NActO(1) + lk = NBF_RI ! do not partition this + +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'(a,3i6)') "# (lm,ln,lk) = ", lm, ln, lk + endif +#endif + + maxLenM = idim + +#ifdef USE_GPU + maxMN = sqrt(maxMem/8 + lk*lk) - lk + maxBlkN = maxMN / ln + if ( maxLenM > maxMN ) then + maxLenM = maxMN + maxLenM = maxLenM - mod(maxLenM, 32) + endif + +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'(a,i6,a,i6,a,i6,a,i6)') & + "# maxMN = ", maxMN, & + ", maxLenM = ", maxLenM, & + ", maxLenN = ", maxBlkN * NActO(1), & + ", maxBlkN = ", maxBlkN + endif +#endif + + lm = maxLenM + ln = NActO(1) * maxBlkN + call cublas_init() + call cublas_alloc( devptr_A, lm, lk ) + DO id_st = 1, Num_Stream + call cublas_alloc( devptr_B(id_st), lk, ln ) + call cublas_alloc( devptr_C(id_st), lm, ln ) + End DO +#endif + + ! test +! CALL MPI_Barrier( MPI_COMM_MO, ierr ) + sync all +! +! +!MPI Parallel + DO IaBat_Proc = 1, NOccBat_per_Proc +!MPI Parallel + id_buf = mod(IaBat_Proc-1, Num_Buf) + 1 +! +! o Send and recieve the 2/3 transformed 3c RI integrals (ia|c) +! + ! ALLOCATE(T2BufSend(MXNActO,MXNBF_RI_MyRank,LenOccBat)) + ! ALLOCATE(T2BufRecv(MXNActO,MXNBF_RI_MyRank,LenOccBat)) + DO Irank_diff = 0, (NProcsMO - 1) + Irankrecv = MyRankMO + Irank_diff + Iranksend = MyRankMO - Irank_diff + IF (Irankrecv >= NProcsMO) Irankrecv = Irankrecv - NProcsMO + IF (Iranksend < 0) Iranksend = Iranksend + NProcsMO + IaBat = Iranksend * NOccBat_per_Proc + IaBat_Proc + IaBg = IOccBat(1,IaBat,1) + 1 + IaEd = IOccBat(1,IaBat,1) + IOccBat(2,IaBat,1) +! +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'(a,i6,i6)') "# Irank_diff", Irank_diff, (NProcsMO - 1) + endif +#endif +! + DO K = 1, NBF_RI_MyRank(MyRankMO) + I = 0 + DO Ia = IaBg, IaEd + I = I + 1 + DO Ii = 1, NActO(1) + T2BufSend(Ii,K,I) = RIInt3c2a(Ii+(Ia-1)*NActO(1),K) + END DO + END DO + END DO +! + sync all + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) + +!coarray +! CALL MPI_IRecv(IdxBF_RI_Irank, NBF_RI_MyRank(Irankrecv), MPI_INTEGER, Irankrecv, 0, & +! & MPI_COMM_MO, ireq(2), IErr) +! CALL MPI_ISend(IdxBF_RI_MyRank, NBF_RI_MyRank(MyRankMO), MPI_INTEGER, Iranksend, 0, & +! & MPI_COMM_MO, ireq(1), IErr) +! CALL MPI_Wait(ireq(1), istat1, IErr) +! CALL MPI_Wait(ireq(2), istat2, IErr) + IdxBF_RI_Irank(1:NBF_RI_MyRank(MyRankMO))[Iranksend+1] = & + IdxBF_RI_MyRank(1:NBF_RI_MyRank(MyRankMO)) + sync all +!! + +!coarray +! CALL MPI_IRecv(T2BufRecv, NT2BufSize, MPI_DOUBLE_PRECISION, Irankrecv, 1, MPI_COMM_MO, ireq(4), IErr) +! CALL MPI_ISend(T2BufSend, NT2BufSize, MPI_DOUBLE_PRECISION, Iranksend, 1, MPI_COMM_MO, ireq(3), IErr) +! CALL MPI_Wait(ireq(3), istat3, IErr) +! CALL MPI_Wait(ireq(4), istat4, IErr) + T2BufRecv(:,:,:)[Iranksend+1] = T2BufSend(:,:,:) + sync all +!! + + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_T2C = Time_T2C + TimeEnd - TimeBgn + WTime_T2C = WTime_T2C + WTimeEnd - WTimeBgn +! + IaBat = MyRankMO * NOccBat_per_Proc + IaBat_Proc + DO I = 1, IOccBat(2,IaBat,1) + DO K = 1, NBF_RI_MyRank(Irankrecv) + DO Ii = 1, NActO(1) + T2Int(IdxBF_RI_Irank(K),Ii,I,id_buf) = T2BufRecv(Ii,K,I) + END DO + END DO + END DO + + ! + ! work-around to avoid to stop MPI comm for some unknown reason + ! + if ( mod(Irank_diff, 4) == 0 ) then +!coarray +! CALL MPI_Barrier( MPI_COMM_MO, ierr ) + sync all +!! + endif + + END DO + ! DEALLOCATE(T2BufSend) + ! DEALLOCATE(T2BufRecv) +! + IaBat = MyRankMO * NOccBat_per_Proc + 1 + IaBg_Proc = IOccBat(1,IaBat,1) + IaBat = MyRankMO * NOccBat_per_Proc + IaBat_Proc + IaBg = IOccBat(1,IaBat,1) +! + stpBlkN = 1 +#ifdef USE_GPU + stpBlkN = maxBlkN + if ( stpBlkN > IOccBat(2,IaBat,1) ) then + stpBlkN = (IOccBat(2,IaBat,1) + 1) / 2 + endif +#endif +! +#ifdef DEBUG + if ( MyRank == 0 ) then + write(*,'(a,i6,a,i6)') "# IOccBat(2,IaBat,1) = ", IOccBat(2,IaBat,1), & + ", stpBlkN = ", stpBlkN + write(*,'(a,i6,i4,a,i4,i6)') "# (m,n,k) = ", idim, NActO(1), 'x', stpBlkN, NBF_RI + endif +#endif +! + DO J = 1, idim, maxLenM ! insert this loop to do blocking in m-direction of dgemm + lk = NBF_RI + lm = idim - (J-1) + if ( lm > maxLenM ) lm = maxLenM + +#ifdef USE_GPU + DO id_st = 1, Num_Stream + call cublas_st_sync( id_st ) + End DO + call cublas_set_matrix( lk, lm, RI2cInv(1,ibgn+(J-1)), lk, devptr_A, lk ) + id_st = 1 +#endif + + DO I = 1, IOccBat(2,IaBat,1), stpBlkN + Ia = (IaBg + I) - IaBg_Proc + + numBlkN = IOccBat(2,IaBat,1) - I + 1 + if ( numBlkN > stpBlkN ) numBlkN = stpBlkN + ln = NActO(1) * numBlkN +! #ifdef DEBUG +! if ( MyRank == 0 ) then +! write(*,'(a,i4,a,i4,a,3i6)') "# J=", J, ", I=", I, ", (lm,ln,lk)=", lm,ln,lk +! endif +! #endif +! +! o 3/3 integral transformation +! (ia|P) -> (iq|Q) +! + WTimeBgn = MPI_WTIME() + CALL CPU_TIME(TimeBgn) +#ifdef USE_GPU + id_st = mod(id_st, Num_Stream) + 1 + call cublas_set_matrix_async( lk, ln, T2Int(1,1,I,id_buf), lk, devptr_B(id_st), lk, id_st ) + call cublas_dgemm_async('T', 'N', lm, ln, lk, One, & + devptr_A, NBF_RI, & + devptr_B(id_st), NBF_RI, Zero, & + devptr_C(id_st), maxLenM, id_st ) + call cublas_get_matrix_async( lm, ln, devptr_C(id_st), maxLenM, RIInt3c3a(J,Ia), idim, id_st ) +#else + ! CALL DGEMM('T', 'N', lm, ln, lk, One, & + ! RI2cInv(1,ibgn), NBF_RI, & + ! T2Int(1,1,I,id_buf), NBF_RI, Zero, & + ! RIInt3c3a(1,Ia), idim ) + CALL DGEMM('T', 'N', lm, ln, lk, One, & + RI2cInv(1,ibgn+(J-1)), NBF_RI, & + T2Int(1,1,I,id_buf), NBF_RI, Zero, & + RIInt3c3a(J,Ia), idim ) +#endif + CALL CPU_TIME(TimeEnd) + WTimeEnd = MPI_WTIME() + Time_T3 = Time_T3 + TimeEnd - TimeBgn + WTime_T3 = WTime_T3 + WTimeEnd - WTimeBgn +! + End DO +! +! + END DO +! + END DO +! +#ifdef USE_GPU + DO id_st = 1, Num_Stream + call cublas_st_sync( id_st ) + End DO + call cublas_free( devptr_A ) + DO id_st = 1, Num_Stream + call cublas_free( devptr_B(id_st) ) + call cublas_free( devptr_C(id_st) ) + End DO +#endif + + IF (MyRank == 0) THEN + PRINT '(" ..... CPU time (2/3 tran3c2 comm) :", F12.2)', Time_T2C + PRINT '(" ..... CPU time (3/3 tran3c2 tran) :", F12.2)', Time_T3 + PRINT '(" ..... WALL time (2/3 tran3c2 comm) :", F12.2)', WTime_T2C + PRINT '(" ..... WALL time (3/3 tran3c2 tran) :", F12.2)', WTime_T3 + END IF +! +! o deallocate memory +! + DEALLOCATE(T2BufSend) + DEALLOCATE(T2BufRecv) +! + DEALLOCATE(RI2cInv) + DEALLOCATE(T2Int) + DEALLOCATE(IdxBF_RI_MyRank) + DEALLOCATE(NBF_RI_MyRank) + DEALLOCATE(IdxBF_RI_Irank) + DEALLOCATE(istat1) + DEALLOCATE(istat2) + DEALLOCATE(istat3) + DEALLOCATE(istat4) +! + END SUBROUTINE From 6c4663a5232677108002a2b1c77c20a47db33f11 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 5 Mar 2021 19:56:49 +0900 Subject: [PATCH 26/70] [WIP] Change sync all to xmp_sync_all routine. --- NTCHEM-MINI/src/mp2/xmpAPI_rimp2_driver_incore_mpiomp.F90 | 8 ++++++-- .../src/mp2/xmpAPI_rimp2_tran3c1_incore_v_mpiomp.F90 | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_driver_incore_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_driver_incore_mpiomp.F90 index 83b993f..229a046 100755 --- a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_driver_incore_mpiomp.F90 +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_driver_incore_mpiomp.F90 @@ -5,6 +5,7 @@ SUBROUTINE RIMP2_Driver_InCore_MPIOMP USE MP2_Basis_Module, ONLY : Spherical, LtuvMin_Car, LtuvMin_Sph, LtuvMax_Car, LtuvMax_Sph USE RIMP2_Basis_Module, ONLY : NShel_RI, KType_RI USE MPI_Module, ONLY : NProcs, MyRank, NProcsMat, MyRankMat, NProcsMO, MyRankMO + USE XMP_API ! ! o Driver subroutine for RI-MP2 energy evaluation ! @@ -16,6 +17,7 @@ SUBROUTINE RIMP2_Driver_InCore_MPIOMP INTEGER :: KK, KBF_RI, NK, IAnglC INTEGER :: IErr REAL(8) :: TimeBgn, TimeEnd, WTimeBgn, WTimeEnd + INTEGER(4) :: status ! ! o Obtaining RI-MP2 batch infomation and memory allocation ! @@ -66,7 +68,8 @@ SUBROUTINE RIMP2_Driver_InCore_MPIOMP ! CALL RIMP2_Tran3c1_InCore_V_MPIOMP !coarray CALL MPI_Barrier(MPI_COMM_WORLD, IErr) - sync all +! sync all + call xmp_sync_all(status) CALL CPU_TIME(TimeEnd) WTimeEnd = MPI_WTIME() IF (MyRank == 0) THEN @@ -90,7 +93,8 @@ SUBROUTINE RIMP2_Driver_InCore_MPIOMP ALLOCATE(RIInt3c3a(NBF_RI_per_ProcMat*NActO(1),LenOccBat_per_Proc)) CALL RIMP2_Tran3c2_InCore_V_MPIOMP !coarray CALL MPI_Barrier(MPI_COMM_WORLD, IErr) - sync all +! sync all + call xmp_sync_all(status) CALL CPU_TIME(TimeEnd) WTimeEnd = MPI_WTIME() IF (MyRank == 0) THEN diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c1_incore_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c1_incore_v_mpiomp.F90 index 2ddf6ce..3778501 100755 --- a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c1_incore_v_mpiomp.F90 +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c1_incore_v_mpiomp.F90 @@ -15,6 +15,7 @@ SUBROUTINE RIMP2_Tran3c1_InCore_V_MPIOMP USE MP2_Constant_Module, ONLY : Zero, One, Half, Pi252, RLN10 USE MPI_Module, ONLY : NProcs, MyRank, MPIIO, IORank, MPI_COMM_IO, MPI_COMM_MO, NProcsMO, MyRankMO, & & MPI_COMM_Mat, NProcsMat, MyRankMat + USE XMP_API ! ! o 3c RI integral transformation from AO to MO basis ! @@ -71,6 +72,7 @@ SUBROUTINE RIMP2_Tran3c1_InCore_V_MPIOMP integer, parameter :: Num_Stream = 3 integer :: id_st integer :: lm, ln, lk + integer(4) :: status ! Time_T0 = Zero Time_T1 = Zero @@ -587,7 +589,8 @@ SUBROUTINE RIMP2_Tran3c1_InCore_V_MPIOMP #endif !coarray ! call MPI_Barrier( MPI_COMM_WORLD, IErr ) - sync all +! sync all + call xmp_sync_all(status) !! ! IF (MyRank == 0) THEN From 26a24a26c7db3b683c870361abe93f49cfbcf033 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 5 Mar 2021 20:32:19 +0900 Subject: [PATCH 27/70] [WIP] Change co_* routines of 1 file to MPI routines. --- .../src/mp2/xmpAPI_rimp2_rmp2energy_semidirect_v_mpiomp.F90 | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_semidirect_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_semidirect_v_mpiomp.F90 index e7c9768..168274e 100755 --- a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_semidirect_v_mpiomp.F90 +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_semidirect_v_mpiomp.F90 @@ -258,8 +258,10 @@ SUBROUTINE RIMP2_RMP2Energy_SemiDirect_V_MPIOMP ! CALL MPI_Reduce(E2TP, E2T, 1, MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, IErr) E2S = E2SP E2T = E2TP - call co_sum(E2S,result_image=1) - call co_sum(E2T,result_image=1) +! call co_sum(E2S,result_image=1) +! call co_sum(E2T,result_image=1) + CALL MPI_Reduce(E2SP, E2S, 1, MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, IErr) + CALL MPI_Reduce(E2TP, E2T, 1, MPI_DOUBLE_PRECISION, MPI_SUM, 0, MPI_COMM_WORLD, IErr) !! IF (MyRank == 0) THEN From 42af544778a21ec5a19d86ea81c3f45297f21e40 Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Fri, 5 Mar 2021 22:55:07 +0900 Subject: [PATCH 28/70] [WIP] Add xmp_api code of NICAM-DC_MINI --- NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 | 1297 ++++++ NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 | 4344 +++++++++++++++++++ 2 files changed, 5641 insertions(+) create mode 100755 NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 create mode 100755 NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 diff --git a/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 new file mode 100755 index 0000000..13dacca --- /dev/null +++ b/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 @@ -0,0 +1,1297 @@ +!------------------------------------------------------------------------------- +!> +!! Administration module +!! +!! @par Description +!! This module is for the management of process and region on +!! the icosahedral grid configuration. +!! +!! @author H.Tomita +!! +!! @par History +!! @li 2004-02-17 (H.Tomita) Imported from igdc-4.33 +!! @li 2007-10-22 (T.Mitsui) change value of PRC_RGN_NMAX +!! @li 2008-01-30 (S.Iga) private procedure mk_suffix is changed to public procedure +!! @li 2009-08-18 (T.Mitsui) modify adm_proc_stop to keep out extra process from main routines. +!! @li 2010-04-26 (M.Satoh) add ADM_l_me +!! @li 2010-06-07 (S.Iga) new grid (Iga 2010) is implemented. (see string XTMS) +!! @li 2011-06-30 (T.Seiki) fix undefined value (after, 07-10-22) +!! @li 2011-07-21 (T.Ohno) 2 new grid systems (1DMD-ON-SPHERE are added by Hara-san@JAMSTEC) +!! @li 2012-01-12 (H.Yashiro) add filename specification for logfile(optional) +!! @li 2012-06-11 (H.Yashiro) Milestone-project, code cleanup +!! +!< +module mod_adm + !----------------------------------------------------------------------------- + ! + !++ used modules + ! + !----------------------------------------------------------------------------- + implicit none + private + !--- 2020 Fujitsu + !use mod_coarray + use xmp_api + !--- 2020 Fujitsu end + !----------------------------------------------------------------------------- + ! + !++ Public procedure + ! + public :: ADM_proc_init + public :: ADM_proc_stop + public :: ADM_setup + public :: ADM_mk_suffix + + !----------------------------------------------------------------------------- + ! + !++ Public parameters & variables + ! + !------ Character length of system control + integer, public, parameter :: ADM_NSYS = 32 + ! + !------ Maximum length of file name + integer, public, parameter :: ADM_MAXFNAME = 128 + + ! + !====== Basic definition & information ====== + ! + !------ Log file ID & Control file ID + integer, public, parameter :: ADM_LOG_FID = 30 + integer, public, parameter :: ADM_CTL_FID = 35 + ! + !------ Identifier for single computation or parallel computation + integer, public, parameter :: ADM_SINGLE_PRC = 0 + integer, public, parameter :: ADM_MULTI_PRC = 1 + ! + !------ Identifiers of directions of region edges + integer, public, parameter :: ADM_SW = 1 + integer, public, parameter :: ADM_NW = 2 + integer, public, parameter :: ADM_NE = 3 + integer, public, parameter :: ADM_SE = 4 + ! + !------ Identifiers of directions of region vertices + integer, public, parameter :: ADM_W = 1 + integer, public, parameter :: ADM_N = 2 + integer, public, parameter :: ADM_E = 3 + integer, public, parameter :: ADM_S = 4 + ! + !--- Identifier of triangle element (i-axis-side or j-axis side) + integer, public, parameter :: ADM_TI = 1 + integer, public, parameter :: ADM_TJ = 2 + ! + !--- Identifier of line element (i-axis-side, ij-axis side, or j-axis side) + integer, public, parameter :: ADM_AI = 1 + integer, public, parameter :: ADM_AIJ = 2 + integer, public, parameter :: ADM_AJ = 3 + ! + !------ Identifier of 1 variable + integer, public, parameter :: ADM_KNONE = 1 + integer, public, parameter :: ADM_VNONE = 1 + ! + !------ Identifier of poles (north pole or south pole) + integer, public, parameter :: ADM_NPL = 1 + integer, public, parameter :: ADM_SPL = 2 + ! + !------ Fist colomn on the table for region and direction + integer, public, parameter :: ADM_RID = 1 + integer, public, parameter :: ADM_DIR = 2 + ! + real(8), public, parameter :: ADM_VMISS = 1.D0 + + ! + !====== Information for processes ====== + ! + !------ Communication world for NICAM + integer, public, save :: ADM_COMM_RUN_WORLD + ! + !------ Master process + integer, public, parameter :: ADM_prc_run_master = 1 + ! + !------ Total number of process + integer, public, save :: ADM_prc_all + ! + !------ My process ID + integer, public, save :: ADM_prc_me + ! + !------ Process ID which manages the pole regions. + integer, public, save :: ADM_prc_pl + ! + !------ Process ID which have the pole regions. + integer, public, save :: ADM_prc_npl + integer, public, save :: ADM_prc_spl + integer, public, save :: ADM_prc_nspl(ADM_NPL:ADM_SPL) + + ! + !====== Information for processes-region relationship ====== + ! + !------ Maximum number of regions managed by 1 process. + integer, public, parameter :: PRC_RGN_NMAX = 2560 + ! + !------ Regin managing file name + character(len=ADM_MAXFNAME), public, save :: ADM_rgnmngfname + ! + !------ Number of regions mangeged by each process + integer, public, allocatable, save :: ADM_prc_rnum(:) + ! + !------ Table of regions managed by each process + integer, public, allocatable, save :: ADM_prc_tab(:,:) + ! + !------ Table of edge link information + integer, public, allocatable, save :: ADM_rgn_etab(:,:,:) + !<----- + !<----- ADM_rgn_etab( ADM_RID:ADM_DIR, & + !<----- ADM_SW:ADM_SE, & + !<----- ADM_rgn_nmax ) + !<----- + ! + !------ Table of process ID from region ID + integer, public, allocatable, save :: ADM_rgn2prc(:) + !<----- + !<----- ADM_rgn2prc(ADM_rgn_nmax) + !<----- + ! + !------ Maximum number of vertex linkage + !integer, public, parameter :: ADM_VLINK_NMAX=5 ! S.Iga 100607 + integer, public, save :: ADM_VLINK_NMAX ! S.Iga 100607 + ! + !------ Table of n-vertex-link(?) at the region vertex + integer, public, allocatable, save :: ADM_rgn_vnum(:,:) + !<----- + !<----- ADM_rgn_vnum( ADM_W:ADM_S, & + !<----- ADM_rgn_nmax ) + !<----- + ! + !------ Table of vertex link information + integer, public, allocatable, save :: ADM_rgn_vtab(:,:,:,:) + !<----- + !<----- ADM_rgn_vtab( ADM_RID:ADM_DIR, & + !<----- ADM_W:ADM_S, & + !<----- ADM_rgn_nmax, & + !<----- ADM_VLINK_NMAX ) + !<----- + ! + !------ Table of vertex link information for poles + integer, public, allocatable, save :: ADM_rgn_vtab_pl(:,:,:) + !<----- + !<----- ADM_rgn_vtab_pl( ADM_RID:ADM_DIR, & + !<----- ADM_RGN_NMAX_PL, & + !<----- ADM_VLINK_NMAX ) + !<----- + ! + !------ Region ID (reguler) of north pole management + integer, public, save :: ADM_rgnid_npl_mng + integer, public, save :: ADM_rgnid_spl_mng + + + ! + !====== Information for regions ====== + ! + !------ Region division level + integer, public, save :: ADM_rlevel + ! + !------ Total number of regular regions managed by all process + integer, public, save :: ADM_rgn_nmax + ! + !------ Maximum number of pole regions + integer, public, parameter :: ADM_rgn_nmax_pl = 2 + ! + !------ Local region number + integer, public, save :: ADM_lall + ! + !------ Local region number for poles + integer, public, save :: ADM_lall_pl = ADM_rgn_nmax_pl + ! + !------ Present Local region number ! 2010.4.26 M.Satoh + integer, public, save :: ADM_l_me + + ! + !====== Grid resolution informations ====== + ! + !------ Grid division level + integer, public, save :: ADM_glevel + ! + !------ Horizontal grid numbers + integer, public, save :: ADM_gmin + integer, public, save :: ADM_gmax + integer, public, save :: ADM_gall_1d + integer, public, save :: ADM_gall + ! + !----- grid number of inner region in the diamond + integer, public, save :: ADM_gall_in + ! + !------ Identifiers of grid points around poles. + integer, public, parameter :: ADM_gslf_pl = 1 + integer, public, parameter :: ADM_gmin_pl = 2 + integer, public, save :: ADM_gmax_pl ! [mod] S.Iga 100607 + integer, public, save :: ADM_gall_pl ! [mod] S.Iga 100607 + ! + !------ Vertica grid numbers + integer, public, save :: ADM_vlayer + integer, public, save :: ADM_kmin + integer, public, save :: ADM_kmax + integer, public, save :: ADM_kall + + ! + !====== List vector for 1-dimensional array in the horiz. dir. ====== + ! + !------ Identifiers of grid points around a grid point + integer, public, parameter :: ADM_GIJ_nmax = 7 + integer, public, parameter :: ADM_GIoJo = 1 + integer, public, parameter :: ADM_GIpJo = 2 + integer, public, parameter :: ADM_GIpJp = 3 + integer, public, parameter :: ADM_GIoJp = 4 + integer, public, parameter :: ADM_GImJo = 5 + integer, public, parameter :: ADM_GImJm = 6 + integer, public, parameter :: ADM_GIoJm = 7 + ! + !------ List vectors + integer, public, save :: ADM_IooJoo_nmax + integer, public, allocatable, save :: ADM_IooJoo(:,:) + !<----- + !<----- ADM_IooJoo(ADM_IooJoo_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_IooJmo_nmax + integer, public, allocatable, save :: ADM_IooJmo(:,:) + !<----- + !<----- ADM_IooJmo(ADM_IooJmo_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_IooJop_nmax + integer, public, allocatable, save :: ADM_IooJop(:,:) + !<----- + !<----- ADM_IooJop(ADM_IooJop_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_IooJmp_nmax + integer, public, allocatable, save :: ADM_IooJmp(:,:) + !<----- + !<----- ADM_IooJmp(ADM_IooJmp_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_ImoJoo_nmax + integer, public, allocatable, save :: ADM_ImoJoo(:,:) + !<----- + !<----- ADM_ImoJoo(ADM_ImoJoo_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_ImoJmo_nmax + integer, public, allocatable, save :: ADM_ImoJmo(:,:) + !<----- + !<----- ADM_ImoJmo(ADM_ImoJmo_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_ImoJop_nmax + integer, public, allocatable, save :: ADM_ImoJop(:,:) + !<----- + !<----- ADM_ImoJop(ADM_ImoJop_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_ImoJmp_nmax + integer, public, allocatable, save :: ADM_ImoJmp(:,:) + !<----- + !<----- ADM_ImoJmp(ADM_ImoJmp_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_IopJoo_nmax + integer, public, allocatable, save :: ADM_IopJoo(:,:) + !<----- + !<----- ADM_IopJoo(ADM_IopJoo_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_IopJmo_nmax + integer, public, allocatable, save :: ADM_IopJmo(:,:) + !<----- + !<----- ADM_IopJmo(ADM_IopJmo_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_IopJop_nmax + integer, public, allocatable, save :: ADM_IopJop(:,:) + !<----- + !<----- ADM_IopJop(ADM_IopJop_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_IopJmp_nmax + integer, public, allocatable, save :: ADM_IopJmp(:,:) + !<----- + !<----- ADM_IopJmp(ADM_IopJmp_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_ImpJoo_nmax + integer, public, allocatable, save :: ADM_ImpJoo(:,:) + !<----- + !<----- ADM_ImpJoo(ADM_ImpJoo_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_ImpJmo_nmax + integer, public, allocatable, save :: ADM_ImpJmo(:,:) + !<----- + !<----- ADM_ImpJmo(ADM_ImpJmo_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_ImpJop_nmax + integer, public, allocatable, save :: ADM_ImpJop(:,:) + !<----- + !<----- ADM_ImpJop(ADM_ImpJop_nmax,ADM_GIJ_nmax) + !<----- + integer, public, save :: ADM_ImpJmp_nmax + integer, public, allocatable, save :: ADM_ImpJmp(:,:) + !<----- + !<----- ADM_ImpJmp(ADM_ImpJmp_nmax,ADM_GIJ_nmax) + !<----- + + !=========== For New Grid (XTMS) start <= S.Iga100607 + ! + !------ Horizontal Grid type + character(len=ADM_MAXFNAME), public, save :: ADM_HGRID_SYSTEM = 'ICO' ! icosahedral + ! 'ICO-XTMS' icosahedral but XTMS is used in oprt + ! 'LCP' Lambert-cornial (including PSP) + ! 'MLCP' Mercator+Lambert-cornial + ! 'MLCP-OLD' OLD vergion (only for s=1) + ! + !------ Number of lines at each pole (maybe this is identical to ADM_VLINK_NMAX) + integer, public, save :: ADM_XTMS_K=-1 ! default + ! ICO:5 + ! PSP:6 + ! LCP, MLCP:k + ! + !------ Number of segment for MLCP + integer, public, save :: ADM_XTMS_MLCP_S= 1 + ! + !------ XTMS LEVEL (it is conveniently defined especially for mod_oprt) + integer, public, save :: ADM_XTMS_LEVEL = 0 ! original icosahedral (NICAM) + ! = 1 ! XTMS level 1 + ! = 2 ! XTMS level 2 (to be implemented) + !=========== For New Grid (XTMS) end S.Iga100607 => + + logical, public, save :: ADM_debug = .false. ! [ADD] H.Yashiro 20120703 + + !----------------------------------------------------------------------------- + ! + !++ Private procedure + ! + private :: input_mnginfo + private :: output_info + private :: setup_vtab + + !----------------------------------------------------------------------------- + ! + !++ Private parameters & variables + ! + integer, private, parameter :: GDUMMY = 1 ! Horizontal dummy(halo) cell + integer, private, parameter :: KDUMMY = 1 ! Vertical dummy(halo) cell + + integer, private, save :: ADM_run_type ! Run type (single or multi processes) + + integer, private, save :: NMAX_DMD = -999 ! number of diamond + + !----------------------------------------------------------------------------- +contains + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine ADM_proc_init + !> + subroutine ADM_proc_init( rtype ) + implicit none + + integer, intent(in) :: rtype ! multi or single processor? + + integer :: my_rank + integer :: ierr + !--------------------------------------------------------------------------- + + ADM_run_type = rtype + + if ( rtype == ADM_MULTI_PRC ) then + + !--- 2020 Fujitsu + call xmp_api_init + call MPI_Init(ierr) + call MPI_Comm_size(MPI_COMM_WORLD, ADM_prc_all, ierr) + !ADM_prc_all = num_images() +!coarray call MPI_Comm_rank(MPI_COMM_WORLD, my_rank, ierr) + !my_rank = this_image() - 1 + my_rank = xmp_this_image() - 1 + !--- 2020 Fujitsu end + +!coarray call MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank, ADM_COMM_RUN_WORLD,ierr) +!coarray ADM_COMM_RUN_WORLD = MPI_COMM_WORLD + ADM_COMM_RUN_WORLD = 1140850688 + + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + else + ADM_prc_all = 1 + my_rank = 0 + endif + + ADM_prc_me = my_rank + 1 + ADM_prc_pl = 1 + + return + end subroutine ADM_proc_init + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine ADM_proc_stop + !> + subroutine ADM_proc_stop + implicit none + +!coarray character(len=ADM_NSYS) :: request + !--- 2020 Fujitsu + !character(len=ADM_NSYS) :: request[*] + integer, pointer :: request(:) => null() + integer(8) :: request_desc + integer(8), dimension(1) :: request_lb, request_ub + integer(8) :: request_sec + integer(4), dimension(1) :: img_dims + !--- 2020 Fujitsu end + integer :: ierr + integer :: ll + !--------------------------------------------------------------------------- + + !--- 2020 Fujitsu + request_lb(1) = 1; request_ub(1) = ADM_NSYS + call xmp_new_coarray(request_desc, 1, 1, request_lb, request_ub, 1, img_dims) + call xmp_coarray_bind(request_desc, request) + call xmp_new_array_section(request_sec, 1) + !--- 2020 Fujitsu end + if ( ADM_run_type == ADM_MULTI_PRC ) then + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) 'MPI process going to STOP...' + + request='STOP' +!coarray +! call MPI_BCAST( request, & !--- starting address +! ADM_NSYS, & !--- number of array +! MPI_CHARACTER, & !--- type +! ADM_prc_run_master-1, & !--- source rank +! MPI_COMM_WORLD, & !--- world +! ierr ) !--- error id + if(ADM_prc_me == ADM_prc_run_master) then + do ll = 1,ADM_prc_all + if(ll /= ADM_prc_me) then + !--- 2020 Fujitsu + !request[ll] = request + call xmp_array_section_set_triplet(request_sec, 1, 1,ADM_NSYS,1, ierr) + img_dims(1) = ll + call xmp_coarray_put(img_dims, request_desc,request_sec, request_desc,request_sec, ierr) + !--- 2020 Fujitsu end + endif + end do + endif +!coarray call MPI_Barrier(MPI_COMM_WORLD,ierr) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + + write(ADM_LOG_FID,*) 'MPI process has normally finished.' + write(ADM_LOG_FID,*) '############################################################' + + close(ADM_CTL_FID) + close(ADM_LOG_FID) + + stop + else + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) 'Serial process stopeed.' + write(ADM_LOG_FID,*) '############################################################' + + close(ADM_CTL_FID) + close(ADM_LOG_FID) + + stop + endif + + return + end subroutine ADM_proc_stop + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine ADM_setup + !> + subroutine ADM_setup( & + param_fname, & + msg_base ) + use mod_misc, only: & + MISC_make_idstr + implicit none + + character(LEN=*), intent(in) :: param_fname ! namelist file name + + character(len=*), intent(in), optional :: msg_base ! output file for msg.pexxxxx file + + integer :: glevel = -1 + integer :: rlevel = -1 + integer :: vlayer = 1 + character(LEN=ADM_MAXFNAME) :: rgnmngfname = '' + + namelist / ADMPARAM / & + glevel, & !--- grid division level + rlevel, & !--- region division level + vlayer, & !--- number of inner vertical layer + rgnmngfname, & !--- region management file name + ADM_HGRID_SYSTEM, & !--- grid system (default: ico) ! S.Iga100607 + ADM_XTMS_K, & !--- num of lines at PL ! S.Iga100607 + ADM_XTMS_MLCP_S, & !--- num of segment for MLCP ! S.Iga100607 + ADM_debug + + integer :: rgn_nmax + integer :: nmax + integer :: ierr + + character(LEN=ADM_MAXFNAME) :: fname + character(LEN=ADM_MAXFNAME) :: msg + !--------------------------------------------------------------------------- + + msg = 'msg' + if( present(msg_base) ) msg = msg_base ! [add] H.Yashiro 20110701 + + !--- open message file + call MISC_make_idstr(fname,trim(msg),'pe',ADM_prc_me) + open( unit = ADM_LOG_FID, & + file = trim(fname), & + form = 'formatted' ) + + write(ADM_LOG_FID,*) '############################################################' + write(ADM_LOG_FID,*) '# #' + write(ADM_LOG_FID,*) '# NICAM : Nonhydrostatic ICosahedal Atmospheric Model #' + write(ADM_LOG_FID,*) '# #' + write(ADM_LOG_FID,*) '############################################################' + + !--- open control file + open( unit = ADM_CTL_FID, & + file = trim(param_fname), & + form = 'formatted', & + status = 'old', & + iostat = ierr ) + + if ( ierr /= 0 ) then + write(*,*) 'xxx Cannot open parameter control file!' + write(*,*) 'xxx filename:', trim(param_fname) + call ADM_proc_stop + endif + + !--- read parameters + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) '+++ Module[adm]/Category[common share]' + rewind(ADM_CTL_FID) + read(ADM_CTL_FID,nml=ADMPARAM,iostat=ierr) + if ( ierr < 0 ) then + write(*, *) 'xxx Not found namelist! STOP.' + write(ADM_LOG_FID,*) 'xxx Not found namelist! STOP.' + call ADM_proc_stop + elseif ( ierr > 0 ) then + write(*, *) 'xxx Not appropriate names in namelist ADMPARAM. STOP.' + write(ADM_LOG_FID,*) 'xxx Not appropriate names in namelist ADMPARAM. STOP.' + call ADM_proc_stop + endif + write(ADM_LOG_FID,nml=ADMPARAM) + + ADM_glevel = glevel + ADM_rlevel = rlevel + ADM_vlayer = vlayer + ADM_rgnmngfname = trim(rgnmngfname) + + ! S.Iga 100607 start => + if ( trim(ADM_HGRID_SYSTEM) == 'ICO' ) then + ADM_XTMS_level = 0 + ADM_XTMS_K = 5 + NMAX_DMD = 10 + elseif( trim(ADM_HGRID_SYSTEM) == 'LCP' ) then + if( ADM_XTMS_K == -1 ) ADM_XTMS_K = 6 + ADM_XTMS_level = 1 + NMAX_DMD = 4* ADM_XTMS_K + elseif( trim(ADM_HGRID_SYSTEM) == 'MLCP-OLD' ) then + if( ADM_XTMS_K == -1 ) ADM_XTMS_K = 6 + ADM_XTMS_level = 1 + NMAX_DMD = 2* ADM_XTMS_K + elseif( trim(ADM_HGRID_SYSTEM) == 'MLCP' ) then + if( ADM_XTMS_K == -1 ) ADM_XTMS_K = 6 + ADM_XTMS_level = 1 + NMAX_DMD = (1+ADM_XTMS_MLCP_S) * ADM_XTMS_K + elseif( trim(ADM_HGRID_SYSTEM) == 'PERIODIC-1DMD' ) then ! T.Ohno 110721 + ADM_XTMS_level = 0 + ADM_XTMS_K = 5 + NMAX_DMD = 1 + ADM_prc_pl = -999 + elseif( trim(ADM_HGRID_SYSTEM) == '1DMD-ON-SPHERE' ) then ! M.Hara 110721 + ADM_XTMS_level = 0 + ADM_XTMS_K = 5 + NMAX_DMD = 1 + ADM_prc_pl = -999 + elseif( trim(ADM_HGRID_SYSTEM) == 'ICO-XTMS' ) then + ADM_XTMS_level = 1 + ADM_XTMS_K = 5 + NMAX_DMD = 10 + else + write(* ,*) 'xxx Name of ADM_HGRID_SYSTEM is wrong. STOP.' + write(ADM_LOG_FID,*) 'xxx Name of ADM_HGRID_SYSTEM is wrong. STOP.' + call ADM_proc_stop + endif + + ADM_VLINK_NMAX = ADM_XTMS_K + ADM_GMAX_PL = ADM_VLINK_NMAX + 1 + ADM_GALL_PL = ADM_VLINK_NMAX + 1 + ! <= S.Iga 100607 end + + ! ERROR if Glevel & Rlevel are not defined + if ( ADM_glevel < 1 ) then + write(* ,*) 'xxx Glevel is not appropriate, STOP. GL=', ADM_glevel + write(ADM_LOG_FID,*) 'xxx Glevel is not appropriate, STOP. GL=', ADM_glevel + call ADM_proc_stop + endif + if ( ADM_rlevel < 0 ) then + write(* ,*) 'xxx Rlevel is not appropriate, STOP. RL=', ADM_rlevel + write(ADM_LOG_FID,*) 'xxx Rlevel is not appropriate, STOP. RL=', ADM_rlevel + call ADM_proc_stop + endif + + rgn_nmax = 2**ADM_rlevel + ADM_rgn_nmax = rgn_nmax * rgn_nmax * NMAX_DMD + + call input_mnginfo( ADM_rgnmngfname ) + + ADM_prc_npl = ADM_prc_pl + ADM_prc_spl = ADM_prc_pl + + ADM_prc_nspl(ADM_NPL) = ADM_prc_npl + ADM_prc_nspl(ADM_SPL) = ADM_prc_spl + + nmax = 2**( ADM_glevel - ADM_rlevel ) + ADM_gmin = GDUMMY + 1 + ADM_gmax = GDUMMY + nmax + ADM_gall_1d = GDUMMY + nmax + GDUMMY + ADM_gall = ADM_gall_1d * ADM_gall_1d + + ADM_gall_in = ( nmax+GDUMMY ) * ( nmax+GDUMMY ) !--- inner grid number (e.g., 33x33 for gl05) + + if ( ADM_vlayer == 1 ) then + ADM_kmin = 1 + ADM_kmax = 1 + ADM_kall = 1 + else + ADM_kmin = KDUMMY + 1 + ADM_kmax = KDUMMY + ADM_vlayer + ADM_kall = KDUMMY + ADM_vlayer + KDUMMY + endif + + ADM_lall = ADM_prc_rnum(ADM_prc_me) + + ! 2010.4.26 M.Satoh; 2010.5.11 M.Satoh + ! ADM_l_me: this spans from 1 to ADM_lall, if effective. + ! Otherwise, ADM_l_me = 0 should be set. see mod_history + ADM_l_me = 0 + + !--- make suffix for list-vector loop. + call ADM_mk_suffix + + call output_info + + return + end subroutine ADM_setup + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine input_mnginfo + !> + subroutine input_mnginfo( fname ) + use mod_misc, only :& + MISC_get_available_fid + implicit none + + character(len=ADM_MAXFNAME), intent(in) :: fname + + integer :: num_of_rgn !--- number of region + + namelist / rgn_info / & + num_of_rgn + + integer :: rgnid !--- region ID + integer :: sw(ADM_RID:ADM_DIR) = -1 !--- south-west region info + integer :: nw(ADM_RID:ADM_DIR) = -1 !--- nouth-west region info + integer :: ne(ADM_RID:ADM_DIR) = -1 !--- nouth-east region info + integer :: se(ADM_RID:ADM_DIR) = -1 !--- south-east region info + + namelist / rgn_link_info / & + rgnid, & + sw, & + nw, & + ne, & + se + + integer :: num_of_proc !--- number of run-processes + + namelist /proc_info/ & + num_of_proc + + integer :: peid !--- process ID + integer :: num_of_mng !--- number of regions be managed + integer :: mng_rgnid(PRC_RGN_NMAX) = -1 !--- managed region ID + + namelist /rgn_mng_info/ & + peid, & + num_of_mng, & + mng_rgnid + + integer :: fid, ierr + integer :: l, m, n + !--------------------------------------------------------------------------- + + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) '+++ Module[mnginfo]/Category[common share]' + + fid = MISC_get_available_fid() + open( unit = fid, & + file = trim(fname), & + form = 'formatted', & + status = 'old', & + iostat = ierr ) + + !=> [add] H.Yashiro 20120611 + ! ERROR if filename are not defined + if ( ierr /= 0 ) then + write(ADM_LOG_FID,*) 'xxx mnginfo file is not found! STOP. ', trim(fname) + call ADM_proc_stop + endif + !<= [add] H.Yashiro 20120611 + + read(fid,nml=rgn_info) + if ( num_of_rgn /= ADM_rgn_nmax ) then + write(ADM_LOG_FID,*) 'xxx No match for region number! STOP.' + write(ADM_LOG_FID,*) 'xxx ADM_rgn_nmax= ',ADM_rgn_nmax,' num_of_rgn=',num_of_rgn + call ADM_proc_stop + endif + + allocate( ADM_rgn_etab( ADM_RID:ADM_DIR, & + ADM_SW:ADM_SE, & + ADM_rgn_nmax ) ) + + do l = 1, ADM_rgn_nmax + read(fid,nml=rgn_link_info) + + ADM_rgn_etab(ADM_RID:ADM_DIR,ADM_SW,rgnid) = sw(ADM_RID:ADM_DIR) + ADM_rgn_etab(ADM_RID:ADM_DIR,ADM_NW,rgnid) = nw(ADM_RID:ADM_DIR) + ADM_rgn_etab(ADM_RID:ADM_DIR,ADM_NE,rgnid) = ne(ADM_RID:ADM_DIR) + ADM_rgn_etab(ADM_RID:ADM_DIR,ADM_SE,rgnid) = se(ADM_RID:ADM_DIR) + enddo + + read(fid,nml=proc_info) + if ( ADM_prc_all /= num_of_proc ) then + write(ADM_LOG_FID,*) ' xxx No match for process number! STOP.' + write(ADM_LOG_FID,*) ' xxx ADM_prc_all= ',ADM_prc_all,' num_of_proc=',num_of_proc + call ADM_proc_stop + endif + + if ( ADM_prc_all /= num_of_proc ) then + write(ADM_LOG_FID,*) 'Msg : Sub[ADM_input_mngtab]/Mod[admin]' + write(ADM_LOG_FID,*) ' --- No match for process number!' + call ADM_proc_stop + endif + + allocate( ADM_prc_rnum(ADM_prc_all) ) + allocate( ADM_prc_tab (PRC_RGN_NMAX,ADM_prc_all) ) + allocate( ADM_rgn2prc (ADM_rgn_nmax) ) + ADM_prc_tab = -1 ! [Fix] 11/06/30 T.Seiki, fill undefined value + + do m = 1, ADM_prc_all + read(fid,nml=rgn_mng_info) + + ADM_prc_rnum(m) = num_of_mng + ADM_prc_tab (:,peid) = mng_rgnid(:) + do n = 1, num_of_mng + ADM_rgn2prc(mng_rgnid(n)) = peid + enddo + enddo + + call setup_vtab + + close(fid) + + return + end subroutine input_mnginfo + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine setup_vtab + !> + subroutine setup_vtab + implicit none + + integer :: nrid(ADM_VLINK_NMAX) + integer :: nvid(ADM_VLINK_NMAX) + integer :: vnum + + integer :: l, k, ll, v + !--------------------------------------------------------------------------- + + allocate( ADM_rgn_vnum( ADM_W:ADM_S, & + ADM_rgn_nmax ) ) + + allocate( ADM_rgn_vtab( ADM_RID:ADM_DIR,& + ADM_W:ADM_S, & + ADM_rgn_nmax, & + ADM_VLINK_NMAX ) ) + + allocate( ADM_rgn_vtab_pl( ADM_RID:ADM_DIR, & + ADM_RGN_NMAX_PL, & + ADM_VLINK_NMAX ) ) + + do l = 1, ADM_rgn_nmax + do k = ADM_W, ADM_S + call set_vinfo(vnum,nrid,nvid,l,k) + + ADM_rgn_vnum(k,l) = vnum + ADM_rgn_vtab(ADM_RID,k,l,:) = nrid(:) + ADM_rgn_vtab(ADM_DIR,k,l,:) = nvid(:) + enddo + enddo + + do l = 1, ADM_rgn_nmax + if ( ADM_rgn_vnum(ADM_N,l) == ADM_VLINK_NMAX ) then + ll = l + exit + endif + enddo + ADM_rgnid_npl_mng = ll + + do v = 1, ADM_VLINK_NMAX + ADM_rgn_vtab_pl(ADM_RID,ADM_NPL,v) = ADM_rgn_vtab(ADM_RID,ADM_N,ll,v) + ADM_rgn_vtab_pl(ADM_DIR,ADM_NPL,v) = ADM_rgn_vtab(ADM_DIR,ADM_N,ll,v) + enddo + + do l = 1, ADM_rgn_nmax + if ( ADM_rgn_vnum(ADM_S,l) == ADM_VLINK_NMAX ) then + ll = l + exit + endif + enddo + ADM_rgnid_spl_mng = ll + + do v = 1, ADM_VLINK_NMAX + ADM_rgn_vtab_pl(ADM_RID,ADM_SPL,v) = ADM_rgn_vtab(ADM_RID,ADM_S,ll,v) + ADM_rgn_vtab_pl(ADM_DIR,ADM_SPL,v) = ADM_rgn_vtab(ADM_DIR,ADM_S,ll,v) + enddo + + return + end subroutine setup_vtab + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine set_vinfo + !> + subroutine set_vinfo( vert_num, nrgnid, nvertid, rgnid, vertid ) + implicit none + + integer,intent(out) :: vert_num + integer,intent(out) :: nrgnid (:) + integer,intent(out) :: nvertid(:) + integer,intent(in) :: rgnid + integer,intent(in) :: vertid + + integer :: eid, rid + integer :: eid_new, rid_new + !--------------------------------------------------------------------------- + + vert_num = 0 + + rid = rgnid + eid = vertid + select case(vertid) + case(ADM_W) + eid = ADM_SW + case(ADM_N) + eid = ADM_NW + case(ADM_E) + eid = ADM_NE + case(ADM_S) + eid = ADM_SE + endselect + + nvertid(:) = -1 + nrgnid (:) = -1 + do + rid_new = ADM_rgn_etab(ADM_RID,eid,rid) + eid_new = ADM_rgn_etab(ADM_DIR,eid,rid) - 1 + + if( eid_new == 0 ) eid_new = 4 + rid = rid_new + eid = eid_new + + vert_num = vert_num + 1 + + nrgnid (vert_num) = rid + nvertid(vert_num) = eid + + if( rid == rgnid ) exit + enddo + + return + end subroutine set_vinfo + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine ADM_mk_suffix + !> + subroutine ADM_mk_suffix + implicit none + + integer :: gall_in + integer :: i, j, n + !--------------------------------------------------------------------------- + + gall_in = ADM_gmax-ADM_gmin+1 + + !--- ADM_IooJoo + ADM_IooJoo_nmax = ( gall_in ) * ( gall_in ) + allocate( ADM_IooJoo(ADM_IooJoo_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin, ADM_gmax + do i = ADM_gmin, ADM_gmax + ADM_IooJoo(n,ADM_GIoJo) = suf(i ,j ) + ADM_IooJoo(n,ADM_GIpJo) = suf(i+1,j ) + ADM_IooJoo(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_IooJoo(n,ADM_GIoJp) = suf(i ,j+1) + ADM_IooJoo(n,ADM_GImJo) = suf(i-1,j ) + ADM_IooJoo(n,ADM_GImJm) = suf(i-1,j-1) + ADM_IooJoo(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_IooJmo + ADM_IooJmo_nmax = ( gall_in ) * ( gall_in+1 ) + allocate( ADM_IooJmo(ADM_IooJmo_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin-1, ADM_gmax + do i = ADM_gmin, ADM_gmax + ADM_IooJmo(n,ADM_GIoJo) = suf(i ,j ) + ADM_IooJmo(n,ADM_GIpJo) = suf(i+1,j ) + ADM_IooJmo(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_IooJmo(n,ADM_GIoJp) = suf(i ,j+1) + ADM_IooJmo(n,ADM_GImJo) = suf(i-1,j ) + ADM_IooJmo(n,ADM_GImJm) = suf(i-1,j-1) + ADM_IooJmo(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_IooJop + ADM_IooJop_nmax = ( gall_in ) * ( gall_in+1 ) + allocate( ADM_IooJop(ADM_IooJop_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin, ADM_gmax+1 + do i = ADM_gmin, ADM_gmax + ADM_IooJop(n,ADM_GIoJo) = suf(i ,j ) + ADM_IooJop(n,ADM_GIpJo) = suf(i+1,j ) + ADM_IooJop(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_IooJop(n,ADM_GIoJp) = suf(i ,j+1) + ADM_IooJop(n,ADM_GImJo) = suf(i-1,j ) + ADM_IooJop(n,ADM_GImJm) = suf(i-1,j-1) + ADM_IooJop(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_IooJmp + ADM_IooJmp_nmax = ( gall_in ) * ( gall_in+2 ) + allocate( ADM_IooJmp(ADM_IooJmp_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin-1, ADM_gmax+1 + do i = ADM_gmin, ADM_gmax + ADM_IooJmp(n,ADM_GIoJo) = suf(i ,j ) + ADM_IooJmp(n,ADM_GIpJo) = suf(i+1,j ) + ADM_IooJmp(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_IooJmp(n,ADM_GIoJp) = suf(i ,j+1) + ADM_IooJmp(n,ADM_GImJo) = suf(i-1,j ) + ADM_IooJmp(n,ADM_GImJm) = suf(i-1,j-1) + ADM_IooJmp(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_ImoJoo + ADM_ImoJoo_nmax = ( gall_in+1 ) * ( gall_in ) + allocate( ADM_ImoJoo(ADM_ImoJoo_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin, ADM_gmax + do i = ADM_gmin-1, ADM_gmax + ADM_ImoJoo(n,ADM_GIoJo) = suf(i ,j ) + ADM_ImoJoo(n,ADM_GIpJo) = suf(i+1,j ) + ADM_ImoJoo(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_ImoJoo(n,ADM_GIoJp) = suf(i ,j+1) + ADM_ImoJoo(n,ADM_GImJo) = suf(i-1,j ) + ADM_ImoJoo(n,ADM_GImJm) = suf(i-1,j-1) + ADM_ImoJoo(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_ImoJmo + ADM_ImoJmo_nmax = ( gall_in+1 ) * ( gall_in+1 ) + allocate( ADM_ImoJmo(ADM_ImoJmo_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin-1, ADM_gmax + do i = ADM_gmin-1, ADM_gmax + ADM_ImoJmo(n,ADM_GIoJo) = suf(i ,j ) + ADM_ImoJmo(n,ADM_GIpJo) = suf(i+1,j ) + ADM_ImoJmo(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_ImoJmo(n,ADM_GIoJp) = suf(i ,j+1) + ADM_ImoJmo(n,ADM_GImJo) = suf(i-1,j ) + ADM_ImoJmo(n,ADM_GImJm) = suf(i-1,j-1) + ADM_ImoJmo(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_ImoJop + ADM_ImoJop_nmax = ( gall_in+1 ) * ( gall_in+1 ) + allocate( ADM_ImoJop(ADM_ImoJop_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin, ADM_gmax+1 + do i = ADM_gmin-1, ADM_gmax + ADM_ImoJop(n,ADM_GIoJo) = suf(i ,j ) + ADM_ImoJop(n,ADM_GIpJo) = suf(i+1,j ) + ADM_ImoJop(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_ImoJop(n,ADM_GIoJp) = suf(i ,j+1) + ADM_ImoJop(n,ADM_GImJo) = suf(i-1,j ) + ADM_ImoJop(n,ADM_GImJm) = suf(i-1,j-1) + ADM_ImoJop(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_ImoJmp + ADM_ImoJmp_nmax = ( gall_in+1 ) * ( gall_in+2 ) + allocate( ADM_ImoJmp(ADM_ImoJmp_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin-1, ADM_gmax+1 + do i = ADM_gmin-1, ADM_gmax + ADM_ImoJmp(n,ADM_GIoJo) = suf(i ,j ) + ADM_ImoJmp(n,ADM_GIpJo) = suf(i+1,j ) + ADM_ImoJmp(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_ImoJmp(n,ADM_GIoJp) = suf(i ,j+1) + ADM_ImoJmp(n,ADM_GImJo) = suf(i-1,j ) + ADM_ImoJmp(n,ADM_GImJm) = suf(i-1,j-1) + ADM_ImoJmp(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_IopJoo + ADM_IopJoo_nmax = ( gall_in+1 ) * ( gall_in ) + allocate( ADM_IopJoo(ADM_IopJoo_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin, ADM_gmax + do i = ADM_gmin, ADM_gmax+1 + ADM_IopJoo(n,ADM_GIoJo) = suf(i ,j ) + ADM_IopJoo(n,ADM_GIpJo) = suf(i+1,j ) + ADM_IopJoo(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_IopJoo(n,ADM_GIoJp) = suf(i ,j+1) + ADM_IopJoo(n,ADM_GImJo) = suf(i-1,j ) + ADM_IopJoo(n,ADM_GImJm) = suf(i-1,j-1) + ADM_IopJoo(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_IopJmo + ADM_IopJmo_nmax = ( gall_in+1 ) * ( gall_in+1 ) + allocate( ADM_IopJmo(ADM_IopJmo_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin-1, ADM_gmax + do i = ADM_gmin, ADM_gmax+1 + ADM_IopJmo(n,ADM_GIoJo) = suf(i ,j ) + ADM_IopJmo(n,ADM_GIpJo) = suf(i+1,j ) + ADM_IopJmo(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_IopJmo(n,ADM_GIoJp) = suf(i ,j+1) + ADM_IopJmo(n,ADM_GImJo) = suf(i-1,j ) + ADM_IopJmo(n,ADM_GImJm) = suf(i-1,j-1) + ADM_IopJmo(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_IopJop + ADM_IopJop_nmax = ( gall_in+1 ) * ( gall_in+1 ) + allocate( ADM_IopJop(ADM_IopJop_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin, ADM_gmax+1 + do i = ADM_gmin, ADM_gmax+1 + ADM_IopJop(n,ADM_GIoJo) = suf(i ,j ) + ADM_IopJop(n,ADM_GIpJo) = suf(i+1,j ) + ADM_IopJop(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_IopJop(n,ADM_GIoJp) = suf(i ,j+1) + ADM_IopJop(n,ADM_GImJo) = suf(i-1,j ) + ADM_IopJop(n,ADM_GImJm) = suf(i-1,j-1) + ADM_IopJop(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_IopJmp + ADM_IopJmp_nmax = ( gall_in+1 ) * ( gall_in+2 ) + allocate( ADM_IopJmp(ADM_IopJmp_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin-1, ADM_gmax+1 + do i = ADM_gmin, ADM_gmax+1 + ADM_IopJmp(n,ADM_GIoJo) = suf(i ,j ) + ADM_IopJmp(n,ADM_GIpJo) = suf(i+1,j ) + ADM_IopJmp(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_IopJmp(n,ADM_GIoJp) = suf(i ,j+1) + ADM_IopJmp(n,ADM_GImJo) = suf(i-1,j ) + ADM_IopJmp(n,ADM_GImJm) = suf(i-1,j-1) + ADM_IopJmp(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_ImpJoo + ADM_ImpJoo_nmax = ( gall_in+2 ) * ( gall_in ) + allocate( ADM_ImpJoo(ADM_ImpJoo_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin, ADM_gmax + do i = ADM_gmin-1, ADM_gmax+1 + ADM_ImpJoo(n,ADM_GIoJo) = suf(i ,j ) + ADM_ImpJoo(n,ADM_GIpJo) = suf(i+1,j ) + ADM_ImpJoo(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_ImpJoo(n,ADM_GIoJp) = suf(i ,j+1) + ADM_ImpJoo(n,ADM_GImJo) = suf(i-1,j ) + ADM_ImpJoo(n,ADM_GImJm) = suf(i-1,j-1) + ADM_ImpJoo(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_ImpJmo + ADM_ImpJmo_nmax = ( gall_in+2 ) * ( gall_in+1 ) + allocate( ADM_ImpJmo(ADM_ImpJmo_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin-1, ADM_gmax + do i = ADM_gmin-1, ADM_gmax+1 + ADM_ImpJmo(n,ADM_GIoJo) = suf(i ,j ) + ADM_ImpJmo(n,ADM_GIpJo) = suf(i+1,j ) + ADM_ImpJmo(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_ImpJmo(n,ADM_GIoJp) = suf(i ,j+1) + ADM_ImpJmo(n,ADM_GImJo) = suf(i-1,j ) + ADM_ImpJmo(n,ADM_GImJm) = suf(i-1,j-1) + ADM_ImpJmo(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_ImpJop + ADM_ImpJop_nmax = ( gall_in+2 ) * ( gall_in+1 ) + allocate( ADM_ImpJop(ADM_ImpJop_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin, ADM_gmax+1 + do i = ADM_gmin-1, ADM_gmax+1 + ADM_ImpJop(n,ADM_GIoJo) = suf(i ,j ) + ADM_ImpJop(n,ADM_GIpJo) = suf(i+1,j ) + ADM_ImpJop(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_ImpJop(n,ADM_GIoJp) = suf(i ,j+1) + ADM_ImpJop(n,ADM_GImJo) = suf(i-1,j ) + ADM_ImpJop(n,ADM_GImJm) = suf(i-1,j-1) + ADM_ImpJop(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + !--- ADM_ImpJmp + ADM_ImpJmp_nmax = ( gall_in+2 ) * ( gall_in+2 ) + allocate( ADM_ImpJmp(ADM_ImpJmp_nmax,ADM_GIJ_nmax) ) + n = 1 + do j = ADM_gmin-1, ADM_gmax+1 + do i = ADM_gmin-1, ADM_gmax+1 + ADM_ImpJmp(n,ADM_GIoJo) = suf(i ,j ) + ADM_ImpJmp(n,ADM_GIpJo) = suf(i+1,j ) + ADM_ImpJmp(n,ADM_GIpJp) = suf(i+1,j+1) + ADM_ImpJmp(n,ADM_GIoJp) = suf(i ,j+1) + ADM_ImpJmp(n,ADM_GImJo) = suf(i-1,j ) + ADM_ImpJmp(n,ADM_GImJm) = suf(i-1,j-1) + ADM_ImpJmp(n,ADM_GIoJm) = suf(i ,j-1) + n = n + 1 + enddo + enddo + + return + contains + !--------------------------------------------------------------------------- + integer function suf(i,j) + implicit none + + integer :: i, j + !------------------------------------------------------------------------- + + suf = ADM_gall_1d * (j-1) + i + + end function suf + + end subroutine ADM_mk_suffix + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine output_info + !> + subroutine output_info + implicit none + + integer :: n, k, m + integer :: rgnid + !--------------------------------------------------------------------------- + + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,'(1x,A)' ) '====== Process management info. ======' + write(ADM_LOG_FID,'(1x,A,I7)') '--- Total number of process : ', ADM_prc_all + write(ADM_LOG_FID,'(1x,A,I7)') '--- My Process rank : ', ADM_prc_me + write(ADM_LOG_FID,'(1x,A)' ) '====== Region/Grid topology info. ======' + write(ADM_LOG_FID,'(1x,A,A)' ) '--- Grid sysytem : ', trim(ADM_HGRID_SYSTEM) + write(ADM_LOG_FID,'(1x,A,I7)') '--- # of diamond : ', NMAX_DMD + write(ADM_LOG_FID,'(1x,A)' ) '====== Region management info. ======' + write(ADM_LOG_FID,'(1x,A,I7)') '--- Region level (RL) : ', ADM_rlevel + write(ADM_LOG_FID,'(1x,A,I7,3(A,I4),A)') '--- Total number of region : ', ADM_rgn_nmax, & + ' (', 2**ADM_rlevel, ' x', 2**ADM_rlevel, ' x', NMAX_DMD, ' )' + write(ADM_LOG_FID,'(1x,A,I7)') '--- # of region per process : ', ADM_lall + write(ADM_LOG_FID,'(1x,A)' ) '--- ID of region in my process : ' + write(ADM_LOG_FID,*) ADM_prc_tab(1:ADM_lall, ADM_prc_me) + + write(ADM_LOG_FID,'(1x,A,I7)') '--- Region ID, contains north pole : ', ADM_rgnid_npl_mng + write(ADM_LOG_FID,'(1x,A,I7)') '--- Region ID, contains south pole : ', ADM_rgnid_spl_mng + write(ADM_LOG_FID,'(1x,A,I7)') '--- Process rank, managing north pole : ', ADM_prc_npl + write(ADM_LOG_FID,'(1x,A,I7)') '--- Process rank, managing south pole : ', ADM_prc_spl + write(ADM_LOG_FID,'(1x,A)' ) '====== Grid management info. ======' + write(ADM_LOG_FID,'(1x,A,I7)') '--- Grid level (GL) : ', ADM_glevel + write(ADM_LOG_FID,'(1x,A,I7,2(A,I4),A,I7,A)') '--- Total number of grid (horizontal) : ', & + 4**(ADM_glevel-ADM_rlevel)*ADM_rgn_nmax, & + ' (', 2**(ADM_glevel-ADM_rlevel), & + ' x', 2**(ADM_glevel-ADM_rlevel), & + ' x', ADM_rgn_nmax, ' )' + write(ADM_LOG_FID,'(1x,A,I7)') '--- Number of vertical layer : ', ADM_kmax-ADM_kmin+1 + + if ( ADM_debug ) then + do n = 1, ADM_lall + rgnid = ADM_prc_tab(n, ADM_prc_me) + write(ADM_LOG_FID,*) ' --- Link information for region', rgnid + + write(ADM_LOG_FID,*) ' < edge link > --- ( rgnid , edgid )' + do k = ADM_SW, ADM_SE + write(ADM_LOG_FID,*) ' (',rgnid,',',k,') -> ', & + '(', ADM_rgn_etab(ADM_RID,k,rgnid), & + ',', ADM_rgn_etab(ADM_DIR,k,rgnid), ')' + enddo + + write(ADM_LOG_FID,*) ' < vertex link > --- ( rgnid , edgid )' + do k = ADM_W, ADM_S + write(ADM_LOG_FID,*) ' (',rgnid,',',k,') : ', ADM_rgn_vnum(k,rgnid), 'point link' + do m = 1, ADM_rgn_vnum(k,rgnid) + write(ADM_LOG_FID,*) ' -> ', & + '(', ADM_rgn_vtab(ADM_RID,k,rgnid,m), & + ',', ADM_rgn_vtab(ADM_DIR,k,rgnid,m), ')' + enddo + enddo + + enddo + + write(ADM_LOG_FID,*) ' --- Table of corresponding between region ID and process ID' + write(ADM_LOG_FID,*) ' region ID : process ID' + do n = 1, ADM_rgn_nmax + write(ADM_LOG_FID,'(I13,I14)') n, ADM_rgn2prc(n) + enddo + endif + + return + end subroutine output_info + +end module mod_adm +!------------------------------------------------------------------------------- diff --git a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 new file mode 100755 index 0000000..09b81e2 --- /dev/null +++ b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 @@ -0,0 +1,4344 @@ +!------------------------------------------------------------------------------- +! +!+ communication module +! +!------------------------------------------------------------------------------- +module mod_comm + !----------------------------------------------------------------------------- + ! + !++ description: + ! this module is for the communication based on mpi library. + ! + !++ Current Corresponding Author : K.Goto, H.Tomita + ! + !++ History: + ! Version Date Comment + ! ----------------------------------------------------------------------- + ! 0.00 04-02-17 Imported from igdc-4.33 + ! 06-09-?? K.Goto bug fix + ! 06-10-08 S.Iga add namelist (&COMMPARAM max_varmax) + ! 07-11-07 T.Mitsui add varmax check option(opt_check_varmax) + ! 09-03-10 H.Tomita : Transplanting COMM_data_transfer2 from + ! mod[mod_varcomm]. + ! 09-03-10 H.Tomita : rename COMM_data_transfer2 to COMM_var. + ! 09-09-17 S.Iga : Add debug option and barrier option + ! 10-06-07 S.Iga: new grid is implemented + ! (only the attribute of max_comm_xxx and + ! max_comm is changed from parameter to variable) + ! 11-01-24 C.Kodama: Reduce memory usage in large rlevel. + ! (provided by Terai-san @ RIKEN) + ! Modified line: (20101207 teraim) + ! 11-04-26 C.Kodama: default value of opt_check_varmax is changed to .true. + ! and modify its behavior to abort when cmax exceeds max_maxvar*ADM_kall. + ! 11-05-06 Y.Yamada: Merge tuning code with original code + ! (provided by Yamamoto-san @ NEC) + ! Modified line: !=org= + ! 11-07-21 T.Ohno: A public variable 'comm_pl' is added. + ! If 'comm_pl' is false, pole data is not used in + ! COMM_data_transfer and COMM_var. + ! 11-11-30 S.Iga (commit) : Modification around comm_var, + ! suggested and modified by T.Inoue on 11-10-24 + ! 11-12-14 T.Seiki : allocatable variables are not permitted in type structure. + ! allocatable => pointer (only @ SR16000 and ES) + ! 12-03-26 T.Seiki : bug-fix if opt_comm_dbg=.true. + ! 12-06-27 T.Ohno : bug fix for simulations at which + ! 'comm_pl' is false + ! ----------------------------------------------------------------------- + ! + !----------------------------------------------------------------------------- + ! + !++ used modules + ! + !--- 2020 Fujitsu + use mpi + !use mod_coarray + use xmp_api + !--- 2020 Fujitsu end + use mod_debug + use mod_adm, only: & + ADM_LOG_FID, & + ADM_vlink_nmax + !----------------------------------------------------------------------------- + implicit none + private + !----------------------------------------------------------------------------- + ! + !++ public procedure + ! + public :: COMM_setup + public :: COMM_data_transfer + public :: COMM_data_transfer_rgn2pl + public :: COMM_data_transfer_nopl ! T.Ohno 110721 + public :: COMM_var + public :: COMM_Stat_sum + public :: COMM_Stat_sum_eachlayer + public :: COMM_Stat_avg + public :: COMM_Stat_max + public :: COMM_Stat_min + ! + !----------------------------------------------------------------------------- + + + ! + !++ private parameters & variables + ! + integer,parameter,private::max_comm_r2r=9 +! integer,parameter,private::max_comm_r2p=ADM_vlink_nmax*2 !S.Iga100607 del +! integer,parameter,private::max_comm_p2r=ADM_vlink_nmax*2 !S.Iga100607 del +! integer,parameter,private::max_comm=max_comm_r2r+max_comm_r2p+max_comm_p2r !S.Iga100607 del + integer,private,save::max_comm_r2p!S.Iga100607 + integer,private,save::max_comm_p2r!S.Iga100607 + integer,private,save::max_comm!S.Iga100607 +! integer,parameter,private::max_varmax=32 + integer,save,private::max_varmax=32 ! Iga(061008) + logical,save,private::opt_check_varmax = .true. ! T.Mitsui 07/11/07 ! [mod] C.Kodama .false. -> .true. + real(8),save,private::diag_varmax=0.d0 ! T.Mitsui 07/11/07 + ! + logical,save,private::opt_comm_dbg = .false. ! S.Iga 09/09/XX + real(8),save,private::dbg_sendbuf_init ! S.Iga 09/09/XX + real(8),save,private::dbg_recvbuf_init ! S.Iga 09/09/XX + logical,save,private::opt_comm_barrier = .false. ! S.Iga 09/09/XX + integer,save,private,allocatable::dbg_areq_save(:,:) ! S.Iga 09/09/XX + integer,save,private::dbg_tcount = 1 ! count comm_data_transfer is called S.Iga 09/09/XX + ! + ! + integer,parameter,private::ptr_prcid=1 + integer,parameter,private::ptr_lrgnid=2 + ! + integer,parameter,private::elemsize_comm=3 + integer,parameter,private::SIZE_COMM=1 + integer,parameter,private::LRGNID_COMM=2 + integer,parameter,private::BASE_COMM=3 + ! + integer,parameter,private::elemsize_copy=3 + integer,parameter,private::SIZE_COPY=1 + integer,parameter,private::LRGNID_COPY=2 + integer,parameter,private::SRC_LRGNID_COPY=3 + ! + !-------------------------------------------------- + integer,private,save::rank_me + integer,private,save::max_comm_prc +! integer,private,save::maxdatasize + integer,private,save::maxdatasize_s + integer,private,save::maxdatasize_r + ! + integer,private,save::maxn + integer,save,private::maxm + integer,save,private::maxl + !---- + integer,private,save::maxn_pl + integer,save,private::maxm_pl + integer,save,private::maxl_pl + !---- + integer,private,save::maxn_r2r + integer,save,private::maxm_r2r + integer,save,private::maxl_r2r + !---- + integer,private,save::maxn_r2p + integer,save,private::maxm_r2p + integer,save,private::maxl_r2p + !---- + integer,private,save::maxn_p2r + integer,save,private::maxm_p2r + integer,save,private::maxl_p2r + !---- + integer,private,save::maxn_sgp + integer,save,private::maxm_sgp + integer,save,private::maxl_sgp + ! + integer,allocatable,private,save::prc_tab_rev(:,:) + ! + integer,allocatable,private,save::clist(:) + ! + !-------------------------------------------------- + ! for send + !-------------------------------------------------- + integer,allocatable,private,save::nsmax(:,:) + integer,allocatable,public,save::sendinfo(:,:,:,:) + integer,allocatable,public,save::sendlist(:,:,:,:) + integer,allocatable,private,save::nsmax_pl(:,:) + integer,allocatable,private,save::sendinfo_pl(:,:,:,:) + integer,allocatable,private,save::sendlist_pl(:,:,:,:) + ! + !-------------------------------------------------- + ! for copy + !-------------------------------------------------- + integer,allocatable,private,save::ncmax_r2r(:) + integer,allocatable,private,save::copyinfo_r2r(:,:,:) + integer,allocatable,private,save::recvlist_r2r(:,:,:) + integer,allocatable,private,save::sendlist_r2r(:,:,:) + !-------------------------------------------------- + integer,allocatable,private,save::ncmax_r2p(:) + integer,allocatable,private,save::copyinfo_r2p(:,:,:) + integer,allocatable,private,save::recvlist_r2p(:,:,:) + integer,allocatable,private,save::sendlist_r2p(:,:,:) + !-------------------------------------------------- + integer,allocatable,private,save::ncmax_p2r(:) + integer,allocatable,private,save::copyinfo_p2r(:,:,:) + integer,allocatable,private,save::recvlist_p2r(:,:,:) + integer,allocatable,private,save::sendlist_p2r(:,:,:) + !-------------------------------------------------- + integer,allocatable,private,save::ncmax_sgp(:) + integer,allocatable,private,save::copyinfo_sgp(:,:,:) + integer,allocatable,private,save::recvlist_sgp(:,:,:) + integer,allocatable,private,save::sendlist_sgp(:,:,:) + !-------------------------------------------------- + ! + !-------------------------------------------------- + ! for recv + !-------------------------------------------------- + integer,allocatable,private,save::nrmax(:,:) + integer,allocatable,public,save::recvinfo(:,:,:,:) + integer,allocatable,public,save::recvlist(:,:,:,:) + integer,allocatable,private,save::lrmax_pl(:) + integer,allocatable,private,save::nrmax_pl(:,:) + integer,allocatable,private,save::recvinfo_pl(:,:,:,:) + integer,allocatable,private,save::recvlist_pl(:,:,:,:) + !-------------------------------------------------- + ! + integer,allocatable,private::temp_sendorder(:,:) + integer,allocatable,private::temp_recvorder(:,:) + integer,allocatable,private::temp_dest_rgn(:,:,:) + integer,allocatable,private::temp_src_rgn(:,:,:) + integer,allocatable,private::temp_dest_rgn_pl(:,:,:) + integer,allocatable,private::temp_src_rgn_pl(:,:,:) + !integer,allocatable,private::temp_sb(:,:,:) !(20101207)removed by teraim + integer,allocatable,private::tsb(:) + ! + integer,allocatable,private,save::ssize(:,:) + integer,allocatable,private,save::sendtag(:,:) + integer,allocatable,private,save::somax(:) + integer,allocatable,private,save::destrank(:,:) + real(8),allocatable,public,save::sendbuf(:,:) + !--- 2020 Fujitsu + integer(8) :: sendbuf_l_desc + !--- 2020 Fujitsu end + integer,allocatable,private,save::rsize(:,:) + integer,allocatable,private,save::recvtag(:,:) + integer,allocatable,private,save::romax(:) + integer,allocatable,private,save::sourcerank(:,:) + real(8),allocatable,public,save::recvbuf(:,:) + ! + integer,allocatable,private,save::n_nspl(:,:) + ! + integer,allocatable,private,save::n_hemisphere_copy(:,:,:) + integer,allocatable,private,save::s_hemisphere_copy(:,:,:) + ! + !-------------------------------------------------- + integer,allocatable,private,save::rsize_r2r(:,:,:) + integer,allocatable,private,save::ssize_r2r(:,:,:) + integer,allocatable,private,save::sourceid_r2r(:,:,:) + integer,allocatable,private,save::destid_r2r(:,:,:) + !integer,allocatable,private,save::mrecv_r2r(:,:,:) !(20101207)removed by teraim + integer,allocatable,private,save::msend_r2r(:,:,:) + integer,allocatable,private,save::maxcommrecv_r2r(:,:) + integer,allocatable,private,save::maxcommsend_r2r(:,:) + !integer,allocatable,private,save::recvtag_r2r(:,:,:) !(20101207)removed by teraim + !integer,allocatable,private,save::sendtag_r2r(:,:,:) !(20101207)removed by teraim + integer,allocatable,private,save::rlist_r2r(:,:,:,:) + integer,allocatable,private,save::qlist_r2r(:,:,:,:) + integer,allocatable,private,save::slist_r2r(:,:,:,:) + integer,private,save::max_datasize_r2r + real(8),allocatable,private::recvbuf_r2r(:,:,:) + real(8),allocatable,private::sendbuf_r2r(:,:,:) + ! + integer,allocatable,private,save::rsize_r2p(:,:,:) + integer,allocatable,private,save::ssize_r2p(:,:,:) + integer,allocatable,private,save::source_prc_r2p(:,:,:) + integer,allocatable,private,save::source_rgn_r2p(:,:,:) + integer,allocatable,private,save::dest_prc_r2p(:,:,:) + integer,allocatable,private,save::maxcommrecv_r2p(:,:) + integer,allocatable,private,save::maxcommsend_r2p(:,:) + integer,allocatable,private,save::recvtag_r2p(:,:,:) + integer,allocatable,private,save::sendtag_r2p(:,:,:) + integer,allocatable,private,save::rlist_r2p(:,:,:,:) + integer,allocatable,private,save::qlist_r2p(:,:,:,:) + integer,allocatable,private,save::slist_r2p(:,:,:,:) + integer,save,private :: max_datasize_r2p + real(8),allocatable,private::recvbuf_r2p(:,:,:) + real(8),allocatable,private::sendbuf_r2p(:,:,:) + ! + integer,allocatable,private,save::recvtag_p2r(:,:) + integer,allocatable,private,save::sendtag_p2r(:,:) + real(8),allocatable,private::sendbuf_p2r(:,:) + real(8),allocatable,private::recvbuf_p2r(:,:) + !---------------------------------------------- + integer,allocatable,private,save::dest_rank_all(:,:,:) + integer,allocatable,private,save::src_rank_all(:,:,:) + !---------------------------------------------- + +!! real(8),allocatable,public:: comm_dbg_recvbuf(:,:,:) !iga +!! real(8),allocatable,public:: comm_dbg_sendbuf(:,:,:) !iga + + + + ! + integer,allocatable,private,save::imin(:),imax(:) & + ,jmin(:),jmax(:) & + ,gmin(:),gmax(:) & + ,gall(:) + ! + integer,allocatable,private,save::nmin_nspl(:),nmax_nspl(:) & + ,pmin_nspl(:),pmax_nspl(:) & + ,lmin_nspl(:),lmax_nspl(:) & + ,gmin_nspl(:),gmax_nspl(:) & + ,gall_nspl(:) + integer,allocatable,private,save::pl_index(:,:,:,:) + ! + !---------------------------------------------------------------- + !++ public variablesizc + ! integer,allocatable,public,save::izc(:,:,:) + ! integer,allocatable,public,save::itc(:,:,:,:) + ! integer,allocatable,public,save::itc2(:,:,:,:) + ! integer,allocatable,public,save::ntr(:,:,:,:,:) + ! integer,parameter,public::order=2 + ! integer,parameter,public::noupfi=(order+1)*(order+2)/2 + !---------------------------------------------------------------- + ! + integer,private,save::halomax + integer,private,save::kmax + + integer,private,save :: comm_call_count = 0 + real(8),private,save::time_total= 0.D0 + real(8),private,save::time_pre = 0.D0 + real(8),private,save::time_bar1= 0.D0 + real(8),private,save::time_bar2= 0.D0 + real(8),private,save::time_sbuf= 0.D0 + real(8),private,save::time_recv= 0.D0 + real(8),private,save::time_send= 0.D0 + real(8),private,save::time_copy= 0.D0 + real(8),private,save::time_wait= 0.D0 + real(8),private,save::time_rbuf= 0.D0 + real(8),private,save::time_copy_sgp= 0.D0 + real(8),private,save::size_total= 0.D0 + real(8),private,save::comm_count= 0.D0 + real(8),private::t(0:12) + ! + !(20101207)added by teraim + type type_tempsb + integer::num + ! 2011/12/14 [Mod] T.Seiki +!!$ integer,allocatable::col(:) +!!$ integer,allocatable::val(:) + integer,pointer :: col(:) + integer,pointer :: val(:) + end type + type(type_tempsb),allocatable::tempsb(:) + integer,parameter::max_size=10 ! This is not optimal value. + ! + logical, public, save :: comm_pl = .true. ! T.Ohno 110721 + !----------------------------------------------------------------------------- +contains + !----------------------------------------------------------------------------- + !(20101207) added by teraim + subroutine init_tempsb + use mod_adm, only : ADM_rgn_nmax + implicit none + integer::i + ! + allocate(tempsb(ADM_rgn_nmax+2)) + tempsb(:)%num=0 + ! + do i=1, ADM_rgn_nmax+2 + allocate(tempsb(i)%col(max_size)) + allocate(tempsb(i)%val(max_size)) + tempsb(i)%col(:)=-1 + tempsb(i)%val(:)=-1 + enddo + end subroutine + !(20101207) added by teraim + subroutine finalize_tempsb + use mod_adm, only : ADM_rgn_nmax + implicit none + integer::i + ! + do i=1, ADM_rgn_nmax+2 + deallocate(tempsb(i)%col) + deallocate(tempsb(i)%val) + enddo + deallocate(tempsb) + end subroutine + !(20101207) added by teraim + subroutine add_tempsb(icol, irow, ival) + implicit none + integer,intent(in)::icol,irow,ival + ! + if(ival > 0) then + if(tempsb(irow)%num < max_size) then + tempsb(irow)%num = tempsb(irow)%num + 1 + tempsb(irow)%col(tempsb(irow)%num)=icol + tempsb(irow)%val(tempsb(irow)%num)=ival + else + write(*,*)"range of list is over." + stop + endif + endif + end subroutine + !(20101207) added by teraim + subroutine show_tempsb + use mod_adm, only : ADM_rgn_nmax + implicit none + integer::i,j + ! + write(*,*)"show" + do i=1, ADM_rgn_nmax+2 + do j=1, max_size + if(tempsb(i)%val(j) > 0) then + write(*,*)"(i,j)=",i,j," : show_list=(",i,tempsb(i)%col(j),tempsb(i)%val(j),")" + endif + enddo + enddo + end subroutine + !(20101207) added by teraim + subroutine get_tempsb(icol, irow, ret) + implicit none + integer,intent(in)::icol,irow + integer,intent(out)::ret + integer::i + ! + ret = 0 + do i=1, max_size + if(tempsb(irow)%col(i) == icol) then + ret = tempsb(irow)%val(i) + exit + endif + enddo + end subroutine + !----------------------------------------------------------------------------- + subroutine COMM_setup( & + max_hallo_num, & !--- IN : number of hallo regions + debug ) !--- IN : debug flag + use mod_adm, only : & + !--- public parameters + ADM_w, & + ADM_e, & + ADM_n, & + ADM_s, & + ADM_sw, & + ADM_nw, & + ADM_ne, & + ADM_se, & + ADM_rid, & + ADM_dir, & + ADM_vlink_nmax, & + ADM_rgn_nmax_pl,& + ADM_npl, & + ADM_spl, & + ADM_gslf_pl, & + ADM_prc_all, & + ADM_prc_rnum, & + ADM_prc_tab, & + ADM_prc_me, & + ADM_rgn_nmax, & + ADM_rgn_etab, & + ADM_rgn_vnum, & + ADM_rgn_vtab, & + ADM_rgn_vtab_pl,& + ADM_gmin, & + ADM_gmax, & + ADM_gall_1d, & + ADM_lall, & + ADM_kall, & + ADM_prc_nspl, & + ADM_comm_run_world,& + ADM_rgn2prc,& !(20101207)added by teraim + !--- For namelist. + ADM_CTL_FID, & !Iga(061008) + ADM_LOG_FID !Iga(061008) + implicit none + + integer,intent(in),optional :: max_hallo_num + logical,intent(in),optional :: debug + + integer :: i,j,l,n,m,p,q + integer :: rgnid + integer :: ierr + ! + !integer :: nn,t,n1,n2,n3,n4,n5,n6 + ! + integer::lr,mr + integer::ls,ms + integer::nr,nc,ns + integer::rs,cs,ss + integer::nd,ld,pl,halo + integer::in,jn + integer::srgnid,rrgnid + integer::ck + integer::srank,drank + integer::ro,so + ! + integer::suf,g_1d + suf(i,j,g_1d)=(g_1d)*((j)-1)+(i) + ! + integer::rgnid1,rgnid2,ret !(20101207) added by teraim + integer:: ll + integer :: colc(10),valc(10) + !--- 2020 Fujitsu + integer(8) :: sendbuf_l_lb(2), sendbuf_l_ub(2) + !--- 2020 Fujitsu end + ! + ! Iga(061008) ==> + namelist / COMMPARAM / & + max_varmax, & ! max number of communication variables + opt_check_varmax, & ! check option of varmax [Add] T.Mitsui 07/11/07 + opt_comm_dbg, & ! debug option of comm_data_transfer [Add] S.Iga 0909XX + opt_comm_barrier ! debug option of comm_data_transfer [Add] S.Iga 0909XX + ! + max_comm_r2p=ADM_vlink_nmax*2!S.Iga100607 + max_comm_p2r=ADM_vlink_nmax*2!S.Iga100607 + max_comm=max_comm_r2r+max_comm_r2p+max_comm_p2r!S.Iga100607 + !--- < reading parameters > --- + ! + rewind(ADM_CTL_FID) + read(ADM_CTL_FID,nml=COMMPARAM,iostat=ierr) + if(ierr<0) then + write(ADM_LOG_FID,*) & + 'Msg : Sub[COMM_setup]/Mod[comm]' + write(ADM_LOG_FID,*) & + ' *** No namelist in paramter file.' + write(ADM_LOG_FID,*) & + ' *** Use default values.' + else if(ierr>0) then + write(*,*) & + 'Msg : Sub[COMM_setup]/Mod[comm]' + write(*,*) & + ' *** WARNING : Not appropriate names in namelist!! CHECK!!' + end if + write(ADM_LOG_FID,COMMPARAM) + ! <== Iga(061008) + + if (present(max_hallo_num)) then + halomax=max_hallo_num + else + halomax=1 + endif + ! if (present(max_k_num)) then + ! kmax=max_k_num + ! else + ! kmax=ADM_kall + ! endif + kmax=ADM_kall + ! + allocate(prc_tab_rev(ptr_prcid:ptr_lrgnid,ADM_rgn_nmax)) + ! + do p=1,ADM_prc_all + do n=1,ADM_prc_rnum(p) + prc_tab_rev(ptr_prcid,ADM_prc_tab(n,p))=p + prc_tab_rev(ptr_lrgnid,ADM_prc_tab(n,p))=n + end do + end do + +! if (ADM_prc_me.eq.1) write(*,*) 'ADM_prc_tab',ADM_prc_tab +! if (ADM_prc_me.eq.1) write(*,*) 'prc_tab_rev', prc_tab_rev + + if(ADM_prc_nspl(ADM_npl) < 0 .and. ADM_prc_nspl(ADM_spl) <0 ) comm_pl = .false. ! T.Ohno 110721 + + ! + allocate(imin(halomax)) + allocate(imax(halomax)) + allocate(jmin(halomax)) + allocate(jmax(halomax)) + allocate(gmin(halomax)) + allocate(gmax(halomax)) + allocate(gall(halomax)) + ! + !(20101207) changed by teraim + allocate(rsize_r2r(max_comm_r2r,halomax,ADM_rgn_nmax)) + allocate(ssize_r2r(max_comm_r2r,halomax,ADM_rgn_nmax)) + allocate(sourceid_r2r(max_comm_r2r,halomax,ADM_rgn_nmax)) + allocate(destid_r2r(max_comm_r2r,halomax,ADM_rgn_nmax)) + !allocate(mrecv_r2r(ADM_rgn_nmax,halomax,ADM_rgn_nmax)) + !allocate(msend_r2r(ADM_rgn_nmax,halomax,ADM_rgn_nmax)) + rgnid1=ADM_prc_tab(1,ADM_prc_me) + rgnid2=ADM_prc_tab(ADM_prc_rnum(ADM_prc_me),ADM_prc_me) + allocate(msend_r2r(ADM_rgn_nmax,halomax,rgnid1:rgnid2)) + allocate(maxcommrecv_r2r(halomax,ADM_rgn_nmax)) + allocate(maxcommsend_r2r(halomax,ADM_rgn_nmax)) + !allocate(recvtag_r2r(ADM_rgn_nmax,halomax,ADM_rgn_nmax)) + !allocate(sendtag_r2r(ADM_rgn_nmax,halomax,ADM_rgn_nmax)) + ! + imin(halomax)=(ADM_gmin-1)+halomax + imax(halomax)=(ADM_gmax-1)+halomax + jmin(halomax)=(ADM_gmin-1)+halomax + jmax(halomax)=(ADM_gmax-1)+halomax + gmin(halomax)=(ADM_gmin-1)+halomax + gmax(halomax)=(ADM_gmax-1)+halomax + gall(halomax)=(ADM_gall_1d-2)+2*halomax + ! + max_datasize_r2r=(gmax(halomax)-gmin(halomax)+1)*halomax + allocate(rlist_r2r(max_datasize_r2r,max_comm_r2r,halomax,ADM_rgn_nmax)) + allocate(qlist_r2r(max_datasize_r2r,max_comm_r2r,halomax,ADM_rgn_nmax)) + allocate(slist_r2r(max_datasize_r2r,max_comm_r2r,halomax,ADM_rgn_nmax)) + ! + allocate(recvbuf_r2r(max_datasize_r2r*kmax*max_varmax & + ,ADM_prc_rnum(ADM_prc_me),max_comm_r2r)) + allocate(sendbuf_r2r(max_datasize_r2r*kmax*max_varmax & + ,ADM_prc_rnum(ADM_prc_me),max_comm_r2r)) + ! + allocate(n_hemisphere_copy(ADM_w:ADM_s,halomax,ADM_rgn_nmax)) + allocate(s_hemisphere_copy(ADM_w:ADM_s,halomax,ADM_rgn_nmax)) + ! + rsize_r2r(:,:,:)=0 + ssize_r2r(:,:,:)=0 + sourceid_r2r(:,:,:)=-1 + destid_r2r(:,:,:)=-1 + !mrecv_r2r(:,:,:)=-1 + msend_r2r(:,:,:)=-1 + maxcommrecv_r2r(:,:)=max_comm_r2r + maxcommsend_r2r(:,:)=max_comm_r2r + !(20101207) removed by teraim + !recvtag_r2r(:,:,:)=-1 + !sendtag_r2r(:,:,:)=-1 + ! + rlist_r2r(:,:,:,:)=-1 + qlist_r2r(:,:,:,:)=-1 + slist_r2r(:,:,:,:)=-1 + ! + n_hemisphere_copy(:,:,:)=0 + s_hemisphere_copy(:,:,:)=0 + ! + allocate(rsize_r2p(max_comm_r2p,ADM_npl:ADM_spl,halomax)) + allocate(ssize_r2p(max_comm_r2p,ADM_npl:ADM_spl,halomax)) + allocate(source_prc_r2p(max_comm_r2p,ADM_npl:ADM_spl,halomax)) + allocate(source_rgn_r2p(max_comm_r2p,ADM_npl:ADM_spl,halomax)) + allocate(dest_prc_r2p(max_comm_r2p,ADM_npl:ADM_spl,halomax)) + allocate(maxcommrecv_r2p(ADM_npl:ADM_spl,halomax)) + allocate(maxcommsend_r2p(ADM_npl:ADM_spl,halomax)) + allocate(recvtag_r2p(max_comm_r2p,ADM_npl:ADM_spl,halomax)) + allocate(sendtag_r2p(max_comm_r2p,ADM_npl:ADM_spl,halomax)) + ! + max_datasize_r2p=halomax*(halomax+1)/2 + ! + allocate(rlist_r2p(max_datasize_r2p,max_comm_r2p,ADM_npl:ADM_spl,halomax)) + allocate(qlist_r2p(max_datasize_r2p,max_comm_r2p,ADM_npl:ADM_spl,halomax)) + allocate(slist_r2p(max_datasize_r2p,max_comm_r2p,ADM_npl:ADM_spl,halomax)) + ! + allocate(recvbuf_r2p(max_datasize_r2p*kmax*max_varmax & + ,max_comm_r2p,ADM_npl:ADM_spl)) + allocate(sendbuf_r2p(max_datasize_r2p*kmax*max_varmax & + ,max_comm_r2p,ADM_npl:ADM_spl)) + ! + rsize_r2p(:,:,:)=0 + ssize_r2p(:,:,:)=0 + source_prc_r2p(:,:,:)=-1 + source_rgn_r2p(:,:,:)=-1 + dest_prc_r2p(:,:,:)=-1 + maxcommrecv_r2p(:,:)=max_comm_r2p + maxcommsend_r2p(:,:)=max_comm_r2p + recvtag_r2p(:,:,:)=-1 + sendtag_r2p(:,:,:)=-1 + ! + rlist_r2p(:,:,:,:)=-1 + qlist_r2p(:,:,:,:)=-1 + slist_r2p(:,:,:,:)=-1 + ! +!!!!!!!!!!!!!!! + allocate(nmin_nspl(1:halomax)) + allocate(nmax_nspl(1:halomax)) + allocate(pmin_nspl(1:halomax)) + allocate(pmax_nspl(1:halomax)) + allocate(lmin_nspl(1:halomax)) + allocate(lmax_nspl(1:halomax)) + allocate(gmin_nspl(1:halomax)) + allocate(gmax_nspl(1:halomax)) + allocate(gall_nspl(1:halomax)) + nmin_nspl(halomax)=1 + nmax_nspl(halomax)=halomax+1 + pmin_nspl(halomax)=1 + pmax_nspl(halomax)=ADM_vlink_nmax + lmin_nspl(halomax)=1 + lmax_nspl(halomax)=halomax + gmin_nspl(halomax)=2 + gmax_nspl(halomax)=1+5*halomax*(halomax+1)/2 + gall_nspl(halomax)=1+5*halomax*(halomax+1)/2 + allocate(pl_index(nmin_nspl(halomax):nmax_nspl(halomax) & + ,pmin_nspl(halomax):pmax_nspl(halomax) & + ,lmin_nspl(halomax):lmax_nspl(halomax),halomax)) + ! + do halo=1,halomax + ! + imin(halo)=(ADM_gmin-1)+halo + imax(halo)=(ADM_gmax-1)+halo + jmin(halo)=(ADM_gmin-1)+halo + jmax(halo)=(ADM_gmax-1)+halo + gmin(halo)=(ADM_gmin-1)+halo + gmax(halo)=(ADM_gmax-1)+halo + gall(halo)=(ADM_gall_1d-2)+2*halo + ! + nmin_nspl(halo)=1 + nmax_nspl(halo)=halo+1 + pmin_nspl(halo)=1 + pmax_nspl(halo)=ADM_vlink_nmax + lmin_nspl(halo)=1 + lmax_nspl(halo)=halo + gmin_nspl(halo)=2 + gmax_nspl(halo)=1+5*halo*(halo+1)/2 + gall_nspl(halo)=1+5*halo*(halo+1)/2 + pl_index(:,:,:,halo)=-1 + do l=lmin_nspl(halo),lmax_nspl(halo) + do p=pmin_nspl(halo),pmax_nspl(halo) + do n=nmin_nspl(halo),l+1 + pl_index(n,p,l,halo)=n+(p-1)*l+(1+5*(l-1)*l/2) + enddo + enddo + pl_index(l+1,pmax_nspl(halo),l,halo)=nmin_nspl(halo) & + +(pmin_nspl(halo)-1)*l & + +(1+5*(l-1)*l/2) + enddo + ! + ! --- r2p ---- + if(comm_pl)then + do p=pmin_nspl(halo),pmax_nspl(halo) + rsize_r2p(p,ADM_npl,halo)=halo*(halo+1)/2 + source_prc_r2p(p,ADM_npl,halo)=prc_tab_rev(ptr_prcid & + ,ADM_rgn_vtab_pl(ADM_rid,ADM_npl,p)) + source_rgn_r2p(p,ADM_npl,halo)=prc_tab_rev(ptr_lrgnid & + ,ADM_rgn_vtab_pl(ADM_rid,ADM_npl,p)) + enddo + do p=pmin_nspl(halo),pmax_nspl(halo) + q=0 + do ld=lmin_nspl(halo),lmax_nspl(halo) + do nd=nmin_nspl(halo),ld + q=q+1 + in=-nd+ld+nmin_nspl(halo)-lmin_nspl(halo)+imin(halo) + jn=-nd+nmin_nspl(halo)+(jmax(halo)-jmin(halo))+jmin(halo) + rlist_r2p(q,mod(p,ADM_vlink_nmax)+1,ADM_npl,halo) & + =pl_index(nd+1,p,ld,halo) + qlist_r2p(q,mod(p,ADM_vlink_nmax)+1,ADM_npl,halo)=suf(in,jn,gall(halo)) + enddo + enddo + enddo + do p=pmin_nspl(halo),pmax_nspl(halo) + rsize_r2p(p,ADM_spl,halo)=halo*(halo+1)/2 + source_prc_r2p(p,ADM_spl,halo)=prc_tab_rev(ptr_prcid & + ,ADM_rgn_vtab_pl(ADM_rid,ADM_spl,p)) + source_rgn_r2p(p,ADM_spl,halo)=prc_tab_rev(ptr_lrgnid & + ,ADM_rgn_vtab_pl(ADM_rid,ADM_spl,p)) + enddo + do p=pmin_nspl(halo),pmax_nspl(halo) + q=0 + do ld=lmin_nspl(halo),lmax_nspl(halo) + do nd=nmin_nspl(halo),ld + q=q+1 + in=nd-ld-nmin_nspl(halo) & + +lmin_nspl(halo)+(imax(halo)-imin(halo))+imin(halo) + jn=nd-nmin_nspl(halo)+jmin(halo) + rlist_r2p(q,p,ADM_spl,halo)=pl_index(nd,p,ld,halo) + qlist_r2p(q,p,ADM_spl,halo)=suf(in,jn,gall(halo)) + enddo + enddo + enddo + maxcommrecv_r2p(ADM_npl,halo)=(pmax_nspl(halo)-pmin_nspl(halo)+1) + maxcommrecv_r2p(ADM_spl,halo)=(pmax_nspl(halo)-pmin_nspl(halo)+1) + ! + do pl=ADM_npl,ADM_spl + do p=1,maxcommrecv_r2p(pl,halo) + if (ADM_prc_me==source_prc_r2p(p,pl,halo)) then + dest_prc_r2p(p,pl,halo)=ADM_prc_nspl(pl) + ssize_r2p(p,pl,halo)=rsize_r2p(p,pl,halo) + do q=1,ssize_r2p(p,pl,halo) + slist_r2p(q,p,pl,halo)=qlist_r2p(q,p,pl,halo) + enddo + endif + sendtag_r2p(p,pl,halo)=pl+(ADM_spl-ADM_npl+1)*(p-1) & + +ADM_rgn_nmax**2+ADM_vlink_nmax*2 + recvtag_r2p(p,pl,halo)=sendtag_r2p(p,pl,halo) + +! write(*,*) 'sendtag_r2p',ADM_prc_me,p,pl,halo,sendtag_r2p(p,pl,halo) + enddo + maxcommsend_r2p(pl,halo)=(pmax_nspl(halo)-pmin_nspl(halo)+1) + enddo + endif + ! + ! --- r2r ---- + do l=1,ADM_prc_rnum(ADM_prc_me) + rgnid=ADM_prc_tab(l,ADM_prc_me) + m=0 + if (ADM_rgn_etab(ADM_dir,ADM_sw,rgnid)==ADM_ne) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(gmax(halo)-gmin(halo)+1)*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_etab(ADM_rid,ADM_sw,rgnid) + ! mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmin(halo)-halo,jmin(halo)-1 + do i=imin(halo),imax(halo) + n=n+1 + in=i + jn=j+jmax(halo)+1-jmin(halo) + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + if ((ADM_rgn_vnum(ADM_w,rgnid)==3)) then + if (ADM_rgn_etab(ADM_dir,ADM_sw,rgnid)==ADM_se) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(gmax(halo)-gmin(halo)+1)*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_etab(ADM_rid,ADM_sw,rgnid) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207)removed by teraim + n=0 + do j=jmin(halo)-halo,jmin(halo)-1 + do i=imin(halo)+1,imax(halo)+(j-(jmin(halo)-1)) + n=n+1 + in=j+jmax(halo)-2*jmin(halo)+imin(halo)+1 + jn=-i+j+imin(halo)+jmax(halo)-jmin(halo)+1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + do i=imin(halo)-halo,imin(halo)-1+(j-(jmin(halo)-1)) + n=n+1 + in=i+imax(halo)+1-imin(halo) + jn=j+jmax(halo)+1-jmin(halo) + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + endif + ! + if (ADM_rgn_etab(ADM_dir,ADM_nw,rgnid)==ADM_ne) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(gmax(halo)-gmin(halo)+1)*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_etab(ADM_rid,ADM_nw,rgnid) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do i=imin(halo)-halo,imin(halo)-1 + do j=jmin(halo)+(i-(imin(halo)-1)),jmax(halo)+(i-(imin(halo)-1)) + n=n+1 + in=i-j+imax(halo)-imin(halo)+jmin(halo)+1 + jn=i+imax(halo)-2*imin(halo)+jmin(halo)+1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + elseif (ADM_rgn_etab(ADM_dir,ADM_nw,rgnid)==ADM_se) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(gmax(halo)-gmin(halo)+1)*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_etab(ADM_rid,ADM_nw,rgnid) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmin(halo),jmax(halo) + do i=imin(halo)-halo,imin(halo)-1 + n=n+1 + in=i+imax(halo)-imin(halo)+1 + jn=j + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + ! +!!!!! + if ((ADM_rgn_vnum(ADM_n,rgnid)==5)) then + ! + if (ADM_rgn_vtab(ADM_dir,ADM_n,rgnid,2)==ADM_n) then + if (halo>=2) then + m=m+1 + rsize_r2r(m,halo,rgnid)=halo*(halo+1)/2-1 + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_n,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do i=imin(halo)-halo,imin(halo)-1 + do j=jmax(halo)+1+(i-(imin(halo)-1)) & + ,min(jmax(halo)+1,jmax(halo)+(halo-1)+(i-(imin(halo)-1))) + n=n+1 + in=-j+jmax(halo)+imin(halo)+1 + jn=i-j+imax(halo)-2*imin(halo)+jmax(halo)+jmin(halo)+2 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + if (ADM_rgn_vtab(ADM_dir,ADM_n,rgnid,3)==ADM_n) then + if (halo>=2) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(halo-1)*halo/2 + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_n,rgnid,3) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmax(halo)+2,jmax(halo)+halo + do i=imin(halo)+1,imin(halo)+1+(j-(jmax(halo)+2)) + n=n+1 + in=-i+j-2*jmax(halo)+jmin(halo)+imax(halo)+imin(halo)-1 + jn=-i+imax(halo)+jmin(halo)+1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + ! + endif +!!!!! + ! + if (ADM_rgn_etab(ADM_dir,ADM_ne,rgnid)==ADM_nw) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(gmax(halo)-gmin(halo)+1)*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_etab(ADM_rid,ADM_ne,rgnid) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmax(halo)+1,jmax(halo)+halo + do i=imin(halo)+1+(j-(jmax(halo)+1)),imax(halo)+1+(j-(jmax(halo)+1)) + n=n+1 + in=j-jmax(halo)+imin(halo)-1 + jn=-i+j+imax(halo)-jmax(halo)+jmin(halo) + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + elseif (ADM_rgn_etab(ADM_dir,ADM_ne,rgnid)==ADM_sw) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(gmax(halo)-gmin(halo)+1)*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_etab(ADM_rid,ADM_ne,rgnid) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207)removed by teraim + n=0 + do j=jmax(halo)+1,jmax(halo)+halo + do i=imin(halo),jmax(halo) + n=n+1 + in=i + jn=j-jmax(halo)-1+jmin(halo) + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + ! + if (ADM_rgn_etab(ADM_dir,ADM_se,rgnid)==ADM_nw) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(gmax(halo)-gmin(halo)+1)*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_etab(ADM_rid,ADM_se,rgnid) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207)removed by teraim + n=0 + do j=jmin(halo),jmax(halo) + do i=imax(halo)+1,imax(halo)+halo + n=n+1 + in=i-imax(halo)+imin(halo)-1 + jn=j + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + if ((ADM_rgn_vnum(ADM_e,rgnid)==3)) then + if (ADM_rgn_etab(ADM_dir,ADM_se,rgnid)==ADM_sw) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(gmax(halo)-gmin(halo)+1)*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_etab(ADM_rid,ADM_se,rgnid) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do i=imax(halo)+1,imax(halo)+halo + do j=jmin(halo)+1+(i-(imax(halo)+1)),jmax(halo) + n=n+1 + in=i-j-imax(halo)+jmax(halo)+imin(halo) + jn=i-imax(halo)-1+jmin(halo) + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + do j=jmax(halo)+1+(i-(imax(halo)+1)),jmax(halo)+halo + n=n+1 + in=i-imax(halo)-1+imin(halo) + jn=j-jmax(halo)-1+jmin(halo) + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + endif + ! +!!!!! + if ((ADM_rgn_vnum(ADM_s,rgnid)==5)) then + ! + if (ADM_rgn_vtab(ADM_dir,ADM_s,rgnid,2)==ADM_s) then + if (halo>=2) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(halo-1)*halo/2 + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_s,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do i=imax(halo)+2,imax(halo)+halo + do j=jmin(halo)+1,jmin(halo)+1+(i-(imax(halo)+2)) + n=n+1 + in=-j+jmax(halo)+imin(halo)+1 + jn=i-j-2*imax(halo)+imin(halo)+jmax(halo)+jmin(halo)-1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + ! + ! + if (ADM_rgn_vtab(ADM_dir,ADM_s,rgnid,3)==ADM_s) then + if (halo>=2) then + m=m+1 + rsize_r2r(m,halo,rgnid)=halo*(halo+1)/2-1 + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_s,rgnid,3) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmin(halo)-halo,jmin(halo)-1 + do i=imax(halo)+1+(j-(jmin(halo)-1)) & + ,min(imax(halo)+1,imax(halo)+(halo-1)+(j-(jmin(halo)-1))) + n=n+1 + in=-i+j+imax(halo)+jmax(halo)+imin(halo)-2*jmin(halo)+2 + jn=-i+imax(halo)+jmin(halo)+1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + ! + endif + !! + ! + !! + if ((ADM_rgn_vnum(ADM_w,rgnid)==4)) then + ! + if (ADM_rgn_vtab(ADM_dir,ADM_w,rgnid,2)==ADM_n) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=halo*(halo+1)/2 + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_w,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmin(halo)-halo,jmin(halo)-1 + do i=imin(halo)-1+(j-(jmin(halo)-1)),imin(halo)-1 + n=n+1 + in=i-j+imax(halo)-imin(halo)-jmax(halo)+2*jmin(halo) + jn=i+imax(halo)-2*imin(halo)+jmin(halo)+1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + elseif (ADM_rgn_vtab(ADM_dir,ADM_w,rgnid,2)==ADM_e) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=halo*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_w,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmin(halo)-halo,jmin(halo)-1 + do i=imin(halo)-halo,imin(halo)-1 + n=n+1 + in=i+imax(halo)-imin(halo)+1 + jn=j+jmax(halo)-jmin(halo)+1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + elseif (ADM_rgn_vtab(ADM_dir,ADM_w,rgnid,2)==ADM_s) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=halo*(halo+1)/2 + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_w,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmin(halo)-halo,jmin(halo)-1 + do i=imin(halo)-halo,imin(halo)-halo+(j-(jmin(halo)-halo)) + !do i=imin(halo)-halo,imin(halo)-1 + ! do j=i+jmin(halo)-imin(halo),jmin(halo)-1 + n=n+1 + in=j+jmax(halo)+1-2*jmin(halo)+imin(halo) + jn=-i+j-imax(halo)+2*imin(halo)+jmax(halo)-jmin(halo) + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + if (ADM_rgn_etab(ADM_dir,ADM_sw,rgnid)==ADM_se) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(gmax(halo)-gmin(halo)+1)*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_etab(ADM_rid,ADM_sw,rgnid) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmin(halo)-halo,jmin(halo)-1 + do i=imin(halo)+(j-(jmin(halo)-1)),imax(halo)+(j-(jmin(halo)-1)) + n=n+1 + in=j+jmax(halo)-2*jmin(halo)+imin(halo)+1 + jn=-i+j+imin(halo)+jmax(halo)-jmin(halo)+1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + ! + endif + !! + ! + !! + if ((ADM_rgn_vnum(ADM_n,rgnid)==4)) then + ! + if (ADM_rgn_vtab(ADM_dir,ADM_n,rgnid,2)==ADM_e) then + if (halo>=2) then + m=m+1 + rsize_r2r(m,halo,rgnid)=halo*(halo-1) + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_n,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do i=imin(halo)-halo,imin(halo)-1 + do j=jmax(halo)+1+(i-(imin(halo)-1)) & + ,jmax(halo)+(halo-1)+(i-(imin(halo)-1)) + n=n+1 + in=i-j+imax(halo)-imin(halo)+jmax(halo)+2 + jn=i+imax(halo)-2*imin(halo)+jmin(halo)+1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + elseif (ADM_rgn_vtab(ADM_dir,ADM_n,rgnid,2)==ADM_s) then + if (halo>=2) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(halo-1)*halo/2 + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_n,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do i=imin(halo)-(halo-1),imin(halo)-1 + do j=jmax(halo)+1,jmax(halo)+1+(i-(imin(halo)-(halo-1))) + n=n+1 + in=i+imax(halo)-imin(halo)+1 + jn=j-jmax(halo)+jmin(halo)-1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + elseif (ADM_rgn_vtab(ADM_dir,ADM_n,rgnid,2)==ADM_w) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=halo*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_n,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmax(halo)+1,jmax(halo)+halo + do i=imin(halo)-(halo-1)+(j-(jmax(halo)+1)) & + ,imin(halo)+(j-(jmax(halo)+1)) + n=n+1 + in=j-jmax(halo)+imin(halo)-1 + jn=-i+j+imin(halo)-jmax(halo)+jmin(halo)-1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + ! + endif + !! + ! + !! + if ((ADM_rgn_vnum(ADM_e,rgnid)==4)) then + ! + if (ADM_rgn_vtab(ADM_dir,ADM_e,rgnid,2)==ADM_n) then + if (halo>=2) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(halo-1)*halo/2 + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_e,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do i=imax(halo)+2,imax(halo)+halo + do j=jmax(halo)+1,jmax(halo)+1+(i-(imax(halo)+2)) + n=n+1 + in=j-jmax(halo)-1+imin(halo) + jn=-i+j+2*imax(halo)-imin(halo)-jmax(halo)+jmin(halo)+1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + elseif (ADM_rgn_vtab(ADM_dir,ADM_e,rgnid,2)==ADM_w) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=halo*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_e,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmax(halo)+1,jmax(halo)+halo + do i=imax(halo)+1,imax(halo)+halo + n=n+1 + in=i-imax(halo)+imin(halo)-1 + jn=j-jmax(halo)+jmin(halo)-1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + elseif (ADM_rgn_vtab(ADM_dir,ADM_e,rgnid,2)==ADM_s) then + if (halo>=2) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(halo-1)*halo/2 + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_e,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmax(halo)+2,jmax(halo)+halo + do i=imax(halo)+1,imax(halo)+1+(j-(jmax(halo)+2)) + n=n+1 + in=i-j-imax(halo)+2*jmax(halo)+imin(halo)-jmin(halo)+1 + jn=i-imax(halo)+jmin(halo)-1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + if (ADM_rgn_etab(ADM_dir,ADM_se,rgnid)==ADM_sw) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(gmax(halo)-gmin(halo)+1)*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_etab(ADM_rid,ADM_se,rgnid) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do i=imax(halo)+1,imax(halo)+halo + do j=jmin(halo)+1+(i-(imax(halo)+1)),jmax(halo)+1+(i-(imax(halo)+1)) + n=n+1 + in=i-j-imax(halo)+jmax(halo)+imin(halo) + jn=i-imax(halo)+jmin(halo)-1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + ! + endif + endif + !! + ! + !! + if (ADM_rgn_vnum(ADM_s,rgnid)==4) then + ! + if (ADM_rgn_vtab(ADM_dir,ADM_s,rgnid,2)==ADM_n) then + if (halo>=2) then + m=m+1 + rsize_r2r(m,halo,rgnid)=(halo-1)*halo/2 + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_s,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do i=imax(halo)+1,imax(halo)+(halo-1) + do j=jmin(halo)-(halo-1)+(i-(imax(halo)+1)),jmin(halo)-1 + n=n+1 + in=i-imax(halo)-1+imin(halo) + jn=j+jmax(halo)+1-jmin(halo) + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + elseif (ADM_rgn_vtab(ADM_dir,ADM_s,rgnid,2)==ADM_e) then + if (halo>=2) then + m=m+1 + rsize_r2r(m,halo,rgnid)=halo*(halo-1) + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_s,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207) removed by teraim + n=0 + do j=jmin(halo)-halo,jmin(halo)-1 + do i=imax(halo)+1+(j-(jmin(halo)-1)) & + ,imax(halo)+(halo-1)+(j-(jmin(halo)-1)) + n=n+1 + in=j+jmax(halo)-2*jmin(halo)+imin(halo)+1 + jn=-i+j+imax(halo)+jmax(halo)-jmin(halo)+2 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + elseif (ADM_rgn_vtab(ADM_dir,ADM_s,rgnid,2)==ADM_w) then + if (halo>=1) then + m=m+1 + rsize_r2r(m,halo,rgnid)=halo*halo + sourceid_r2r(m,halo,rgnid)=ADM_rgn_vtab(ADM_rid,ADM_s,rgnid,2) + !mrecv_r2r(sourceid_r2r(m,halo,rgnid),halo,rgnid)=m !(20101207)removed by teraim + n=0 + do i=imax(halo)+1,imax(halo)+halo + do j=jmin(halo)-(halo-1)+(i-(imax(halo)+1)) & + ,jmin(halo)+(i-(imax(halo)+1)) + n=n+1 + in=i-j-imax(halo)+jmin(halo)+imin(halo)-1 + jn=i-imax(halo)+jmin(halo)-1 + rlist_r2r(n,m,halo,rgnid)=suf(i,j,gall(halo)) + qlist_r2r(n,m,halo,rgnid)=suf(in,jn,gall(halo)) + enddo + enddo + endif + endif + ! + endif + ! + maxcommrecv_r2r(halo,rgnid)=m + ! + if ((ADM_rgn_vnum(ADM_w,rgnid)==3)) then + if ((ADM_rgn_etab(ADM_dir,ADM_nw,rgnid)==ADM_ne)) then + n_hemisphere_copy(ADM_w,halo,rgnid)=1 + elseif ((ADM_rgn_etab(ADM_dir,ADM_nw,rgnid)==ADM_se)) then + s_hemisphere_copy(ADM_w,halo,rgnid)=1 + endif + endif + if ((ADM_rgn_vnum(ADM_n,rgnid)==5)) then + n_hemisphere_copy(ADM_n,halo,rgnid)=1 + endif + if ((ADM_rgn_vnum(ADM_s,rgnid)==3)) then + n_hemisphere_copy(ADM_s,halo,rgnid)=1 + endif + if ((ADM_rgn_vnum(ADM_e,rgnid)==3)) then + if ((ADM_rgn_etab(ADM_dir,ADM_ne,rgnid)==ADM_nw)) then + n_hemisphere_copy(ADM_e,halo,rgnid)=1 + elseif ((ADM_rgn_etab(ADM_dir,ADM_ne,rgnid)==ADM_sw)) then + s_hemisphere_copy(ADM_e,halo,rgnid)=1 + endif + endif + if ((ADM_rgn_vnum(ADM_s,rgnid)==5)) then + s_hemisphere_copy(ADM_s,halo,rgnid)=1 + endif + if ((ADM_rgn_vnum(ADM_n,rgnid)==3)) then + s_hemisphere_copy(ADM_n,halo,rgnid)=1 + endif + ! + enddo !loop l + ! + !(20101207) removed by teraim + !do rrgnid=1,ADM_rgn_nmax + ! do srgnid=1,ADM_rgn_nmax + ! sendtag_r2r(rrgnid,halo,srgnid)=rrgnid+ADM_rgn_nmax*(srgnid-1) + ! recvtag_r2r(srgnid,halo,rrgnid)=sendtag_r2r(rrgnid,halo,srgnid) +! ! write(*,*) 'sendtag_r2r',ADM_prc_me,rrgnid,srgnid,sendtag_r2r(rrgnid,halo,srgnid) + ! enddo + !enddo + ! + enddo !loop halo + ! + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do l=1,ADM_rgn_nmax +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + rsize_r2r(1,1,l), & + max_comm_r2r*halomax, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(rsize_r2r(:,:,l),prc_tab_rev(ptr_prcid,l)) + !--- 2020 Fujitsu end + end do + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do l=1,ADM_rgn_nmax +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + sourceid_r2r(1,1,l), & + max_comm_r2r*halomax, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(sourceid_r2r(:,:,l),prc_tab_rev(ptr_prcid,l)) + !--- 2020 Fujitsu end + end do + !(20101207)removed by teraim + !do l=1,ADM_rgn_nmax + ! call mpi_bcast( & + ! mrecv_r2r(1,1,l), & + ! ADM_rgn_nmax*halomax, & + ! mpi_integer, & + ! prc_tab_rev(ptr_prcid,l)-1, & + ! ADM_comm_run_world, & + ! ierr) + !end do + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu + do l=1,ADM_rgn_nmax +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + maxcommrecv_r2r(1,l), & + 1*halomax, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(maxcommrecv_r2r(:,l),prc_tab_rev(ptr_prcid,l)) + !--- 2020 Fujitsu end + end do + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do l=1,ADM_rgn_nmax +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + rlist_r2r(1,1,1,l), & + max_comm_r2r*max_datasize_r2r*halomax, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(rlist_r2r(:,:,:,l),prc_tab_rev(ptr_prcid,l)) + !--- 2020 Fujitsu end + end do + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do l=1,ADM_rgn_nmax +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + qlist_r2r(1,1,1,l), & + max_comm_r2r*max_datasize_r2r*halomax, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(qlist_r2r(:,:,:,l),prc_tab_rev(ptr_prcid,l)) + !--- 2020 Fujitsu end + end do + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do l=1,ADM_rgn_nmax +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + n_hemisphere_copy(1,1,l), & + (ADM_s-ADM_w+1)*halomax, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(n_hemisphere_copy(:,:,l),prc_tab_rev(ptr_prcid,l)) + !--- 2020 Fujitsu end + end do + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do l=1,ADM_rgn_nmax +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + s_hemisphere_copy(1,1,l), & + (ADM_s-ADM_w+1)*halomax, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(s_hemisphere_copy(:,:,l),prc_tab_rev(ptr_prcid,l)) + !--- 2020 Fujitsu end + end do +!coarray call mpi_barrier(ADM_comm_run_world,ierr) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + ! + do halo=1,halomax + do ls=1,ADM_prc_rnum(ADM_prc_me) + srgnid=ADM_prc_tab(ls,ADM_prc_me) + ms=0 + do lr=1,ADM_rgn_nmax + rrgnid=lr + do mr=1,maxcommrecv_r2r(halo,rrgnid) + if (srgnid==sourceid_r2r(mr,halo,rrgnid)) then + ms=ms+1 + ! + !(20101207)added by teraim + if(ADM_rgn2prc(srgnid)==ADM_prc_me) then + msend_r2r(rrgnid,halo,srgnid)=ms + else + write(*,*)"This process is abort because irregular access in msend_r2r." + exit + endif + ! + destid_r2r(ms,halo,srgnid)=rrgnid + ssize_r2r(ms,halo,srgnid)=rsize_r2r(mr,halo,rrgnid) + do n=1,rsize_r2r(mr,halo,rrgnid) + slist_r2r(n,ms,halo,srgnid)=qlist_r2r(n,mr,halo,rrgnid) + enddo + endif + enddo + enddo + maxcommsend_r2r(halo,srgnid)=ms + enddo + enddo !loop halo + ! +!coarray call mpi_barrier(ADM_comm_run_world,ierr) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do l=1,ADM_rgn_nmax +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + destid_r2r(1,1,l), & + max_comm_r2r*halomax, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(destid_r2r(:,:,l),prc_tab_rev(ptr_prcid,l)) + !--- 2020 Fujitsu end + end do + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do l=1,ADM_rgn_nmax +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + ssize_r2r(1,1,l), & + max_comm_r2r*halomax, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(ssize_r2r(:,:,l),prc_tab_rev(ptr_prcid,l)) + !--- 2020 Fujitsu end + end do + !(20101207)removed by teraim + !do l=1,ADM_rgn_nmax + ! call mpi_bcast( & + ! msend_r2r(1,1,l), & + ! ADM_rgn_nmax*halomax, & + ! mpi_integer, & + ! prc_tab_rev(ptr_prcid,l)-1, & + ! ADM_comm_run_world, & + ! ierr) + !end do + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do l=1,ADM_rgn_nmax +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + slist_r2r(1,1,1,l), & + max_comm_r2r*max_datasize_r2r*halomax, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(slist_r2r(:,:,:,l),prc_tab_rev(ptr_prcid,l)) + !--- 2020 Fujitsu end + end do + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do l=1,ADM_rgn_nmax +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + maxcommsend_r2r(1,l), & + 1*halomax, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(maxcommsend_r2r(:,l),prc_tab_rev(ptr_prcid,l)) + !--- 2020 Fujitsu end + end do + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + ! + allocate(sendbuf_p2r(kmax*max_varmax*2, & + ADM_rgn_nmax_pl)) + allocate(recvbuf_p2r(kmax*max_varmax*2, & + ADM_rgn_nmax_pl)) + allocate(recvtag_p2r(max_comm_p2r,ADM_npl:ADM_spl)) + allocate(sendtag_p2r(max_comm_p2r,ADM_npl:ADM_spl)) + do pl=ADM_npl,ADM_spl + do p=1,ADM_vlink_nmax + recvtag_p2r(p,pl)=ADM_rgn_nmax*ADM_rgn_nmax+p+ADM_vlink_nmax*(pl-1) + sendtag_p2r(p,pl)=ADM_rgn_nmax*ADM_rgn_nmax+p+ADM_vlink_nmax*(pl-1) + +! write(*,*) 'sendtag_p2r',ADM_prc_me,p,pl,halo,sendtag_p2r(p,pl) + + enddo + enddo + ! + allocate(clist(max_varmax)) + ! +!coarray call mpi_barrier(ADM_comm_run_world,ierr) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + ! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!! re-setup comm_table !!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + rank_me=ADM_prc_me-1 + max_comm_prc=min(ADM_prc_all,max_comm_r2r*ADM_lall+2*max_comm_r2p) + ! + allocate(n_nspl(ADM_npl:ADM_spl,halomax)) + do halo=1,halomax + n_nspl(ADM_npl,halo)=suf(imin(halo)+0,jmax(halo)+1,gall(halo)) + n_nspl(ADM_spl,halo)=suf(imax(halo)+1,jmin(halo)+0,gall(halo)) + enddo + ! + allocate(temp_sendorder(0:ADM_prc_all-1,halomax)) + allocate(temp_recvorder(0:ADM_prc_all-1,halomax)) + ! + !-------------------------------------------------- + allocate(romax(halomax)) + allocate(somax(halomax)) + allocate(sourcerank(max_comm_prc,halomax)) + allocate(destrank(max_comm_prc,halomax)) + allocate(rsize(max_comm_prc,halomax)) + allocate(ssize(max_comm_prc,halomax)) + romax(:)=0 + somax(:)=0 + sourcerank(:,:)=-1 + destrank(:,:)=-1 + rsize(:,:)=0 + ssize(:,:)=0 + !-------------------------------------------------- + ! + maxn=((gmax(halomax)-gmin(halomax)+1)+2)*halomax + maxm=max_comm_r2r+1 + maxl=ADM_lall+2 + !---- + maxn_pl=halomax*(halomax+1)/2 + maxm_pl=ADM_vlink_nmax + maxl_pl=(ADM_spl-ADM_npl+1) + !---- + maxn_r2r=(gmax(halomax)-gmin(halomax)+1)*halomax + maxm_r2r=max_comm_r2r + maxl_r2r=ADM_lall + !---- + maxn_r2p=halomax*(halomax+1)/2 + maxm_r2p=ADM_vlink_nmax + maxl_r2p=(ADM_spl-ADM_npl+1) + !---- + maxn_p2r=1 + maxm_p2r=ADM_vlink_nmax + maxl_p2r=(ADM_spl-ADM_npl+1) + !---- + maxn_sgp=halomax + maxm_sgp=4 + maxl_sgp=12 + ! + !-------------------------------------------------- + ! for send + !-------------------------------------------------- + allocate(nsmax(max_comm_prc,halomax)) + allocate(sendinfo(elemsize_comm,maxm*maxl,max_comm_prc,halomax)) + allocate(sendlist(maxn,maxm*maxl,max_comm_prc,halomax)) + nsmax(:,:)=0 + sendinfo(:,:,:,:)=0 + sendlist(:,:,:,:)=0 + allocate(nsmax_pl(max_comm_prc,halomax)) + allocate(sendinfo_pl(elemsize_comm,maxm_pl*maxl_pl,max_comm_prc,halomax)) + allocate(sendlist_pl(maxn_pl,maxm_pl*maxl_pl,max_comm_prc,halomax)) + nsmax_pl(:,:)=0 + sendinfo_pl(:,:,:,:)=0 + sendlist_pl(:,:,:,:)=0 + !-------------------------------------------------- + ! + !-------------------------------------------------- + ! for copy + !-------------------------------------------------- + allocate(ncmax_r2r(halomax)) + allocate(copyinfo_r2r(elemsize_copy,maxm_r2r*maxl_r2r,halomax)) + allocate(recvlist_r2r(maxn_r2r,maxm_r2r*maxl_r2r,halomax)) + allocate(sendlist_r2r(maxn_r2r,maxm_r2r*maxl_r2r,halomax)) + ncmax_r2r(:)=0 + copyinfo_r2r(:,:,:)=0 + recvlist_r2r(:,:,:)=0 + sendlist_r2r(:,:,:)=0 + !-------------------------------------------------- + allocate(ncmax_r2p(halomax)) + allocate(copyinfo_r2p(elemsize_copy,maxm_r2p*maxl_r2p,halomax)) + allocate(recvlist_r2p(maxn_r2p,maxm_r2p*maxl_r2p,halomax)) + allocate(sendlist_r2p(maxn_r2p,maxm_r2p*maxl_r2p,halomax)) + ncmax_r2p(:)=0 + copyinfo_r2p(:,:,:)=0 + recvlist_r2p(:,:,:)=0 + sendlist_r2p(:,:,:)=0 + !-------------------------------------------------- + allocate(ncmax_p2r(halomax)) + allocate(copyinfo_p2r(elemsize_copy,maxm_p2r*maxl_p2r,halomax)) + allocate(recvlist_p2r(maxn_p2r,maxm_p2r*maxl_p2r,halomax)) + allocate(sendlist_p2r(maxn_p2r,maxm_p2r*maxl_p2r,halomax)) + ncmax_p2r(:)=0 + copyinfo_p2r(:,:,:)=0 + recvlist_p2r(:,:,:)=0 + sendlist_p2r(:,:,:)=0 + !-------------------------------------------------- + ! + !-------------------------------------------------- + ! for recv + !-------------------------------------------------- + allocate(nrmax(max_comm_prc,halomax)) + allocate(recvinfo(elemsize_comm,maxm*maxl,max_comm_prc,halomax)) + allocate(recvlist(maxn,maxm*maxl,max_comm_prc,halomax)) + nrmax(:,:)=0 + recvinfo(:,:,:,:)=0 + recvlist(:,:,:,:)=0 + allocate(nrmax_pl(max_comm_prc,halomax)) + allocate(recvinfo_pl(elemsize_comm,maxm_pl*maxl_pl,max_comm_prc,halomax)) + allocate(recvlist_pl(maxn_pl,maxm_pl*maxl_pl,max_comm_prc,halomax)) + nrmax_pl(:,:)=0 + recvinfo_pl(:,:,:,:)=0 + recvlist_pl(:,:,:,:)=0 + !-------------------------------------------------- + allocate(temp_dest_rgn(maxm*maxl,max_comm_prc,halomax)) + allocate(temp_src_rgn(maxm*maxl,max_comm_prc,halomax)) + allocate(temp_dest_rgn_pl(maxm_pl*maxl_pl,max_comm_prc,halomax)) + allocate(temp_src_rgn_pl(maxm_pl*maxl_pl,max_comm_prc,halomax)) + temp_dest_rgn(:,:,:)=0 + temp_dest_rgn_pl(:,:,:)=0 + temp_src_rgn(:,:,:)=0 + temp_src_rgn_pl(:,:,:)=0 + !-------------------------------------------------- + ! + do halo=1,halomax + do l=1,ADM_lall + rgnid=ADM_prc_tab(l,ADM_prc_me) + do m=1,maxcommrecv_r2r(halo,rgnid) + srank=prc_tab_rev(ptr_prcid,sourceid_r2r(m,halo,rgnid))-1 + if (srank/=rank_me) then + ck=0 + loop_ro1:do ro=1,romax(halo) + if (srank==sourcerank(ro,halo)) exit loop_ro1 + ck=ck+1 + enddo loop_ro1 + if (ck==romax(halo)) then + romax(halo)=romax(halo)+1 + ro=romax(halo) + sourcerank(ro,halo)=srank + temp_recvorder(srank,halo)=ro + endif + ro=temp_recvorder(srank,halo) + nrmax(ro,halo)=nrmax(ro,halo)+1 + nr=nrmax(ro,halo) + recvinfo(SIZE_COMM,nr,ro,halo)=rsize_r2r(m,halo,rgnid) + recvinfo(LRGNID_COMM,nr,ro,halo)=l + temp_src_rgn(nr,ro,halo)=sourceid_r2r(m,halo,rgnid) + rs=recvinfo(SIZE_COMM,nr,ro,halo) + rsize(ro,halo)=rsize(ro,halo)+rs + do n=1,rs + recvlist(n,nr,ro,halo)=rlist_r2r(n,m,halo,rgnid) + enddo + else + ncmax_r2r(halo)=ncmax_r2r(halo)+1 + nc=ncmax_r2r(halo) + copyinfo_r2r(SIZE_COPY,nc,halo)=rsize_r2r(m,halo,rgnid) + copyinfo_r2r(LRGNID_COPY,nc,halo)=l + copyinfo_r2r(SRC_LRGNID_COPY,nc,halo) & + =prc_tab_rev(ptr_lrgnid,sourceid_r2r(m,halo,rgnid)) + cs=copyinfo_r2r(SIZE_COPY,nc,halo) + srgnid=sourceid_r2r(m,halo,rgnid) + do n=1,cs + recvlist_r2r(n,nc,halo)=rlist_r2r(n,m,halo,rgnid) + ! + !(20101207)added by teraim + if(ADM_rgn2prc(srgnid)==ADM_prc_me) then + sendlist_r2r(n,nc,halo)=slist_r2r(n,msend_r2r(rgnid,halo,srgnid),halo,srgnid) + else + write(*,*)"This process is abort because irregular access is msend_r2r." + exit + endif + ! + enddo + endif + enddo !loop m + !enddo !loop l + !! + !do l=1,ADM_lall + ! rgnid=ADM_prc_tab(l,ADM_prc_me) + do m=1,maxcommsend_r2r(halo,rgnid) + drank=prc_tab_rev(ptr_prcid,destid_r2r(m,halo,rgnid))-1 + if (drank/=rank_me) then + ck=0 + loop_so1:do so=1,somax(halo) + if (drank==destrank(so,halo)) exit loop_so1 + ck=ck+1 + enddo loop_so1 + if (ck==somax(halo)) then + somax(halo)=somax(halo)+1 + so=somax(halo) + destrank(so,halo)=drank + temp_sendorder(drank,halo)=so + endif + so=temp_sendorder(drank,halo) + nsmax(so,halo)=nsmax(so,halo)+1 + ns=nsmax(so,halo) + sendinfo(SIZE_COMM,ns,so,halo)=ssize_r2r(m,halo,rgnid) + sendinfo(LRGNID_COMM,ns,so,halo)=l + temp_dest_rgn(ns,so,halo)=destid_r2r(m,halo,rgnid) + ss=sendinfo(SIZE_COMM,ns,so,halo) + ssize(so,halo)=ssize(so,halo)+ss + do n=1,ss + sendlist(n,ns,so,halo)=slist_r2r(n,m,halo,rgnid) + enddo + endif + enddo !loop m + enddo !loop l + !enddo !loop halo + !do halo=1,halomax + if(comm_pl) call re_setup_pl_comm_info ! T.Ohno 110721 + enddo !loop halo + deallocate(temp_sendorder) + deallocate(temp_recvorder) + ! + !allocate(temp_sb(ADM_rgn_nmax+2,halomax,ADM_rgn_nmax+2)) !(20101207) removed by teraim + allocate(tsb(somax(halomax))) + !temp_sb(:,:,:)=0 !(20101207) removed by teraim + + call init_tempsb !(20101207) added by teraim + + do halo=1,halomax + tsb(:)=0 + do so=1,somax(halo) + do ns=1,nsmax(so,halo) + ss=sendinfo(SIZE_COMM,ns,so,halo) + srgnid=ADM_prc_tab(sendinfo(LRGNID_COMM,ns,so,halo),ADM_prc_me) + rrgnid=temp_dest_rgn(ns,so,halo) + sendinfo(BASE_COMM,ns,so,halo)=tsb(so) + !temp_sb(rrgnid,halo,srgnid)=tsb(so) !(20101207)removed by teraim + call add_tempsb(rrgnid, srgnid, tsb(so)) !(20101207)added by teraim + tsb(so)=tsb(so)+ss + enddo + do ns=1,nsmax_pl(so,halo) + ss=sendinfo_pl(SIZE_COMM,ns,so,halo) + pl=sendinfo_pl(LRGNID_COMM,ns,so,halo) + srgnid=ADM_rgn_nmax+pl + rrgnid=temp_dest_rgn_pl(ns,so,halo) + sendinfo_pl(BASE_COMM,ns,so,halo)=tsb(so) + !temp_sb(rrgnid,halo,srgnid)=tsb(so) !(20101207)removed by teraim + call add_tempsb(rrgnid, srgnid, tsb(so)) !(20101207)added by teraim + tsb(so)=tsb(so)+ss + enddo + enddo + enddo + deallocate(tsb) + ! + !(20101207)removed by teraim + !call mpi_barrier(ADM_comm_run_world,ierr) + !do l=1,ADM_rgn_nmax + ! call mpi_bcast( & + ! temp_sb(1,1,l), & + ! (ADM_rgn_nmax+2)*halomax, & + ! mpi_integer, & + ! prc_tab_rev(ptr_prcid,l)-1, & + ! ADM_comm_run_world, & + ! ierr) + !enddo + !do pl=ADM_npl,ADM_spl + ! call mpi_bcast( & + ! temp_sb(1,1,ADM_rgn_nmax+pl), & + ! (ADM_rgn_nmax+2)*halomax, & + ! mpi_integer, & + ! ADM_prc_nspl(pl)-1, & + ! ADM_comm_run_world, & + ! ierr) + !enddo + !call mpi_barrier(ADM_comm_run_world,ierr) + ! + !(20101207)added by teraim +! call mpi_barrier(ADM_comm_run_world,ierr) + colc(:) = -1 + valc(:) = -1 + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do l=1,ADM_rgn_nmax +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + tempsb(l)%col, & + max_size, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !colc(:) = tempsb(l)%col(:) + !call co_broadcast(colc,prc_tab_rev(ptr_prcid,l)) + !tempsb(l)%col(:) = colc(:) + !--- 2020 Fujitsu end +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + tempsb(l)%val, & + max_size, & + mpi_integer, & + prc_tab_rev(ptr_prcid,l)-1, & + ADM_comm_run_world, & + ierr) + !valc(:) = tempsb(l)%val(:) + !call co_broadcast(valc,prc_tab_rev(ptr_prcid,l)) + !tempsb(l)%val(:) = valc(:) + !--- 2020 Fujitsu end + enddo + if(comm_pl) then ! T.Ohno 110721 + do pl=ADM_npl,ADM_spl +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + tempsb(ADM_rgn_nmax+pl)%col, & + max_size, & + mpi_integer, & + ADM_prc_nspl(pl)-1, & + ADM_comm_run_world, & + ierr) + !colc(:) = tempsb(ADM_rgn_nmax+pl)%col(:) + !call co_broadcast(colc,ADM_prc_nspl(pl)) + !tempsb(ADM_rgn_nmax+pl)%col(:) = colc(:) + !--- 2020 Fujitsu end +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + tempsb(ADM_rgn_nmax+pl)%val, & + max_size, & + mpi_integer, & + ADM_prc_nspl(pl)-1, & + ADM_comm_run_world, & + ierr) + !valc(:) = tempsb(ADM_rgn_nmax+pl)%val(:) + !call co_broadcast(valc,ADM_prc_nspl(pl)) + !tempsb(ADM_rgn_nmax+pl)%val(:) = valc(:) + !--- 2020 Fujitsu end + enddo + endif ! T.Ohno 110721 +!coarray call mpi_barrier(ADM_comm_run_world,ierr) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + ! + !call show_tempsb !(20101209) added by teraim + ! + do halo=1,halomax + do ro=1,romax(halo) + do nr=1,nrmax(ro,halo) + rrgnid=ADM_prc_tab(recvinfo(LRGNID_COMM,nr,ro,halo),ADM_prc_me) + srgnid=temp_src_rgn(nr,ro,halo) + !recvinfo(BASE_COMM,nr,ro,halo)=temp_sb(rrgnid,halo,srgnid) !(20101207)removed by teraim + !(20101207) added by teraim + call get_tempsb(rrgnid,srgnid,ret) + recvinfo(BASE_COMM,nr,ro,halo)=ret + enddo + do nr=1,nrmax_pl(ro,halo) + pl=recvinfo_pl(LRGNID_COMM,nr,ro,halo) + rrgnid=pl+ADM_rgn_nmax + srgnid=temp_src_rgn_pl(nr,ro,halo) + !recvinfo_pl(BASE_COMM,nr,ro,halo)=temp_sb(rrgnid,halo,srgnid) !(20101207)removed by teraim + !(20101207) added by teraim + call get_tempsb(rrgnid,srgnid,ret) + recvinfo_pl(BASE_COMM,nr,ro,halo)=ret + enddo + enddo !loop ro + enddo !loop halo + deallocate(temp_dest_rgn) + deallocate(temp_dest_rgn_pl) + deallocate(temp_src_rgn) + deallocate(temp_src_rgn_pl) + !deallocate(temp_sb) !(20101207)removed by teraim + call finalize_tempsb !(20101207)added by teraim + ! + allocate(recvtag(romax(halomax),halomax)) + allocate(sendtag(somax(halomax),halomax)) + recvtag(:,:)=-1 + sendtag(:,:)=-1 + do halo=1,halomax + do ro=1,romax(halo) + recvtag(ro,halo)=rank_me + enddo + do so=1,somax(halo) + sendtag(so,halo)=destrank(so,halo) + enddo + enddo +! maxdatasize=(max_comm_r2r*(gmax(halomax)-gmin(halomax)+1)+2*max_comm_r2p*(halomax+1)/2)*halomax*kmax*max_varmax +! maxdatasize=(maxn_r2r*maxm_r2r*maxl_r2r+maxn_r2p*maxm_r2p*maxl_r2p+maxn_p2r*maxm_p2r*maxl_p2r)*kmax*max_varmax + maxdatasize_s=0 + do so=1,somax(halomax) + maxdatasize_s=maxdatasize_s+ssize(so,halomax)*kmax*max_varmax + enddo + maxdatasize_r=0 + do ro=1,romax(halomax) + maxdatasize_r=maxdatasize_r+rsize(ro,halomax)*kmax*max_varmax + enddo + allocate(recvbuf(maxdatasize_r,romax(halomax))) + allocate(sendbuf(maxdatasize_s,somax(halomax))) + !--- 2020 Fujitsu + sendbuf_l_lb(1) = 1; sendbuf_l_ub(1) = maxdatasize_s + sendbuf_l_lb(2) = 1; sendbuf_l_ub(2) = somax(halomax) + call xmp_new_local_array(sendbuf_l_desc, 8, 2, sendbuf_l_lb, sendbuf_l_ub, sendbuf) + !--- 2020 Fujitsu end + recvbuf(:,:)=0 + sendbuf(:,:)=0 + +!! allocate(comm_dbg_recvbuf(maxdatasize_r,romax(halomax),2)) !iga +!! allocate(comm_dbg_sendbuf(maxdatasize_s,somax(halomax),2)) !iga +!! comm_dbg_recvbuf=CNST_UNDEF !iga +!! comm_dbg_sendbuf=CNST_UNDEF !iga + + ! + allocate(ncmax_sgp(halomax)) + allocate(copyinfo_sgp(elemsize_copy,maxm_sgp*maxl_sgp,halomax)) + allocate(recvlist_sgp(maxn_sgp,maxm_sgp*maxl_sgp,halomax)) + allocate(sendlist_sgp(maxn_sgp,maxm_sgp*maxl_sgp,halomax)) + ncmax_sgp(:)=0 + copyinfo_sgp(:,:,:)=0 + recvlist_sgp(:,:,:)=0 + sendlist_sgp(:,:,:)=0 + do halo=1,halomax + ncmax_sgp(halo)=0 + do l=1,ADM_lall + rgnid=ADM_prc_tab(l,ADM_prc_me) + if (n_hemisphere_copy(ADM_w,halo,rgnid)==1) then + ncmax_sgp(halo)=ncmax_sgp(halo)+1 + nc=ncmax_sgp(halo) + copyinfo_sgp(SIZE_COPY,nc,halo)=halo + copyinfo_sgp(LRGNID_COPY,nc,halo)=l + copyinfo_sgp(SRC_LRGNID_COPY,nc,halo)=l + cs=copyinfo_sgp(SIZE_COPY,nc,halo) + do n=1,cs + recvlist_sgp(n,nc,halo)=suf(gmin(halo)-n,gmin(halo)-n,gall(halo)) + sendlist_sgp(n,nc,halo)=suf(gmin(halo),gmin(halo)-n,gall(halo)) + enddo + endif + if ((n_hemisphere_copy(ADM_n,halo,rgnid)==1)) then + ncmax_sgp(halo)=ncmax_sgp(halo)+1 + nc=ncmax_sgp(halo) + copyinfo_sgp(SIZE_COPY,nc,halo)=halo-1 + copyinfo_sgp(LRGNID_COPY,nc,halo)=l + copyinfo_sgp(SRC_LRGNID_COPY,nc,halo)=l + cs=copyinfo_sgp(SIZE_COPY,nc,halo) + do n=1,cs + recvlist_sgp(n,nc,halo)=suf(gmin(halo),gmax(halo)+n+1,gall(halo)) + sendlist_sgp(n,nc,halo)=suf(gmin(halo)-n,gmax(halo)+1,gall(halo)) + enddo + endif + if ((n_hemisphere_copy(ADM_e,halo,rgnid)==1)) then + ncmax_sgp(halo)=ncmax_sgp(halo)+1 + nc=ncmax_sgp(halo) + copyinfo_sgp(SIZE_COPY,nc,halo)=halo-1 + copyinfo_sgp(LRGNID_COPY,nc,halo)=l + copyinfo_sgp(SRC_LRGNID_COPY,nc,halo)=l + cs=copyinfo_sgp(SIZE_COPY,nc,halo) + do n=1,cs + recvlist_sgp(n,nc,halo)=suf(gmax(halo)+n+1,gmax(halo)+1,gall(halo)) + sendlist_sgp(n,nc,halo)=suf(gmax(halo)+n+1,gmax(halo)+n+1,gall(halo)) + enddo + endif + if ((n_hemisphere_copy(ADM_s,halo,rgnid)==1)) then + ncmax_sgp(halo)=ncmax_sgp(halo)+1 + nc=ncmax_sgp(halo) + copyinfo_sgp(SIZE_COPY,nc,halo)=halo-1 + copyinfo_sgp(LRGNID_COPY,nc,halo)=l + copyinfo_sgp(SRC_LRGNID_COPY,nc,halo)=l + cs=copyinfo_sgp(SIZE_COPY,nc,halo) + do n=1,cs + recvlist_sgp(n,nc,halo)=suf(gmax(halo)+1,gmin(halo)-n,gall(halo)) + sendlist_sgp(n,nc,halo)=suf(gmax(halo)+n+1,gmin(halo),gall(halo)) + enddo + endif + if (s_hemisphere_copy(ADM_w,halo,rgnid)==1) then + ncmax_sgp(halo)=ncmax_sgp(halo)+1 + nc=ncmax_sgp(halo) + copyinfo_sgp(SIZE_COPY,nc,halo)=halo + copyinfo_sgp(LRGNID_COPY,nc,halo)=l + copyinfo_sgp(SRC_LRGNID_COPY,nc,halo)=l + cs=copyinfo_sgp(SIZE_COPY,nc,halo) + do n=1,cs + recvlist_sgp(n,nc,halo)=suf(gmin(halo),gmin(halo)-n,gall(halo)) + sendlist_sgp(n,nc,halo)=suf(gmin(halo)-n,gmin(halo)-n,gall(halo)) + enddo + endif + if ((s_hemisphere_copy(ADM_n,halo,rgnid)==1)) then + ncmax_sgp(halo)=ncmax_sgp(halo)+1 + nc=ncmax_sgp(halo) + copyinfo_sgp(SIZE_COPY,nc,halo)=halo-1 + copyinfo_sgp(LRGNID_COPY,nc,halo)=l + copyinfo_sgp(SRC_LRGNID_COPY,nc,halo)=l + cs=copyinfo_sgp(SIZE_COPY,nc,halo) + do n=1,cs + recvlist_sgp(n,nc,halo)=suf(gmin(halo)-n,gmax(halo)+1,gall(halo)) + sendlist_sgp(n,nc,halo)=suf(gmin(halo),gmax(halo)+n+1,gall(halo)) + enddo + endif + if ((s_hemisphere_copy(ADM_e,halo,rgnid)==1)) then + ncmax_sgp(halo)=ncmax_sgp(halo)+1 + nc=ncmax_sgp(halo) + copyinfo_sgp(SIZE_COPY,nc,halo)=halo-1 + copyinfo_sgp(LRGNID_COPY,nc,halo)=l + copyinfo_sgp(SRC_LRGNID_COPY,nc,halo)=l + cs=copyinfo_sgp(SIZE_COPY,nc,halo) + do n=1,cs + recvlist_sgp(n,nc,halo)=suf(gmax(halo)+n+1,gmax(halo)+1,gall(halo)) + sendlist_sgp(n,nc,halo)=suf(gmax(halo)+n+1,gmax(halo)+n+1,gall(halo)) + enddo + endif + if ((s_hemisphere_copy(ADM_s,halo,rgnid)==1)) then + ncmax_sgp(halo)=ncmax_sgp(halo)+1 + nc=ncmax_sgp(halo) + copyinfo_sgp(SIZE_COPY,nc,halo)=halo-1 + copyinfo_sgp(LRGNID_COPY,nc,halo)=l + copyinfo_sgp(SRC_LRGNID_COPY,nc,halo)=l + cs=copyinfo_sgp(SIZE_COPY,nc,halo) + do n=1,cs + recvlist_sgp(n,nc,halo)=suf(gmax(halo)+n+1,gmin(halo),gall(halo)) + sendlist_sgp(n,nc,halo)=suf(gmax(halo)+1,gmin(halo)-n,gall(halo)) + enddo + endif + enddo !loop l + enddo !loop halo + ! + !-- for output_info --- + allocate( src_rank_all(max_comm_prc,halomax,ADM_prc_all)) + allocate(dest_rank_all(max_comm_prc,halomax,ADM_prc_all)) + src_rank_all(:,:,:)=-1 + dest_rank_all(:,:,:)=-1 + src_rank_all(:,:,ADM_prc_me)=sourcerank(:,:) + dest_rank_all(:,:,ADM_prc_me)=destrank(:,:) +!coarray call mpi_barrier(ADM_comm_run_world,ierr) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do l=1,ADM_prc_all +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + src_rank_all(1,1,l), & + max_comm_prc*halomax, & + mpi_integer, & + l-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(src_rank_all(:,:,l),l) + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end +!coarray + !--- 2020 Fujitsu + call mpi_bcast( & + dest_rank_all(1,1,l), & + max_comm_prc*halomax, & + mpi_integer, & + l-1, & + ADM_comm_run_world, & + ierr) + !call co_broadcast(dest_rank_all(:,:,l),l) + !--- 2020 Fujitsu end + enddo + ! +!coarray call MPI_Barrier(ADM_COMM_RUN_WORLD,ierr) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + ! + !--- output for debug + if(present(debug)) then + if(debug) call output_info + end if + ! + ! <== iga for dbg 090917 + if (opt_comm_dbg) then +! dbg_sendbuf_init = -1d66 * (ADM_prc_me+1000) + dbg_sendbuf_init = -888d66 + dbg_recvbuf_init = -777d66 + allocate(dbg_areq_save(2*(ADM_lall*max_comm_r2r+ADM_vlink_nmax*4),4)) + dbg_areq_save(:,:) = -999 ! [Add] 12/03/26 T.Seiki + endif + ! iga for dbg 090916 ==> + contains + ! + subroutine re_setup_pl_comm_info + do pl=ADM_npl,ADM_spl + if (ADM_prc_me==ADM_prc_nspl(pl)) then + do p=1,maxcommrecv_r2p(pl,halo) + srank=source_prc_r2p(p,pl,halo)-1 + if (srank/=rank_me) then + ck=0 + loop_ro2:do ro=1,romax(halo) + if (srank==sourcerank(ro,halo)) exit loop_ro2 + ck=ck+1 + enddo loop_ro2 + if (ck==romax(halo)) then + romax(halo)=romax(halo)+1 + ro=romax(halo) + sourcerank(ro,halo)=srank + temp_recvorder(srank,halo)=ro + endif + ro=temp_recvorder(srank,halo) + nrmax_pl(ro,halo)=nrmax_pl(ro,halo)+1 + nr=nrmax_pl(ro,halo) + recvinfo_pl(SIZE_COMM,nr,ro,halo)=rsize_r2p(p,pl,halo) + recvinfo_pl(LRGNID_COMM,nr,ro,halo)=pl + temp_src_rgn_pl(nr,ro,halo)=ADM_prc_tab(source_rgn_r2p(p,pl,halo),srank+1) + rs=recvinfo_pl(SIZE_COMM,nr,ro,halo) + rsize(ro,halo)=rsize(ro,halo)+rs + do n=1,rs + recvlist_pl(n,nr,ro,halo)=rlist_r2p(n,p,pl,halo) + enddo + else + ncmax_r2p(halo)=ncmax_r2p(halo)+1 + nc=ncmax_r2p(halo) + copyinfo_r2p(SIZE_COPY,nc,halo)=rsize_r2p(p,pl,halo) + copyinfo_r2p(LRGNID_COPY,nc,halo)=pl + copyinfo_r2p(SRC_LRGNID_COPY,nc,halo)=source_rgn_r2p(p,pl,halo) + cs=copyinfo_r2p(SIZE_COPY,nc,halo) + do n=1,cs + recvlist_r2p(n,nc,halo)=rlist_r2p(n,p,pl,halo) + sendlist_r2p(n,nc,halo)=slist_r2p(n,p,pl,halo) + enddo + endif + enddo !loop p + ! + do p=1,ADM_vlink_nmax + rgnid=ADM_rgn_vtab_pl(ADM_rid,pl,p) + drank=prc_tab_rev(ptr_prcid,rgnid)-1 + if (drank/=rank_me) then + ck=0 + loop_so2:do so=1,somax(halo) + if (drank==destrank(so,halo)) exit loop_so2 + ck=ck+1 + enddo loop_so2 + if (ck==somax(halo)) then + somax(halo)=somax(halo)+1 + so=somax(halo) + destrank(so,halo)=drank + temp_sendorder(drank,halo)=so + endif + so=temp_sendorder(drank,halo) + nsmax_pl(so,halo)=nsmax_pl(so,halo)+1 + ns=nsmax_pl(so,halo) + sendinfo_pl(SIZE_COMM,ns,so,halo)=1 + sendinfo_pl(LRGNID_COMM,ns,so,halo)=pl + temp_dest_rgn_pl(ns,so,halo)=rgnid + ss=sendinfo_pl(SIZE_COMM,ns,so,halo) + ssize(so,halo)=ssize(so,halo)+ss + do n=1,ss + sendlist_pl(n,ns,so,halo)=ADM_gslf_pl + enddo + endif + enddo !loop p + endif + ! + do p=1,ADM_vlink_nmax + rgnid=ADM_rgn_vtab_pl(ADM_rid,pl,p) + drank=prc_tab_rev(ptr_prcid,rgnid)-1 + if (rank_me==drank) then + srank=ADM_prc_nspl(pl)-1 + if (srank/=rank_me) then + ck=0 + loop_ro3:do ro=1,romax(halo) + if (srank==sourcerank(ro,halo)) exit loop_ro3 + ck=ck+1 + enddo loop_ro3 + if (ck==romax(halo)) then + romax(halo)=romax(halo)+1 + ro=romax(halo) + sourcerank(ro,halo)=srank + temp_recvorder(srank,halo)=ro + endif + ro=temp_recvorder(srank,halo) + nrmax(ro,halo)=nrmax(ro,halo)+1 + nr=nrmax(ro,halo) + recvinfo(SIZE_COMM,nr,ro,halo)=1 + recvinfo(LRGNID_COMM,nr,ro,halo)=prc_tab_rev(ptr_lrgnid,rgnid) + temp_src_rgn(nr,ro,halo)=ADM_rgn_nmax+pl + rs=recvinfo(SIZE_COMM,nr,ro,halo) + rsize(ro,halo)=rsize(ro,halo)+rs + do n=1,rs + recvlist(n,nr,ro,halo)=n_nspl(pl,halo) + enddo + else + ncmax_p2r(halo)=ncmax_p2r(halo)+1 + nc=ncmax_p2r(halo) + copyinfo_p2r(SIZE_COPY,nc,halo)=1 + copyinfo_p2r(LRGNID_COPY,nc,halo)=prc_tab_rev(ptr_lrgnid,rgnid) + copyinfo_p2r(SRC_LRGNID_COPY,nc,halo)=pl + cs=copyinfo_p2r(SIZE_COPY,nc,halo) + do n=1,cs + recvlist_p2r(n,nc,halo)=n_nspl(pl,halo) + sendlist_p2r(n,nc,halo)=ADM_gslf_pl + enddo + endif + endif + enddo !loop p + ! + do p=1,maxcommsend_r2p(pl,halo) + srank=source_prc_r2p(p,pl,halo)-1 + if (rank_me==srank) then + rgnid=ADM_rgn_vtab_pl(ADM_rid,pl,p) + drank=ADM_prc_nspl(pl)-1 + if (drank/=rank_me) then + ck=0 + loop_so3:do so=1,somax(halo) + if (drank==destrank(so,halo)) exit loop_so3 + ck=ck+1 + enddo loop_so3 + if (ck==somax(halo)) then + somax(halo)=somax(halo)+1 + so=somax(halo) + destrank(so,halo)=drank + temp_sendorder(drank,halo)=so + endif + so=temp_sendorder(drank,halo) + nsmax(so,halo)=nsmax(so,halo)+1 + ns=nsmax(so,halo) + sendinfo(SIZE_COMM,ns,so,halo)=ssize_r2p(p,pl,halo) + sendinfo(LRGNID_COMM,ns,so,halo)=prc_tab_rev(ptr_lrgnid,rgnid) + temp_dest_rgn(ns,so,halo)=ADM_rgn_nmax+pl + ss=sendinfo(SIZE_COMM,ns,so,halo) + ssize(so,halo)=ssize(so,halo)+ss + do n=1,ss + sendlist(n,ns,so,halo)=slist_r2p(n,p,pl,halo) + enddo + endif + endif + enddo !loop p + enddo !loop pl + end subroutine re_setup_pl_comm_info + + end subroutine COMM_setup + !----------------------------------------------------------------------------- + subroutine output_info + use mod_adm, only : & + !--- public parameters + ADM_log_fid, & + !--- public variables + ADM_prc_all + ! + implicit none + ! + integer::halo + ! + integer :: varmax + integer :: cmax + ! + !integer :: srgnid,rrgnid + ! + integer :: ns,nr + integer :: so,sl,sb,ss + integer :: ro,rl,rb,rs + integer :: l,n + ! + write(ADM_log_fid,*) + write(ADM_log_fid,*) & + 'msg : sub[output_info]/mod[comm]' + write(ADM_log_fid,*) & + 'version : comm.f90.test5.2.1_wtime' + write(ADM_log_fid,*) & + '---------------------------------------& + & commnication table start & + &---------------------------------------' + ! + varmax=1 + cmax=kmax*varmax + do halo=1,halomax + write(ADM_log_fid,*) & + '---------------------------------------& + & halo region =',halo,' & + &---------------------------------------' + write(ADM_log_fid,*) & + '---------------------------------------& + & count & + &---------------------------------------' + write(ADM_log_fid,*) & + 'romax =',romax(halo) & + ,'somax =',somax(halo) + write(ADM_log_fid,*) & + '---------------------------------------& + & send & + &---------------------------------------' + do so=1,somax(halo) + write(ADM_log_fid,*) & + 'so =',so & + ,'mrank =',rank_me & + ,'drank =',destrank(so,halo) + enddo + write(ADM_log_fid,*) & + '---------------------------------------& + & recv & + &---------------------------------------' + do ro=1,romax(halo) + write(ADM_log_fid,*) & + 'ro =',ro & + ,'mrank =',rank_me & + ,'srank =',sourcerank(ro,halo) + enddo + write(ADM_log_fid,*) & + '---------------------------------------& + & table & + &---------------------------------------' + do l=1,ADM_prc_all + do n=1,max_comm_prc + if (dest_rank_all(n,halo,l)==-1) cycle + write(ADM_log_fid,*) & + 'n =',n & + ,'rank =',l-1 & + ,'dest_rank =',dest_rank_all(n,halo,l) & + ,' src_rank =', src_rank_all(n,halo,l) + enddo + enddo + do so=1,somax(halo) + do ns=1,nsmax(so,halo) + ss=sendinfo(SIZE_COMM,ns,so,halo) + sl=sendinfo(LRGNID_COMM,ns,so,halo) + sb=sendinfo(BASE_COMM,ns,so,halo)*cmax + write(ADM_log_fid,*) & + 'so =',so & + ,'rank =',rank_me & + ,' dest_rank =', destrank(so,halo) & + ,' ss =', ss & + ,' sl =', sl & + ,' sb =', sb + enddo + do ns=1,nsmax_pl(so,halo) + ss=sendinfo_pl(SIZE_COMM,ns,so,halo) + sl=sendinfo_pl(LRGNID_COMM,ns,so,halo) + sb=sendinfo_pl(BASE_COMM,ns,so,halo)*cmax + write(ADM_log_fid,*) & + 'so =',so & + ,'rank =',rank_me & + ,' dest_rank =', destrank(so,halo) & + ,' ss =', ss & + ,' sl =', sl & + ,' sb =', sb + enddo + enddo !loop so + do ro=1,romax(halo) + do nr=1,nrmax(ro,halo) + rs=recvinfo(SIZE_COMM,nr,ro,halo) + rl=recvinfo(LRGNID_COMM,nr,ro,halo) + rb=recvinfo(BASE_COMM,nr,ro,halo)*cmax + write(ADM_log_fid,*) & + 'ro =',ro & + ,'rank =',rank_me & + ,' src_rank =', sourcerank(ro,halo) & + ,' rs =', rs & + ,' rl =', rl & + ,' rb =', rb + enddo + do nr=1,nrmax_pl(ro,halo) + rs=recvinfo_pl(SIZE_COMM,nr,ro,halo) + rl=recvinfo_pl(LRGNID_COMM,nr,ro,halo) + rb=recvinfo_pl(BASE_COMM,nr,ro,halo)*cmax + write(ADM_log_fid,*) & + 'ro =',ro & + ,'rank =',rank_me & + ,' src_rank =', sourcerank(ro,halo) & + ,' rs =', rs & + ,' rl =', rl & + ,' rb =', rb + enddo + enddo !loop ro + enddo !loop halo + ! + write(ADM_log_fid,*) & + '---------------------------------------& + & commnication table end & + &---------------------------------------' + ! + !call ADM_proc_stop + return + ! + end subroutine output_info + !----------------------------------------------------------------------------- + subroutine output_time + use mod_adm, only : & + !--- public parameters + ADM_log_fid, & + !--- public variables + ADM_prc_me, & + ADM_prc_all, & + ADM_comm_run_world + ! + implicit none + ! + real(8):: & + ave_time_total, & + ave_time_pre , & + ave_time_bar1, & + ave_time_bar2, & + ave_time_sbuf, & + ave_time_recv, & + ave_time_send, & + ave_time_copy, & + ave_time_wait, & + ave_time_rbuf, & + ave_time_copy_sgp + real(8):: & + min_time_total, & + min_time_pre , & + min_time_bar1, & + min_time_bar2, & + min_time_sbuf, & + min_time_recv, & + min_time_send, & + min_time_copy, & + min_time_wait, & + min_time_rbuf, & + min_time_copy_sgp + real(8):: & + max_time_total, & + max_time_pre , & + max_time_bar1, & + max_time_bar2, & + max_time_sbuf, & + max_time_recv, & + max_time_send, & + max_time_copy, & + max_time_wait, & + max_time_rbuf, & + max_time_copy_sgp + real(8):: & + ave_size_total, & + min_size_total, & + max_size_total + real(8):: & + ave_comm_count, & + min_comm_count, & + max_comm_count + integer::ierr + ! + write(ADM_log_fid,*) + write(ADM_log_fid,*) & + 'msg : sub[output_time]/mod[comm]' + write(ADM_log_fid,*) & + 'version : comm.f90.test5.2.1_wtime' + write(ADM_log_fid,*) & + '---------------------------------------& + & commnication time & + &---------------------------------------' + ! +!coarray +! call mpi_reduce(time_total,ave_time_total,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_pre ,ave_time_pre ,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_bar1,ave_time_bar1,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_bar2,ave_time_bar2,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_sbuf,ave_time_sbuf,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_recv ,ave_time_recv ,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_send ,ave_time_send ,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_copy,ave_time_copy,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_wait,ave_time_wait,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_rbuf,ave_time_rbuf,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_copy_sgp,ave_time_copy_sgp,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_total ,min_time_total ,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_pre ,min_time_pre ,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_bar1,min_time_bar1,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_bar2,min_time_bar2,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_sbuf,min_time_sbuf,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_recv,min_time_recv ,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_send,min_time_send ,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_copy,min_time_copy,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_wait,min_time_wait,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_rbuf,min_time_rbuf,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_copy_sgp,min_time_copy_sgp,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_total ,max_time_total ,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_pre ,max_time_pre ,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_bar1,max_time_bar1,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_bar2,max_time_bar2,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_sbuf,max_time_sbuf,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_recv,max_time_recv ,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_send,max_time_send ,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_copy,max_time_copy,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_wait,max_time_wait,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_rbuf,max_time_rbuf,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) +! call mpi_reduce(time_copy_sgp,max_time_copy_sgp,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) + !----- +!coarray +! call mpi_reduce(size_total,ave_size_total,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(size_total,min_size_total,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(size_total,max_size_total,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) +! call mpi_reduce(comm_count,ave_comm_count,1,mpi_double_precision,mpi_sum,0,ADM_comm_run_world,ierr) +! call mpi_reduce(comm_count,min_comm_count,1,mpi_double_precision,mpi_min,0,ADM_comm_run_world,ierr) +! call mpi_reduce(comm_count,max_comm_count,1,mpi_double_precision,mpi_max,0,ADM_comm_run_world,ierr) + write(ADM_log_fid,*) "------- Using unit in the following tables is msec & KB -------" + !----- + write(ADM_log_fid,*) "--------- average by comm_call_count[",comm_call_count,"] --------" +!coarray +! write(ADM_log_fid,'(a,1e11.4)') "time_pre =",1000*time_pre /comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "time_bar1 =",1000*time_bar1/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "time_recv =",1000*time_recv/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "time_sbuf =",1000*time_sbuf/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "time_send =",1000*time_send/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "time_copy =",1000*time_copy/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "time_wait =",1000*time_wait/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "time_rbuf =",1000*time_rbuf/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "time_copy_sgp =",1000*time_copy_sgp/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "time_bar2 =",1000*time_bar2/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "time_total =",1000*time_total/comm_call_count +! write(ADM_log_fid,'(a,1f11.4)') "size_total =",size_total*8/1024/comm_call_count +! write(ADM_log_fid,'(a,1f11.4)') "comm_count =",comm_count/comm_call_count + !----- +!coarray +! if (rank_me==0) then +! write(ADM_log_fid,*) "--------- average by ADM_prc_all & comm_call_count --------" +! write(ADM_log_fid,'(a,1e11.4)') "ave_time_pre =",1000*ave_time_pre /ADM_prc_all/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "ave_time_bar1 =",1000*ave_time_bar1/ADM_prc_all/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "ave_time_recv =",1000*ave_time_recv/ADM_prc_all/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "ave_time_sbuf =",1000*ave_time_sbuf/ADM_prc_all/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "ave_time_send =",1000*ave_time_send/ADM_prc_all/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "ave_time_copy =",1000*ave_time_copy/ADM_prc_all/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "ave_time_wait =",1000*ave_time_wait/ADM_prc_all/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "ave_time_rbuf =",1000*ave_time_rbuf/ADM_prc_all/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "ave_time_copy_sgp =",1000*ave_time_copy_sgp/ADM_prc_all/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "ave_time_bar2 =",1000*ave_time_bar2/ADM_prc_all/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "ave_time_total =",1000*ave_time_total/ADM_prc_all/comm_call_count +! write(ADM_log_fid,'(a,1f11.4)') "ave_size_total =",ave_size_total*8/1024/ADM_prc_all/comm_call_count +! write(ADM_log_fid,'(a,1f11.4)') "ave_comm_count =",ave_comm_count/ADM_prc_all/comm_call_count +! write(ADM_log_fid,*) "--------- minimum --------" +! write(ADM_log_fid,'(a,1e11.4)') "min_time_pre =",1000*min_time_pre/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "min_time_bar1 =",1000*min_time_bar1/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "min_time_recv =",1000*min_time_recv/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "min_time_sbuf =",1000*min_time_sbuf/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "min_time_send =",1000*min_time_send/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "min_time_copy =",1000*min_time_copy/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "min_time_wait =",1000*min_time_wait/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "min_time_rbuf =",1000*min_time_rbuf/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "min_time_copy_sgp =",1000*min_time_copy_sgp/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "min_time_bar2 =",1000*min_time_bar2/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "min_time_total =",1000*min_time_total/comm_call_count +! write(ADM_log_fid,'(a,1f11.4)') "min_size_total =",min_size_total*8/1024/comm_call_count +! write(ADM_log_fid,'(a,1f11.4)') "min_comm_count =",min_comm_count/comm_call_count +! write(ADM_log_fid,*) "--------- maximum --------" +! write(ADM_log_fid,'(a,1e11.4)') "max_time_pre =",1000*max_time_pre/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "max_time_bar1 =",1000*max_time_bar1/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "max_time_recv =",1000*max_time_recv/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "max_time_sbuf =",1000*max_time_sbuf/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "max_time_send =",1000*max_time_send/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "max_time_copy =",1000*max_time_copy/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "max_time_wait =",1000*max_time_wait/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "max_time_rbuf =",1000*max_time_rbuf/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "max_time_copy_sgp =",1000*max_time_copy_sgp/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "max_time_bar2 =",1000*max_time_bar2/comm_call_count +! write(ADM_log_fid,'(a,1e11.4)') "max_time_total =",1000*max_time_total/comm_call_count +! write(ADM_log_fid,'(a,1f11.4)') "max_size_total =",max_size_total*8/1024/comm_call_count +! write(ADM_log_fid,'(a,1f11.4)') "max_comm_count =",max_comm_count/comm_call_count +! write(ADM_log_fid,*) "----------------------------" +! endif + ! + !call ADM_proc_stop + return + ! + end subroutine output_time + !----------------------------------------------------------------------------- + subroutine COMM_data_transfer(& + var,var_pl, & !--- INOUT : variables comminicated. + trn, & !--- IN : commnucation flag for each variable + hallo_num) !--- IN : number of hallo + use mod_adm, only : & + ADM_proc_stop, & ! [add] C.Kodama 2011.04.26 + ADM_vlink_nmax, & + ADM_lall, & + ADM_comm_run_world, & + ADM_LOG_FID, & +!coarray + ADM_prc_all, & + ADM_prc_me, & +!! + ADM_kall + use mod_cnst, only: & + CNST_undef + implicit none + + real(8),intent(inout)::var(:,:,:,:) + real(8),intent(inout)::var_pl(:,:,:,:) + ! + logical,intent(in),optional::trn(:) + ! + integer,intent(in),optional::hallo_num + ! + integer :: shp(4) + integer :: varmax + integer :: cmax + integer :: halo + + integer :: acount + integer :: areq(2*(ADM_lall*max_comm_r2r+ADM_vlink_nmax*4)) +! integer :: stat(mpi_status_size,2*(ADM_lall*max_comm_r2r+ADM_vlink_nmax*4)) + integer :: ierr + ! + integer :: k,m,n + integer :: nr,nc,ns + integer :: sl,so,sb,ss + integer :: rl,ro,rb,rs + integer :: cl,scl,cs +! integer :: i_dbg !iga + logical :: dbg_ierr !iga + !============================================================================= +!coarray + integer dst_img + !--- 2020 Fujitsu + !integer,allocatable :: tbl(:)[:] + !real(8),allocatable :: caf_recvbuf(:,:)[:] + integer bufsize1,bufsize2 + integer , POINTER :: tbl ( : ) => null ( ) + integer , POINTER :: caf_recvbuf ( : , : ) => null ( ) + integer(8) :: tbl_desc, caf_recvbuf_desc + integer(8) :: tbl_lb(1),tbl_ub(1), caf_recvbuf_lb(2), caf_recvbuf_ub(2) + integer(8) :: tbl_sec, caf_recvbuf_sec + integer(4) :: img_dims(1) + integer(8) :: sendbuf_l_sec ! for senbuf(:,:) + integer :: max_tmp + + call xmp_new_array_section(tbl_sec, 1) + call xmp_new_array_section(caf_recvbuf_sec, 2) + call xmp_new_array_section(sendbuf_l_sec, 2) + !--- 2020 Fujitsu end + + if ( opt_comm_barrier ) then + call DEBUG_rapstart('Node imbalance adjustment') +!coarray call MPI_BARRIER( ADM_comm_run_world, ierr ) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu + call DEBUG_rapend ('Node imbalance adjustment') + endif + + call DEBUG_rapstart('COMM data_transfer') + + ! + ! --- pre-process + ! + comm_call_count=comm_call_count+1 + ! +!coarray t(0)=mpi_wtime() + t(0) = xmp_wtime() + ! + shp=shape(var) + kmax = shp(2) + ! + if (present(trn)) then + varmax=0 +!cdir novector + do n=1,shp(4) + if(trn(n)) then + varmax=varmax+1 + clist(varmax)=n + endif + enddo + else + varmax=shp(4) +!cdir novector + do n=1,varmax + clist(n)=n + enddo + endif + ! + if (present(hallo_num)) then + halo=hallo_num + else + halo=1 + endif + + if (halo.ne.1) then + write(ADM_log_fid,*) 'halo=',halo + endif + + ! + cmax=varmax*kmax + ! [Add] 07/11/07 T.Mitsui for check of varmax + if( opt_check_varmax ) then + ! <- [rep] C.Kodama 2011.04.26 + if ( cmax > max_varmax * ADM_kall ) then + write(ADM_LOG_FID,*) 'error: cmax > max_varmax * ADM_kall, stop!' + write(ADM_LOG_FID,*) 'cmax=', cmax, 'max_varmax*ADM_kall=', max_varmax*ADM_kall + call ADM_proc_stop + end if +! equiv_varmax = real( varmax*kmax )/real( ADM_kall ) ! assuming variables are all 3-Dimension +! diag_varmax = max( equiv_varmax, diag_varmax ) ! diagnose max value +! write(ADM_LOG_FID,'(a)' ) ' *** max_varmax, varmax, kmax, equivalent varmax, diagnosed max_varmax ' +! write(ADM_LOG_FID,'(5x, 3i8, 2f18.1)') max_varmax, varmax, kmax, equiv_varmax, diag_varmax + ! -> + end if + ! +!coarray t(1)=mpi_wtime() + t(1) = xmp_wtime() + time_pre=time_pre+(t(1)-t(0)) + !call mpi_barrier(ADM_comm_run_world,ierr) +!coarray t(2)=mpi_wtime() + t(2) = xmp_wtime() + time_bar1=time_bar1+(t(2)-t(1)) +!coarray + !--- 2020 Fujitsu + !allocate(tbl(num_images())[*]) + tbl_lb(1) = 1; tbl_ub(1) = ADM_prc_all + call xmp_new_coarray(tbl_desc, 4, 1, tbl_lb, tbl_ub, 1, img_dims) + call xmp_coarray_bind(tbl_desc, tbl) + !call co_max(maxdatasiaze_r) + call MPI_Allreduce(maxdatasiaze_r, max_tmp, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr) + maxdatasiaze_r = max_tmp + !--- 2020 Fujitsu end + bufsize1 = maxdatasize_r + bufsize2 = romax(halomax) + !--- 2020 Fujitsu + !call co_max(bufsize2) + call MPI_Allreduce(bufsize2, max_tmp, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr) + bufsize2 = max_tmp + !allocate(caf_recvbuf(bufsize1,bufsize2)[*]) + caf_recvbuf_lb(1) = 1; caf_recvbuf_ub(1) = bufsize1 + caf_recvbuf_lb(2) = 1; caf_recvbuf_ub(2) = bufsize2 + call xmp_new_coarray(caf_recvbuf_desc, 4, 2, caf_recvbuf_lb, caf_recvbuf_ub, 1, img_dims) + call xmp_coarray_bind(caf_recvbuf_desc, caf_recvbuf) + !--- 2020 Fujitsu end + + !----------------------------------------- + ! call mpi_isend + !----------------------------------------- + +!! comm_dbg_recvbuf(:,:,:)=CNST_UNDEF +!! comm_dbg_sendbuf(:,:,:)=CNST_UNDEF +!! comm_dbg_recvbuf(:,:,1)=recvbuf(:,:) +!! comm_dbg_sendbuf(:,:,1)=sendbuf(:,:) + + if (opt_comm_dbg) then !iga + sendbuf(:,:)= dbg_sendbuf_init + recvbuf(:,:)= dbg_recvbuf_init + if (somax(halo)>(ADM_lall*max_comm_r2r+ADM_vlink_nmax*4)) then + write(ADM_log_fid,*) 'somax inconsistency' + write(ADM_log_fid,*) somax(halo),(ADM_lall*max_comm_r2r+ADM_vlink_nmax*4) + endif + endif + + do ro=1,romax(halo) + ! + if (opt_comm_dbg) then + if (rsize(ro,halo)*cmax > size(recvbuf(:,ro)) ) then + write(ADM_log_fid,*) 'ro',ro,'buf-dim=',shape(recvbuf),'rsize=',rsize(ro,halo),'cmax=',cmax + endif + endif + ! +!coarray +! call mpi_irecv(recvbuf(1,ro) & +! ,rsize(ro,halo)*cmax & +! ,mpi_double_precision & +! ,sourcerank(ro,halo) & +! ,recvtag(ro,halo) & +! ,ADM_comm_run_world & +! ,areq(ro) & +! ,ierr) + tbl(sourcerank(ro,halo)+1) = ro + + ! + if (opt_comm_dbg) then + if (ierr.ne.0) then + write(ADM_log_fid,*) 'mpi_irecv info start==' + write(ADM_log_fid,*) 'ierr=',ierr + write(ADM_log_fid,*) 'ro=',ro + write(ADM_log_fid,*) 'rsize=',rsize(ro,halo) + write(ADM_log_fid,*) 'cmax=',cmax + write(ADM_log_fid,*) 'areq=',areq(ro) + write(ADM_log_fid,*) 'mpi_irecv info end==' + endif + ! [fix] 12/03/26 T.Seiki +!!$ dbg_areq_save(:,1)=areq(:) + dbg_areq_save(ro,1)=areq(ro) + endif + enddo + +!coarray t(3)=mpi_wtime() + t(3) = xmp_wtime() + time_recv=time_recv+(t(3)-t(2)) + !----------------------------------------- + ! var -> sendbuf + !----------------------------------------- + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do so=1,somax(halo) + + + +!coarray t(4)=mpi_wtime() + t(4) = xmp_wtime() + do ns=1,nsmax(so,halo) + ss=sendinfo(SIZE_COMM,ns,so,halo) + sl=sendinfo(LRGNID_COMM,ns,so,halo) + sb=sendinfo(BASE_COMM,ns,so,halo)*cmax +!=org=!cdir novector +!=org= do n=1,ss +!=org=!cdir unroll=3 +!=org= do m=1,varmax +!=org=!!cdir shortloop +!=org= do k=1,kmax +!=org= sendbuf(k+(m-1)*kmax+(n-1)*cmax+sb,so) & +!=org= =var(sendlist(n,ns,so,halo),k,sl,clist(m)) +!=org= enddo +!=org= enddo +!=org=! if (ADM_prc_me==1) write(ADM_log_fid,*) 'n',n,sendlist(n,ns,so,halo) +!=org= enddo + + do m=1,varmax +!cdir outerunroll=8 + do k=1,kmax + do n=1,ss + sendbuf(n+(k-1)*ss+(m-1)*ss*kmax+sb,so) & + =var(sendlist(n,ns,so,halo),k,sl,clist(m)) + enddo + enddo + enddo + + + enddo + !----------------------------------------- + ! + !----------------------------------------- + ! var_pl -> sendbuf + !----------------------------------------- + do ns=1,nsmax_pl(so,halo) + ss=sendinfo_pl(SIZE_COMM,ns,so,halo) + sl=sendinfo_pl(LRGNID_COMM,ns,so,halo) + sb=sendinfo_pl(BASE_COMM,ns,so,halo)*cmax +!=org=!cdir novector +!=org= do n=1,ss +!=org=!cdir unroll=3 +!=org= do m=1,varmax +!=org=!!cdir shortloop +!=org= do k=1,kmax +!=org= sendbuf(k+(m-1)*kmax+(n-1)*cmax+sb,so) & +!=org= =var_pl(sendlist_pl(n,ns,so,halo),k,sl,clist(m)) +!=org= enddo +!=org= enddo +!=org= enddo + do m=1,varmax +!cdir outerunroll=8 + do k=1,kmax + do n=1,ss + sendbuf(n+(k-1)*ss+(m-1)*ss*kmax+sb,so) & + =var_pl(sendlist_pl(n,ns,so,halo),k,sl,clist(m)) + enddo + enddo + enddo + + enddo +!coarray t(5)=mpi_wtime() + t(5) = xmp_wtime() + time_sbuf=time_sbuf+(t(5)-t(4)) + + +! write(ADM_log_fid,*) 'send count=',i_dbg,'prc=',adm_prc_me + !----------------------------------------- + ! + !----------------------------------------- + ! call mpi_isend + !----------------------------------------- + + if (opt_comm_dbg) then + if (ssize(so,halo)*cmax>size(sendbuf(:,so))) then + write(ADM_log_fid,*) 'so',so,'buf-dim=',shape(sendbuf),'rsize=',ssize(so,halo),'cmax=',cmax + endif + endif + +! write(*,*) 'me=',ADM_prc_me,'sendtag',sendtag(:,:) +! write(*,*) 'sendbuf',sendbuf(:,:) +! write(*,*) 'me=',ADM_prc_me,'destrank',destrank(:,:) + +!coarray +! call Mpi_isend(sendbuf(1,so) & +! ,ssize(so,halo)*cmax & +! ,mpi_double_precision & +! ,destrank(so,halo) & +! ,sendtag(so,halo) & +! ,ADM_comm_run_world & +! ,areq(so+romax(halo)) & +! ,ierr) + !--- 2020 Fujitsu + !dst_img = tbl(this_image())[destrank(so,halo)+1] + img_dims(1) = destrank(so,halo)+1 + call xmp_array_section_set_triplet(tbl_sec, 1, destrank(so,halo)+1, destrank(so,halo)+1, 1, ierr) + call xmp_coarray_get_scalar(img_dims, tbl_desc, tbl_sec, dst_img, ierr) + !caf_recvbuf(1:ssize(so,halo)*cmax,dst_img)[destrank(so,halo)+1] & + ! = sendbuf(1:ssize(so,halo)*cmax,so) + call xmp_array_section_set_triplet(caf_recvbuf_sec, 1, 1, ssize(so,halo)*cmax, 1, ierr) + call xmp_array_section_set_triplet(caf_recvbuf_sec, 2, dst_img, dst_img, 1, ierr) + call xmp_array_section_set_triplet(sendbuf_l_sec, 1, 1, ssize(so,halo)*cmax, 1, ierr) + call xmp_array_section_set_triplet(sendbuf_l_sec, 2, so, so, 1, ierr) + call xmp_coarray_put_local(img_dims, caf_recvbuf_desc, caf_recvbuf_sec, sendbuf_l_desc, sendbuf_l_sec, ierr) + !--- 2020 Fujitsu + + if (opt_comm_dbg) then + if (ierr.ne.0) then + write(ADM_log_fid,*) 'mpi_isend info start==' + write(ADM_log_fid,*) 'ierr=',ierr + write(ADM_log_fid,*) 'so=',so + write(ADM_log_fid,*) 'ssize=',ssize(so,halo) + write(ADM_log_fid,*) 'cmax=',cmax + write(ADM_log_fid,*) 'areq=',areq(so+romax(halo)) + write(ADM_log_fid,*) 'mpi_isend info end==' + endif + ! [fix] 12/03/26 T.Seiki +!!$ dbg_areq_save(:,2)=areq(:) + dbg_areq_save(so+romax(halo),2)=areq(so+romax(halo)) + endif + + +!coarray t(6)=mpi_wtime() + t(6) = xmp_wtime() + time_send=time_send+(t(6)-t(5)) + size_total=size_total+ssize(so,halo)*cmax + comm_count=comm_count+1 + enddo !loop so + !----------------------------------------- + ! +!coarray t(7)=mpi_wtime() + t(7) = xmp_wtime() + !--------------------------------------------------- + ! var -> var (region to region copy in same rank) + !--------------------------------------------------- + do nc=1,ncmax_r2r(halo) + cs=copyinfo_r2r(SIZE_COPY,nc,halo) + cl=copyinfo_r2r(LRGNID_COPY,nc,halo) + scl=copyinfo_r2r(SRC_LRGNID_COPY,nc,halo) +!=org=!cdir novector +!=org= do n=1,cs +!=org=!cdir unroll=3 +!=org= do m=1,varmax +!=org=!!cdir shortloop +!=org= do k=1,kmax +!=org= var(recvlist_r2r(n,nc,halo),k,cl ,clist(m)) & +!=org= =var(sendlist_r2r(n,nc,halo),k,scl,clist(m)) +!=org= enddo +!=org= enddo +!=org= enddo + do m=1,varmax +!cdir outerunroll=8 + do k=1,kmax + do n=1,cs + var(recvlist_r2r(n,nc,halo),k,cl ,clist(m)) & + =var(sendlist_r2r(n,nc,halo),k,scl,clist(m)) + enddo + enddo + enddo + + enddo + !------------------------------------------ + ! + !------------------------------------------ + ! var -> var_pl ( data copy in same rank) + !------------------------------------------ + do nc=1,ncmax_r2p(halo) + cs=copyinfo_r2p(SIZE_COPY,nc,halo) + cl=copyinfo_r2p(LRGNID_COPY,nc,halo) + scl=copyinfo_r2p(SRC_LRGNID_COPY,nc,halo) +!=org=!cdir novector +!=org= do n=1,cs +!=org=!cdir unroll=3 +!=org= do m=1,varmax +!=org=!!cdir shortloop +!=org= do k=1,kmax +!=org= var_pl(recvlist_r2p(n,nc,halo),k,cl,clist(m)) & +!=org= =var(sendlist_r2p(n,nc,halo),k,scl,clist(m)) +!=org= enddo +!=org= enddo +!=org= enddo + do m=1,varmax +!cdir outerunroll=8 + do k=1,kmax + do n=1,cs + var_pl(recvlist_r2p(n,nc,halo),k,cl,clist(m)) & + =var(sendlist_r2p(n,nc,halo),k,scl,clist(m)) + enddo + enddo + enddo + + enddo + !----------------------------------------- + ! + !----------------------------------------- + ! var_pl -> var (data copy in same rank) + !----------------------------------------- + do nc=1,ncmax_p2r(halo) + cs=copyinfo_p2r(SIZE_COPY,nc,halo) + cl=copyinfo_p2r(LRGNID_COPY,nc,halo) + scl=copyinfo_p2r(SRC_LRGNID_COPY,nc,halo) +!=org=!cdir novector +!=org= do n=1,cs +!=org=!cdir unroll=3 +!=org= do m=1,varmax +!=org=!!cdir shortloop +!=org= do k=1,kmax +!=org= var(recvlist_p2r(n,nc,halo),k,cl,clist(m)) & +!=org= =var_pl(sendlist_p2r(n,nc,halo),k,scl,clist(m)) +!=org= enddo +!=org= enddo +!=org= enddo + do m=1,varmax +!cdir outerunroll=8 + do k=1,kmax + do n=1,cs + var(recvlist_p2r(n,nc,halo),k,cl,clist(m)) & + =var_pl(sendlist_p2r(n,nc,halo),k,scl,clist(m)) + enddo + enddo + enddo + + enddo + !----------------------------------------- + ! + !----------------------------------------- +!coarray t(8)=mpi_wtime() + t(8) = xmp_wtime() + time_copy=time_copy+(t(8)-t(7)) + acount=romax(halo)+somax(halo) +!coarray call mpi_waitall(acount,areq,stat,ierr) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + do ro=1,romax(halo) + do n=1,rsize(ro,halo)*cmax + recvbuf(n,ro) = caf_recvbuf(n,ro) + end do + end do + +!coarray t(9)=mpi_wtime() + t(9) = xmp_wtime() + time_wait=time_wait+(t(9)-t(8)) + + if (opt_comm_dbg) then !================== dbg start + if (ierr.ne.0) then + write(ADM_log_fid,*) 'mpi_wait info start==' + write(ADM_log_fid,*) 'ierr=',ierr + write(ADM_log_fid,*) 'acount=',acount +! write(ADM_log_fid,*) 'stat=',stat(:,1:acount) + write(ADM_log_fid,*) 'areq=',areq(1:acount) + write(ADM_log_fid,*) 'mpi_wait info end==' + endif + if (acount > size(areq)) then + write(ADM_log_fid,*) 'acount, size(areq)',acount,size(areq) + endif + ! [fix] 12/03/26 T.Seiki +!!$ dbg_areq_save(:,3)=areq(:) + dbg_areq_save(1:acount,3)=areq(1:acount) + endif !=================================== dbg end + + if (opt_comm_dbg) then !================== dbg start + dbg_ierr=.false. + do ro=1,romax(halo) + do n=1,rsize(ro,halo)*cmax + if (recvbuf(n,ro) == dbg_recvbuf_init) then + dbg_ierr=.true. + endif + if (recvbuf(n,ro) == dbg_sendbuf_init) then + dbg_ierr=.true. + endif + enddo + enddo + if (dbg_ierr) then + write(ADM_log_fid,*) 'communication is not completed!' + write(ADM_log_fid,*) 'dbg_tcount=',dbg_tcount + write(ADM_log_fid,*) 'romax=',romax,'rsize=',rsize(ro,halo),'cmax=',cmax + do ro=1,romax(halo) + do n=1,rsize(ro,halo)*cmax + if (recvbuf(n,ro) == dbg_recvbuf_init) then + write(ADM_log_fid,*) 'n=',n,'ro=',ro,'recvbuf=',recvbuf(n,ro) + endif + enddo + enddo + write(ADM_log_fid,*) 'areq after irecv:',dbg_areq_save(1:acount,1) + write(ADM_log_fid,*) 'areq after isend',dbg_areq_save(1:acount,2) + write(ADM_log_fid,*) 'areq after waitall',dbg_areq_save(1:acount,3) +! write(ADM_log_fid,*) 'areq after barrier',dbg_areq_save(1:acount,4) + + write(ADM_log_fid,*) 'ierr of mpi_waitall=',ierr + write(ADM_log_fid,*) 'acount of mpi_waitall=',acount +! write(ADM_log_fid,*) 'stat of mpi_waitall=',stat(:,1:acount) + + endif + dbg_tcount=dbg_tcount+1 + endif !=================================== dbg end + + + if (opt_comm_barrier) then +!coarray call mpi_barrier(ADM_comm_run_world,ierr) + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + if (ierr.ne.0) then + write(ADM_log_fid,*) 'mpi_barriert info start==' + write(ADM_log_fid,*) 'ierr=',ierr + write(ADM_log_fid,*) 'mpi_barrier info end==' + endif +! if (opt_comm_dbg) then +! dbg_areq_save(:,4)=areq(:) +! endif + endif + + + !----------------------------------------- + ! +! i_dbg=0 !iga + do ro=1,romax(halo) + !----------------------------------------- + ! recvbuf -> var ( recieve in region ) + !----------------------------------------- + do nr=1,nrmax(ro,halo) + rs=recvinfo(SIZE_COMM,nr,ro,halo) + rl=recvinfo(LRGNID_COMM,nr,ro,halo) + rb=recvinfo(BASE_COMM,nr,ro,halo)*cmax +!=org=!cdir novector +!=org= do n=1,rs +!=org=!cdir unroll=3 +!=org= do m=1,varmax +!=org=!!cdir shortloop +!=org= do k=1,kmax +!=org= var(recvlist(n,nr,ro,halo),k,rl,clist(m)) & +!=org= =recvbuf(k+(m-1)*kmax+(n-1)*cmax+rb,ro) +!=org= enddo +!=org= enddo +!=org=! if (ADM_prc_me==2) write(ADM_log_fid,*) 'ro,n,nr,',ro,n,nr,recvlist(n,nr,ro,halo) +!=org= enddo + do m=1,varmax +!cdir outerunroll=8 + do k=1,kmax + do n=1,rs + var(recvlist(n,nr,ro,halo),k,rl,clist(m)) & + =recvbuf(n+(k-1)*rs+(m-1)*rs*kmax+rb,ro) + enddo + enddo +! if (ADM_prc_me==2) write(ADM_log_fid,*) 'ro,n,nr,',ro,n,nr,recvlist(n,nr,ro,halo) + enddo +! i_dbg=i_dbg+max(rs,0) + enddo + + !----------------------------------------- + ! + !----------------------------------------- + ! recvbuf -> var_pl ( recieve in pole ) + !----------------------------------------- + do nr=1,nrmax_pl(ro,halo) + rs=recvinfo_pl(SIZE_COMM,nr,ro,halo) + rl=recvinfo_pl(LRGNID_COMM,nr,ro,halo) + rb=recvinfo_pl(BASE_COMM,nr,ro,halo)*cmax +!=org=!cdir novector +!=org= do n=1,rs +!=org=!cdir unroll=3 +!=org= do m=1,varmax +!=org=!!cdir shortloop +!=org= do k=1,kmax +!=org= var_pl(recvlist_pl(n,nr,ro,halo),k,rl,clist(m)) & +!=org= =recvbuf(k+(m-1)*kmax+(n-1)*cmax+rb,ro) +!=org= enddo +!=org= enddo +!=org= enddo + do m=1,varmax +!cdir outerunroll=8 + do k=1,kmax + do n=1,rs + var_pl(recvlist_pl(n,nr,ro,halo),k,rl,clist(m)) & + =recvbuf(n+(k-1)*rs+(m-1)*rs*kmax+rb,ro) + enddo + enddo + enddo + + enddo + enddo !loop ro +!coarray t(10)=mpi_wtime() + t(10) = xmp_wtime() + time_rbuf=time_rbuf+(t(10)-t(9)) + +! write(ADM_log_fid,*) 'recv count=',i_dbg,'prc=',adm_prc_me + + !----------------------------------------- + ! + !----------------------------------------- + ! copy data around singular point + !----------------------------------------- + do nc=1,ncmax_sgp(halo) + cs=copyinfo_sgp(SIZE_COPY,nc,halo) + cl=copyinfo_sgp(LRGNID_COPY,nc,halo) + scl=copyinfo_sgp(SRC_LRGNID_COPY,nc,halo) +!=org=!cdir novector +!=org= do n=1,cs +!=org=!cdir unroll=3 +!=org= do m=1,varmax +!=org=!!cdir shortloop +!=org= do k=1,kmax +!=org= var(recvlist_sgp(n,nc,halo),k,cl ,clist(m)) & +!=org= =var(sendlist_sgp(n,nc,halo),k,scl,clist(m)) +!=org= enddo +!=org= enddo +!=org= enddo + + do m=1,varmax +!cdir outerunroll=8 + do k=1,kmax + do n=1,cs + var(recvlist_sgp(n,nc,halo),k,cl ,clist(m)) & + =var(sendlist_sgp(n,nc,halo),k,scl,clist(m)) + enddo + enddo + enddo + + enddo +!coarray t(11)=mpi_wtime() + t(11) = xmp_wtime() + time_copy_sgp=time_copy_sgp+(t(11)-t(10)) + !! + !call mpi_barrier(ADM_comm_run_world,ierr) +!coarray t(12)=mpi_wtime() + t(12) = xmp_wtime() + time_bar2=time_bar2+(t(12)-t(11)) + time_total=time_total+(t(12)-t(0)) + + call DEBUG_rapend('COMM data_transfer') + + end subroutine COMM_data_transfer + + !----------------------------------------------------------------------------- + subroutine COMM_data_transfer_rgn2pl( & + var, & + var_pl, & + knum, & + nnum ) + use mod_adm, only: & + ADM_COMM_RUN_WORLD, & + ADM_prc_tab, & + ADM_rgn2prc, & + ADM_prc_me, & + ADM_NPL, & + ADM_SPL, & + ADM_prc_npl, & + ADM_prc_spl, & + ADM_rgnid_npl_mng, & + ADM_rgnid_spl_mng, & + ADM_gall, & + ADM_gall_pl, & + ADM_lall, & + ADM_lall_pl, & + ADM_gall_1d, & + ADM_gmin, & + ADM_gmax, & + ADM_GSLF_PL + implicit none + + integer, intent(in) :: knum + integer, intent(in) :: nnum + real(8), intent(inout) :: var (ADM_gall, knum,ADM_lall, nnum) + real(8), intent(inout) :: var_pl(ADM_gall_pl,knum,ADM_lall_pl,nnum) + + real(8) :: v_npl_send(knum,nnum) + real(8) :: v_spl_send(knum,nnum) + real(8) :: v_npl_recv(knum,nnum) + real(8) :: v_spl_recv(knum,nnum) +!coarray + !--- 2020 Fujitsu + !real(8),allocatable :: v_npl_recvc(:,:)[:] + !real(8),allocatable :: v_spl_recvc(:,:)[:] + integer , POINTER :: v_npl_recvc ( : , : ) => null ( ) + integer , POINTER :: v_spl_recvc ( : , : ) => null ( ) + integer(8) :: v_npl_recvc_desc, v_spl_recvc_desc + integer(8) :: v_npl_recvc_lb(2), v_npl_recvc_ub(2), v_spl_recvc_lb(2), v_spl_recvc_ub(2) + integer(8) :: v_npl_recvc_sec, v_spl_recvc_sec + integer(4) :: img_dims(1) + !--- 2020 Fujitsu end + + integer :: ireq(4) +! integer :: istat(MPI_STATUS_SIZE) + + integer :: ierr + integer :: k, l, n, rgnid + + integer :: i,j,suf + suf(i,j) = ADM_gall_1d * ((j)-1) + (i) + !--------------------------------------------------------------------------- + !--- 2020 Fujitsu + !allocate(v_npl_recvc(knum,nnum)[*]) + v_npl_recvc_lb(1) = 1; v_npl_recvc_ub(1) = knum + v_npl_recvc_lb(2) = 1; v_npl_recvc_ub(2) = nnum + call xmp_new_coarray(v_npl_recvc_desc, 8, 2, v_npl_recvc_lb, v_npl_recvc_ub, 1, img_dims) + call xmp_coarray_bind(v_npl_recvc_desc, v_npl_recvc) + !allocate(v_spl_recvc(knum,nnum)[*]) + v_spl_recvc_lb(1) = 1; v_spl_recvc_ub(1) = knum + v_spl_recvc_lb(2) = 1; v_spl_recvc_ub(2) = nnum + call xmp_new_coarray(v_spl_recvc_desc, 8, 2, v_spl_recvc_lb, v_spl_recvc_ub, 1, img_dims) + call xmp_coarray_bind(v_spl_recvc_desc, v_spl_recvc) + + call xmp_new_array_section(v_npl_recvc_sec, 2) + call xmp_new_array_section(v_spl_recvc_sec, 2) + !--- 2020 Fujitsu end + v_npl_recvc = 0.d0 + v_spl_recvc = 0.d0 + + if( comm_pl ) then ! T.Ohno 110721 + + !--- recv pole value + !--- north pole +!coarray +! if ( ADM_prc_me == ADM_prc_npl ) then +! call MPI_IRECV( v_npl_recv, & +! knum * nnum, & +! MPI_DOUBLE_PRECISION, & +! ADM_rgn2prc(ADM_rgnid_npl_mng)-1, & +! ADM_NPL, & +! ADM_COMM_RUN_WORLD, & +! ireq(3), & +! ierr ) +! endif + + !--- south pole +!coarray +! if ( ADM_prc_me == ADM_prc_spl ) then +! call MPI_IRECV( v_spl_recv, & +! knum * nnum, & +! MPI_DOUBLE_PRECISION, & +! ADM_rgn2prc(ADM_rgnid_spl_mng)-1, & +! ADM_SPL, & +! ADM_COMM_RUN_WORLD, & +! ireq(4), & +! ierr ) +! endif + + !--- send pole value + do l = 1, ADM_lall + rgnid = ADM_prc_tab(l,ADM_prc_me) + + !--- north pole + sync all + if ( rgnid == ADM_rgnid_npl_mng ) then + do n = 1, nnum + do k = 1, knum + v_npl_send(k,n) = var(suf(ADM_gmin,ADM_gmax+1),k,l,n) + enddo + enddo + +!coarray +! call MPI_ISEND( v_npl_send, & +! knum * nnum, & +! MPI_DOUBLE_PRECISION, & +! ADM_prc_npl-1, & +! ADM_NPL, & +! ADM_COMM_RUN_WORLD, & +! ireq(1), & +! ierr ) + !--- 2020 Fujitsu + !v_npl_recvc(:,:)[ADM_prc_npl] = v_npl_send(:,:) + !XXXX + !--- 2020 Fujitsu end + endif + sync all + + !--- south pole + if ( rgnid == ADM_rgnid_spl_mng ) then + do n = 1, nnum + do k = 1, knum + v_spl_send(k,n) = var(suf(ADM_gmax+1,ADM_gmin),k,l,n) + enddo + enddo + +!coarray +! call MPI_ISEND( v_spl_send, & +! knum * nnum, & +! MPI_DOUBLE_PRECISION, & +! ADM_prc_spl-1, & +! ADM_SPL, & +! ADM_COMM_RUN_WORLD, & +! ireq(2), & +! ierr ) + v_spl_recvc(:,:)[ADM_prc_spl] = v_spl_send(:,:) + endif + sync all + + enddo + +!coarray +! do l = 1, ADM_lall +! rgnid = ADM_prc_tab(l,ADM_prc_me) +! + !--- north pole +! if( rgnid == ADM_rgnid_npl_mng ) call MPI_WAIT(ireq(1),istat,ierr) +! + !--- south pole +! if( rgnid == ADM_rgnid_spl_mng ) call MPI_WAIT(ireq(2),istat,ierr) +! enddo + + if ( ADM_prc_me == ADM_prc_npl ) then +!coarray call MPI_WAIT(ireq(3),istat,ierr) + + do n = 1, nnum + do k = 1, knum +!coarray var_pl(ADM_GSLF_PL,k,ADM_NPL,n) = v_npl_recv(k,n) + var_pl(ADM_GSLF_PL,k,ADM_NPL,n) = v_npl_recvc(k,n) + enddo + enddo + endif + + if ( ADM_prc_me == ADM_prc_spl ) then +!coarray call MPI_WAIT(ireq(4),istat,ierr) + + do n = 1, nnum + do k = 1, knum +!coarray var_pl(ADM_GSLF_PL,k,ADM_SPL,n) = v_spl_recv(k,n) + var_pl(ADM_GSLF_PL,k,ADM_SPL,n) = v_spl_recvc(k,n) + enddo + enddo + endif + + endif + sync all + + return + end subroutine COMM_data_transfer_rgn2pl + + !----------------------------------------------------------------------------- + !> comm_type : 1 ( region -> pole ) + !> : 2 ( region -> pole -> regular communication ) + !> : 3 ( regular communication only ) + subroutine COMM_var( & + var, & + var_pl, & + KNUM, & + NNUM, & + comm_type, & + NSval_fix ) + use mod_adm, only: & + ADM_COMM_RUN_WORLD,& + ADM_prc_tab, & + ADM_rgn2prc, & + ADM_prc_me, & + ADM_prc_npl, & + ADM_prc_spl, & + ADM_rgnid_npl_mng, & + ADM_rgnid_spl_mng, & + ADM_gall, & + ADM_gall_pl, & + ADM_lall, & + ADM_lall_pl, & + ADM_KNONE, & + ADM_gall_1d, & + ADM_gmin, & + ADM_gmax, & + ADM_GSLF_PL, & + ADM_NPL, & + ADM_SPL + implicit none + + integer, intent(in) :: KNUM ! number of layers + integer, intent(in) :: NNUM ! number of variables + real(8), intent(inout) :: var (ADM_gall, KNUM,ADM_lall, NNUM) ! variables + real(8), intent(inout) :: var_pl(ADM_gall_pl,KNUM,ADM_lall_pl,NNUM) ! variables at poles + integer, intent(in) :: comm_type ! communication type + logical, intent(in) :: NSval_fix ! North & South value is fixed or not. + +! integer :: ireq(4), istat(MPI_STATUS_SIZE), ierr + ! + real(8) :: v_npl_send(KNUM,NNUM) + real(8) :: v_spl_send(KNUM,NNUM) + real(8) :: v_npl_recv(KNUM,NNUM) + real(8) :: v_spl_recv(KNUM,NNUM) +!coarray + real(8),allocatable :: v_npl_recvc(:,:)[:] + real(8),allocatable :: v_spl_recvc(:,:)[:] + + integer :: rgnid + integer :: l + + integer :: i,j,suf + suf(i,j) = ADM_gall_1d * ((j)-1) + (i) + !--------------------------------------------------------------------------- +!coarray + allocate(v_npl_recvc(KNUM,NNUM)[*]) + allocate(v_spl_recvc(KNUM,NNUM)[*]) + v_npl_recvc = 0.d0 + v_spl_recvc = 0.d0 + + if ( opt_comm_barrier ) then + call DEBUG_rapstart('Node imbalance adjustment') +!coarray call MPI_BARRIER( ADM_comm_run_world, ierr ) + sync all + call DEBUG_rapend ('Node imbalance adjustment') + endif + + call DEBUG_rapstart('COMM var') + + if( comm_pl .and. ((comm_type==1).or.(comm_type==2)) ) then ! T.Ohno 110721 + +!coarray +! if ( ADM_prc_me == ADM_prc_npl ) then !--- recv north pole value +! call MPI_IRECV( v_npl_recv(1,1), & !--- starting address +! KNUM * NNUM, & !--- number of array +! MPI_DOUBLE_PRECISION, & !--- type +! ADM_rgn2prc(ADM_rgnid_npl_mng)-1, & !--- source rank +! ADM_NPL, & !--- tag +! ADM_COMM_RUN_WORLD, & !--- world +! ireq(3), & !--- request id +! ierr ) !--- error id +! endif + + do l = 1, ADM_lall + rgnid = ADM_prc_tab(l,ADM_prc_me) + + sync all + if ( rgnid == ADM_rgnid_npl_mng ) then !--- send north pole value + v_npl_send(:,:) = var(suf(ADM_gmin,ADM_gmax+1),:,l,:) + +!coarray +! call MPI_ISEND( v_npl_send(1,1), & !--- starting address +! KNUM * NNUM, & !--- number of array +! MPI_DOUBLE_PRECISION, & !--- type +! ADM_prc_npl-1, & !--- dest rank +! ADM_NPL, & !--- tag +! ADM_COMM_RUN_WORLD, & !--- world +! ireq(1), & !--- request id +! ierr ) !--- error id +! +! call MPI_WAIT(ireq(1),istat,ierr) + v_npl_recvc(:,:)[ADM_prc_npl] = v_npl_send(:,:) + endif + sync all + enddo + + if ( ADM_prc_me == ADM_prc_npl ) then +!coarray +! call MPI_WAIT(ireq(3),istat,ierr) +! var_pl(ADM_GSLF_PL,:,ADM_NPL,:) = v_npl_recv(:,:) + var_pl(ADM_GSLF_PL,:,ADM_NPL,:) = v_npl_recvc(:,:) + endif + +!coarray +! if ( ADM_prc_me == ADM_prc_spl ) then !--- recv south pole value +! call MPI_IRECV( v_spl_recv(1,1), & !--- starting address +! KNUM * NNUM, & !--- number of array +! MPI_DOUBLE_PRECISION, & !--- type +! ADM_rgn2prc(ADM_rgnid_spl_mng)-1, & !--- srouce rank +! ADM_SPL, & !--- tag +! ADM_COMM_RUN_WORLD, & !--- world +! ireq(4), & !--- request id +! ierr ) !--- error id +! endif + + do l = 1, ADM_lall + rgnid = ADM_prc_tab(l,ADM_prc_me) + sync all + if ( rgnid == ADM_rgnid_spl_mng ) then !--- send south pole value + v_spl_send(:,:) = var(suf(ADM_gmax+1,ADM_gmin),:,l,:) + +!coarray +! call MPI_ISEND( v_spl_send(1,1), & !--- starting address +! KNUM * NNUM, & !--- number of array +! MPI_DOUBLE_PRECISION, & !--- type +! ADM_prc_spl-1, & !--- dest rank +! ADM_SPL, & !--- tag +! ADM_COMM_RUN_WORLD, & !--- world +! ireq(2), & !--- request id +! ierr ) !--- error id +! +! call MPI_WAIT(ireq(2),istat,ierr) + v_spl_recvc(:,:)[ADM_prc_spl] = v_spl_send(:,:) + endif + sync all + enddo + + if ( ADM_prc_me == ADM_prc_spl ) then +!coarray +! call MPI_WAIT(ireq(4),istat,ierr) +! var_pl(ADM_GSLF_PL,:,ADM_SPL,:) = v_spl_recv(:,:) + var_pl(ADM_GSLF_PL,:,ADM_SPL,:) = v_spl_recvc(:,:) + endif + + endif + + !write(ADM_LOG_FID,*) 'COMM_var npl', v_npl_send(2,1), v_npl_recv(2,1), var_pl(ADM_GSLF_PL,2,ADM_NPL,1) + !write(ADM_LOG_FID,*) 'COMM_var spl', v_spl_send(2,1), v_spl_recv(2,1), var_pl(ADM_GSLF_PL,2,ADM_SPL,1) + + !--- to complete communication + if ( comm_type == 2 & + .OR. comm_type == 3 ) then + call COMM_data_transfer( var, var_pl ) + endif + + if (NSval_fix) then + var(suf(ADM_gall_1d,1),:,:,:) = var(suf(ADM_gmax+1,ADM_gmin),:,:,:) + var(suf(1,ADM_gall_1d),:,:,:) = var(suf(ADM_gmin,ADM_gmax+1),:,:,:) + endif + + call DEBUG_rapend('COMM var') + + return + end subroutine COMM_var + + !----------------------------------------------------------------------------- + ! [add] T.Ohno 110721 + subroutine COMM_data_transfer_nopl(& + var, & !--- INOUT : variables comminicated. + trn, & !--- IN : commnucation flag for each variable + hallo_num) !--- IN : number of hallo + use mod_adm, only: & + ADM_proc_stop, & ! [add] C.Kodama 2011.04.26 + ADM_vlink_nmax, & + ADM_lall, & + ADM_comm_run_world, & + ADM_kall + use mod_cnst, only : & + CNST_undef + implicit none + ! + real(8),intent(inout)::var(:,:,:,:) + ! + logical,intent(in),optional::trn(:) + ! + integer,intent(in),optional::hallo_num + ! + integer :: shp(4) + integer :: varmax + integer :: cmax + integer :: halo + + integer :: acount + integer :: areq(2*(ADM_lall*max_comm_r2r+ADM_vlink_nmax*4)) +! integer :: stat(mpi_status_size,2*(ADM_lall*max_comm_r2r+ADM_vlink_nmax*4)) + integer :: ierr + ! + integer :: k,m,n + integer :: nr,nc,ns + integer :: sl,so,sb,ss + integer :: rl,ro,rb,rs + integer :: cl,scl,cs +! integer :: i_dbg !iga + logical :: dbg_ierr !iga + !============================================================================= +!coarray + integer dst_img + integer,allocatable :: tbl(:)[:] + real(8),allocatable :: caf_recvbuf(:,:)[:] + integer bufsize1,bufsize2 + + ! + ! --- pre-process + ! + comm_call_count=comm_call_count+1 + ! +!coarray t(0)=mpi_wtime() + t(0) = xmp_wtime() + ! + shp=shape(var) + kmax = shp(2) + ! + if (present(trn)) then + varmax=0 +!cdir novector + do n=1,shp(4) + if(trn(n)) then + varmax=varmax+1 + clist(varmax)=n + endif + enddo + else + varmax=shp(4) +!cdir novector + do n=1,varmax + clist(n)=n + enddo + endif + ! + if (present(hallo_num)) then + halo=hallo_num + else + halo=1 + endif + + if (halo.ne.1) then + write(ADM_log_fid,*) 'halo=',halo + endif + + ! + cmax=varmax*kmax + ! [Add] 07/11/07 T.Mitsui for check of varmax + if( opt_check_varmax ) then + ! <- [rep] C.Kodama 2011.04.26 + if ( cmax > max_varmax * ADM_kall ) then + write(ADM_LOG_FID,*) 'error: cmax > max_varmax * ADM_kall, stop!' + write(ADM_LOG_FID,*) 'cmax=', cmax, 'max_varmax*ADM_kall=', max_varmax*ADM_kall + call ADM_proc_stop + end if + ! -> + end if + ! +!coarray t(1)=mpi_wtime() + t(1) = xmp_wtime() + time_pre=time_pre+(t(1)-t(0)) +!coarray t(2)=mpi_wtime() + t(2) = xmp_wtime() + time_bar1=time_bar1+(t(2)-t(1)) +!coarray + allocate(tbl(num_images())[*]) + call co_max(maxdatasize_r) + bufsize1 = maxdatasize_r + bufsize2 = romax(halomax) + call co_max(bufsize2) + allocate(caf_recvbuf(bufsize1,bufsize2)[*]) + + !----------------------------------------- + ! call mpi_isend + !----------------------------------------- + if (opt_comm_dbg) then !iga + sendbuf(:,:)= dbg_sendbuf_init + recvbuf(:,:)= dbg_recvbuf_init + if (somax(halo)>(ADM_lall*max_comm_r2r+ADM_vlink_nmax*4)) then + write(ADM_log_fid,*) 'somax inconsistency' + write(ADM_log_fid,*) somax(halo),(ADM_lall*max_comm_r2r+ADM_vlink_nmax*4) + endif + endif + + + do ro=1,romax(halo) + ! + if (opt_comm_dbg) then + if (rsize(ro,halo)*cmax > size(recvbuf(:,ro)) ) then + write(ADM_log_fid,*) 'ro',ro,'buf-dim=',shape(recvbuf),'rsize=',rsize(ro,halo),'cmax=',cmax + endif + endif + ! +!coarray +! call mpi_irecv(recvbuf(1,ro) & +! ,rsize(ro,halo)*cmax & +! ,mpi_double_precision & +! ,sourcerank(ro,halo) & +! ,recvtag(ro,halo) & +! ,ADM_comm_run_world & +! ,areq(ro) & +! ,ierr) + tbl(sourcerank(ro,halo)+1) = ro + + ! + if (opt_comm_dbg) then + if (ierr.ne.0) then + write(ADM_log_fid,*) 'mpi_irecv info start==' + write(ADM_log_fid,*) 'ierr=',ierr + write(ADM_log_fid,*) 'ro=',ro + write(ADM_log_fid,*) 'rsize=',rsize(ro,halo) + write(ADM_log_fid,*) 'cmax=',cmax + write(ADM_log_fid,*) 'areq=',areq(ro) + write(ADM_log_fid,*) 'mpi_irecv info end==' + endif + ! [fix] 12/03/26 T.Seiki +!!$ dbg_areq_save(:,1)=areq(:) + dbg_areq_save(ro,1)=areq(ro) + endif + enddo + +!coarray t(3)=mpi_wtime() + t(3) = xmp_wtime() + time_recv=time_recv+(t(3)-t(2)) + !----------------------------------------- + ! var -> sendbuf + !----------------------------------------- + do so=1,somax(halo) + + + +!coarray t(4)=mpi_wtime() + t(4) = xmp_wtime() + do ns=1,nsmax(so,halo) + ss=sendinfo(SIZE_COMM,ns,so,halo) + sl=sendinfo(LRGNID_COMM,ns,so,halo) + sb=sendinfo(BASE_COMM,ns,so,halo)*cmax + do m=1,varmax +!cdir outerunroll=8 + do k=1,kmax + do n=1,ss + sendbuf(n+(k-1)*ss+(m-1)*ss*kmax+sb,so) & + =var(sendlist(n,ns,so,halo),k,sl,clist(m)) + enddo + enddo + enddo + + + enddo + !----------------------------------------- +!coarray t(5)=mpi_wtime() + t(5) = xmp_wtime() + time_sbuf=time_sbuf+(t(5)-t(4)) + + + !----------------------------------------- + ! + !----------------------------------------- + ! call mpi_isend + !----------------------------------------- + + if (opt_comm_dbg) then + if (ssize(so,halo)*cmax>size(sendbuf(:,so))) then + write(ADM_log_fid,*) 'so',so,'buf-dim=',shape(sendbuf),'rsize=',ssize(so,halo),'cmax=',cmax + endif + endif + +!coarray +! call mpi_isend(sendbuf(1,so) & +! ,ssize(so,halo)*cmax & +! ,mpi_double_precision & +! ,destrank(so,halo) & +! ,sendtag(so,halo) & +! ,ADM_comm_run_world & +! ,areq(so+romax(halo)) & +! ,ierr) + dst_img = tbl(this_image())[destrank(so,halo)+1] + caf_recvbuf(1:ssize(so,halo)*cmax,dst_img)[destrank(so,halo)+1] & + = sendbuf(1:ssize(so,halo)*cmax,so) + + if (opt_comm_dbg) then + if (ierr.ne.0) then + write(ADM_log_fid,*) 'mpi_isend info start==' + write(ADM_log_fid,*) 'ierr=',ierr + write(ADM_log_fid,*) 'so=',so + write(ADM_log_fid,*) 'ssize=',ssize(so,halo) + write(ADM_log_fid,*) 'cmax=',cmax + write(ADM_log_fid,*) 'areq=',areq(so+romax(halo)) + write(ADM_log_fid,*) 'mpi_isend info end==' + endif + ! [fix] 12/03/26 T.Seiki +!!$ dbg_areq_save(:,2)=areq(:) + dbg_areq_save(so+romax(halo),2)=areq(so+romax(halo)) + endif + + +!coarray t(6)=mpi_wtime() + t(6) = xmp_wtime() + time_send=time_send+(t(6)-t(5)) + size_total=size_total+ssize(so,halo)*cmax + comm_count=comm_count+1 + enddo !loop so + !----------------------------------------- + ! +!coarray t(7)=mpi_wtime() + t(7) = xmp_wtime() + !--------------------------------------------------- + ! var -> var (region to region copy in same rank) + !--------------------------------------------------- + do nc=1,ncmax_r2r(halo) + cs=copyinfo_r2r(SIZE_COPY,nc,halo) + cl=copyinfo_r2r(LRGNID_COPY,nc,halo) + scl=copyinfo_r2r(SRC_LRGNID_COPY,nc,halo) + do m=1,varmax +!cdir outerunroll=8 + do k=1,kmax + do n=1,cs + var(recvlist_r2r(n,nc,halo),k,cl ,clist(m)) & + =var(sendlist_r2r(n,nc,halo),k,scl,clist(m)) + enddo + enddo + enddo + + enddo + !----------------------------------------- + ! + !----------------------------------------- +!coarray t(8)=mpi_wtime() + t(8) = xmp_wtime() + time_copy=time_copy+(t(8)-t(7)) + acount=romax(halo)+somax(halo) +!coarray +! call mpi_waitall(acount,areq,stat,ierr) + sync all + + do ro=1,romax(halo) + do n=1,rsize(ro,halo)*cmax + recvbuf(n,ro) = caf_recvbuf(n,ro) + end do + end do + +!coarray t(9)=mpi_wtime() + t(9) = xmp_wtime() + time_wait=time_wait+(t(9)-t(8)) + + if (opt_comm_dbg) then !================== dbg start + if (ierr.ne.0) then + write(ADM_log_fid,*) 'mpi_wait info start==' + write(ADM_log_fid,*) 'ierr=',ierr + write(ADM_log_fid,*) 'acount=',acount +! write(ADM_log_fid,*) 'stat=',stat(:,1:acount) + write(ADM_log_fid,*) 'areq=',areq(1:acount) + write(ADM_log_fid,*) 'mpi_wait info end==' + endif + if (acount > size(areq)) then + write(ADM_log_fid,*) 'acount, size(areq)',acount,size(areq) + endif + ! [fix] 12/03/26 T.Seiki +!!$ dbg_areq_save(:,3)=areq(:) + dbg_areq_save(1:acount,3)=areq(1:acount) + endif !=================================== dbg end + + if (opt_comm_dbg) then !================== dbg start + dbg_ierr=.false. + do ro=1,romax(halo) + do n=1,rsize(ro,halo)*cmax + if (recvbuf(n,ro) == dbg_recvbuf_init) then + dbg_ierr=.true. + endif + if (recvbuf(n,ro) == dbg_sendbuf_init) then + dbg_ierr=.true. + endif + enddo + enddo + if (dbg_ierr) then + write(ADM_log_fid,*) 'communication is not completed!' + write(ADM_log_fid,*) 'dbg_tcount=',dbg_tcount + write(ADM_log_fid,*) 'romax=',romax,'rsize=',rsize(ro,halo),'cmax=',cmax + do ro=1,romax(halo) + do n=1,rsize(ro,halo)*cmax + if (recvbuf(n,ro) == dbg_recvbuf_init) then + write(ADM_log_fid,*) 'n=',n,'ro=',ro,'recvbuf=',recvbuf(n,ro) + endif + enddo + enddo + write(ADM_log_fid,*) 'areq after irecv:',dbg_areq_save(1:acount,1) + write(ADM_log_fid,*) 'areq after isend',dbg_areq_save(1:acount,2) + write(ADM_log_fid,*) 'areq after waitall',dbg_areq_save(1:acount,3) + + write(ADM_log_fid,*) 'ierr of mpi_waitall=',ierr + write(ADM_log_fid,*) 'acount of mpi_waitall=',acount +! write(ADM_log_fid,*) 'stat of mpi_waitall=',stat(:,1:acount) + + endif + dbg_tcount=dbg_tcount+1 + endif !=================================== dbg end + + + if (opt_comm_barrier) then +!coarray call mpi_barrier(ADM_comm_run_world,ierr) + sync all + if (ierr.ne.0) then + write(ADM_log_fid,*) 'mpi_barriert info start==' + write(ADM_log_fid,*) 'ierr=',ierr + write(ADM_log_fid,*) 'mpi_barrier info end==' + endif + endif + + + !----------------------------------------- + ! + do ro=1,romax(halo) + !----------------------------------------- + ! recvbuf -> var ( recieve in region ) + !----------------------------------------- + do nr=1,nrmax(ro,halo) + rs=recvinfo(SIZE_COMM,nr,ro,halo) + rl=recvinfo(LRGNID_COMM,nr,ro,halo) + rb=recvinfo(BASE_COMM,nr,ro,halo)*cmax + do m=1,varmax +!cdir outerunroll=8 + do k=1,kmax + do n=1,rs + var(recvlist(n,nr,ro,halo),k,rl,clist(m)) & + =recvbuf(n+(k-1)*rs+(m-1)*rs*kmax+rb,ro) + enddo + enddo + enddo + enddo + enddo !loop ro +!coarray t(10)=mpi_wtime() + t(10) = xmp_wtime() + time_rbuf=time_rbuf+(t(10)-t(9)) + + !----------------------------------------- + ! + !----------------------------------------- + ! copy data around singular point + !----------------------------------------- + do nc=1,ncmax_sgp(halo) + cs=copyinfo_sgp(SIZE_COPY,nc,halo) + cl=copyinfo_sgp(LRGNID_COPY,nc,halo) + scl=copyinfo_sgp(SRC_LRGNID_COPY,nc,halo) + do m=1,varmax +!cdir outerunroll=8 + do k=1,kmax + do n=1,cs + var(recvlist_sgp(n,nc,halo),k,cl ,clist(m)) & + =var(sendlist_sgp(n,nc,halo),k,scl,clist(m)) + enddo + enddo + enddo + + enddo +!coarray t(11)=mpi_wtime() + t(11) = xmp_wtime() + time_copy_sgp=time_copy_sgp+(t(11)-t(10)) + !! + !call mpi_barrier(ADM_comm_run_world,ierr) +!coarray t(12)=mpi_wtime() + t(12) = xmp_wtime() + time_bar2=time_bar2+(t(12)-t(11)) + time_total=time_total+(t(12)-t(0)) + ! + end subroutine COMM_data_transfer_nopl + + + !----------------------------------------------------------------------------- + subroutine COMM_Stat_sum( localsum, globalsum ) + use mod_adm, only: & + ADM_COMM_RUN_WORLD, & + ADM_prc_all, & + ADM_prc_me + implicit none + + real(8), intent(in) :: localsum + real(8), intent(out) :: globalsum + +!coarray +! real(8) :: sendbuf(1) +! real(8) :: recvbuf(ADM_prc_all) +! +! integer :: ierr + +!20180208 integer localVal + !--------------------------------------------------------------------------- + + + if ( COMM_pl ) then +!coarray +! sendbuf(1) = localsum +! +! call MPI_Allgather( sendbuf, & +! 1, & +! MPI_DOUBLE_PRECISION, & +! recvbuf, & +! 1, & +! MPI_DOUBLE_PRECISION, & +! ADM_COMM_RUN_WORLD, & +! ierr ) +! +! globalsum = sum( recvbuf(:) ) +!20180208 localVal = localsum +!20180208 call co_sum(localVal) +!20180208 globalsum = localVal + globalsum = localsum + call co_sum(globalsum) + else + globalsum = localsum + endif + + return + end subroutine COMM_Stat_sum + + !----------------------------------------------------------------------------- + subroutine COMM_Stat_sum_eachlayer( kall, localsum, globalsum ) + use mod_adm, only: & + ADM_COMM_RUN_WORLD, & + ADM_prc_all, & + ADM_prc_me + implicit none + + integer, intent(in) :: kall + real(8), intent(in) :: localsum (kall) + real(8), intent(out) :: globalsum(kall) + + real(8) :: gathersum(ADM_prc_all,kall) + + integer :: displs (ADM_prc_all) + integer :: rcounts(ADM_prc_all) + + integer :: ierr + integer :: k, p + !--------------------------------------------------------------------------- + + do k = 1, kall + gathersum(ADM_prc_me,k) = localsum(k) + enddo + + do p = 1, ADM_prc_all + displs (p) = (p-1) * ADM_prc_all + 1 + rcounts(p) = 1 + enddo + + if ( COMM_pl ) then +!coarray +! call MPI_Allgatherv( MPI_IN_PLACE, & +! 1, & +! MPI_DOUBLE_PRECISION, & +! gathersum(1,1), & +! rcounts, & +! displs, & +! MPI_DOUBLE_PRECISION, & +! ADM_COMM_RUN_WORLD, & +! ierr ) +! +! do k = 1, kall +! globalsum(k) = sum( gathersum(:,k) ) +! enddo + else + do k = 1, kall + globalsum = gathersum(ADM_prc_me,k) + enddo + endif + + end subroutine COMM_Stat_sum_eachlayer + + !----------------------------------------------------------------------------- + subroutine COMM_Stat_avg( localavg, globalavg ) + use mod_adm, only: & + ADM_COMM_RUN_WORLD, & + ADM_prc_all, & + ADM_prc_me + implicit none + + real(8), intent(in) :: localavg + real(8), intent(out) :: globalavg + +!coarray +! real(8) :: sendbuf(1) +! real(8) :: recvbuf(ADM_prc_all) +! +! integer :: ierr +!20180208 +!20180208 real(8) localVal + !--------------------------------------------------------------------------- + + if ( COMM_pl ) then +!coarray +! sendbuf(1) = localavg +! +! call MPI_Allgather( sendbuf, & +! 1, & +! MPI_DOUBLE_PRECISION, & +! recvbuf, & +! 1, & +! MPI_DOUBLE_PRECISION, & +! ADM_COMM_RUN_WORLD, & +! ierr ) +! +! globalavg = sum( recvbuf(:) ) / real(ADM_prc_all,kind=8) +!20280208 localVal = localavg +!20280208 call co_sum(localVal) +!20280208 globalavg = localVal + globalavg = localavg + call co_sum(globalavg) + + globalavg = globalavg / real(ADM_prc_all,kind=8) + else + globalavg = localavg + endif + + !write(ADM_LOG_FID,*) 'COMM_Stat_avg', sendbuf(1), recvbuf(:) + + return + end subroutine COMM_Stat_avg + + !----------------------------------------------------------------------------- + subroutine COMM_Stat_max( localmax, globalmax ) +!coarray +! use mod_adm, only: & +! ADM_COMM_RUN_WORLD, & +! ADM_prc_all, & +! ADM_prc_me + implicit none + + real(8), intent(in) :: localmax + real(8), intent(out) :: globalmax + +!coarray +! real(8) :: sendbuf(1) +! real(8) :: recvbuf(ADM_prc_all) +! +! integer :: ierr +!20180208 real(8) localVal + !--------------------------------------------------------------------------- + +!coarray +! sendbuf(1) = localmax +! +! call MPI_Allgather( sendbuf, & +! 1, & +! MPI_DOUBLE_PRECISION, & +! recvbuf, & +! 1, & +! MPI_DOUBLE_PRECISION, & +! ADM_COMM_RUN_WORLD, & +! ierr ) +! +! globalmax = maxval( recvbuf(:) ) +!20180208 localVal = localmax +!20180208 call co_max(localVal) +!20180208 globalmax = localVal + globalmax = localmax + call co_max(globalmax) + + !write(ADM_LOG_FID,*) 'COMM_Stat_max', sendbuf(1), recvbuf(:) + + return + end subroutine COMM_Stat_max + + !----------------------------------------------------------------------------- + subroutine COMM_Stat_min( localmin, globalmin ) +!coarray +! use mod_adm, only: & +! ADM_COMM_RUN_WORLD, & +! ADM_prc_all, & +! ADM_prc_me + implicit none + + real(8), intent(in) :: localmin + real(8), intent(out) :: globalmin + +!coarray +! real(8) :: sendbuf(1) +! real(8) :: recvbuf(ADM_prc_all) +! +! integer :: ierr +!20180208 real(8) localVal + !--------------------------------------------------------------------------- + +!coarray +! sendbuf(1) = localmin +! +! call MPI_Allgather( sendbuf, & +! 1, & +! MPI_DOUBLE_PRECISION, & +! recvbuf, & +! 1, & +! MPI_DOUBLE_PRECISION, & +! ADM_COMM_RUN_WORLD, & +! ierr ) +! +! globalmin = minval( recvbuf(:) ) +! +!20180208 localVal = localmin +!20180208 call co_min(localVal) +!20180208 globalmin = localVal + globalmin = localmin + call co_min(globalmin) + + !write(ADM_LOG_FID,*) 'COMM_Stat_min', sendbuf(1), recvbuf(:) + + return + end subroutine COMM_Stat_min + +end module mod_comm +!------------------------------------------------------------------------------- From 1c6bd8da693b19a5c94b39b1588184d2048cdc0c Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Sat, 6 Mar 2021 13:44:27 +0900 Subject: [PATCH 29/70] Modify function type definition --- FFB-MINI/src/miniapp_util.F | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FFB-MINI/src/miniapp_util.F b/FFB-MINI/src/miniapp_util.F index 7838dab..eb222c1 100755 --- a/FFB-MINI/src/miniapp_util.F +++ b/FFB-MINI/src/miniapp_util.F @@ -88,7 +88,7 @@ integer*4 function total(ival) c--- local integer :: ierr integer*4 :: total1 - integer*4 :: total + !Fujitsu! integer*4 :: total include 'mpif.h' ival=ival c -- From 04577c8b4c6ca379ec65504c2ef86746336fb7db Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 5 Mar 2021 20:33:07 +0900 Subject: [PATCH 30/70] [WIP] Change coarray of 1 file to XMP-API routines. --- ...mpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 | 84 +++++++++++++++---- 1 file changed, 69 insertions(+), 15 deletions(-) diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 index 2014ead..1e24804 100755 --- a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 @@ -8,6 +8,7 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP Use MP2_Constant_Module, ONLY : Zero, One, Two, Three, P12 USE MPI_Module, ONLY : NProcs, MyRank, MPIIO, IORank, MPI_COMM_IO, MPI_COMM_MO, NProcsMO, MyRankMO, & & MPI_COMM_MAT, NProcsMat, MyRankMat + USE XMP_API ! IMPLICIT NONE ! @@ -91,10 +92,17 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP integer :: chunk, myChunk !coarray - real(8), allocatable :: sbuf(:)[:] - real(8), allocatable :: rbuf(:)[:] +! real(8), allocatable :: sbuf(:)[:] +! real(8), allocatable :: rbuf(:)[:] + real(8), pointer :: sbuf(:) => null() + real(8), pointer :: rbuf(:) => null() + integer(8) :: sbuf_desc, rbuf_desc + integer(8), dimension(1) :: sbuf_lb,sbuf_ub, rbuf_lb, rbuf_ub + integer(8) :: sbuf_sec, rbuf_sec + integer bufsize integer, save :: jsta + integer(4) :: status !! Time_T3C = Zero @@ -197,7 +205,8 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP ! CALL CPU_TIME(TimeBgn) !coarray call mpi_barrier(MPI_COMM_WORLD,ierr) ! not essential, just make measured time meaningful - sync all +! sync all + call xmp_sync_all(status) !! call cublas_init() ! allocate memory space for matrix A,B and C on GPU @@ -207,7 +216,8 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP call cublas_alloc( devptr_C(id_st), m, n ) enddo !coarray call mpi_barrier(MPI_COMM_WORLD,ierr) ! not essential, just make measured time meaningful - sync all +! sync all + call xmp_sync_all(status) !! CALL CPU_TIME(TimeEnd) #ifdef DEBUG @@ -413,11 +423,30 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP ! CALL MPI_IRecv(RecvBuf(1,commIndexEach(commPhase)), commSizeEach(commPhase), & ! MPI_DOUBLE_PRECISION, Jrankrecv_1, commPhase, MPI_COMM_MO, ireq(2), IErr) bufsize = commSizeEach(commPhase) - allocate(sbuf(bufsize)[*]) - allocate(rbuf(bufsize)[*]) +! allocate(sbuf(bufsize)[*]) + sbuf_lb(1)=1 + sbuf_ub(1)=bufsize + call xmp_new_coarray(sbuf_desc,8,1,sbuf_lb,sbuf_ub,1,img_dims) + call xmp_coarray_bind(sbuf_desc,sbuf) + +! allocate(rbuf(bufsize)[*]) + rbuf_lb(1)=1 + rbuf_ub(1)=bufsize + call xmp_new_coarray(rbuf_desc,8,1,rbuf_lb,rbuf_ub,1,img_dims) + call xmp_coarray_bind(rbuf_desc,rbuf) + + call xmp_new_array_section(sbuf_sec,1) + call xmp_new_array_section(rbuf_sec,1) + jsta = commIndexEach(commPhase) sbuf(1:bufsize) = SendBuf(1:bufsize,jsta) - rbuf(1:bufsize)[Jranksend_1+1] = sbuf(1:bufsize) + !rbuf(1:bufsize)[Jranksend_1+1] = sbuf(1:bufsize) + img_dims(1) = Jranksend_1+1 + call xmp_coarray_put(img_dims,rbuf_desc,rbuf_sec,sbuf_desc,sbuf_sec) + + call xmp_free_array_sections(sbuf_sec) + call xmp_free_array_sections(rbuf_sec) + !! endif CALL CPU_TIME(TimeEnd) @@ -574,10 +603,19 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP !coarray ! CALL MPI_Wait(ireq(1), istat1, IErr) ! CALL MPI_Wait(ireq(2), istat2, IErr) - sync all +! sync all + call xmp_sync_all(status) RecvBuf(1:bufsize,jsta) = rbuf(1:bufsize) - if (allocated(sbuf)) deallocate(sbuf) - if (allocated(rbuf)) deallocate(rbuf) +! if (allocated(sbuf)) deallocate(sbuf) +! if (allocated(rbuf)) deallocate(rbuf) +! TODO: check + if (allocated(sbuf)) then + call xmp_coarray_deallocate(sbuf,status) + endif + if (allocated(rbuf)) then + call xmp_coarray_deallocate(rbuf,status) + endif + !! endif @@ -591,11 +629,25 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP ! CALL MPI_IRecv(RecvBuf(1,commIndexEach(commPhase)), commSizeEach(commPhase), & ! MPI_DOUBLE_PRECISION, Jrankrecv_1, commPhase, MPI_COMM_MO, ireq(2), IErr) bufsize = commSizeEach(commPhase) - allocate(sbuf(bufsize)[*]) - allocate(rbuf(bufsize)[*]) +! allocate(sbuf(bufsize)[*]) + sbuf_lb(1)=1 + sbuf_ub(1)=bufsize + call xmp_new_coarray(sbuf_desc,8,1,sbuf_lb,sbuf_ub,1,img_dims) + call xmp_coarray_bind(sbuf_desc,sbuf) + +! allocate(rbuf(bufsize)[*]) + rbuf_lb(1)=1 + rbuf_ub(1)=bufsize + call xmp_new_coarray(rbuf_desc,8,1,rbuf_lb,rbuf_ub,1,img_dims) + call xmp_coarray_bind(rbuf_desc,rbuf) + jsta = commIndexEach(commPhase) sbuf(1:bufsize) = SendBuf(1:bufsize,jsta) - rbuf(1:bufsize)[Jranksend_1+1] = sbuf(1:bufsize) +! rbuf(1:bufsize)[Jranksend_1+1] = sbuf(1:bufsize) + call xmp_coarray_put(img_dims,rbuf_desc,rbuf_sec,sbuf_desc,sbuf_sec) + + call xmp_free_array_sections(sbuf_sec) + call xmp_free_array_sections(rbuf_sec) !! endif CALL CPU_TIME(TimeEnd) @@ -721,7 +773,8 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP ! CALL CPU_TIME(TimeBgn) !coarray call mpi_barrier(MPI_COMM_WORLD,ierr) ! not essential, just make measured time meaningful - sync all +! sync all + call xmp_sync_all(status) !! ! free buffer on GPU do id_st = 1, NUM_STREAM @@ -731,7 +784,8 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP enddo call cublas_fin() !coarray call mpi_barrier(MPI_COMM_WORLD,ierr) ! not essential, just make measured time meaningful - sync all +! sync all + call xmp_sync_all(status) !! CALL CPU_TIME(TimeEnd) #ifdef DEBUG From 565a1824f760fdc9f153d8afddf776de04e855ea Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Mon, 8 Mar 2021 15:51:59 +0900 Subject: [PATCH 31/70] [WIP] Update and add files for temporal running by xmp-api. --- NTCHEM-MINI/src/mp2/GNUmakefile | 2 +- ...mpAPI_rimp2_riint2_mdint2_int2c_mpiomp.F90 | 3 +- ...mpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 | 45 ++++-- .../xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 | 152 +++++++++++++++--- NTCHEM-MINI/src/util_lib/GNUmakefile | 13 +- .../src/util_lib/xmpAPI_util_finmpi.f90 | 22 +++ .../src/util_lib/xmpAPI_util_initmpi.F90 | 151 +++++++++++++++++ 7 files changed, 351 insertions(+), 37 deletions(-) create mode 100755 NTCHEM-MINI/src/util_lib/xmpAPI_util_finmpi.f90 create mode 100755 NTCHEM-MINI/src/util_lib/xmpAPI_util_initmpi.F90 diff --git a/NTCHEM-MINI/src/mp2/GNUmakefile b/NTCHEM-MINI/src/mp2/GNUmakefile index 18e6757..456cd5d 100755 --- a/NTCHEM-MINI/src/mp2/GNUmakefile +++ b/NTCHEM-MINI/src/mp2/GNUmakefile @@ -147,7 +147,7 @@ program : $(modules) $(objects) ../util_lib/libutil.a \ ../parallel_mod/*.o \ $(OMPLDFLAGS) $(MPILDFLAGS) $(NVLIB) \ - $(LAPACKLIB) $(BLASLIB) $(ATLASLIB) \ + $(LAPACKLIB) $(BLASLIB) $(ATLASLIB) $(OMNI_LIB) \ -o $(program) mvbin : diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_riint2_mdint2_int2c_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_riint2_mdint2_int2c_mpiomp.F90 index d8d0603..c00d805 100755 --- a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_riint2_mdint2_int2c_mpiomp.F90 +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_riint2_mdint2_int2c_mpiomp.F90 @@ -232,7 +232,8 @@ SUBROUTINE RIMP2_RIInt2_MDInt2_Int2c_MPIOMP ! CALL MPI_Allreduce(RWork1, RIInt2c, NBC_RI, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_MAT, IErr) !coarray RIInt2c(1:NBC_RI) = RWork1(1:NBC_RI) - call co_sum(RIInt2c(1:NBC_RI)) +! call co_sum(RIInt2c(1:NBC_RI)) + CALL MPI_Allreduce(RWork1, RIInt2c, NBC_RI, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, IErr) !! DEALLOCATE(RWork1) CALL CPU_TIME(TimeEnd) diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 index 1e24804..dd38865 100755 --- a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 @@ -103,6 +103,7 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP integer bufsize integer, save :: jsta integer(4) :: status + integer(4) :: img_dims(1) !! Time_T3C = Zero @@ -151,7 +152,8 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP !coarray ! CALL MPI_Allreduce( nGrp, nGrpMax, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr ) nGrpMax = nGrp - call co_max(nGrpMax) + !call co_max(nGrpMax) + CALL MPI_Allreduce( nGrp, nGrpMax, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr ) !! nGrp = nGrpMax @@ -435,18 +437,22 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP call xmp_new_coarray(rbuf_desc,8,1,rbuf_lb,rbuf_ub,1,img_dims) call xmp_coarray_bind(rbuf_desc,rbuf) - call xmp_new_array_section(sbuf_sec,1) - call xmp_new_array_section(rbuf_sec,1) jsta = commIndexEach(commPhase) sbuf(1:bufsize) = SendBuf(1:bufsize,jsta) !rbuf(1:bufsize)[Jranksend_1+1] = sbuf(1:bufsize) img_dims(1) = Jranksend_1+1 - call xmp_coarray_put(img_dims,rbuf_desc,rbuf_sec,sbuf_desc,sbuf_sec) - call xmp_free_array_sections(sbuf_sec) - call xmp_free_array_sections(rbuf_sec) + call xmp_new_array_section(sbuf_sec,1) + call xmp_new_array_section(rbuf_sec,1) + call xmp_array_section_set_triplet(rbuf_sec,1,int(1,kind=8),int(bufsize,kind=8),1,status) + call xmp_array_section_set_triplet(sbuf_sec,1,int(1,kind=8),int(bufsize,kind=8),1,status) + + call xmp_coarray_put(img_dims,rbuf_desc,rbuf_sec,sbuf_desc,sbuf_sec,status) + + call xmp_free_array_section(sbuf_sec) + call xmp_free_array_section(rbuf_sec) !! endif CALL CPU_TIME(TimeEnd) @@ -609,11 +615,13 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP ! if (allocated(sbuf)) deallocate(sbuf) ! if (allocated(rbuf)) deallocate(rbuf) ! TODO: check - if (allocated(sbuf)) then - call xmp_coarray_deallocate(sbuf,status) + if (associated(sbuf)) then + !deallocate(sbuf) + call xmp_coarray_deallocate(sbuf_desc,status) endif - if (allocated(rbuf)) then - call xmp_coarray_deallocate(rbuf,status) + if (associated(rbuf)) then + !deallocate(rbuf) + call xmp_coarray_deallocate(rbuf_desc,status) endif !! @@ -641,13 +649,18 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP call xmp_new_coarray(rbuf_desc,8,1,rbuf_lb,rbuf_ub,1,img_dims) call xmp_coarray_bind(rbuf_desc,rbuf) + call xmp_new_array_section(sbuf_sec,1) + call xmp_new_array_section(rbuf_sec,1) + call xmp_array_section_set_triplet(rbuf_sec,1,int(1,kind=8),int(bufsize,kind=8),1,status) + call xmp_array_section_set_triplet(sbuf_sec,1,int(1,kind=8),int(bufsize,kind=8),1,status) + jsta = commIndexEach(commPhase) sbuf(1:bufsize) = SendBuf(1:bufsize,jsta) ! rbuf(1:bufsize)[Jranksend_1+1] = sbuf(1:bufsize) - call xmp_coarray_put(img_dims,rbuf_desc,rbuf_sec,sbuf_desc,sbuf_sec) + call xmp_coarray_put(img_dims,rbuf_desc,rbuf_sec,sbuf_desc,sbuf_sec,status) - call xmp_free_array_sections(sbuf_sec) - call xmp_free_array_sections(rbuf_sec) + call xmp_free_array_section(sbuf_sec) + call xmp_free_array_section(rbuf_sec) !! endif CALL CPU_TIME(TimeEnd) @@ -818,8 +831,10 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP ! CALL MPI_Allreduce(E2TP, E2T, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, IErr) E2S = E2SP E2T = E2TP - call co_sum(E2S) - call co_sum(E2T) +! call co_sum(E2S) +! call co_sum(E2T) + CALL MPI_Allreduce(E2SP, E2S, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, IErr) + CALL MPI_Allreduce(E2TP, E2T, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, IErr) !! CALL CPU_TIME(TimeEnd) WTimeEnd = MPI_WTIME() diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 index 515f34f..4d043e1 100755 --- a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 @@ -8,6 +8,7 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP & NBF_RI_MyRank, IdxBF_RI_MyRank USE MP2_Constant_Module, ONLY : Zero, One USE MPI_Module, ONLY : MPI_COMM_MO, NProcs, MyRank, NProcsMO, MyRankMO, NProcsMat, MyRankMat + USE XMP_API ! IMPLICIT NONE ! @@ -32,7 +33,11 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP REAL(8) :: TimeBgn, TimeEnd, WTimeBgn, WTimeEnd !coarray ! INTEGER, ALLOCATABLE :: IdxBF_RI_Irank(:) - INTEGER, ALLOCATABLE :: IdxBF_RI_Irank(:)[:] +! INTEGER, ALLOCATABLE :: IdxBF_RI_Irank(:)[:] + INTEGER, pointer :: IdxBF_RI_Irank(:) => null() + INTEGER(8) :: IdxBF_RI_Irank_desc + integer(8), dimension(1) :: IdxBF_RI_Irank_lb,IdxBF_RI_Irank_ub + integer(8) :: IdxBF_RI_Irank_sec !! INTEGER, ALLOCATABLE :: istat1(:), istat2(:), istat3(:), istat4(:) #ifdef USE_GPU @@ -49,7 +54,32 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP #endif !coarray ! REAL(8), ALLOCATABLE :: T2BufSend(:,:,:), T2BufRecv(:,:,:) - REAL(8), ALLOCATABLE :: T2BufSend(:,:,:), T2BufRecv(:,:,:)[:] +! REAL(8), ALLOCATABLE :: T2BufSend(:,:,:), T2BufRecv(:,:,:)[:] + REAL(8), ALLOCATABLE :: T2BufSend(:,:,:) +!local xmp-api + ! xmp_array_section_t + integer(8) :: T2BufSend_local_sec + ! xmp_desc_t + integer(8) :: T2BufSend_local_desc + ! array size + integer(8),dimension(3) :: T2BufSend_lb, T2BufSend_ub + + REAL(8), pointer :: T2BufRecv(:,:,:) => null() + INTEGER(8) :: T2BufRecv_desc + integer(8), dimension(3) :: T2BufRecv_lb,T2BufRecv_ub + integer(8) :: T2BufRecv_sec +! local xmp-api + ! xmp_array_section_t + integer(8) :: IdxBF_RI_MyRank_local_sec + ! xmp_desc_t + integer(8) :: IdxBF_RI_MyRank_local_desc + ! xmp_array_section_t + !integer(8) :: IdxBF_RI_MyRank_local + integer(8),dimension(1) :: IdxBF_RI_MyRank_lb, IdxBF_RI_MyRank_ub + + integer(4) status + integer(4) :: img_dims(1) + !! ! REAL(8) :: Time_T2C, Time_T3, WTime_T2C, WTime_T3 @@ -127,7 +157,14 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP ALLOCATE(T2Int(NBF_RI,MXNActO,LenOccBat, Num_Buf)) !coarray ! ALLOCATE(IdxBF_RI_Irank(MXNBF_RI_MyRank)) - ALLOCATE(IdxBF_RI_Irank(MXNBF_RI_MyRank)[*]) +! ALLOCATE(IdxBF_RI_Irank(MXNBF_RI_MyRank)[*]) + + IdxBF_RI_Irank_lb(1) = 1 + IdxBF_RI_Irank_ub(1) = MXNBF_RI_MyRank + call xmp_new_coarray(IdxBF_RI_Irank_desc,4,1,IdxBF_RI_Irank_lb, & + IdxBF_RI_Irank_ub,1,img_dims) + call xmp_coarray_bind(IdxBF_RI_Irank_desc,IdxBF_RI_Irank) + !! ALLOCATE(istat1(MPI_STATUS_SIZE)) ALLOCATE(istat2(MPI_STATUS_SIZE)) @@ -135,9 +172,29 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP ALLOCATE(istat4(MPI_STATUS_SIZE)) ALLOCATE(T2BufSend(MXNActO,MXNBF_RI_MyRank,LenOccBat)) + T2BufSend_lb(1) = 1 + T2BufSend_lb(2) = 1 + T2BufSend_lb(3) = 1 + T2BufSend_ub(1) = LenOccBat + T2BufSend_ub(2) = MXNBF_RI_MyRank + T2BufSend_ub(3) = MXNActO + call xmp_new_local_array(T2BufSend_local_desc,8,3,T2BufSend_lb, & + T2BufSend_ub,T2BufSend) + ! TODO:chack unnecessary? + !call xmp_coarray_bind(T2BufSend_local_desc,T2BufSend) + + !coarray ! ALLOCATE(T2BufRecv(MXNActO,MXNBF_RI_MyRank,LenOccBat)) - ALLOCATE(T2BufRecv(MXNActO,MXNBF_RI_MyRank,LenOccBat)[*]) +! ALLOCATE(T2BufRecv(MXNActO,MXNBF_RI_MyRank,LenOccBat)[*]) + T2BufRecv_lb(1)=1 + T2BufRecv_ub(1)=LenOccBat + T2BufRecv_lb(2)=1 + T2BufRecv_ub(2)=MXNBF_RI_MyRank + T2BufRecv_lb(3)=1 + T2BufRecv_ub(3)=MXNActO + call xmp_new_coarray(T2BufRecv_desc,8,3,T2BufRecv_lb,T2BufRecv_ub,1,img_dims) + call xmp_coarray_bind(T2BufRecv_desc,T2BufRecv) !! ! ! test @@ -183,7 +240,8 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP ! test ! CALL MPI_Barrier( MPI_COMM_MO, ierr ) - sync all +! sync all + call xmp_sync_all(status) ! ! !MPI Parallel @@ -220,7 +278,8 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP END DO END DO ! - sync all +! sync all + call xmp_sync_all(status) WTimeBgn = MPI_WTIME() CALL CPU_TIME(TimeBgn) @@ -231,9 +290,32 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP ! & MPI_COMM_MO, ireq(1), IErr) ! CALL MPI_Wait(ireq(1), istat1, IErr) ! CALL MPI_Wait(ireq(2), istat2, IErr) - IdxBF_RI_Irank(1:NBF_RI_MyRank(MyRankMO))[Iranksend+1] = & - IdxBF_RI_MyRank(1:NBF_RI_MyRank(MyRankMO)) - sync all + + IdxBF_RI_MyRank_lb(1) = 1 + IdxBF_RI_MyRank_ub(1) = NBF_RI + call xmp_new_local_array(IdxBF_RI_MyRank_local_desc,4,1,IdxBF_RI_MyRank_lb, & + IdxBF_RI_MyRank_ub,IdxBF_RI_MyRank) + ! TODO:chack unnecessary? + !call xmp_coarray_bind(IdxBF_RI_MyRank_local_desc,IdxBF_RI_MyRank) + + call xmp_new_array_section(IdxBF_RI_MyRank_local_sec,1) + call xmp_new_array_section(IdxBF_RI_Irank_sec,1) + + call xmp_array_section_set_triplet(IdxBF_RI_MyRank_local_sec, & + 1,int(1,kind=8),int(NBF_RI_MyRank(MyRankMO),kind=8),1,status) + call xmp_array_section_set_triplet(IdxBF_RI_Irank_sec, & + 1,int(1,kind=8),int(NBF_RI_MyRank(MyRankMO),kind=8),1,status) + img_dims(1) = Iranksend+1 + +! IdxBF_RI_Irank(1:NBF_RI_MyRank(MyRankMO))[Iranksend+1] = & +! IdxBF_RI_MyRank(1:NBF_RI_MyRank(MyRankMO)) + call xmp_coarray_put_local(img_dims,IdxBF_RI_Irank_desc,IdxBF_RI_Irank_sec, & + IdxBF_RI_MyRank_local_desc,IdxBF_RI_MyRank_local_sec,status) +! sync all + call xmp_sync_all(status) + + call xmp_free_array_section(IdxBF_RI_Irank_sec) + call xmp_free_array_section(IdxBF_RI_MyRank_local_sec) !! !coarray @@ -241,10 +323,36 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP ! CALL MPI_ISend(T2BufSend, NT2BufSize, MPI_DOUBLE_PRECISION, Iranksend, 1, MPI_COMM_MO, ireq(3), IErr) ! CALL MPI_Wait(ireq(3), istat3, IErr) ! CALL MPI_Wait(ireq(4), istat4, IErr) - T2BufRecv(:,:,:)[Iranksend+1] = T2BufSend(:,:,:) - sync all -!! + call xmp_new_array_section(T2BufRecv_sec,3) + call xmp_new_array_section(T2BufSend_local_sec,3) + + call xmp_array_section_set_triplet(T2BufSend_local_sec, & + 1,int(1,kind=8),int(LenOccBat,kind=8),1,status) + call xmp_array_section_set_triplet(T2BufSend_local_sec, & + 2,int(1,kind=8),int(MXNBF_RI_MyRank,kind=8),1,status) + call xmp_array_section_set_triplet(T2BufSend_local_sec, & + 3,int(1,kind=8),int(MXNActO,kind=8),1,status) + + call xmp_array_section_set_triplet(T2BufRecv_sec, & + 1,int(1,kind=8),int(LenOccBat,kind=8),1,status) + call xmp_array_section_set_triplet(T2BufRecv_sec, & + 2,int(1,kind=8),int(MXNBF_RI_MyRank,kind=8),1,status) + call xmp_array_section_set_triplet(T2BufRecv_sec, & + 3,int(1,kind=8),int(MXNActO,kind=8),1,status) + + !T2BufRecv(:,:,:)[Iranksend+1] = T2BufSend(:,:,:) + !sync all + img_dims(1) = Iranksend+1 + call xmp_coarray_put_local(img_dims, & + T2BufRecv_desc, T2BufRecv_sec, & + T2BufSend_local_desc, T2BufSend_local_sec,status) +! sync all + call xmp_sync_all(status) + + call xmp_free_array_section(T2BufSend_local_sec) + call xmp_free_array_section(T2BufRecv_sec) +!! CALL CPU_TIME(TimeEnd) WTimeEnd = MPI_WTIME() Time_T2C = Time_T2C + TimeEnd - TimeBgn @@ -265,7 +373,8 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP if ( mod(Irank_diff, 4) == 0 ) then !coarray ! CALL MPI_Barrier( MPI_COMM_MO, ierr ) - sync all +! sync all + call xmp_sync_all(status) !! endif @@ -374,14 +483,21 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP ! ! o deallocate memory ! - DEALLOCATE(T2BufSend) - DEALLOCATE(T2BufRecv) -! + !DEALLOCATE(T2BufSend) +! call xmp_coarray_deallocate(T2BufSend_local_desc,status) + + !DEALLOCATE(T2BufRecv) + call xmp_coarray_deallocate(T2BufRecv_desc,status) + + !DEALLOCATE(IdxBF_RI_MyRank) +! call xmp_coarray_deallocate(IdxBF_RI_MyRank_local_desc,status) + + !DEALLOCATE(IdxBF_RI_Irank) + call xmp_coarray_deallocate(IdxBF_RI_Irank_desc,status) + DEALLOCATE(RI2cInv) DEALLOCATE(T2Int) - DEALLOCATE(IdxBF_RI_MyRank) DEALLOCATE(NBF_RI_MyRank) - DEALLOCATE(IdxBF_RI_Irank) DEALLOCATE(istat1) DEALLOCATE(istat2) DEALLOCATE(istat3) diff --git a/NTCHEM-MINI/src/util_lib/GNUmakefile b/NTCHEM-MINI/src/util_lib/GNUmakefile index 8f95480..2c9bffe 100755 --- a/NTCHEM-MINI/src/util_lib/GNUmakefile +++ b/NTCHEM-MINI/src/util_lib/GNUmakefile @@ -4,6 +4,15 @@ # include ../../config/makeconfig.xmp +ifeq ($(USE_XMP_API),yes) +UTIL_INITMPI=xmpAPI_util_initmpi +UTIL_FINMPI=xmpAPI_util_finmpi +else +UTIL_INITMPI=util_initmpi +UTIL_FINMPI=util_finmpi +endif + + objects_common = \ util_linout.o \ util_matout.o \ @@ -13,8 +22,8 @@ util_lowtrmzero.o \ objects_mpi = \ util_abortmpi.o \ -util_initmpi.o \ -util_finmpi.o \ +$(UTIL_INITMPI).o \ +$(UTIL_FINMPI).o \ objec77 = \ diff --git a/NTCHEM-MINI/src/util_lib/xmpAPI_util_finmpi.f90 b/NTCHEM-MINI/src/util_lib/xmpAPI_util_finmpi.f90 new file mode 100755 index 0000000..bd16228 --- /dev/null +++ b/NTCHEM-MINI/src/util_lib/xmpAPI_util_finmpi.f90 @@ -0,0 +1,22 @@ + SUBROUTINE Util_FinMPI +! + USE MPI_Module, ONLY : MPIMain + USE XMP_API +! +! o Terminate MPI +! + IMPLICIT NONE +! +!coarray +! INCLUDE "mpif.h" +! +! INTEGER :: IErr +! +! CALL MPI_FINALIZE(IErr) +! + CALL xmp_api_finalize + IF (MPIMain) THEN + WRITE(*, '("MPI has been terminated")') + END IF +! + END SUBROUTINE diff --git a/NTCHEM-MINI/src/util_lib/xmpAPI_util_initmpi.F90 b/NTCHEM-MINI/src/util_lib/xmpAPI_util_initmpi.F90 new file mode 100755 index 0000000..cb6cd46 --- /dev/null +++ b/NTCHEM-MINI/src/util_lib/xmpAPI_util_initmpi.F90 @@ -0,0 +1,151 @@ + SUBROUTINE Util_InitMPI(Hybrid) +! +!!$ USE OMP_LIB + USE MPI_Module, ONLY : NThreads, NProcs, MyRank, NProcsIO, MyRankIO, MainRank, IORank, & + & MPIMain, MPIIO, NCorePerIO, MPI_COMM_IO, MyGroupIO, NumGroupIO, MPI_COMM_Node, & + & NCorePerMat, MPI_COMM_MAT, NProcsMat, MyRankMat, MyGroupMat, NumGroupMat, & + & MPI_COMM_MO, NProcsMO, MyRankMO, MyGroupMO, NumGroupMO + USE XMP_API +! +! o Initialize MPI +! + IMPLICIT NONE +! + INCLUDE "mpif.h" +! +#ifdef MPIINT8 +#define MPI_INTEGER MPI_INTEGER8 +#endif +! + LOGICAL, INTENT(IN) :: Hybrid +! + INTEGER, PARAMETER :: IO1 = 5 + INTEGER :: MyColor, MyKey + INTEGER :: IErr + INTEGER :: MyRankNode, NProcsNode +! + NAMELIST /Parallel/ NCorePerIO, NCorePerMat +! +!coarray +! CALL MPI_INIT(IErr) +! CALL MPI_COMM_SIZE(MPI_COMM_WORLD, NProcs, IErr) +! CALL MPI_COMM_RANK(MPI_COMM_WORLD, MyRank, IErr) + + call xmp_api_init + + !NProcs = num_images() + CALL MPI_COMM_SIZE(MPI_COMM_WORLD, NProcs, IErr) + !MyRank = this_image()-1 + MyRank = xmp_this_image()-1 +! + MPIMain = (MyRank == MainRank) + NCorePerIO = NProcs + NCorePerMat = 1 +! +! o Read NCorePerIO from INPUT +! + IF (MPIMain) THEN +! + OPEN(UNIT=IO1, FILE='INPUT', STATUS='OLD', ACCESS='SEQUENTIAL', ERR=100) + GO TO 200 + 100 CONTINUE + CALL Util_AbortMPI('Error in opening input file') + 200 CONTINUE +! + REWIND(IO1) + READ(IO1, Parallel, END=300, ERR=300) + GO TO 400 + 300 CONTINUE + CALL Util_AbortMPI('Error: Check NAMELIST PARALLEL') + 400 CONTINUE + CLOSE(IO1) +! + END IF +! +!coarray +! CALL MPI_BCAST(NCorePerIO, 1, MPI_INTEGER, MainRank, MPI_COMM_WORLD, IErr) +! CALL MPI_BCAST(NCorePerMat, 1, MPI_INTEGER, MainRank, MPI_COMM_WORLD, IErr) +! call co_broadcast(NCorePerIO, MainRank+1) +! call co_broadcast(NCorePerMat, MainRank+1) + CALL MPI_BCAST(NCorePerIO, 1, MPI_INTEGER, MainRank, MPI_COMM_WORLD, IErr) + CALL MPI_BCAST(NCorePerMat, 1, MPI_INTEGER, MainRank, MPI_COMM_WORLD, IErr) +! +! o Local Communicator for file I/O +! Definition of MyColor and MyKey may be dependent on hardware +! + MyColor = MyRank / NCorePerIO + MyKey = MOD(MyRank, NCorePerIO) +! + CALL MPI_COMM_SPLIT(MPI_COMM_WORLD, MyColor, MyKey, MPI_COMM_IO, IErr) + CALL MPI_COMM_SIZE(MPI_COMM_IO, NProcsIO, IErr) + CALL MPI_COMM_RANK(MPI_COMM_IO, MyRankIO, IErr) + MyGroupIO = MyColor + NumGroupIO = NProcs / NCorePerIO + IF (MOD(NProcs, NCorePerIO) /= 0) NumGroupIO = NumGroupIO + 1 +! +! o Assign I/O rank +! MainRank needs to be the IORank of its IO communicator +! + MPIIO = (MyRankIO == IORank) +! + IF (MPIMain .AND. (.NOT. MPIIO)) THEN + CALL Util_AbortMPI('Error: MainRank must be IORank') + END IF +! +! o Global Comminicator involving only IORank +! + MyColor = 0 + IF (MPIIO) MyColor = 1 + MyKey = MyRank / NCorePerIO ! May depend on hardware +! + CALL MPI_COMM_SPLIT(MPI_COMM_WORLD, MyColor, MyKey, MPI_COMM_Node, IErr) + IF (MPIIO) THEN + CALL MPI_COMM_SIZE(MPI_COMM_Node, NProcsNode, IErr) + CALL MPI_COMM_RANK(MPI_COMM_Node, MyRankNode, IErr) + END IF +! +! o Local Communicator for matrix multiplication +! Definition of MyColor and MyKey may be dependent on hardware +! + MyColor = MyRank / NCorePerMat + MyKey = MOD(MyRank, NCorePerMat) +! + CALL MPI_COMM_SPLIT(MPI_COMM_WORLD, MyColor, MyKey, MPI_COMM_MAT, IErr) + CALL MPI_COMM_SIZE(MPI_COMM_MAT, NProcsMat, IErr) + CALL MPI_COMM_RANK(MPI_COMM_MAT, MyRankMat, IErr) + MyGroupMat = MyColor + NumGroupMat = NProcs / NCorePerMat + IF (MOD(NProcs, NCorePerMat) /= 0) NumGroupMat = NumGroupMat + 1 +! +! o Local Comminicator for orbital distribution +! + MyColor = MOD(MyRank, NCorePerMat) + MyKey = MyRank / NCorePerMat +! + CALL MPI_COMM_SPLIT(MPI_COMM_WORLD, MyColor, MyKey, MPI_COMM_MO, IErr) + CALL MPI_COMM_SIZE(MPI_COMM_MO, NProcsMO, IErr) + CALL MPI_COMM_RANK(MPI_COMM_MO, MyRankMO, IErr) + MyGroupMO = MyColor + NumGroupMO = NProcs / NCorePerMat + IF (MOD(NProcs, NCorePerMat) /= 0) NumGroupMO = NumGroupMO + 1 +! + IF (MPIMain) THEN + WRITE(*, '("MPI has been initialized successfully")') + WRITE(*, '("The job is running on", I8, " processes")') NProcs + WRITE(*, '("Number of local I/O groups: ", I7)') NumGroupIO + WRITE(*, '("Number of local matrix groups: ", I7)') NumGroupMat + WRITE(*, '("Number of local MO groups: ", I7)') NumGroupMO + END IF +! +! o MPI/OpenMP hybrid +! + IF (Hybrid) THEN +! NThreads = OMP_GET_MAX_THREADS() +! CALL OMP_SET_DYNAMIC(.FALSE.) +! CALL OMP_SET_NUM_THREADS(NThreads) +! IF (MPIMain) THEN +! WRITE(*, '("Number of threads :", I3)') NThreads +! END IF + END IF +! + END SUBROUTINE From 45b7b148d9021b6d67718aea9c4b2f86b3d74e19 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Mon, 8 Mar 2021 16:48:14 +0900 Subject: [PATCH 32/70] [WIP] Add temporarily configuration files. --- NTCHEM-MINI/config/makeconfig_tmp_xmpAPI | 80 +++++++++++++++++++ .../platforms/config_mine.xmpAPI_gfortran | 18 +++++ 2 files changed, 98 insertions(+) create mode 100644 NTCHEM-MINI/config/makeconfig_tmp_xmpAPI create mode 100755 NTCHEM-MINI/platforms/config_mine.xmpAPI_gfortran diff --git a/NTCHEM-MINI/config/makeconfig_tmp_xmpAPI b/NTCHEM-MINI/config/makeconfig_tmp_xmpAPI new file mode 100644 index 0000000..f0177c6 --- /dev/null +++ b/NTCHEM-MINI/config/makeconfig_tmp_xmpAPI @@ -0,0 +1,80 @@ +# + NTQC_TOP = /home/tago.kazuma/data/20210301/XMP-Applications/NTCHEM-MINI + LAPACKLIB = -llapack + BLASLIB = -lblas + ATLASLIB = + HOSTTYPE = linux64_xmp_api_omp_gfortran + + BIN = /home/tago.kazuma/data/20210301/XMP-Applications/NTCHEM-MINI/bin + LIB = /home/tago.kazuma/data/20210301/XMP-Applications/NTCHEM-MINI/lib + INCLUDE = /home/tago.kazuma/data/20210301/XMP-Applications/NTCHEM-MINI/include + TESTS = /home/tago.kazuma/data/20210301/XMP-Applications/NTCHEM-MINI/tests + SCRATCH = /home/tago.kazuma/scr/ntchem + LOCALBIN = . + LOCALLIB = . + LOCALINC = . + PARALLEL = mpiomp + MPIHOME=/usr/local/openmpi-2.1.1.gnu + #MPIHOME=/usr/local/openmpi-1.8.1.gnu +# XMP-API + USE_XMP_API = yes +# TODO + OMNI_HOME=/data/nfsWORK4/omni_gnu + OMNI_INC=$(OMNI_HOME)/include + OMNI_LIB=-L/data/nfsWORK4/omni_gnu/lib -lxmp -std=gnu99 -lm -fopenmp + + + LDFLAGS_NOMAIN = + + TARGET = LINUX64 + + DMACRO = -UDEBUG +# DMACRO+=-DSUPPORT_R16 +# DMACRO+=-DHAVE_ERF + + INC = -I$(INCLUDE) -I$(LOCALINC) -I$(MPIHOME)/include -I$(OMNI_INC) + MOD = -J$(LOCALINC) + INCMOD = $(INC) $(MOD) + +# FCONVERT = + + F77C = mpif90 + F77FLAGS = -cpp -fopenmp $(DMACRO) $(INCMOD) -m64 -O2 -std=legacy -DNOQUAD + F77FLAGSNOOPT = $(DMACRO) $(INCMOD) -m64 -std=legacy -DNOQUAD -Wuninitialized -Wall -Wunderflow -fbounds-check + + F90C = mpif90 + F90FLAGS = -cpp -fopenmp $(DMACRO) $(INCMOD) -m64 -O2 -std=legacy -DNOQUAD $(OMNI_LIB) -I$(OMNI_INC) + F90FLAGSNOOPT = $(DMACRO) $(INCMOD) -m64 -std=legacy -Wuninitialized -Wall -Wunderflow -fbounds-check -DNOQUAD + + MODSUFFIX = mod + + CC = gcc + CFLAGS = $(INC) -O3 + + CXX = g++ + CXXFLAGS = $(INC) -O3 + CXXLIB = -lstdc++ + + MPIFLAGS = -UMPIINT8 + MPILDFLAGS = + + OMPFLAGS = -fopenmp + OMPLDFLAGS = -fopenmp + +# LD = xmpf90 -fc=gfortran + LD = mpif90 + LDFLAGS = -L$(LIB) $(MPILIB) -I$(OMNI_INC) + + AR = ar + ARFLAGS = cr + RANLIB = ranlib + + MAKE = make + + SHELL = /bin/sh + MV = /bin/mv -f + RM = /bin/rm -f + CP = /bin/cp -f + MKDIR = /bin/mkdir + LN = /bin/ln + diff --git a/NTCHEM-MINI/platforms/config_mine.xmpAPI_gfortran b/NTCHEM-MINI/platforms/config_mine.xmpAPI_gfortran new file mode 100755 index 0000000..3a41172 --- /dev/null +++ b/NTCHEM-MINI/platforms/config_mine.xmpAPI_gfortran @@ -0,0 +1,18 @@ +# general PC cluster with GNU (gfortran) based mpif90 + ATLAS library +# Set ATLAS_DIR to the ATLAS library directory path + +# ATLAS_DIR=${HOME}/atlas/lib +ATLAS_DIR=/usr/lib +LDFLAGS= + +./config/configure \ +--lapack='-llapack' \ +--blas='-lblas' \ +--atlas= \ +linux64_xmp_omp_gfortran + +cd ./config +ln -sf makeconfig makeconfig.xmp + +# linux64_mpif90_omp + From f6c884056971b0b0e74d7c98cf107d758296c004 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Mon, 8 Mar 2021 18:20:33 +0900 Subject: [PATCH 33/70] Add loc() routines to xmp_new_local_array arg. --- .../src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 index 4d043e1..ff4f2d0 100755 --- a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 @@ -179,7 +179,7 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP T2BufSend_ub(2) = MXNBF_RI_MyRank T2BufSend_ub(3) = MXNActO call xmp_new_local_array(T2BufSend_local_desc,8,3,T2BufSend_lb, & - T2BufSend_ub,T2BufSend) + T2BufSend_ub,loc(T2BufSend)) ! TODO:chack unnecessary? !call xmp_coarray_bind(T2BufSend_local_desc,T2BufSend) @@ -294,9 +294,7 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP IdxBF_RI_MyRank_lb(1) = 1 IdxBF_RI_MyRank_ub(1) = NBF_RI call xmp_new_local_array(IdxBF_RI_MyRank_local_desc,4,1,IdxBF_RI_MyRank_lb, & - IdxBF_RI_MyRank_ub,IdxBF_RI_MyRank) - ! TODO:chack unnecessary? - !call xmp_coarray_bind(IdxBF_RI_MyRank_local_desc,IdxBF_RI_MyRank) + IdxBF_RI_MyRank_ub,loc(IdxBF_RI_MyRank)) call xmp_new_array_section(IdxBF_RI_MyRank_local_sec,1) call xmp_new_array_section(IdxBF_RI_Irank_sec,1) @@ -485,12 +483,14 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP ! !DEALLOCATE(T2BufSend) ! call xmp_coarray_deallocate(T2BufSend_local_desc,status) + call xmp_free_local_array(T2BufSend_local_desc) !DEALLOCATE(T2BufRecv) call xmp_coarray_deallocate(T2BufRecv_desc,status) !DEALLOCATE(IdxBF_RI_MyRank) ! call xmp_coarray_deallocate(IdxBF_RI_MyRank_local_desc,status) + call xmp_free_local_array(IdxBF_RI_MyRank_local_desc) !DEALLOCATE(IdxBF_RI_Irank) call xmp_coarray_deallocate(IdxBF_RI_Irank_desc,status) From ebf5cf0e10a869bd34449d322657fce3ebd57e76 Mon Sep 17 00:00:00 2001 From: tozaki_chisae Date: Mon, 8 Mar 2021 18:39:56 +0900 Subject: [PATCH 34/70] [WIP] modify 5 files. --- FFB-MINI/src/dd_mpi/dd_mpi.F90 | 6 ++++-- FFB-MINI/src/ffb_mini_main.F90 | 12 +++++++++--- FFB-MINI/src/les3x.F | 15 +++++++++++++-- FFB-MINI/src/ma_prof/src/maprof.c | 4 ++++ FFB-MINI/src/make_setting | 3 ++- 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/FFB-MINI/src/dd_mpi/dd_mpi.F90 b/FFB-MINI/src/dd_mpi/dd_mpi.F90 index c70a584..39fdf40 100755 --- a/FFB-MINI/src/dd_mpi/dd_mpi.F90 +++ b/FFB-MINI/src/dd_mpi/dd_mpi.F90 @@ -413,8 +413,10 @@ SUBROUTINE DDCOM2(SEND,RECV) CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL8,MPI_SUM,MPI_COMM_WORLD,IERR) #else RECV = SEND -! CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL ,MPI_SUM,MPI_COMM_WORLD,IERR) - CALL CO_SUM(SEND,RECV) +! Fujitsu start 202103 + CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL ,MPI_SUM,MPI_COMM_WORLD,IERR) +! CALL CO_SUM(SEND,RECV) +! Fujitsu end 202103 !C$XMP REDUCTION(+:RECV) on PDDCOM2 !C$XMP BCAST (RECV) FROM PDDCOM2(1) diff --git a/FFB-MINI/src/ffb_mini_main.F90 b/FFB-MINI/src/ffb_mini_main.F90 index d3996b4..1a40c72 100755 --- a/FFB-MINI/src/ffb_mini_main.F90 +++ b/FFB-MINI/src/ffb_mini_main.F90 @@ -1,7 +1,9 @@ program ffb_mini - !use mpi +!Fj + use mpi use makemesh - include "mpif.h" +!Fj +! include "mpif.h" !implicit none integer :: ierr @@ -34,7 +36,8 @@ program ffb_mini intrinsic :: command_argument_count - !call MPI_Init(ierr) +!Fj + call MPI_Init(ierr) call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr) call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr) @@ -115,6 +118,9 @@ program ffb_mini call LES3X(file_parm) +!Fj +! call MPI_Finalize(ierr) + contains subroutine print_usage_and_exit() diff --git a/FFB-MINI/src/les3x.F b/FFB-MINI/src/les3x.F index 2637d2c..30878ab 100755 --- a/FFB-MINI/src/les3x.F +++ b/FFB-MINI/src/les3x.F @@ -17,6 +17,7 @@ C* PROGRAM LES3X SUBROUTINE LES3X(FILEIN) use xmp_api + use mpi #include "timing.h" !#include "xmp_coarray.h" IMPLICIT NONE @@ -337,7 +338,8 @@ SUBROUTINE LES3X(FILEIN) INTEGER*8 :: rx_desc, ry_desc INTEGER*8, DIMENSION(2) :: rx_lb, rx_ub, ry_lb, ry_ub INTEGER*4 :: img_dims(1) - INTEGER*4 status + INTEGER*4 :: status +C INTEGER :: ierr, nnn, me CC Fj end 202103 REAL*8,ALLOCATABLE:: * DWRK01(:,:),DWRK02(:),DWRK03(:,:,:), @@ -751,13 +753,20 @@ SUBROUTINE LES3X(FILEIN) C C C +C Fj start 202103 +C call mpi_init(ierr) +C call mpi_comm_size(mpi_comm_world,nnn,ierr) +C call mpi_comm_rank(mpi_comm_world,me,ierr) +C Fj end 202103 +C C NDOM = 0 CALL DDINIT(NPART,IPART) -C IF(IPART.GE.1) NDOM = 1 C C Fj start 202103 call xmp_api_init C Fj end 202103 +C +C IF(IPART.GE.1) NDOM = 1 C IF(IPART.GE.2) THEN IUT6 = IUTLG @@ -2205,6 +2214,8 @@ SUBROUTINE LES3X(FILEIN) call xmp_coarray_deallocate(ry_desc, status) C call xmp_api_finalize +C + call mpi_finalize(ierr) C Fj end 202103 C STOP diff --git a/FFB-MINI/src/ma_prof/src/maprof.c b/FFB-MINI/src/ma_prof/src/maprof.c index 75b9f95..8f07a6f 100755 --- a/FFB-MINI/src/ma_prof/src/maprof.c +++ b/FFB-MINI/src/ma_prof/src/maprof.c @@ -351,6 +351,10 @@ void maprof_set_num_threads(int n) { N_threads = n; } +void maprof_set_num_threads_(int n) +{ + maprof_set_num_threads(n); +} void maprof_flush_stdout_() { diff --git a/FFB-MINI/src/make_setting b/FFB-MINI/src/make_setting index 8ec3020..81d6748 100755 --- a/FFB-MINI/src/make_setting +++ b/FFB-MINI/src/make_setting @@ -3,7 +3,8 @@ CC = mpicc OMNI_HOME = $(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') FC = mpif90 -FFLAGS = -I$(OMNI_HOME)/include -J$(OMNI_HOME)/include -fopenmp +#FFLAGS = -I$(OMNI_HOME)/include -J$(OMNI_HOME)/include -fopenmp +FFLAGS = -I$(OMNI_HOME)/include -fopenmp LIBS = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') DEFINE += -DNO_METIS From d35bd8dbc800b4be8fd62cf212ac96e124b0c575 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Mon, 8 Mar 2021 18:59:06 +0900 Subject: [PATCH 35/70] [WIP] Fix index size of coarray in xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90. --- .../src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 index ff4f2d0..8c62284 100755 --- a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 @@ -175,9 +175,9 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP T2BufSend_lb(1) = 1 T2BufSend_lb(2) = 1 T2BufSend_lb(3) = 1 - T2BufSend_ub(1) = LenOccBat + T2BufSend_ub(1) = MXNActO T2BufSend_ub(2) = MXNBF_RI_MyRank - T2BufSend_ub(3) = MXNActO + T2BufSend_ub(3) = LenOccBat call xmp_new_local_array(T2BufSend_local_desc,8,3,T2BufSend_lb, & T2BufSend_ub,loc(T2BufSend)) ! TODO:chack unnecessary? @@ -188,11 +188,11 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP ! ALLOCATE(T2BufRecv(MXNActO,MXNBF_RI_MyRank,LenOccBat)) ! ALLOCATE(T2BufRecv(MXNActO,MXNBF_RI_MyRank,LenOccBat)[*]) T2BufRecv_lb(1)=1 - T2BufRecv_ub(1)=LenOccBat T2BufRecv_lb(2)=1 - T2BufRecv_ub(2)=MXNBF_RI_MyRank T2BufRecv_lb(3)=1 - T2BufRecv_ub(3)=MXNActO + T2BufRecv_ub(1)=MXNActO + T2BufRecv_ub(2)=MXNBF_RI_MyRank + T2BufRecv_ub(3)=LenOccBat call xmp_new_coarray(T2BufRecv_desc,8,3,T2BufRecv_lb,T2BufRecv_ub,1,img_dims) call xmp_coarray_bind(T2BufRecv_desc,T2BufRecv) !! From 68e33d3a958cd7bb0a13d44c31521e636161da35 Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Mon, 8 Mar 2021 19:59:04 +0900 Subject: [PATCH 36/70] [WIP] Add xmp_api codes --- NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 | 877 ++++++---- NICAM-DC-MINI/src/share/mod_debug_xmpAPI.f90 | 537 +++++++ NICAM-DC-MINI/src/share/mod_grd_xmpXPI.f90 | 1420 +++++++++++++++++ .../sysdep/Makedef.Linux64-gnu-ompi-xmp | 38 + .../sysdep/Makedef.Linux64-gnu-ompi-xmpAPI | 38 + .../sysdep/Mkjobshell.Linux64-gnu-ompi-xmp.sh | 100 ++ .../Mkjobshell.Linux64-gnu-ompi-xmpAPI.sh | 100 ++ 7 files changed, 2761 insertions(+), 349 deletions(-) create mode 100755 NICAM-DC-MINI/src/share/mod_debug_xmpAPI.f90 create mode 100755 NICAM-DC-MINI/src/share/mod_grd_xmpXPI.f90 create mode 100644 NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmp create mode 100644 NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmpAPI create mode 100644 NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmp.sh create mode 100644 NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmpAPI.sh diff --git a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 index 09b81e2..31c5455 100755 --- a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 @@ -2001,7 +2001,7 @@ subroutine COMM_setup( & !--- 2020 Fujitsu sendbuf_l_lb(1) = 1; sendbuf_l_ub(1) = maxdatasize_s sendbuf_l_lb(2) = 1; sendbuf_l_ub(2) = somax(halomax) - call xmp_new_local_array(sendbuf_l_desc, 8, 2, sendbuf_l_lb, sendbuf_l_ub, sendbuf) + call xmp_new_local_array(sendbuf_l_desc, 8, 2, sendbuf_l_lb, sendbuf_l_ub, loc(sendbuf)) !--- 2020 Fujitsu end recvbuf(:,:)=0 sendbuf(:,:)=0 @@ -2723,7 +2723,7 @@ subroutine COMM_data_transfer(& if ( opt_comm_barrier ) then call DEBUG_rapstart('Node imbalance adjustment') -!coarray call MPI_BARRIER( ADM_comm_run_world, ierr ) + !coarray call MPI_BARRIER( ADM_comm_run_world, ierr ) !--- 2020 Fujitsu !sync all call xmp_sync_all(ierr) @@ -2738,7 +2738,7 @@ subroutine COMM_data_transfer(& ! comm_call_count=comm_call_count+1 ! -!coarray t(0)=mpi_wtime() + !coarray t(0)=mpi_wtime() t(0) = xmp_wtime() ! shp=shape(var) @@ -2746,7 +2746,7 @@ subroutine COMM_data_transfer(& ! if (present(trn)) then varmax=0 -!cdir novector + !cdir novector do n=1,shp(4) if(trn(n)) then varmax=varmax+1 @@ -2755,7 +2755,7 @@ subroutine COMM_data_transfer(& enddo else varmax=shp(4) -!cdir novector + !cdir novector do n=1,varmax clist(n)=n enddo @@ -2781,36 +2781,38 @@ subroutine COMM_data_transfer(& write(ADM_LOG_FID,*) 'cmax=', cmax, 'max_varmax*ADM_kall=', max_varmax*ADM_kall call ADM_proc_stop end if -! equiv_varmax = real( varmax*kmax )/real( ADM_kall ) ! assuming variables are all 3-Dimension -! diag_varmax = max( equiv_varmax, diag_varmax ) ! diagnose max value -! write(ADM_LOG_FID,'(a)' ) ' *** max_varmax, varmax, kmax, equivalent varmax, diagnosed max_varmax ' -! write(ADM_LOG_FID,'(5x, 3i8, 2f18.1)') max_varmax, varmax, kmax, equiv_varmax, diag_varmax + ! equiv_varmax = real( varmax*kmax )/real( ADM_kall ) ! assuming variables are all 3-Dimension + ! diag_varmax = max( equiv_varmax, diag_varmax ) ! diagnose max value + ! write(ADM_LOG_FID,'(a)' ) ' *** max_varmax, varmax, kmax, equivalent varmax, diagnosed max_varmax ' + ! write(ADM_LOG_FID,'(5x, 3i8, 2f18.1)') max_varmax, varmax, kmax, equiv_varmax, diag_varmax ! -> end if ! -!coarray t(1)=mpi_wtime() + !coarray t(1)=mpi_wtime() t(1) = xmp_wtime() time_pre=time_pre+(t(1)-t(0)) !call mpi_barrier(ADM_comm_run_world,ierr) -!coarray t(2)=mpi_wtime() + !coarray t(2)=mpi_wtime() t(2) = xmp_wtime() time_bar1=time_bar1+(t(2)-t(1)) -!coarray + !coarray !--- 2020 Fujitsu !allocate(tbl(num_images())[*]) tbl_lb(1) = 1; tbl_ub(1) = ADM_prc_all call xmp_new_coarray(tbl_desc, 4, 1, tbl_lb, tbl_ub, 1, img_dims) call xmp_coarray_bind(tbl_desc, tbl) + !call co_max(maxdatasiaze_r) call MPI_Allreduce(maxdatasiaze_r, max_tmp, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr) maxdatasiaze_r = max_tmp - !--- 2020 Fujitsu end + bufsize1 = maxdatasize_r bufsize2 = romax(halomax) - !--- 2020 Fujitsu + !call co_max(bufsize2) call MPI_Allreduce(bufsize2, max_tmp, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr) bufsize2 = max_tmp + !allocate(caf_recvbuf(bufsize1,bufsize2)[*]) caf_recvbuf_lb(1) = 1; caf_recvbuf_ub(1) = bufsize1 caf_recvbuf_lb(2) = 1; caf_recvbuf_ub(2) = bufsize2 @@ -2822,10 +2824,10 @@ subroutine COMM_data_transfer(& ! call mpi_isend !----------------------------------------- -!! comm_dbg_recvbuf(:,:,:)=CNST_UNDEF -!! comm_dbg_sendbuf(:,:,:)=CNST_UNDEF -!! comm_dbg_recvbuf(:,:,1)=recvbuf(:,:) -!! comm_dbg_sendbuf(:,:,1)=sendbuf(:,:) + !! comm_dbg_recvbuf(:,:,:)=CNST_UNDEF + !! comm_dbg_sendbuf(:,:,:)=CNST_UNDEF + !! comm_dbg_recvbuf(:,:,1)=recvbuf(:,:) + !! comm_dbg_sendbuf(:,:,1)=sendbuf(:,:) if (opt_comm_dbg) then !iga sendbuf(:,:)= dbg_sendbuf_init @@ -2844,15 +2846,15 @@ subroutine COMM_data_transfer(& endif endif ! -!coarray -! call mpi_irecv(recvbuf(1,ro) & -! ,rsize(ro,halo)*cmax & -! ,mpi_double_precision & -! ,sourcerank(ro,halo) & -! ,recvtag(ro,halo) & -! ,ADM_comm_run_world & -! ,areq(ro) & -! ,ierr) + !coarray + ! call mpi_irecv(recvbuf(1,ro) & + ! ,rsize(ro,halo)*cmax & + ! ,mpi_double_precision & + ! ,sourcerank(ro,halo) & + ! ,recvtag(ro,halo) & + ! ,ADM_comm_run_world & + ! ,areq(ro) & + ! ,ierr) tbl(sourcerank(ro,halo)+1) = ro ! @@ -2872,7 +2874,7 @@ subroutine COMM_data_transfer(& endif enddo -!coarray t(3)=mpi_wtime() + !coarray t(3)=mpi_wtime() t(3) = xmp_wtime() time_recv=time_recv+(t(3)-t(2)) !----------------------------------------- @@ -2886,27 +2888,27 @@ subroutine COMM_data_transfer(& -!coarray t(4)=mpi_wtime() + !coarray t(4)=mpi_wtime() t(4) = xmp_wtime() do ns=1,nsmax(so,halo) ss=sendinfo(SIZE_COMM,ns,so,halo) sl=sendinfo(LRGNID_COMM,ns,so,halo) sb=sendinfo(BASE_COMM,ns,so,halo)*cmax -!=org=!cdir novector -!=org= do n=1,ss -!=org=!cdir unroll=3 -!=org= do m=1,varmax -!=org=!!cdir shortloop -!=org= do k=1,kmax -!=org= sendbuf(k+(m-1)*kmax+(n-1)*cmax+sb,so) & -!=org= =var(sendlist(n,ns,so,halo),k,sl,clist(m)) -!=org= enddo -!=org= enddo -!=org=! if (ADM_prc_me==1) write(ADM_log_fid,*) 'n',n,sendlist(n,ns,so,halo) -!=org= enddo + !=org=!cdir novector + !=org= do n=1,ss + !=org=!cdir unroll=3 + !=org= do m=1,varmax + !=org=!!cdir shortloop + !=org= do k=1,kmax + !=org= sendbuf(k+(m-1)*kmax+(n-1)*cmax+sb,so) & + !=org= =var(sendlist(n,ns,so,halo),k,sl,clist(m)) + !=org= enddo + !=org= enddo + !=org=! if (ADM_prc_me==1) write(ADM_log_fid,*) 'n',n,sendlist(n,ns,so,halo) + !=org= enddo do m=1,varmax -!cdir outerunroll=8 + !cdir outerunroll=8 do k=1,kmax do n=1,ss sendbuf(n+(k-1)*ss+(m-1)*ss*kmax+sb,so) & @@ -2914,7 +2916,7 @@ subroutine COMM_data_transfer(& enddo enddo enddo - + enddo !----------------------------------------- @@ -2926,19 +2928,19 @@ subroutine COMM_data_transfer(& ss=sendinfo_pl(SIZE_COMM,ns,so,halo) sl=sendinfo_pl(LRGNID_COMM,ns,so,halo) sb=sendinfo_pl(BASE_COMM,ns,so,halo)*cmax -!=org=!cdir novector -!=org= do n=1,ss -!=org=!cdir unroll=3 -!=org= do m=1,varmax -!=org=!!cdir shortloop -!=org= do k=1,kmax -!=org= sendbuf(k+(m-1)*kmax+(n-1)*cmax+sb,so) & -!=org= =var_pl(sendlist_pl(n,ns,so,halo),k,sl,clist(m)) -!=org= enddo -!=org= enddo -!=org= enddo + !=org=!cdir novector + !=org= do n=1,ss + !=org=!cdir unroll=3 + !=org= do m=1,varmax + !=org=!!cdir shortloop + !=org= do k=1,kmax + !=org= sendbuf(k+(m-1)*kmax+(n-1)*cmax+sb,so) & + !=org= =var_pl(sendlist_pl(n,ns,so,halo),k,sl,clist(m)) + !=org= enddo + !=org= enddo + !=org= enddo do m=1,varmax -!cdir outerunroll=8 + !cdir outerunroll=8 do k=1,kmax do n=1,ss sendbuf(n+(k-1)*ss+(m-1)*ss*kmax+sb,so) & @@ -2946,14 +2948,14 @@ subroutine COMM_data_transfer(& enddo enddo enddo - + enddo -!coarray t(5)=mpi_wtime() + !coarray t(5)=mpi_wtime() t(5) = xmp_wtime() time_sbuf=time_sbuf+(t(5)-t(4)) -! write(ADM_log_fid,*) 'send count=',i_dbg,'prc=',adm_prc_me + ! write(ADM_log_fid,*) 'send count=',i_dbg,'prc=',adm_prc_me !----------------------------------------- ! !----------------------------------------- @@ -2966,24 +2968,25 @@ subroutine COMM_data_transfer(& endif endif -! write(*,*) 'me=',ADM_prc_me,'sendtag',sendtag(:,:) -! write(*,*) 'sendbuf',sendbuf(:,:) -! write(*,*) 'me=',ADM_prc_me,'destrank',destrank(:,:) - -!coarray -! call Mpi_isend(sendbuf(1,so) & -! ,ssize(so,halo)*cmax & -! ,mpi_double_precision & -! ,destrank(so,halo) & -! ,sendtag(so,halo) & -! ,ADM_comm_run_world & -! ,areq(so+romax(halo)) & -! ,ierr) + ! write(*,*) 'me=',ADM_prc_me,'sendtag',sendtag(:,:) + ! write(*,*) 'sendbuf',sendbuf(:,:) + ! write(*,*) 'me=',ADM_prc_me,'destrank',destrank(:,:) + + !coarray + ! call Mpi_isend(sendbuf(1,so) & + ! ,ssize(so,halo)*cmax & + ! ,mpi_double_precision & + ! ,destrank(so,halo) & + ! ,sendtag(so,halo) & + ! ,ADM_comm_run_world & + ! ,areq(so+romax(halo)) & + ! ,ierr) !--- 2020 Fujitsu !dst_img = tbl(this_image())[destrank(so,halo)+1] img_dims(1) = destrank(so,halo)+1 - call xmp_array_section_set_triplet(tbl_sec, 1, destrank(so,halo)+1, destrank(so,halo)+1, 1, ierr) + call xmp_array_section_set_triplet(tbl_sec, 1, ADM_prc_me, ADM_prc_me, 1, ierr) call xmp_coarray_get_scalar(img_dims, tbl_desc, tbl_sec, dst_img, ierr) + !caf_recvbuf(1:ssize(so,halo)*cmax,dst_img)[destrank(so,halo)+1] & ! = sendbuf(1:ssize(so,halo)*cmax,so) call xmp_array_section_set_triplet(caf_recvbuf_sec, 1, 1, ssize(so,halo)*cmax, 1, ierr) @@ -3009,7 +3012,7 @@ subroutine COMM_data_transfer(& endif -!coarray t(6)=mpi_wtime() + !coarray t(6)=mpi_wtime() t(6) = xmp_wtime() time_send=time_send+(t(6)-t(5)) size_total=size_total+ssize(so,halo)*cmax @@ -3017,7 +3020,7 @@ subroutine COMM_data_transfer(& enddo !loop so !----------------------------------------- ! -!coarray t(7)=mpi_wtime() + !coarray t(7)=mpi_wtime() t(7) = xmp_wtime() !--------------------------------------------------- ! var -> var (region to region copy in same rank) @@ -3026,19 +3029,19 @@ subroutine COMM_data_transfer(& cs=copyinfo_r2r(SIZE_COPY,nc,halo) cl=copyinfo_r2r(LRGNID_COPY,nc,halo) scl=copyinfo_r2r(SRC_LRGNID_COPY,nc,halo) -!=org=!cdir novector -!=org= do n=1,cs -!=org=!cdir unroll=3 -!=org= do m=1,varmax -!=org=!!cdir shortloop -!=org= do k=1,kmax -!=org= var(recvlist_r2r(n,nc,halo),k,cl ,clist(m)) & -!=org= =var(sendlist_r2r(n,nc,halo),k,scl,clist(m)) -!=org= enddo -!=org= enddo -!=org= enddo + !=org=!cdir novector + !=org= do n=1,cs + !=org=!cdir unroll=3 + !=org= do m=1,varmax + !=org=!!cdir shortloop + !=org= do k=1,kmax + !=org= var(recvlist_r2r(n,nc,halo),k,cl ,clist(m)) & + !=org= =var(sendlist_r2r(n,nc,halo),k,scl,clist(m)) + !=org= enddo + !=org= enddo + !=org= enddo do m=1,varmax -!cdir outerunroll=8 + !cdir outerunroll=8 do k=1,kmax do n=1,cs var(recvlist_r2r(n,nc,halo),k,cl ,clist(m)) & @@ -3046,7 +3049,7 @@ subroutine COMM_data_transfer(& enddo enddo enddo - + enddo !------------------------------------------ ! @@ -3057,19 +3060,19 @@ subroutine COMM_data_transfer(& cs=copyinfo_r2p(SIZE_COPY,nc,halo) cl=copyinfo_r2p(LRGNID_COPY,nc,halo) scl=copyinfo_r2p(SRC_LRGNID_COPY,nc,halo) -!=org=!cdir novector -!=org= do n=1,cs -!=org=!cdir unroll=3 -!=org= do m=1,varmax -!=org=!!cdir shortloop -!=org= do k=1,kmax -!=org= var_pl(recvlist_r2p(n,nc,halo),k,cl,clist(m)) & -!=org= =var(sendlist_r2p(n,nc,halo),k,scl,clist(m)) -!=org= enddo -!=org= enddo -!=org= enddo + !=org=!cdir novector + !=org= do n=1,cs + !=org=!cdir unroll=3 + !=org= do m=1,varmax + !=org=!!cdir shortloop + !=org= do k=1,kmax + !=org= var_pl(recvlist_r2p(n,nc,halo),k,cl,clist(m)) & + !=org= =var(sendlist_r2p(n,nc,halo),k,scl,clist(m)) + !=org= enddo + !=org= enddo + !=org= enddo do m=1,varmax -!cdir outerunroll=8 + !cdir outerunroll=8 do k=1,kmax do n=1,cs var_pl(recvlist_r2p(n,nc,halo),k,cl,clist(m)) & @@ -3077,7 +3080,7 @@ subroutine COMM_data_transfer(& enddo enddo enddo - + enddo !----------------------------------------- ! @@ -3088,19 +3091,19 @@ subroutine COMM_data_transfer(& cs=copyinfo_p2r(SIZE_COPY,nc,halo) cl=copyinfo_p2r(LRGNID_COPY,nc,halo) scl=copyinfo_p2r(SRC_LRGNID_COPY,nc,halo) -!=org=!cdir novector -!=org= do n=1,cs -!=org=!cdir unroll=3 -!=org= do m=1,varmax -!=org=!!cdir shortloop -!=org= do k=1,kmax -!=org= var(recvlist_p2r(n,nc,halo),k,cl,clist(m)) & -!=org= =var_pl(sendlist_p2r(n,nc,halo),k,scl,clist(m)) -!=org= enddo -!=org= enddo -!=org= enddo + !=org=!cdir novector + !=org= do n=1,cs + !=org=!cdir unroll=3 + !=org= do m=1,varmax + !=org=!!cdir shortloop + !=org= do k=1,kmax + !=org= var(recvlist_p2r(n,nc,halo),k,cl,clist(m)) & + !=org= =var_pl(sendlist_p2r(n,nc,halo),k,scl,clist(m)) + !=org= enddo + !=org= enddo + !=org= enddo do m=1,varmax -!cdir outerunroll=8 + !cdir outerunroll=8 do k=1,kmax do n=1,cs var(recvlist_p2r(n,nc,halo),k,cl,clist(m)) & @@ -3108,16 +3111,16 @@ subroutine COMM_data_transfer(& enddo enddo enddo - + enddo !----------------------------------------- ! !----------------------------------------- -!coarray t(8)=mpi_wtime() + !coarray t(8)=mpi_wtime() t(8) = xmp_wtime() time_copy=time_copy+(t(8)-t(7)) acount=romax(halo)+somax(halo) -!coarray call mpi_waitall(acount,areq,stat,ierr) + !coarray call mpi_waitall(acount,areq,stat,ierr) !--- 2020 Fujitsu !sync all call xmp_sync_all(ierr) @@ -3128,7 +3131,7 @@ subroutine COMM_data_transfer(& end do end do -!coarray t(9)=mpi_wtime() + !coarray t(9)=mpi_wtime() t(9) = xmp_wtime() time_wait=time_wait+(t(9)-t(8)) @@ -3137,7 +3140,7 @@ subroutine COMM_data_transfer(& write(ADM_log_fid,*) 'mpi_wait info start==' write(ADM_log_fid,*) 'ierr=',ierr write(ADM_log_fid,*) 'acount=',acount -! write(ADM_log_fid,*) 'stat=',stat(:,1:acount) + ! write(ADM_log_fid,*) 'stat=',stat(:,1:acount) write(ADM_log_fid,*) 'areq=',areq(1:acount) write(ADM_log_fid,*) 'mpi_wait info end==' endif @@ -3175,11 +3178,11 @@ subroutine COMM_data_transfer(& write(ADM_log_fid,*) 'areq after irecv:',dbg_areq_save(1:acount,1) write(ADM_log_fid,*) 'areq after isend',dbg_areq_save(1:acount,2) write(ADM_log_fid,*) 'areq after waitall',dbg_areq_save(1:acount,3) -! write(ADM_log_fid,*) 'areq after barrier',dbg_areq_save(1:acount,4) + ! write(ADM_log_fid,*) 'areq after barrier',dbg_areq_save(1:acount,4) write(ADM_log_fid,*) 'ierr of mpi_waitall=',ierr write(ADM_log_fid,*) 'acount of mpi_waitall=',acount -! write(ADM_log_fid,*) 'stat of mpi_waitall=',stat(:,1:acount) + ! write(ADM_log_fid,*) 'stat of mpi_waitall=',stat(:,1:acount) endif dbg_tcount=dbg_tcount+1 @@ -3187,7 +3190,7 @@ subroutine COMM_data_transfer(& if (opt_comm_barrier) then -!coarray call mpi_barrier(ADM_comm_run_world,ierr) + !coarray call mpi_barrier(ADM_comm_run_world,ierr) !--- 2020 Fujitsu !sync all call xmp_sync_all(ierr) @@ -3197,15 +3200,15 @@ subroutine COMM_data_transfer(& write(ADM_log_fid,*) 'ierr=',ierr write(ADM_log_fid,*) 'mpi_barrier info end==' endif -! if (opt_comm_dbg) then -! dbg_areq_save(:,4)=areq(:) -! endif + ! if (opt_comm_dbg) then + ! dbg_areq_save(:,4)=areq(:) + ! endif endif !----------------------------------------- ! -! i_dbg=0 !iga + ! i_dbg=0 !iga do ro=1,romax(halo) !----------------------------------------- ! recvbuf -> var ( recieve in region ) @@ -3214,29 +3217,29 @@ subroutine COMM_data_transfer(& rs=recvinfo(SIZE_COMM,nr,ro,halo) rl=recvinfo(LRGNID_COMM,nr,ro,halo) rb=recvinfo(BASE_COMM,nr,ro,halo)*cmax -!=org=!cdir novector -!=org= do n=1,rs -!=org=!cdir unroll=3 -!=org= do m=1,varmax -!=org=!!cdir shortloop -!=org= do k=1,kmax -!=org= var(recvlist(n,nr,ro,halo),k,rl,clist(m)) & -!=org= =recvbuf(k+(m-1)*kmax+(n-1)*cmax+rb,ro) -!=org= enddo -!=org= enddo -!=org=! if (ADM_prc_me==2) write(ADM_log_fid,*) 'ro,n,nr,',ro,n,nr,recvlist(n,nr,ro,halo) -!=org= enddo + !=org=!cdir novector + !=org= do n=1,rs + !=org=!cdir unroll=3 + !=org= do m=1,varmax + !=org=!!cdir shortloop + !=org= do k=1,kmax + !=org= var(recvlist(n,nr,ro,halo),k,rl,clist(m)) & + !=org= =recvbuf(k+(m-1)*kmax+(n-1)*cmax+rb,ro) + !=org= enddo + !=org= enddo + !=org=! if (ADM_prc_me==2) write(ADM_log_fid,*) 'ro,n,nr,',ro,n,nr,recvlist(n,nr,ro,halo) + !=org= enddo do m=1,varmax -!cdir outerunroll=8 + !cdir outerunroll=8 do k=1,kmax do n=1,rs var(recvlist(n,nr,ro,halo),k,rl,clist(m)) & =recvbuf(n+(k-1)*rs+(m-1)*rs*kmax+rb,ro) enddo enddo -! if (ADM_prc_me==2) write(ADM_log_fid,*) 'ro,n,nr,',ro,n,nr,recvlist(n,nr,ro,halo) + ! if (ADM_prc_me==2) write(ADM_log_fid,*) 'ro,n,nr,',ro,n,nr,recvlist(n,nr,ro,halo) enddo -! i_dbg=i_dbg+max(rs,0) + ! i_dbg=i_dbg+max(rs,0) enddo !----------------------------------------- @@ -3248,19 +3251,19 @@ subroutine COMM_data_transfer(& rs=recvinfo_pl(SIZE_COMM,nr,ro,halo) rl=recvinfo_pl(LRGNID_COMM,nr,ro,halo) rb=recvinfo_pl(BASE_COMM,nr,ro,halo)*cmax -!=org=!cdir novector -!=org= do n=1,rs -!=org=!cdir unroll=3 -!=org= do m=1,varmax -!=org=!!cdir shortloop -!=org= do k=1,kmax -!=org= var_pl(recvlist_pl(n,nr,ro,halo),k,rl,clist(m)) & -!=org= =recvbuf(k+(m-1)*kmax+(n-1)*cmax+rb,ro) -!=org= enddo -!=org= enddo -!=org= enddo + !=org=!cdir novector + !=org= do n=1,rs + !=org=!cdir unroll=3 + !=org= do m=1,varmax + !=org=!!cdir shortloop + !=org= do k=1,kmax + !=org= var_pl(recvlist_pl(n,nr,ro,halo),k,rl,clist(m)) & + !=org= =recvbuf(k+(m-1)*kmax+(n-1)*cmax+rb,ro) + !=org= enddo + !=org= enddo + !=org= enddo do m=1,varmax -!cdir outerunroll=8 + !cdir outerunroll=8 do k=1,kmax do n=1,rs var_pl(recvlist_pl(n,nr,ro,halo),k,rl,clist(m)) & @@ -3268,14 +3271,14 @@ subroutine COMM_data_transfer(& enddo enddo enddo - + enddo enddo !loop ro -!coarray t(10)=mpi_wtime() + !coarray t(10)=mpi_wtime() t(10) = xmp_wtime() time_rbuf=time_rbuf+(t(10)-t(9)) -! write(ADM_log_fid,*) 'recv count=',i_dbg,'prc=',adm_prc_me + ! write(ADM_log_fid,*) 'recv count=',i_dbg,'prc=',adm_prc_me !----------------------------------------- ! @@ -3286,20 +3289,20 @@ subroutine COMM_data_transfer(& cs=copyinfo_sgp(SIZE_COPY,nc,halo) cl=copyinfo_sgp(LRGNID_COPY,nc,halo) scl=copyinfo_sgp(SRC_LRGNID_COPY,nc,halo) -!=org=!cdir novector -!=org= do n=1,cs -!=org=!cdir unroll=3 -!=org= do m=1,varmax -!=org=!!cdir shortloop -!=org= do k=1,kmax -!=org= var(recvlist_sgp(n,nc,halo),k,cl ,clist(m)) & -!=org= =var(sendlist_sgp(n,nc,halo),k,scl,clist(m)) -!=org= enddo -!=org= enddo -!=org= enddo + !=org=!cdir novector + !=org= do n=1,cs + !=org=!cdir unroll=3 + !=org= do m=1,varmax + !=org=!!cdir shortloop + !=org= do k=1,kmax + !=org= var(recvlist_sgp(n,nc,halo),k,cl ,clist(m)) & + !=org= =var(sendlist_sgp(n,nc,halo),k,scl,clist(m)) + !=org= enddo + !=org= enddo + !=org= enddo do m=1,varmax -!cdir outerunroll=8 + !cdir outerunroll=8 do k=1,kmax do n=1,cs var(recvlist_sgp(n,nc,halo),k,cl ,clist(m)) & @@ -3307,18 +3310,26 @@ subroutine COMM_data_transfer(& enddo enddo enddo - + enddo -!coarray t(11)=mpi_wtime() + !coarray t(11)=mpi_wtime() t(11) = xmp_wtime() time_copy_sgp=time_copy_sgp+(t(11)-t(10)) !! !call mpi_barrier(ADM_comm_run_world,ierr) -!coarray t(12)=mpi_wtime() + !coarray t(12)=mpi_wtime() t(12) = xmp_wtime() time_bar2=time_bar2+(t(12)-t(11)) time_total=time_total+(t(12)-t(0)) + !--- 2020 Fujitsu + call xmp_free_array_section(tbl_sec) + call xmp_free_array_section(caf_recvbuf_sec) + call xmp_free_array_section(sendbuf_l_sec) + + call xmp_coarray_deallocate(tbl_desc, ierr) + call xmp_coarray_deallocate(caf_recvbuf_desc, ierr) + !--- 2020 Fujitsu end call DEBUG_rapend('COMM data_transfer') end subroutine COMM_data_transfer @@ -3330,24 +3341,24 @@ subroutine COMM_data_transfer_rgn2pl( & knum, & nnum ) use mod_adm, only: & - ADM_COMM_RUN_WORLD, & - ADM_prc_tab, & - ADM_rgn2prc, & - ADM_prc_me, & - ADM_NPL, & - ADM_SPL, & - ADM_prc_npl, & - ADM_prc_spl, & - ADM_rgnid_npl_mng, & - ADM_rgnid_spl_mng, & - ADM_gall, & - ADM_gall_pl, & - ADM_lall, & - ADM_lall_pl, & - ADM_gall_1d, & - ADM_gmin, & - ADM_gmax, & - ADM_GSLF_PL + ADM_COMM_RUN_WORLD, & + ADM_prc_tab, & + ADM_rgn2prc, & + ADM_prc_me, & + ADM_NPL, & + ADM_SPL, & + ADM_prc_npl, & + ADM_prc_spl, & + ADM_rgnid_npl_mng, & + ADM_rgnid_spl_mng, & + ADM_gall, & + ADM_gall_pl, & + ADM_lall, & + ADM_lall_pl, & + ADM_gall_1d, & + ADM_gmin, & + ADM_gmax, & + ADM_GSLF_PL implicit none integer, intent(in) :: knum @@ -3357,22 +3368,26 @@ subroutine COMM_data_transfer_rgn2pl( & real(8) :: v_npl_send(knum,nnum) real(8) :: v_spl_send(knum,nnum) + !--- 2020 Fujitsu + integer(8) :: v_npl_send_l_desc + integer(8) :: v_spl_send_l_desc + !--- 2020 Fujitsu end real(8) :: v_npl_recv(knum,nnum) real(8) :: v_spl_recv(knum,nnum) -!coarray + !coarray !--- 2020 Fujitsu !real(8),allocatable :: v_npl_recvc(:,:)[:] !real(8),allocatable :: v_spl_recvc(:,:)[:] integer , POINTER :: v_npl_recvc ( : , : ) => null ( ) integer , POINTER :: v_spl_recvc ( : , : ) => null ( ) integer(8) :: v_npl_recvc_desc, v_spl_recvc_desc - integer(8) :: v_npl_recvc_lb(2), v_npl_recvc_ub(2), v_spl_recvc_lb(2), v_spl_recvc_ub(2) - integer(8) :: v_npl_recvc_sec, v_spl_recvc_sec + integer(8) :: v_npl_lb(2), v_npl_ub(2), v_spl_lb(2), v_spl_ub(2) + integer(8) :: v_npl_sec, v_spl_sec integer(4) :: img_dims(1) !--- 2020 Fujitsu end integer :: ireq(4) -! integer :: istat(MPI_STATUS_SIZE) + ! integer :: istat(MPI_STATUS_SIZE) integer :: ierr integer :: k, l, n, rgnid @@ -3382,18 +3397,22 @@ subroutine COMM_data_transfer_rgn2pl( & !--------------------------------------------------------------------------- !--- 2020 Fujitsu !allocate(v_npl_recvc(knum,nnum)[*]) - v_npl_recvc_lb(1) = 1; v_npl_recvc_ub(1) = knum - v_npl_recvc_lb(2) = 1; v_npl_recvc_ub(2) = nnum - call xmp_new_coarray(v_npl_recvc_desc, 8, 2, v_npl_recvc_lb, v_npl_recvc_ub, 1, img_dims) + v_npl_lb(1) = 1; v_npl_ub(1) = knum + v_npl_lb(2) = 1; v_npl_ub(2) = nnum + call xmp_new_coarray(v_npl_recvc_desc, 8, 2, v_npl_lb, v_npl_ub, 1, img_dims) call xmp_coarray_bind(v_npl_recvc_desc, v_npl_recvc) + !allocate(v_spl_recvc(knum,nnum)[*]) - v_spl_recvc_lb(1) = 1; v_spl_recvc_ub(1) = knum - v_spl_recvc_lb(2) = 1; v_spl_recvc_ub(2) = nnum - call xmp_new_coarray(v_spl_recvc_desc, 8, 2, v_spl_recvc_lb, v_spl_recvc_ub, 1, img_dims) + v_spl_lb(1) = 1; v_spl_ub(1) = knum + v_spl_lb(2) = 1; v_spl_ub(2) = nnum + call xmp_new_coarray(v_spl_recvc_desc, 8, 2, v_spl_lb, v_spl_ub, 1, img_dims) call xmp_coarray_bind(v_spl_recvc_desc, v_spl_recvc) - call xmp_new_array_section(v_npl_recvc_sec, 2) - call xmp_new_array_section(v_spl_recvc_sec, 2) + call xmp_new_array_section(v_npl_sec, 2) + call xmp_new_array_section(v_spl_sec, 2) + + call xmp_new_local_array(v_npl_send_l_desc, 8, 2, v_npl_lb, v_npl_ub, v_npl_send) + call xmp_new_local_array(v_spl_send_l_desc, 8, 2, v_spl_lb, v_spl_ub, v_spl_send) !--- 2020 Fujitsu end v_npl_recvc = 0.d0 v_spl_recvc = 0.d0 @@ -3402,59 +3421,68 @@ subroutine COMM_data_transfer_rgn2pl( & !--- recv pole value !--- north pole -!coarray -! if ( ADM_prc_me == ADM_prc_npl ) then -! call MPI_IRECV( v_npl_recv, & -! knum * nnum, & -! MPI_DOUBLE_PRECISION, & -! ADM_rgn2prc(ADM_rgnid_npl_mng)-1, & -! ADM_NPL, & -! ADM_COMM_RUN_WORLD, & -! ireq(3), & -! ierr ) -! endif + !coarray + ! if ( ADM_prc_me == ADM_prc_npl ) then + ! call MPI_IRECV( v_npl_recv, & + ! knum * nnum, & + ! MPI_DOUBLE_PRECISION, & + ! ADM_rgn2prc(ADM_rgnid_npl_mng)-1, & + ! ADM_NPL, & + ! ADM_COMM_RUN_WORLD, & + ! ireq(3), & + ! ierr ) + ! endif !--- south pole -!coarray -! if ( ADM_prc_me == ADM_prc_spl ) then -! call MPI_IRECV( v_spl_recv, & -! knum * nnum, & -! MPI_DOUBLE_PRECISION, & -! ADM_rgn2prc(ADM_rgnid_spl_mng)-1, & -! ADM_SPL, & -! ADM_COMM_RUN_WORLD, & -! ireq(4), & -! ierr ) -! endif + !coarray + ! if ( ADM_prc_me == ADM_prc_spl ) then + ! call MPI_IRECV( v_spl_recv, & + ! knum * nnum, & + ! MPI_DOUBLE_PRECISION, & + ! ADM_rgn2prc(ADM_rgnid_spl_mng)-1, & + ! ADM_SPL, & + ! ADM_COMM_RUN_WORLD, & + ! ireq(4), & + ! ierr ) + ! endif !--- send pole value do l = 1, ADM_lall rgnid = ADM_prc_tab(l,ADM_prc_me) !--- north pole - sync all + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end if ( rgnid == ADM_rgnid_npl_mng ) then do n = 1, nnum - do k = 1, knum - v_npl_send(k,n) = var(suf(ADM_gmin,ADM_gmax+1),k,l,n) - enddo + do k = 1, knum + v_npl_send(k,n) = var(suf(ADM_gmin,ADM_gmax+1),k,l,n) + enddo enddo -!coarray -! call MPI_ISEND( v_npl_send, & -! knum * nnum, & -! MPI_DOUBLE_PRECISION, & -! ADM_prc_npl-1, & -! ADM_NPL, & -! ADM_COMM_RUN_WORLD, & -! ireq(1), & -! ierr ) + !coarray + ! call MPI_ISEND( v_npl_send, & + ! knum * nnum, & + ! MPI_DOUBLE_PRECISION, & + ! ADM_prc_npl-1, & + ! ADM_NPL, & + ! ADM_COMM_RUN_WORLD, & + ! ireq(1), & + ! ierr ) !--- 2020 Fujitsu !v_npl_recvc(:,:)[ADM_prc_npl] = v_npl_send(:,:) - !XXXX + call xmp_array_section_set_triplet(v_npl_sec, 1, 1, knum, 1, ierr) + call xmp_array_section_set_triplet(v_npl_sec, 2, 1, nnum, 1, ierr) + img_dims(1) = ADM_prc_npl + call xmp_coarray_put_local(img_dims, v_npl_recvc_desc, v_npl_sec, v_npl_send_l_desc, v_npl_sec, ierr) !--- 2020 Fujitsu end endif - sync all + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end !--- south pole if ( rgnid == ADM_rgnid_spl_mng ) then @@ -3473,9 +3501,18 @@ subroutine COMM_data_transfer_rgn2pl( & ! ADM_COMM_RUN_WORLD, & ! ireq(2), & ! ierr ) - v_spl_recvc(:,:)[ADM_prc_spl] = v_spl_send(:,:) + !--- 2020 Fujitsu + !v_spl_recvc(:,:)[ADM_prc_spl] = v_spl_send(:,:) + call xmp_array_section_set_triplet(v_spl_sec, 1, 1, knum, 1, ierr) + call xmp_array_section_set_triplet(v_spl_sec, 2, 1, nnum, 1, ierr) + img_dims(1) = ADM_prc_spl + call xmp_coarray_put_local(img_dims, v_spl_recvc_desc, v_spl_sec, v_spl_send_l_desc, v_spl_sec, ierr) + !--- 2020 Fujitsu end endif - sync all + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end enddo @@ -3513,7 +3550,19 @@ subroutine COMM_data_transfer_rgn2pl( & endif endif - sync all + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + + call xmp_free_array_section(v_npl_sec) + call xmp_free_array_section(v_spl_sec) + + call xmp_free_local_array(v_npl_send_l_desc) + call xmp_free_local_array(v_spl_send_l_desc) + + call xmp_coarray_deallocate(v_npl_recvc_desc, ierr) + call xmp_coarray_deallocate(v_spl_recvc_desc, ierr) + !--- 2020 Fujitsu end return end subroutine COMM_data_transfer_rgn2pl @@ -3562,11 +3611,23 @@ subroutine COMM_var( & ! real(8) :: v_npl_send(KNUM,NNUM) real(8) :: v_spl_send(KNUM,NNUM) + !--- 2020 Fujitsu + integer(8) :: v_npl_send_l_desc + integer(8) :: v_spl_send_l_desc + !--- 2020 Fujitsu end real(8) :: v_npl_recv(KNUM,NNUM) real(8) :: v_spl_recv(KNUM,NNUM) !coarray - real(8),allocatable :: v_npl_recvc(:,:)[:] - real(8),allocatable :: v_spl_recvc(:,:)[:] + !--- 2020 Fujitsu + !real(8),allocatable :: v_npl_recvc(:,:)[:] + !real(8),allocatable :: v_spl_recvc(:,:)[:] + integer , POINTER :: v_npl_recvc ( : , : ) => null ( ) + integer , POINTER :: v_spl_recvc ( : , : ) => null ( ) + integer(8) :: v_npl_recvc_desc, v_spl_recvc_desc + integer(8) :: v_npl_lb(2), v_npl_ub(2), v_spl_lb(2), v_spl_ub(2) + integer(8) :: v_npl_sec, v_spl_sec + integer(4) :: img_dims(1), ierr + !--- 2020 Fujitsu integer :: rgnid integer :: l @@ -3575,15 +3636,30 @@ subroutine COMM_var( & suf(i,j) = ADM_gall_1d * ((j)-1) + (i) !--------------------------------------------------------------------------- !coarray - allocate(v_npl_recvc(KNUM,NNUM)[*]) - allocate(v_spl_recvc(KNUM,NNUM)[*]) + !--- 2020 Fujitsu + !allocate(v_npl_recvc(KNUM,NNUM)[*]) + v_npl_lb(1) = 1; v_npl_ub(1) = knum + v_npl_lb(2) = 1; v_npl_ub(2) = nnum + call xmp_new_coarray(v_npl_recvc_desc, 8, 2, v_npl_lb, v_npl_ub, 1, img_dims) + call xmp_coarray_bind(v_npl_recvc_desc, v_npl_recvc) + + !allocate(v_spl_recvc(KNUM,NNUM)[*]) + v_spl_lb(1) = 1; v_spl_ub(1) = knum + v_spl_lb(2) = 1; v_spl_ub(2) = nnum + call xmp_new_coarray(v_spl_recvc_desc, 8, 2, v_spl_lb, v_spl_ub, 1, img_dims) + call xmp_coarray_bind(v_spl_recvc_desc, v_spl_recvc) + + !--- 2020 Fujitsu end v_npl_recvc = 0.d0 v_spl_recvc = 0.d0 if ( opt_comm_barrier ) then call DEBUG_rapstart('Node imbalance adjustment') !coarray call MPI_BARRIER( ADM_comm_run_world, ierr ) - sync all + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end call DEBUG_rapend ('Node imbalance adjustment') endif @@ -3606,7 +3682,10 @@ subroutine COMM_var( & do l = 1, ADM_lall rgnid = ADM_prc_tab(l,ADM_prc_me) - sync all + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end if ( rgnid == ADM_rgnid_npl_mng ) then !--- send north pole value v_npl_send(:,:) = var(suf(ADM_gmin,ADM_gmax+1),:,l,:) @@ -3621,9 +3700,18 @@ subroutine COMM_var( & ! ierr ) !--- error id ! ! call MPI_WAIT(ireq(1),istat,ierr) - v_npl_recvc(:,:)[ADM_prc_npl] = v_npl_send(:,:) + !--- 2020 Fujitsu + !v_npl_recvc(:,:)[ADM_prc_npl] = v_npl_send(:,:) + call xmp_array_section_set_triplet(v_npl_sec, 1, 1, knum, 1, ierr) + call xmp_array_section_set_triplet(v_npl_sec, 2, 1, nnum, 1, ierr) + img_dims(1) = ADM_prc_npl + call xmp_coarray_put_local(img_dims, v_npl_recvc_desc, v_npl_sec, v_npl_send_l_desc, v_npl_sec, ierr) + !--- 2020 Fujitsu end endif - sync all + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end enddo if ( ADM_prc_me == ADM_prc_npl ) then @@ -3647,7 +3735,10 @@ subroutine COMM_var( & do l = 1, ADM_lall rgnid = ADM_prc_tab(l,ADM_prc_me) - sync all + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end if ( rgnid == ADM_rgnid_spl_mng ) then !--- send south pole value v_spl_send(:,:) = var(suf(ADM_gmax+1,ADM_gmin),:,l,:) @@ -3662,9 +3753,18 @@ subroutine COMM_var( & ! ierr ) !--- error id ! ! call MPI_WAIT(ireq(2),istat,ierr) - v_spl_recvc(:,:)[ADM_prc_spl] = v_spl_send(:,:) + !--- 2020 Fujitsu + !v_spl_recvc(:,:)[ADM_prc_spl] = v_spl_send(:,:) + call xmp_array_section_set_triplet(v_spl_sec, 1, 1, knum, 1, ierr) + call xmp_array_section_set_triplet(v_spl_sec, 2, 1, nnum, 1, ierr) + img_dims(1) = ADM_prc_spl + call xmp_coarray_put_local(img_dims, v_spl_recvc_desc, v_spl_sec, v_spl_send_l_desc, v_spl_sec, ierr) + !--- 2020 Fujitsu end endif - sync all + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end enddo if ( ADM_prc_me == ADM_prc_spl ) then @@ -3690,6 +3790,10 @@ subroutine COMM_var( & var(suf(1,ADM_gall_1d),:,:,:) = var(suf(ADM_gmin,ADM_gmax+1),:,:,:) endif + !--- 2020 Fujitsu + call xmp_coarray_deallocate(v_npl_recvc_desc, ierr) + call xmp_coarray_deallocate(v_spl_recvc_desc, ierr) + !--- 2020 Fujitsu end call DEBUG_rapend('COMM var') return @@ -3703,6 +3807,8 @@ subroutine COMM_data_transfer_nopl(& hallo_num) !--- IN : number of hallo use mod_adm, only: & ADM_proc_stop, & ! [add] C.Kodama 2011.04.26 + ADM_prc_all, & !--- 2020 Fujitsu --- + ADM_prc_me, & !--- 2020 Fujitsu --- ADM_vlink_nmax, & ADM_lall, & ADM_comm_run_world, & @@ -3737,10 +3843,23 @@ subroutine COMM_data_transfer_nopl(& !============================================================================= !coarray integer dst_img - integer,allocatable :: tbl(:)[:] - real(8),allocatable :: caf_recvbuf(:,:)[:] + !--- 2020 Fujitsu + !integer,allocatable :: tbl(:)[:] + !real(8),allocatable :: caf_recvbuf(:,:)[:] + integer , POINTER :: tbl ( : ) => null ( ) + integer , POINTER :: caf_recvbuf ( : , : ) => null ( ) + integer(8) :: tbl_desc, caf_recvbuf_desc + integer(8) :: tbl_lb(1),tbl_ub(1), caf_recvbuf_lb(2), caf_recvbuf_ub(2) + integer(8) :: tbl_sec, caf_recvbuf_sec + integer(4) :: img_dims(1) + integer :: max_tmp + integer bufsize1,bufsize2 + call xmp_new_array_section(tbl_sec, 1) + call xmp_new_array_section(caf_recvbuf_sec, 2) + !--- 2020 Fujitsu end + ! ! --- pre-process ! @@ -3799,12 +3918,29 @@ subroutine COMM_data_transfer_nopl(& t(2) = xmp_wtime() time_bar1=time_bar1+(t(2)-t(1)) !coarray - allocate(tbl(num_images())[*]) - call co_max(maxdatasize_r) + !--- 2020 Fujitsu + !allocate(tbl(num_images())[*]) + tbl_lb(1) = 1; tbl_ub(1) = ADM_prc_all + call xmp_new_coarray(tbl_desc, 4, 1, tbl_lb, tbl_ub, 1, img_dims) + call xmp_coarray_bind(tbl_desc, tbl) + + !call co_max(maxdatasize_r) + call MPI_Allreduce(maxdatasiaze_r, max_tmp, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr) + maxdatasiaze_r = max_tmp + bufsize1 = maxdatasize_r bufsize2 = romax(halomax) - call co_max(bufsize2) - allocate(caf_recvbuf(bufsize1,bufsize2)[*]) + + !call co_max(bufsize2) + call MPI_Allreduce(bufsize2, max_tmp, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr) + bufsize2 = max_tmp + + !allocate(caf_recvbuf(bufsize1,bufsize2)[*]) + caf_recvbuf_lb(1) = 1; caf_recvbuf_ub(1) = bufsize1 + caf_recvbuf_lb(2) = 1; caf_recvbuf_ub(2) = bufsize2 + call xmp_new_coarray(caf_recvbuf_desc, 4, 2, caf_recvbuf_lb, caf_recvbuf_ub, 1, img_dims) + call xmp_coarray_bind(caf_recvbuf_desc, caf_recvbuf) + !--- 2020 Fujitsu end !----------------------------------------- ! call mpi_isend @@ -3866,7 +4002,7 @@ subroutine COMM_data_transfer_nopl(& !coarray t(4)=mpi_wtime() - t(4) = xmp_wtime() + t(4) = xmp_wtime() do ns=1,nsmax(so,halo) ss=sendinfo(SIZE_COMM,ns,so,halo) sl=sendinfo(LRGNID_COMM,ns,so,halo) @@ -3885,7 +4021,7 @@ subroutine COMM_data_transfer_nopl(& enddo !----------------------------------------- !coarray t(5)=mpi_wtime() - t(5) = xmp_wtime() + t(5) = xmp_wtime() time_sbuf=time_sbuf+(t(5)-t(4)) @@ -3910,9 +4046,20 @@ subroutine COMM_data_transfer_nopl(& ! ,ADM_comm_run_world & ! ,areq(so+romax(halo)) & ! ,ierr) - dst_img = tbl(this_image())[destrank(so,halo)+1] - caf_recvbuf(1:ssize(so,halo)*cmax,dst_img)[destrank(so,halo)+1] & - = sendbuf(1:ssize(so,halo)*cmax,so) + !--- 2020 Fujitsu + !dst_img = tbl(this_image())[destrank(so,halo)+1] + img_dims(1) = destrank(so,halo)+1 + call xmp_array_section_set_triplet(tbl_sec, 1, ADM_prc_me, ADM_prc_me, 1, ierr) + call xmp_coarray_get_scalar(img_dims, tbl_desc, tbl_sec, dst_img, ierr) + + !caf_recvbuf(1:ssize(so,halo)*cmax,dst_img)[destrank(so,halo)+1] & + ! = sendbuf(1:ssize(so,halo)*cmax,so) + call xmp_array_section_set_triplet(caf_recvbuf_sec, 1, 1, ssize(so,halo)*cmax, 1, ierr) + call xmp_array_section_set_triplet(caf_recvbuf_sec, 2, dst_img, dst_img, 1, ierr) + call xmp_array_section_set_triplet(sendbuf_l_sec, 1, 1, ssize(so,halo)*cmax, 1, ierr) + call xmp_array_section_set_triplet(sendbuf_l_sec, 2, so, so, 1, ierr) + call xmp_coarray_put_local(img_dims, caf_recvbuf_desc, caf_recvbuf_sec, sendbuf_l_desc, sendbuf_l_sec, ierr) + !--- 2020 Fujitsu end if (opt_comm_dbg) then if (ierr.ne.0) then @@ -3931,7 +4078,7 @@ subroutine COMM_data_transfer_nopl(& !coarray t(6)=mpi_wtime() - t(6) = xmp_wtime() + t(6) = xmp_wtime() time_send=time_send+(t(6)-t(5)) size_total=size_total+ssize(so,halo)*cmax comm_count=comm_count+1 @@ -3967,7 +4114,10 @@ subroutine COMM_data_transfer_nopl(& acount=romax(halo)+somax(halo) !coarray ! call mpi_waitall(acount,areq,stat,ierr) - sync all + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu do ro=1,romax(halo) do n=1,rsize(ro,halo)*cmax @@ -4034,7 +4184,10 @@ subroutine COMM_data_transfer_nopl(& if (opt_comm_barrier) then !coarray call mpi_barrier(ADM_comm_run_world,ierr) - sync all + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end if (ierr.ne.0) then write(ADM_log_fid,*) 'mpi_barriert info start==' write(ADM_log_fid,*) 'ierr=',ierr @@ -4098,6 +4251,14 @@ subroutine COMM_data_transfer_nopl(& time_bar2=time_bar2+(t(12)-t(11)) time_total=time_total+(t(12)-t(0)) ! + + !--- 2020 Fujitsu + call xmp_free_array_section(tbl_sec) + call xmp_free_array_section(caf_recvbuf_sec) + + call xmp_coarray_deallocate(tbl_desc, ierr) + call xmp_coarray_deallocate(caf_recvbuf_desc, ierr) + !--- 2020 Fujitsu end end subroutine COMM_data_transfer_nopl @@ -4113,10 +4274,12 @@ subroutine COMM_Stat_sum( localsum, globalsum ) real(8), intent(out) :: globalsum !coarray -! real(8) :: sendbuf(1) -! real(8) :: recvbuf(ADM_prc_all) -! -! integer :: ierr + !--- 2020 Fujitsu + real(8) :: sendbuf(1) + real(8) :: recvbuf(ADM_prc_all) + + integer :: ierr + !--- 2020 Fujitsu end !20180208 integer localVal !--------------------------------------------------------------------------- @@ -4124,23 +4287,25 @@ subroutine COMM_Stat_sum( localsum, globalsum ) if ( COMM_pl ) then !coarray -! sendbuf(1) = localsum -! -! call MPI_Allgather( sendbuf, & -! 1, & -! MPI_DOUBLE_PRECISION, & -! recvbuf, & -! 1, & -! MPI_DOUBLE_PRECISION, & -! ADM_COMM_RUN_WORLD, & -! ierr ) -! -! globalsum = sum( recvbuf(:) ) + !--- 2020 Fujitsu + sendbuf(1) = localsum + + call MPI_Allgather( sendbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + recvbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + ADM_COMM_RUN_WORLD, & + ierr ) + + globalsum = sum( recvbuf(:) ) !20180208 localVal = localsum !20180208 call co_sum(localVal) !20180208 globalsum = localVal - globalsum = localsum - call co_sum(globalsum) + !globalsum = localsum + !call co_sum(globalsum) + !--- 2020 Fujitsu end else globalsum = localsum endif @@ -4180,19 +4345,21 @@ subroutine COMM_Stat_sum_eachlayer( kall, localsum, globalsum ) if ( COMM_pl ) then !coarray -! call MPI_Allgatherv( MPI_IN_PLACE, & -! 1, & -! MPI_DOUBLE_PRECISION, & -! gathersum(1,1), & -! rcounts, & -! displs, & -! MPI_DOUBLE_PRECISION, & -! ADM_COMM_RUN_WORLD, & -! ierr ) -! -! do k = 1, kall -! globalsum(k) = sum( gathersum(:,k) ) -! enddo + !--- 2020 Fujitsu + call MPI_Allgatherv( MPI_IN_PLACE, & + 1, & + MPI_DOUBLE_PRECISION, & + gathersum(1,1), & + rcounts, & + displs, & + MPI_DOUBLE_PRECISION, & + ADM_COMM_RUN_WORLD, & + ierr ) + + do k = 1, kall + globalsum(k) = sum( gathersum(:,k) ) + enddo + !--- 2020 Fujitsu end else do k = 1, kall globalsum = gathersum(ADM_prc_me,k) @@ -4213,33 +4380,37 @@ subroutine COMM_Stat_avg( localavg, globalavg ) real(8), intent(out) :: globalavg !coarray -! real(8) :: sendbuf(1) -! real(8) :: recvbuf(ADM_prc_all) -! -! integer :: ierr + !--- 2020 Fujitsu + real(8) :: sendbuf(1) + real(8) :: recvbuf(ADM_prc_all) + + integer :: ierr + !--- 2020 Fujitsu end !20180208 !20180208 real(8) localVal !--------------------------------------------------------------------------- if ( COMM_pl ) then !coarray -! sendbuf(1) = localavg -! -! call MPI_Allgather( sendbuf, & -! 1, & -! MPI_DOUBLE_PRECISION, & -! recvbuf, & -! 1, & -! MPI_DOUBLE_PRECISION, & -! ADM_COMM_RUN_WORLD, & -! ierr ) -! -! globalavg = sum( recvbuf(:) ) / real(ADM_prc_all,kind=8) + !--- 2020 Fujitsu + sendbuf(1) = localavg + + call MPI_Allgather( sendbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + recvbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + ADM_COMM_RUN_WORLD, & + ierr ) + + globalavg = sum( recvbuf(:) ) / real(ADM_prc_all,kind=8) !20280208 localVal = localavg !20280208 call co_sum(localVal) !20280208 globalavg = localVal - globalavg = localavg - call co_sum(globalavg) + !globalavg = localavg + !call co_sum(globalavg) + !--- 2020 Fujitsu end globalavg = globalavg / real(ADM_prc_all,kind=8) else @@ -4264,31 +4435,35 @@ subroutine COMM_Stat_max( localmax, globalmax ) real(8), intent(out) :: globalmax !coarray -! real(8) :: sendbuf(1) -! real(8) :: recvbuf(ADM_prc_all) -! -! integer :: ierr + !--- 2020 Fujitsu + real(8) :: sendbuf(1) + real(8) :: recvbuf(ADM_prc_all) + + integer :: ierr + !--- 2020 Fujitsu end !20180208 real(8) localVal !--------------------------------------------------------------------------- !coarray -! sendbuf(1) = localmax -! -! call MPI_Allgather( sendbuf, & -! 1, & -! MPI_DOUBLE_PRECISION, & -! recvbuf, & -! 1, & -! MPI_DOUBLE_PRECISION, & -! ADM_COMM_RUN_WORLD, & -! ierr ) -! -! globalmax = maxval( recvbuf(:) ) + !--- 2020 Fujitsu + sendbuf(1) = localmax + + call MPI_Allgather( sendbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + recvbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + ADM_COMM_RUN_WORLD, & + ierr ) + + globalmax = maxval( recvbuf(:) ) !20180208 localVal = localmax !20180208 call co_max(localVal) !20180208 globalmax = localVal - globalmax = localmax - call co_max(globalmax) + !globalmax = localmax + !call co_max(globalmax) + !--- 2020 Fujitsu end !write(ADM_LOG_FID,*) 'COMM_Stat_max', sendbuf(1), recvbuf(:) @@ -4308,32 +4483,36 @@ subroutine COMM_Stat_min( localmin, globalmin ) real(8), intent(out) :: globalmin !coarray -! real(8) :: sendbuf(1) -! real(8) :: recvbuf(ADM_prc_all) -! -! integer :: ierr + !--- 2020 Fujitsu + real(8) :: sendbuf(1) + real(8) :: recvbuf(ADM_prc_all) + + integer :: ierr + !--- 2020 Fujitsu end !20180208 real(8) localVal !--------------------------------------------------------------------------- !coarray -! sendbuf(1) = localmin -! -! call MPI_Allgather( sendbuf, & -! 1, & -! MPI_DOUBLE_PRECISION, & -! recvbuf, & -! 1, & -! MPI_DOUBLE_PRECISION, & -! ADM_COMM_RUN_WORLD, & -! ierr ) -! -! globalmin = minval( recvbuf(:) ) + !--- 2020 Fujitsu + sendbuf(1) = localmin + + call MPI_Allgather( sendbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + recvbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + ADM_COMM_RUN_WORLD, & + ierr ) + + globalmin = minval( recvbuf(:) ) ! !20180208 localVal = localmin !20180208 call co_min(localVal) !20180208 globalmin = localVal - globalmin = localmin - call co_min(globalmin) + !globalmin = localmin + !call co_min(globalmin) + !--- 2020 Fujitsu end !write(ADM_LOG_FID,*) 'COMM_Stat_min', sendbuf(1), recvbuf(:) diff --git a/NICAM-DC-MINI/src/share/mod_debug_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_debug_xmpAPI.f90 new file mode 100755 index 0000000..9381d97 --- /dev/null +++ b/NICAM-DC-MINI/src/share/mod_debug_xmpAPI.f90 @@ -0,0 +1,537 @@ +!------------------------------------------------------------------------------- +!> +!! Debug utility module +!! +!! @par Description +!! This module is for dubug. +!! +!! @author H.Tomita +!! +!! @par History +!! @li 2012-06-29 (H.Yashiro) [NEW] +!< +module mod_debug + !----------------------------------------------------------------------------- + ! + !++ Used modules + ! + !--- 2020 Fujitsu + use mpi + use xmp_api + !--- 2020 Fujitsu end + use mod_coarray + use mod_adm, only: & + ADM_LOG_FID, & + ADM_NSYS, & + ADM_MAXFNAME + !----------------------------------------------------------------------------- + implicit none + private + !----------------------------------------------------------------------------- + ! + !++ Public procedure + ! + public :: DEBUG_dampdata + public :: DEBUG_dampascii4D + public :: DEBUG_dampascii3D + public :: DEBUG_rapstart + public :: DEBUG_rapend + public :: DEBUG_rapreport + + !----------------------------------------------------------------------------- + ! + !++ Public parameters & variables + ! + !----------------------------------------------------------------------------- + ! + !++ Private procedure + ! + private :: DEBUG_rapid + + !----------------------------------------------------------------------------- + ! + !++ Private parameters & variables + ! + integer, private, parameter :: DEBUG_rapnlimit = 100 + integer, private, save :: DEBUG_rapnmax = 0 + character(len=ADM_NSYS), private, save :: DEBUG_rapname(DEBUG_rapnlimit) + real(8), private, save :: DEBUG_raptstr(DEBUG_rapnlimit) + real(8), private, save :: DEBUG_rapttot(DEBUG_rapnlimit) + integer, private, save :: DEBUG_rapnstr(DEBUG_rapnlimit) + integer, private, save :: DEBUG_rapnend(DEBUG_rapnlimit) + +#ifdef PAPI_OPS + ! <-- [add] PAPI R.Yoshida 20121022 + !integer(8),public, save :: papi_flpins !total floating point instructions since the first call + integer(8),public, save :: papi_flpops !total floating point operations since the first call + !real(4), public, save :: papi_real_time_i !total realtime since the first PAPI_flins() call + !real(4), public, save :: papi_proc_time_i !total process time since the first PAPI_flins() call + real(4), public, save :: papi_real_time_o !total realtime since the first PAPI_flops() call + real(4), public, save :: papi_proc_time_o !total process time since the first PAPI_flops() call + !real(4), public, save :: papi_mflins !Mflip/s achieved since the previous call + real(4), public, save :: papi_mflops !Mflop/s achieved since the previous call + integer, public, save :: papi_check +#endif + + !----------------------------------------------------------------------------- +contains + + !----------------------------------------------------------------------------- + !> + !> Damp all data + !> + subroutine DEBUG_dampdata( & + basename, & !--- [IN] + var, & !--- [IN] + var_pl ) !--- [IN] + use mod_misc, only: & + MISC_make_idstr, & + MISC_get_available_fid + use mod_adm, only: & + ADM_PRC_PL, & + ADM_prc_me + implicit none + + character(len=*), intent(in) :: basename + real(8), intent(in) :: var (:,:,:,:) + real(8), intent(in) :: var_pl(:,:,:,:) + + integer :: shp(4) + + character(LEN=ADM_MAXFNAME) :: fname + + integer :: fid + !--------------------------------------------------------------------------- + + shp(:) = shape(var) + + call MISC_make_idstr(fname,trim(basename),'pe',ADM_prc_me) + fid = MISC_get_available_fid() + open( unit = fid, & + file = trim(fname), & + form = 'unformatted', & + access = 'direct', & + recl = shp(1)*shp(2)*shp(3)*shp(4)*8, & + status = 'unknown' ) + + write(fid,rec=1) var + + close(fid) + + if ( ADM_prc_me == ADM_prc_pl ) then + shp(:) = shape(var_pl) + + fname = trim(basename)//'.pl' + fid = MISC_get_available_fid() + open( unit = fid, & + file = trim(fname), & + form = 'unformatted', & + access = 'direct', & + recl = shp(1)*shp(2)*shp(3)*shp(4)*8, & + status = 'unknown' ) + + write(fid,rec=1) var_pl + + close(fid) + + endif + + end subroutine DEBUG_dampdata + + !----------------------------------------------------------------------------- + !> + !> Damp all data + !> + subroutine DEBUG_dampascii4D( & + basename, & !--- [IN] + var, & !--- [IN] + var_pl ) !--- [IN] + use mod_misc, only: & + MISC_make_idstr, & + MISC_get_available_fid + use mod_adm, only: & + ADM_prc_pl, & + ADM_prc_me + implicit none + + character(len=*), intent(in) :: basename + real(8), intent(in) :: var (:,:,:,:) + real(8), intent(in) :: var_pl(:,:,:,:) + + integer :: shp(4) + + character(LEN=ADM_MAXFNAME) :: fname + + integer :: fid + integer :: i1,i2,i3,i4 + !--------------------------------------------------------------------------- + + shp(:) = shape(var) + + call MISC_make_idstr(fname,trim(basename),'txt',ADM_prc_me) + fid = MISC_get_available_fid() + open( unit = fid, & + file = trim(fname), & + form = 'formatted', & + status = 'unknown' ) + + do i4 = 1, shp(4) + do i3 = 1, shp(3) + do i2 = 1, shp(2) + do i1 = 1, shp(1) + write(fid,*) "(",i1,",",i2,",",i3,",",i4,")=",var(i1,i2,i3,i4) + enddo + enddo + enddo + enddo + + close(fid) + + if ( ADM_prc_me == ADM_prc_pl ) then + shp(:) = shape(var_pl) + + fname = trim(basename)//'.txtpl' + fid = MISC_get_available_fid() + open( unit = fid, & + file = trim(fname), & + form = 'formatted', & + status = 'unknown' ) + + do i4 = 1, shp(4) + do i3 = 1, shp(3) + do i2 = 1, shp(2) + do i1 = 1, shp(1) + write(fid,*) "(",i1,",",i2,",",i3,",",i4,")=",var_pl(i1,i2,i3,i4) + enddo + enddo + enddo + enddo + + close(fid) + + endif + + end subroutine DEBUG_dampascii4D + + !----------------------------------------------------------------------------- + !> + !> Damp all data + !> + subroutine DEBUG_dampascii3D( & + basename, & !--- [IN] + var, & !--- [IN] + var_pl ) !--- [IN] + use mod_misc, only: & + MISC_make_idstr, & + MISC_get_available_fid + use mod_adm, only: & + ADM_prc_pl, & + ADM_prc_me + implicit none + + character(len=*), intent(in) :: basename + real(8), intent(in) :: var (:,:,:) + real(8), intent(in) :: var_pl(:,:,:) + + integer :: shp(3) + + character(LEN=ADM_MAXFNAME) :: fname + + integer :: fid + integer :: i1,i2,i3 + !--------------------------------------------------------------------------- + + shp(:) = shape(var) + + call MISC_make_idstr(fname,trim(basename),'txt',ADM_prc_me) + fid = MISC_get_available_fid() + open( unit = fid, & + file = trim(fname), & + form = 'formatted', & + status = 'unknown' ) + + do i3 = 1, shp(3) + do i2 = 1, shp(2) + do i1 = 1, shp(1) + write(fid,*) "(",i1,",",i2,",",i3,")=",var(i1,i2,i3) + enddo + enddo + enddo + + close(fid) + + if ( ADM_prc_me == ADM_prc_pl ) then + shp(:) = shape(var_pl) + + fname = trim(basename)//'.txtpl' + fid = MISC_get_available_fid() + open( unit = fid, & + file = trim(fname), & + form = 'formatted', & + status = 'unknown' ) + + do i3 = 1, shp(3) + do i2 = 1, shp(2) + do i1 = 1, shp(1) + write(fid,*) "(",i1,",",i2,",",i3,")=",var_pl(i1,i2,i3) + enddo + enddo + enddo + + close(fid) + + endif + + end subroutine DEBUG_dampascii3D + + !----------------------------------------------------------------------------- + function DEBUG_rapid( rapname ) result(id) + implicit none + + character(len=*), intent(in) :: rapname + + integer :: id + !--------------------------------------------------------------------------- + + if ( DEBUG_rapnmax >= 1 ) then + do id = 1, DEBUG_rapnmax + if( trim(rapname) == trim(DEBUG_rapname(id)) ) return + enddo + endif + + DEBUG_rapnmax = DEBUG_rapnmax + 1 + id = DEBUG_rapnmax + DEBUG_rapname(id) = trim(rapname) + DEBUG_raptstr(id) = 0.D0 + DEBUG_rapttot(id) = 0.D0 + DEBUG_rapnstr(id) = 0 + DEBUG_rapnend(id) = 0 + + end function DEBUG_rapid + + !----------------------------------------------------------------------------- + subroutine DEBUG_rapstart( rapname ) + implicit none + + character(len=*), intent(in) :: rapname + + real(8) :: time + + integer :: id + !--------------------------------------------------------------------------- + + id = DEBUG_rapid( rapname ) + +!coarray time = real(MPI_WTIME(), kind=8) + time = xmp_wtime() + + DEBUG_raptstr(id) = time + DEBUG_rapnstr(id) = DEBUG_rapnstr(id) + 1 + +#ifdef _FAPP_ +call START_COLLECTION( rapname ) +#endif + + return + end subroutine DEBUG_rapstart + + !----------------------------------------------------------------------------- + subroutine DEBUG_rapend( rapname ) + implicit none + + character(len=*), intent(in) :: rapname + + real(8) :: time + + integer :: id + !--------------------------------------------------------------------------- + + id = DEBUG_rapid( rapname ) + +!coarray time = real(MPI_WTIME(), kind=8) + time = xmp_wtime() + + DEBUG_rapttot(id) = DEBUG_rapttot(id) + ( time-DEBUG_raptstr(id) ) + DEBUG_rapnend(id) = DEBUG_rapnend(id) + 1 + +#ifdef _FAPP_ +call STOP_COLLECTION( rapname ) +#endif + + return + end subroutine DEBUG_rapend + + !----------------------------------------------------------------------------- + subroutine DEBUG_rapreport + use mod_adm, only: & + ADM_COMM_RUN_WORLD, & + ADM_prc_all, & + ADM_prc_me + implicit none + + real(8) :: sendbuf(1) + real(8) :: recvbuf(ADM_prc_all) + + real(8) :: globalavg, globalmax, globalmin +#ifdef PAPI_OPS + real(8) :: globalsum, total_flops +#endif + + integer :: ierr + integer :: id + real(8) :: localVal + !--------------------------------------------------------------------------- + + if ( DEBUG_rapnmax >= 1 ) then + + do id = 1, DEBUG_rapnmax + if ( DEBUG_rapnstr(id) /= DEBUG_rapnend(id) ) then + write(*,*) '*** Mismatch Report',id,DEBUG_rapname(id),DEBUG_rapnstr(id),DEBUG_rapnend(id) + endif + enddo + + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) '*** Computational Time Report' + +! do id = 1, DEBUG_rapnmax +! write(ADM_LOG_FID,'(1x,A,I3.3,A,A,A,F10.3,A,I7)') & +! '*** ID=',id,' : ',DEBUG_rapname(id),' T=',DEBUG_rapttot(id),' N=',DEBUG_rapnstr(id) +! enddo + + do id = 1, DEBUG_rapnmax +!coarray + !--- 2020 Fujitsu + sendbuf(1) = DEBUG_rapttot(id) + + call MPI_Allgather( sendbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + recvbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + ADM_COMM_RUN_WORLD, & + ierr ) + + globalavg = sum( recvbuf(:) ) / real(ADM_prc_all,kind=8) + globalmax = maxval( recvbuf(:) ) + globalmin = minval( recvbuf(:) ) + + !localVal = DEBUG_rapttot(id) + !globalmax = localVal + !globalmin = localVal + !globalavg = localVal + !call co_max(globalmax) + !call co_min(globalmin) + !call co_sum(globalavg) + ! + !globalavg = globalavg / real(ADM_prc_all,kind=8) + !--- 2020 Fujitsu end + + write(ADM_LOG_FID,'(1x,A,I3.3,A,A,A,F10.3,A,F10.3,A,F10.3,A,I7)') & + '*** ID=', id, & + ' : ', DEBUG_rapname(id), & + ' T(avg)=', globalavg, & + ', T(max)=', globalmax, & + ', T(min)=', globalmin, & + ', N=', DEBUG_rapnstr(id) + enddo + else + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) '*** Computational Time Report: NO item.' + endif + +#ifdef PAPI_OPS + ! [add] PAPI R.Yoshida 20121022 + !write(ADM_LOG_FID,*) ' *** Type: Instructions' + !write(ADM_LOG_FID,*) ' --- Real Time:',papi_real_time_i*2.0d0,' Proc. Time:',papi_proc_time_i*2.0d0 + !write(ADM_LOG_FID,*) ' --- flop inst:',papi_flpins*2,' Gflins/s:',papi_mflins*2.0d0/1.0d3 !GIGA + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) '********* PAPI report *********' + write(ADM_LOG_FID,*) '*** Type: Operations' + write(ADM_LOG_FID,*) '--- Wall clock Time [sec] (this PE):', papi_real_time_o + write(ADM_LOG_FID,*) '--- Processor Time [sec] (this PE):', papi_proc_time_o + write(ADM_LOG_FID,*) '--- Floating Operations [FLOP] (this PE):', papi_flpops + write(ADM_LOG_FID,*) '--- FLOPS by PAPI [MFLOPS] (this PE):', papi_mflops + write(ADM_LOG_FID,*) '--- FLOP / Time [MFLOPS] (this PE):', papi_flpops / papi_proc_time_o / 1024.D0**2 !GIGA + write(ADM_LOG_FID,*) + +!coarray + !--- 2020 Fujitsu + sendbuf(1) = real(papi_proc_time_o,kind=8) + call MPI_Allgather( sendbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + recvbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + ADM_COMM_RUN_WORLD, & + ierr ) + + + globalavg = sum( recvbuf(:) ) / real(ADM_prc_all,kind=8) + globalmax = maxval( recvbuf(:) ) + globalmin = minval( recvbuf(:) ) + !localVal = real(papi_proc_time_o,kind=8) + !globalmax = localVal + !globalmin = localVal + !globalavg = localVal + !call co_max(globalmax) + !call co_min(globalmin) + !call co_sum(globalavg) + ! + !globalavg = globalavg / real(ADM_prc_all,kind=8) + !--- 2020 Fujitsu end + + call COMM_Stat_avg( real(papi_proc_time_o,kind=8), globalavg ) + call COMM_Stat_max( real(papi_proc_time_o,kind=8), globalmax ) + call COMM_Stat_min( real(papi_proc_time_o,kind=8), globalmin ) + + write(ADM_LOG_FID,'(1x,A,F10.3,A,F10.3,A,F10.3)') & + '--- Processor Time [sec] (avg)=', globalavg, & + ', (max)=', globalmax, & + ', (min)=', globalmin + +!coarray + !--- 2020 Fujitsu + sendbuf(1) = real(papi_flpops,kind=8) + call MPI_Allgather( sendbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + recvbuf, & + 1, & + MPI_DOUBLE_PRECISION, & + ADM_COMM_RUN_WORLD, & + ierr ) + + globalsum = sum( recvbuf(:) ) + globalavg = globalsum / real(ADM_prc_all,kind=8) + globalmax = maxval( recvbuf(:) ) + globalmin = minval( recvbuf(:) ) + !localVal = real(papi_flpops,kind=8) + !globalmax = localVal + !globalmin = localVal + !globalsum = localVal + !call co_max(globalmax) + !call co_min(globalmin) + !call co_sum(globalsum) + ! + !globalavg = globalsum / real(ADM_prc_all,kind=8) + !--- 2020 Fujitsu end + + total_flops = globalsum / globalmax / 1024.D0**3 + + write(ADM_LOG_FID,'(1x,A,F10.3,A,F10.3,A,F10.3)') & + '--- Floating Operations [GFLOP] (avg)=', globalavg / 1024.D0**3, & + ', (max)=', globalmax / 1024.D0**3, & + ', (min)=', globalmin / 1024.D0**3 + write(ADM_LOG_FID,'(1x,A,F10.3)') & + '--- Total Flops [GFLOPS] (all PE):',total_flops + + call PAPIF_shutdown +#endif + + return + end subroutine DEBUG_rapreport + +end module mod_debug +!------------------------------------------------------------------------------- diff --git a/NICAM-DC-MINI/src/share/mod_grd_xmpXPI.f90 b/NICAM-DC-MINI/src/share/mod_grd_xmpXPI.f90 new file mode 100755 index 0000000..c87b6b9 --- /dev/null +++ b/NICAM-DC-MINI/src/share/mod_grd_xmpXPI.f90 @@ -0,0 +1,1420 @@ +!------------------------------------------------------------------------------- +!> +!! Grid system module +!! +!! @par Description +!! This module is for the management of the icosahedral grid system +!! +!! @author H.Tomita +!! +!! @par History +!! @li 2004-02-17 (H.Tomita) Imported from igdc-4.33 +!! @li 2009-01-23 (H.Tomita) extend the vertical grid method, introducing "hflat". +!! @li 2009-03-10 (H.Tomita) 1. add sub[GRD_gen_plgrid] +!! ( This subroutine generates +!! the pole grids from the regular region grids. ) +!! 2. support direct access of grid file without pole data. +!! sub[GRD_input_hgrid,GRD_output_hgrid]. +!! 3. add 'da_access_hgrid' in the namelist. +!! @li 2009-03-10 (H.Tomita) add error handling in GRD_input_hgrid. +!! @li 2009-05-27 (M.Hara) 1. bug fix of error handling in GRD_input_hgrid. +!! 2. remove "optional" declaration from +!! da_access in GRD_input_hgrid and GRD_output_hgrid. +!! @li 2011-07-22 (T.Ohno) add parameters +!! 1.GRD_grid_type 'ON_SPHERE' / 'ON_PLANE' +!! 2.hgrid_comm_flg +!! the grid data should be communicated or not. ( default:.true. ) +!! 3.triangle_size +!! scale factor when GRD_grid_type is 'ON_PLANE' +!! @li 2011-09-03 (H.Yashiro) New I/O +!! @li 2012-05-25 (H.Yashiro) Avoid irregal ISEND/IRECV comm. +!! @li 2012-10-20 (R.Yoshida) Topography for Jablonowski test +!! +!< +module mod_grd + !----------------------------------------------------------------------------- + ! + !++ Used modules + ! +!! use mpi + !--- 2020 Fujitsu + !use mod_coarray + use xmp_api + !--- 2020 Fujitsu end + use mod_adm, only: & + ADM_LOG_FID, & + ADM_NSYS, & + ADM_MAXFNAME + !----------------------------------------------------------------------------- + implicit none + private + !----------------------------------------------------------------------------- + ! + !++ Public procedure + ! + public :: GRD_setup + public :: GRD_output_hgrid + public :: GRD_input_hgrid + public :: GRD_scaling + public :: GRD_output_vgrid + public :: GRD_input_vgrid + public :: GRD_gen_plgrid + + !----------------------------------------------------------------------------- + ! + !++ Public parameters & variables + ! + + !====== Horizontal direction ====== + ! + !------ Scaling factor for length, e.g., earth's radius. + real(8), public, save :: GRD_rscale + ! + !------ Indentifiers for the directions in the Cartesian coordinate. + integer, public, parameter :: GRD_XDIR=1 + integer, public, parameter :: GRD_YDIR=2 + integer, public, parameter :: GRD_ZDIR=3 + ! + !------ Grid points ( CELL CENTER ) + real(8), public, allocatable, save :: GRD_x (:,:,:,:) + real(8), public, allocatable, save :: GRD_x_pl(:,:,:,:) + !<----- + !<----- GRD_x(1:ADM_gall, & --- horizontal + !<----- 1:ADM_KNONE, & --- vertical + !<----- 1:ADM_lall, & --- local region + !<----- GRD_XDIR:GRD_ZDIR) --- three components + !<----- + !<----- GRD_x_pl(1:ADM_gall_pl, & --- horizontal + !<----- 1:ADM_KNONE, & --- vertical + !<----- 1:ADM_lall_pl, & --- pole regions + !<----- GRD_XDIR:GRD_ZDIR) --- three components + !<----- ___ + !<----- / \ + !<----- < p > + !<----- \ ___ / + !<----- + + !------ Grid points ( CELL CORNER ) + real(8), public, allocatable, save :: GRD_xt (:,:,:,:,:) + real(8), public, allocatable, save :: GRD_xt_pl(:,:,:,:) + !<----- + !<----- GRD_xt(1:ADM_gall, & --- horizontal + !<----- 1:ADM_KNONE, & --- vertical + !<----- 1:ADM_lall, & --- local region + !<----- ADM_TI:ADM_TJ, & --- upper or lower triangle. + !<----- GRD_XDIR:GRD_ZDIR) --- three components + !<----- + !<----- GRD_xt_pl(1:ADM_gall_pl, & --- horizontal + !<----- 1:ADM_KNONE, & --- vertical + !<----- 1:ADM_lall_pl, & --- pole regions + !<----- GRD_XDIR:GRD_ZDIR) --- three components + !<----- p___p + !<----- / \ + !<----- p p + !<----- \ ___ / + !<----- p p + + real(8), public, allocatable, save :: GRD_e (:,:,:) ! unscaled GRD_x (=unit vector) + real(8), public, allocatable, save :: GRD_e_pl(:,:,:) + + !====== Vertical direction ====== + ! + !------ Top height + real(8), public, save :: GRD_htop + !<----- unit : [m] + ! + !------ xi coordinate + real(8), public, allocatable, save :: GRD_gz(:) + ! + !------ xi coordinate at the half point + real(8), public, allocatable, save :: GRD_gzh(:) + ! + !------ d(xi) + real(8), public, allocatable, save :: GRD_dgz(:) + ! + !------ d(xi) at the half point + real(8), public, allocatable, save :: GRD_dgzh(:) + ! + !------ 1/dgz, 1/dgzh ( add by kgoto ) + real(8), public, allocatable, save :: GRD_rdgz (:) + real(8), public, allocatable, save :: GRD_rdgzh(:) + + !------ Topography & vegitation + integer, public, parameter :: GRD_ZSFC = 1 + integer, public, parameter :: GRD_ZSD = 2 + integer, public, parameter :: GRD_VEGINDX = 3 + real(8), public, allocatable, save :: GRD_zs (:,:,:,:) + real(8), public, allocatable, save :: GRD_zs_pl(:,:,:,:) + !<----- + !<----- GRD_zs(1:ADM_gall, & + !<----- ADM_KNONE, & <- one layer data + !<----- 1:ADM_lall, & + !<----- GRD_ZSFC:GRD_VEGINDX)) + !<----- + !<----- GRD_zs_pl(1:ADM_gall_pl, & + !<----- ADM_KNONE, & <- one layer data + !<----- 1:ADM_lall_pl, & + !<----- GRD_ZSFC:GRD_VEGINDX)) + !<----- + ! + !------ z coordinate ( actual height ) + integer, public, parameter :: GRD_Z = 1 + integer, public, parameter :: GRD_ZH = 2 + real(8), public, allocatable, save :: GRD_vz (:,:,:,:) + real(8), public, allocatable, save :: GRD_vz_pl(:,:,:,:) + !<----- + !<----- GRD_vz(1:ADM_gall, & + !<----- 1:ADM_kall, & + !<----- 1:ADM_lall, & + !<----- GRD_Z:GRD_ZH)) + !<----- GRD_vz_pl(1:ADM_gall_pl, & + !<----- 1:ADM_kall, & + !<----- 1:ADM_lall_pl, & + !<----- GRD_Z:GRD_ZH)) + !<----- + ! + !------ Vertical interpolation factors + real(8), public, allocatable, save :: GRD_afac(:) + real(8), public, allocatable, save :: GRD_bfac(:) + real(8), public, allocatable, save :: GRD_cfac(:) + real(8), public, allocatable, save :: GRD_dfac(:) + ! + ! [add] T.Ohno 110722 + character(ADM_NSYS), public, save :: GRD_grid_type = 'ON_SPHERE' + ! 'ON_PLANE' + + !----------------------------------------------------------------------------- + ! + !++ Private procedure + ! + !----------------------------------------------------------------------------- + ! + !++ Private parameters & variables + ! + character(len=ADM_MAXFNAME), private, save :: hgrid_fname = '' ! Horizontal grid file + + character(len=ADM_MAXFNAME), private, save :: topo_fname = '' ! Topographical data file + character(len=ADM_MAXFNAME), private, save :: toposd_fname = '' ! Standard deviation of topog. data file + character(len=ADM_MAXFNAME), private, save :: vegeindex_fname = '' ! Vegetation index data file + + character(len=ADM_MAXFNAME), private, save :: vgrid_fname = '' ! Vertical grid file + character(len=ADM_NSYS), private, save :: vgrid_scheme = 'LINEAR' ! Vertical coordinate scheme + real(8), private, save :: h_efold = 10000.D0 ! [m] + real(8), private, save :: hflat = -999.D0 ! [m] + + logical, private, save :: hgrid_comm_flg = .true. ! [add] T.Ohno 110722 + real(8), private, save :: triangle_size = 0.D0 ! [add] T.Ohno 110722 length of sides of triangle + + logical, private, save :: da_access_hgrid = .false. + logical, private, save :: topo_direct_access = .false. ! [add] H.Yashiro 20110819 + character(len=ADM_NSYS), private, save :: hgrid_io_mode = 'LEGACY' ! [add] H.Yashiro 20110819 + character(len=ADM_NSYS), private, save :: topo_io_mode = 'LEGACY' ! [add] H.Yashiro 20110819 + + logical, private, save :: output_vgrid = .false. + + !----------------------------------------------------------------------------- +contains + + !----------------------------------------------------------------------------- + !> + !> Setup routine for grid module. + !> 1. set the horizontal grid + !> 2. set the vertical grid + !> 3. set the topograph + !> + subroutine GRD_setup + use mod_adm, only : & + ADM_CTL_FID, & + ADM_PRC_PL, & + ADM_lall_pl, & + ADM_gall_pl, & + ADM_TI, & + ADM_TJ, & + ADM_GSLF_PL, & + ADM_KNONE, & + ADM_VNONE, & + ADM_prc_me, & + ADM_lall, & + ADM_kall, & + ADM_gmin, & + ADM_gmax, & + ADM_gall, & + ADM_gall_1d, & + ADM_kmin, & + ADM_kmax, & + ADM_prc_run_master,& + ADM_proc_stop + use mod_cnst, only : & + CNST_ERADIUS + use mod_comm, only : & + COMM_data_transfer, & + COMM_var ! [add] H.Yashiro 20110819 + implicit none + + namelist / GRDPARAM / & + vgrid_fname, & !--- vertical grid file-name + hgrid_fname, & !--- horizontal grid basename + topo_fname, & !--- topography basename + toposd_fname, & !--- standard deviation of topography basename + vegeindex_fname, & !--- vegetation index basename + vgrid_scheme, & !--- verical grid scheme + h_efold, & !--- efolding height for hybrid vertical grid. + hflat, & + output_vgrid, & !--- output verical grid file? + hgrid_comm_flg, & !--- communicate GRD_x ! [add] T.Ohno 110722 + triangle_size, & !--- length of sides of triangle ! [add] T.Ohno 110722 + GRD_grid_type, & !--- grid type ! [add] T.Ohno 110722 + da_access_hgrid, & + hgrid_io_mode, & !--- io type(hgrid) [add] H.Yashiro 20110819 + topo_io_mode !--- io type(topo) [add] H.Yashiro 20110819 + + integer :: n,k,l + integer :: ierr + + integer :: kflat, K0 + real(8) :: htop + + real(8) :: fac_scale ! [add] T.Ohno 110722 + + integer :: nstart,nend + integer :: i,j,suf + suf(i,j) = ADM_gall_1d * ((j)-1) + (i) + !--------------------------------------------------------------------------- + + !--- read parameters + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) '+++ Module[grd]/Category[common share]' + rewind(ADM_CTL_FID) + read(ADM_CTL_FID,nml=GRDPARAM,iostat=ierr) + if ( ierr < 0 ) then + write(ADM_LOG_FID,*) '*** GRDPARAM is not specified. use default.' + elseif( ierr > 0 ) then + write(*, *) 'xxx Not appropriate names in namelist GRDPARAM. STOP.' + write(ADM_LOG_FID,*) 'xxx Not appropriate names in namelist GRDPARAM. STOP.' + call ADM_proc_stop + endif + write(ADM_LOG_FID,GRDPARAM) + + K0 = ADM_KNONE + + ! + !--- < setting the horizontal grid > --- + ! + !------ allocation and intitialization of horizontal grid points + !------ ( cell CENTER ) + allocate( GRD_x (ADM_gall, K0,ADM_lall, GRD_XDIR:GRD_ZDIR) ) + allocate( GRD_x_pl(ADM_gall_pl,K0,ADM_lall_pl,GRD_XDIR:GRD_ZDIR) ) + + !------ allocation and intitialization of horizontal grid points + !------ ( cell CORNER ) + allocate( GRD_xt (ADM_gall, K0,ADM_lall, ADM_TI:ADM_TJ,GRD_XDIR:GRD_ZDIR) ) + allocate( GRD_xt_pl(ADM_gall_pl,K0,ADM_lall_pl, GRD_XDIR:GRD_ZDIR) ) + + !--- reading the horzontal grid (unit sphere) and + !--- scaled by earth radius + call GRD_input_hgrid( hgrid_fname, & ![IN] + .true., & ![IN] + hgrid_io_mode ) ![IN] + + !--- data transfer for GRD_x + !--- note : do not communicate GRD_xt + if( hgrid_comm_flg ) call COMM_data_transfer(GRD_x,GRD_x_pl) ! [mod] T.Ohno 110722 + + ! save unscaled grid points as the unit vector + allocate( GRD_e (ADM_gall, ADM_lall, GRD_XDIR:GRD_ZDIR) ) + allocate( GRD_e_pl(ADM_gall_pl,ADM_lall_pl,GRD_XDIR:GRD_ZDIR) ) + GRD_e (:,:,:) = GRD_x (:,K0,:,:) + GRD_e_pl(:,:,:) = GRD_x_pl(:,K0,:,:) + + ! [mod] T.Ohno 110722 ==> + if ( trim(GRD_grid_type) == 'ON_PLANE' ) then + fac_scale = triangle_size + else + fac_scale = CNST_ERADIUS + endif + + call GRD_scaling(fac_scale) + ! [mod] T.Ohno 110722 <== + + + !------ allocation, initialization, and + !------ reading of surface height, standard deviation, vegetation index + allocate(GRD_zs (ADM_gall, K0,ADM_lall, GRD_ZSFC:GRD_VEGINDX)) + allocate(GRD_zs_pl(ADM_gall_pl,K0,ADM_lall_pl,GRD_ZSFC:GRD_VEGINDX)) + GRD_zs (:,:,:,:) = 0.D0 + GRD_zs_pl(:,:,:,:) = 0.D0 + + ! -> [add] R.Yoshida 20121020 + if ( trim(topo_fname) == 'Jablonowski' ) then + call GRD_jbw_init_topo + elseif ( trim(topo_fname) == 'Mountainwave' ) then + call GRD_mwave_init_topo + else + call GRD_input_topograph(topo_fname,GRD_ZSFC) + endif + ! <- [add] R.Yoshida 20121020 + + call GRD_input_topograph(toposd_fname, GRD_ZSD) + call GRD_input_topograph(vegeindex_fname,GRD_VEGINDX) + + !--- data transfer for GRD_zs + if (topo_direct_access) then ! [add] H.Yashiro 20110819 + call COMM_var( GRD_zs, GRD_zs_pl, K0, 3, comm_type=2, NSval_fix=.true. ) + else + call COMM_data_transfer(GRD_zs,GRD_zs_pl) + endif + + ! + !--- < setting the vertical coordinate > --- + ! + if( ADM_kall /= ADM_KNONE ) then + + !------ allocation of vertical grid. + allocate( GRD_gz (ADM_kall) ) + allocate( GRD_gzh (ADM_kall) ) + allocate( GRD_dgz (ADM_kall) ) + allocate( GRD_dgzh (ADM_kall) ) + allocate( GRD_rdgz (ADM_kall) ) + allocate( GRD_rdgzh(ADM_kall) ) + + !------ input the vertical grid. + call GRD_input_vgrid(vgrid_fname) + + !------ calculation of grid intervals ( cell center ) + do k = ADM_kmin-1, ADM_kmax + GRD_dgz(k) = GRD_gzh(k+1) - GRD_gzh(k) + enddo + GRD_dgz(ADM_kmax+1) = GRD_dgz(ADM_kmax) + + !------ calculation of grid intervals ( cell wall ) + do k = ADM_kmin, ADM_kmax+1 + GRD_dgzh(k) = GRD_gz(k) - GRD_gz(k-1) + enddo + GRD_dgzh(ADM_kmin-1) = GRD_dgzh(ADM_kmin) + + !------ calculation of 1/dgz and 1/dgzh + do k = 1, ADM_kall + GRD_rdgz (k) = 1.D0 / grd_dgz (k) + GRD_rdgzh(k) = 1.D0 / grd_dgzh(k) + enddo + + !------ hight top + GRD_htop = GRD_gzh(ADM_kmax+1) - GRD_gzh(ADM_kmin) + + !--- < vertical interpolation factor > --- + allocate( GRD_afac(ADM_kall) ) + allocate( GRD_bfac(ADM_kall) ) + allocate( GRD_cfac(ADM_kall) ) + allocate( GRD_dfac(ADM_kall) ) + + !------ From the cell center value to the cell wall value + !------ A(k-1/2) = ( afac(k) A(k) + bfac(k) * A(k-1) ) / 2 + do k = ADM_kmin, ADM_kmax+1 + GRD_afac(k) = 2.D0 * ( GRD_gzh(k) - GRD_gz(k-1) ) & + / ( GRD_gz (k) - GRD_gz(k-1) ) + enddo + GRD_afac(ADM_kmin-1) = 2.D0 + + GRD_bfac(:) = 2.D0 - GRD_afac(:) + + !------ From the cell wall value to the cell center value + !------ A(k) = ( cfac(k) A(k+1/2) + dfac(k) * A(k-1/2) ) / 2 + do k = ADM_kmin, ADM_kmax + GRD_cfac(k) = 2.D0 * ( GRD_gz (k ) - GRD_gzh(k) ) & + / ( GRD_gzh(k+1) - GRD_gzh(k) ) + enddo + GRD_cfac(ADM_kmin-1) = 2.D0 + GRD_cfac(ADM_kmax+1) = 0.D0 + + GRD_dfac(:) = 2.D0 - GRD_cfac(:) + + !------ allocation, initilization, and setting the z-coordinate + allocate( GRD_vz ( ADM_gall, ADM_kall,ADM_lall, GRD_Z:GRD_ZH) ) + allocate( GRD_vz_pl( ADM_gall_pl,ADM_kall,ADM_lall_pl,GRD_Z:GRD_ZH) ) + + select case(trim(vgrid_scheme)) + case('LINEAR') + !--- linear transfromation : (Gal-Chen & Sommerville(1975) + !--- gz = H(z-zs)/(H-zs) -> z = (H-zs)/H * gz + zs + kflat = -1 + if ( hflat > 0.D0 ) then !--- default : -999.0 + do k = ADM_kmin+1, ADM_kmax+1 + if ( hflat < GRD_gzh(k) ) then + kflat = k + exit + endif + enddo + endif + + if ( kflat == -1 ) then + kflat = ADM_kmax + 1 + htop = GRD_htop + else + htop = GRD_gzh(kflat) - GRD_gzh(ADM_kmin) + endif + + K0 = ADM_KNONE + nstart = suf(ADM_gmin,ADM_gmin) + nend = suf(ADM_gmax,ADM_gmax) + + do l = 1, ADM_lall + do k = ADM_kmin-1, kflat + do n = nstart,nend + GRD_vz(n,k,l,GRD_Z ) = GRD_zs(n,K0,l,GRD_ZSFC) & + + ( htop - GRD_zs(n,K0,l,GRD_ZSFC) ) / htop * GRD_gz(k) + GRD_vz(n,k,l,GRD_ZH) = GRD_zs(n,K0,l,GRD_ZSFC) & + + ( htop - GRD_zs(n,K0,l,GRD_ZSFC) ) / htop * GRD_gzh(k) + enddo + enddo + + if ( kflat < ADM_kmax+1 ) then + do k = kflat+1, ADM_kmax+1 + do n = nstart, nend + GRD_vz(n,k,l,GRD_Z ) = GRD_gz (k) + GRD_vz(n,k,l,GRD_ZH) = GRD_gzh(k) + enddo + enddo + endif + enddo + + if ( ADM_prc_me == ADM_prc_pl ) then + n = ADM_GSLF_PL + + do l = 1, ADM_lall_pl + do k = ADM_kmin-1, kflat + GRD_vz_pl(n,k,l,GRD_Z) = GRD_zs_pl(n,K0,l,GRD_ZSFC) & + + ( htop - GRD_zs_pl(n,K0,l,GRD_ZSFC) ) / htop * GRD_gz(k) + GRD_vz_pl(n,k,l,GRD_ZH) = GRD_zs_pl(n,K0,l,GRD_ZSFC) & + + ( htop - GRD_zs_pl(n,K0,l,GRD_ZSFC) ) / htop * GRD_gzh(k) + enddo + + if ( kflat < ADM_kmax+1 ) then + do k = kflat+1, ADM_kmax+1 + GRD_vz_pl(n,k,l,GRD_Z ) = GRD_gz (k) + GRD_vz_pl(n,k,l,GRD_ZH) = GRD_gzh(k) + enddo + endif + enddo + endif + + case('HYBRID') + !--------- Hybrid transformation : like as Simmons & Buridge(1981) + K0 = ADM_KNONE + nstart = suf(ADM_gmin,ADM_gmin) + nend = suf(ADM_gmax,ADM_gmax) + + do l = 1, ADM_lall + do k = ADM_kmin-1, ADM_kmax+1 + do n = nstart,nend + GRD_vz(n,k,l,GRD_Z) = GRD_gz(k) & + + GRD_zs(n,K0,l,ADM_VNONE) & + * sinh( (GRD_htop-GRD_gz(k)) / h_efold ) & + / sinh( GRD_htop / h_efold ) + GRD_vz(n,k,l,GRD_ZH) = GRD_gzh(k) & + + GRD_zs(n,K0,l,ADM_VNONE) & + * sinh( (GRD_htop-GRD_gzh(k)) / h_efold ) & + / sinh( GRD_htop / h_efold ) + enddo + enddo + enddo + + if ( ADM_prc_me == ADM_prc_pl ) then + n = ADM_GSLF_PL + + do l = 1, ADM_lall_pl + do k = ADM_kmin-1, ADM_kmax+1 + GRD_vz_pl(n,k,l,GRD_Z) = GRD_gz(k) & + + GRD_zs_pl(n,K0,l,ADM_VNONE) & + * sinh( (GRD_htop-GRD_gz(k)) / h_efold ) & + / sinh( GRD_htop / h_efold ) + GRD_vz_pl(n,k,l,GRD_ZH) = GRD_gzh(k) & + + GRD_zs_pl(n,K0,l,ADM_VNONE) & + * sinh( (GRD_htop-GRD_gzh(k)) / h_efold ) & + / sinh( GRD_htop / h_efold ) + enddo + enddo + endif + + endselect + + call COMM_data_transfer(GRD_vz,GRD_vz_pl) + + GRD_vz(suf(1,ADM_gall_1d),:,:,:) = GRD_vz(suf(ADM_gmin,ADM_gmin),:,:,:) + GRD_vz(suf(ADM_gall_1d,1),:,:,:) = GRD_vz(suf(ADM_gmin,ADM_gmin),:,:,:) + endif + + !--- output information about grid. + if ( ADM_kall /= ADM_KNONE ) then + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,'(5x,A)') '|====== Vertical Coordinate [m] ======|' + write(ADM_LOG_FID,'(5x,A)') '| |' + write(ADM_LOG_FID,'(5x,A)') '| -GRID CENTER- -GRID INTERFACE- |' + write(ADM_LOG_FID,'(5x,A)') '| k gz d(gz) gzh d(gzh) k |' + write(ADM_LOG_FID,'(5x,A)') '| |' + k = ADM_kmax + 1 + write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' | dummy' + write(ADM_LOG_FID,'(5x,A,2F10.1,I4,A)') '| ',GRD_gzh(k),GRD_dgzh(k),k,' | TOA' + k = ADM_kmax + write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' | kmax' + write(ADM_LOG_FID,'(5x,A,2F10.1,I4,A)') '| ',GRD_gzh(k),GRD_dgzh(k),k,' |' + do k = ADM_kmax-1, ADM_kmin+1, -1 + write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' |' + write(ADM_LOG_FID,'(5x,A,2F10.1,I4,A)') '| ',GRD_gzh(k),GRD_dgzh(k),k,' |' + enddo + k = ADM_kmin + write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' | kmin' + write(ADM_LOG_FID,'(5x,A,2F10.1,I4,A)') '| ',GRD_gzh(k),GRD_dgzh(k),k,' | ground' + k = ADM_kmin-1 + write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' | dummy' + write(ADM_LOG_FID,'(5x,A)') '|===============================================|' + + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) '--- Vertical layer scheme = ', trim(vgrid_scheme) + if ( vgrid_scheme == 'HYBRID' ) then + write(ADM_LOG_FID,*) '--- e-folding height = ', h_efold + endif + + if ( output_vgrid ) then + if ( ADM_prc_me == ADM_prc_run_master ) then + call GRD_output_vgrid('./vgrid_used.dat') + endif + endif + else + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) '--- vartical layer = 1' + endif + + return + end subroutine GRD_setup + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_scaling + !> + subroutine GRD_scaling( fact ) + implicit none + + real(8), intent(in) :: fact !--- IN : scaling factor + !--------------------------------------------------------------------------- + + ! [mod] T.Ohno 110722 ==> + if ( trim(GRD_grid_type) == 'ON_PLANE' ) then + GRD_x (:,:,:,:) = GRD_x (:,:,:,:) * fact + GRD_x_pl (:,:,:,:) = GRD_x_pl (:,:,:,:) * fact + GRD_xt (:,:,:,:,:) = GRD_xt (:,:,:,:,:) * fact + GRD_xt_pl(:,:,:,:) = GRD_xt_pl(:,:,:,:) * fact + else + !--- setting the sphere radius + GRD_rscale = fact + + !--- scaling by using GRD_rscale + GRD_x (:,:,:,:) = GRD_x (:,:,:,:) * GRD_rscale + GRD_x_pl (:,:,:,:) = GRD_x_pl (:,:,:,:) * GRD_rscale + GRD_xt (:,:,:,:,:) = GRD_xt (:,:,:,:,:) * GRD_rscale + GRD_xt_pl(:,:,:,:) = GRD_xt_pl(:,:,:,:) * GRD_rscale + endif + ! [mod] T.Ohno 110722 <== + + return + end subroutine GRD_scaling + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_output_hgrid + !> + subroutine GRD_output_hgrid( & + basename, & + output_vertex, & + io_mode ) + use mod_misc, only: & + MISC_make_idstr,& + MISC_get_available_fid + use mod_adm, only: & + ADM_proc_stop, & + ADM_prc_tab, & + ADM_prc_me, & + ADM_TI, & + ADM_TJ, & + ADM_gall, & + ADM_lall, & + ADM_KNONE + use mod_fio, only: & ! [add] H.Yashiro 20110819 + FIO_output, & + FIO_HMID, & + FIO_REAL8 + implicit none + + character(len=*), intent(in) :: basename ! output basename + logical, intent(in) :: output_vertex ! output flag of B-grid + character(len=*), intent(in) :: io_mode ! io_mode + + character(len=ADM_MAXFNAME) :: fname + character(len=FIO_HMID) :: desc = 'HORIZONTAL GRID FILE' + + integer :: fid + integer :: rgnid, l, K0 + !--------------------------------------------------------------------------- + + K0 = ADM_KNONE + + if ( io_mode == 'ADVANCED' ) then + + call FIO_output( GRD_x(:,:,:,GRD_XDIR), & + basename, desc, "", & + "grd_x_x", "GRD_x (X_DIR)", "", & + "NIL", FIO_REAL8, "ZSSFC1", K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_x(:,:,:,GRD_YDIR), & + basename, desc, '', & + 'grd_x_y', 'GRD_x (Y_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_x(:,:,:,GRD_ZDIR), & + basename, desc, '', & + 'grd_x_z', 'GRD_x (Z_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + + if ( output_vertex ) then + call FIO_output( GRD_xt(:,:,:,ADM_TI,GRD_XDIR), & + basename, desc, '', & + 'grd_xt_ix', 'GRD_xt (TI,X_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_xt(:,:,:,ADM_TJ,GRD_XDIR), & + basename, desc, '', & + 'grd_xt_jx', 'GRD_xt (TJ,X_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_xt(:,:,:,ADM_TI,GRD_YDIR), & + basename, desc, '', & + 'grd_xt_iy', 'GRD_xt (TI,Y_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_xt(:,:,:,ADM_TJ,GRD_YDIR), & + basename, desc, '', & + 'grd_xt_jy', 'GRD_xt (TJ,Y_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_xt(:,:,:,ADM_TI,GRD_ZDIR), & + basename, desc, '', & + 'grd_xt_iz', 'GRD_xt (TI,Z_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_xt(:,:,:,ADM_TJ,GRD_ZDIR), & + basename, desc, '', & + 'grd_xt_jz', 'GRD_xt (TJ,Z_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + endif + + elseif( io_mode == 'LEGACY' ) then + + do l = 1, ADM_lall + rgnid = ADM_prc_tab(l,ADM_prc_me) + call MISC_make_idstr(fname,trim(basename),'rgn',rgnid) + + fid = MISC_get_available_fid() + open( unit = fid, & + file=trim(fname), & + form='unformatted', & + access='direct', & + recl=ADM_gall*8 ) + + write(fid,rec=1) GRD_x(:,K0,l,GRD_XDIR) + write(fid,rec=2) GRD_x(:,K0,l,GRD_YDIR) + write(fid,rec=3) GRD_x(:,K0,l,GRD_ZDIR) + if ( output_vertex ) then + write(fid,rec=4) GRD_xt(:,K0,l,ADM_TI,GRD_XDIR) + write(fid,rec=5) GRD_xt(:,K0,l,ADM_TI,GRD_YDIR) + write(fid,rec=6) GRD_xt(:,K0,l,ADM_TI,GRD_ZDIR) + write(fid,rec=7) GRD_xt(:,K0,l,ADM_TJ,GRD_XDIR) + write(fid,rec=8) GRD_xt(:,K0,l,ADM_TJ,GRD_YDIR) + write(fid,rec=9) GRD_xt(:,K0,l,ADM_TJ,GRD_ZDIR) + endif + close(fid) + enddo + else + write(ADM_LOG_FID,*) 'Invalid io_mode!' + call ADM_proc_stop + endif + + return + end subroutine GRD_output_hgrid + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_input_hgrid + !> + subroutine GRD_input_hgrid( & + basename, & + input_vertex, & + io_mode ) + use mod_misc, only: & + MISC_make_idstr, & + MISC_get_available_fid + use mod_adm, only: & + ADM_proc_stop, & + ADM_prc_tab, & + ADM_prc_me, & + ADM_TI, & + ADM_TJ, & + ADM_gall, & + ADM_lall, & + ADM_KNONE + use mod_fio, only : & ! [add] H.Yashiro 20110819 + FIO_input + implicit none + + character(len=*), intent(in) :: basename ! input basename + logical, intent(in) :: input_vertex ! flag of B-grid input + character(len=*), intent(in) :: io_mode ! io_mode + + character(len=ADM_MAXFNAME) :: fname + + integer :: fid, ierr + integer :: rgnid, l, K0 + !--------------------------------------------------------------------------- + + K0 = ADM_KNONE + + if ( io_mode == 'ADVANCED' ) then + + call FIO_input(GRD_x(:,:,:,GRD_XDIR),basename,'grd_x_x','ZSSFC1',K0,K0,1) + call FIO_input(GRD_x(:,:,:,GRD_YDIR),basename,'grd_x_y','ZSSFC1',K0,K0,1) + call FIO_input(GRD_x(:,:,:,GRD_ZDIR),basename,'grd_x_z','ZSSFC1',K0,K0,1) + if ( input_vertex ) then + call FIO_input(GRD_xt(:,:,:,ADM_TI,GRD_XDIR),basename, & + 'grd_xt_ix','ZSSFC1',K0,K0,1 ) + call FIO_input(GRD_xt(:,:,:,ADM_TJ,GRD_XDIR),basename, & + 'grd_xt_jx','ZSSFC1',K0,K0,1 ) + call FIO_input(GRD_xt(:,:,:,ADM_TI,GRD_YDIR),basename, & + 'grd_xt_iy','ZSSFC1',K0,K0,1 ) + call FIO_input(GRD_xt(:,:,:,ADM_TJ,GRD_YDIR),basename, & + 'grd_xt_jy','ZSSFC1',K0,K0,1 ) + call FIO_input(GRD_xt(:,:,:,ADM_TI,GRD_ZDIR),basename, & + 'grd_xt_iz','ZSSFC1',K0,K0,1 ) + call FIO_input(GRD_xt(:,:,:,ADM_TJ,GRD_ZDIR),basename, & + 'grd_xt_jz','ZSSFC1',K0,K0,1 ) + endif + + elseif( io_mode == 'LEGACY' ) then + + do l = 1, ADM_lall + rgnid = ADM_prc_tab(l,ADM_prc_me) + call MISC_make_idstr(fname,trim(basename),'rgn',rgnid) + + fid = MISC_get_available_fid() + open( unit = fid, & + file = trim(fname), & + form = 'unformatted', & + access = 'direct', & + recl = ADM_gall*8, & + status = 'old', & + iostat = ierr ) + + if ( ierr /= 0 ) then + write(ADM_LOG_FID,*) 'xxx Error occured in reading grid file.', trim(fname) + call ADM_proc_stop + endif + + read(fid,rec=1) GRD_x(:,K0,l,GRD_XDIR) + read(fid,rec=2) GRD_x(:,K0,l,GRD_YDIR) + read(fid,rec=3) GRD_x(:,K0,l,GRD_ZDIR) + if ( input_vertex ) then + read(fid,rec=4) GRD_xt(:,K0,l,ADM_TI,GRD_XDIR) + read(fid,rec=5) GRD_xt(:,K0,l,ADM_TI,GRD_YDIR) + read(fid,rec=6) GRD_xt(:,K0,l,ADM_TI,GRD_ZDIR) + read(fid,rec=7) GRD_xt(:,K0,l,ADM_TJ,GRD_XDIR) + read(fid,rec=8) GRD_xt(:,K0,l,ADM_TJ,GRD_YDIR) + read(fid,rec=9) GRD_xt(:,K0,l,ADM_TJ,GRD_ZDIR) + endif + close(fid) + enddo + + else + write(ADM_LOG_FID,*) 'Invalid io_mode!' + call ADM_proc_stop + endif + + call GRD_gen_plgrid + + return + end subroutine GRD_input_hgrid + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_input_vgrid + !> + subroutine GRD_input_vgrid( fname ) + use mod_misc, only: & + MISC_get_available_fid + use mod_adm, only: & + ADM_LOG_FID, & + ADM_vlayer, & + ADM_proc_stop + implicit none + + character(len=ADM_MAXFNAME), intent(in) :: fname ! vertical grid file name + + integer :: num_of_layer + integer :: fid, ierr + !--------------------------------------------------------------------------- + + fid = MISC_get_available_fid() + open( unit = fid, & + file = trim(fname), & + status = 'old', & + form = 'unformatted', & + iostat = ierr ) + + if ( ierr /= 0 ) then + write(ADM_LOG_FID,*) 'xxx No vertical grid file.' + call ADM_proc_stop + endif + + read(fid) num_of_layer + + if ( num_of_layer /= ADM_vlayer ) then + write(ADM_LOG_FID,*) 'xxx inconsistency in number of vertical layers.' + call ADM_proc_stop + endif + + read(fid) GRD_gz + read(fid) GRD_gzh + + close(fid) + + return + end subroutine GRD_input_vgrid + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_output_vgrid + !> + subroutine GRD_output_vgrid( fname ) + use mod_misc, only: & + MISC_get_available_fid + use mod_adm, only: & + ADM_vlayer + implicit none + + character(len=*), intent(in) :: fname + + integer :: fid + !--------------------------------------------------------------------------- + + fid = MISC_get_available_fid() + open(fid,file=trim(fname),form='unformatted') + write(fid) ADM_vlayer + write(fid) GRD_gz + write(fid) GRD_gzh + close(fid) + + return + end subroutine GRD_output_vgrid + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_input_topograph + !> + subroutine GRD_input_topograph( & + basename, & + i_var ) + use mod_misc, only: & + MISC_make_idstr,& + MISC_get_available_fid + use mod_adm, only: & + ADM_LOG_FID, & + ADM_prc_tab, & + ADM_prc_me, & + ADM_PRC_PL, & + ADM_lall, & + ADM_gall, & + ADM_KNONE + use mod_fio, only: & + FIO_input + implicit none + + character(len=*), intent(in) :: basename + integer, intent(in) :: i_var + + character(len=16) :: varname(3) + data varname / 'topo', 'topo_stddev', 'vegeindex' / + + character(len=128) :: fname + integer :: ierr + integer :: l, rgnid, fid + !--------------------------------------------------------------------------- + + if ( topo_io_mode == 'ADVANCED' ) then + topo_direct_access = .true. + + call FIO_input(GRD_zs(:,:,:,i_var),basename,varname(i_var),'ZSSFC1',1,1,1) + + elseif( topo_io_mode == 'LEGACY' ) then + + if ( topo_direct_access ) then !--- direct access ( defalut ) + do l = 1, ADM_lall + rgnid = ADM_prc_tab(l,ADM_prc_me) + call MISC_make_idstr(fname,trim(basename),'rgn',rgnid) + fid = MISC_get_available_fid() + + open( fid, & + file = trim(fname), & + form = 'unformatted', & + access = 'direct', & + recl = ADM_gall*8, & + status = 'old' ) + + read(fid,rec=1) GRD_zs(:,ADM_KNONE,l,i_var) + + close(fid) + enddo + else !--- sequential access + do l = 1, ADM_lall + rgnid = ADM_prc_tab(l,ADM_prc_me) + call MISC_make_idstr(fname,trim(basename),'rgn',rgnid) + fid = MISC_get_available_fid() + + open(fid,file=trim(fname),status='old',form='unformatted',iostat=ierr) + if ( ierr /= 0 ) then + write(ADM_LOG_FID,*) 'Msg : Sub[GRD_input_topograph]/Mod[grid]' + write(ADM_LOG_FID,*) ' *** No topographical file. Number :', i_var + return + endif + + read(fid) GRD_zs(:,ADM_KNONE,l,i_var) + close(fid) + enddo + + if ( ADM_prc_me == ADM_prc_pl ) then + fname = trim(basename)//'.pl' + fid = MISC_get_available_fid() + + open(fid,file=trim(fname),status='old',form='unformatted') + read(fid) GRD_zs_pl(:,:,:,i_var) + close(fid) + endif + endif !--- direct/sequencial + + endif !--- io_mode + + return + end subroutine GRD_input_topograph + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_gen_plgrid + !> + subroutine GRD_gen_plgrid + use mod_adm, only: & + ADM_rgn_nmax, & + ADM_rgn_vnum, & + ADM_rgn_vtab, & + ADM_rgn2prc, & + ADM_RID, & + ADM_VLINK_NMAX, & + ADM_COMM_RUN_WORLD, & + ADM_prc_tab, & + ADM_prc_me, & + ADM_prc_npl, & + ADM_prc_spl, & + ADM_TI, & + ADM_TJ, & + ADM_N, & + ADM_S, & + ADM_NPL, & + ADM_SPL, & + ADM_lall, & + ADM_gall_1d, & + ADM_gmax, & + ADM_gmin, & + ADM_KNONE, & + ADM_GSLF_PL + use mod_comm, only: & + COMM_var + implicit none + + integer :: prctab (ADM_VLINK_NMAX) + integer :: rgntab (ADM_VLINK_NMAX) + integer :: sreq (ADM_VLINK_NMAX) + integer :: rreq (ADM_VLINK_NMAX) + logical :: send_flag(ADM_VLINK_NMAX) + +! real(8) :: v_pl(GRD_XDIR:GRD_ZDIR,ADM_VLINK_NMAX) + real(8) :: vsend_pl(GRD_XDIR:GRD_ZDIR,ADM_VLINK_NMAX) ! [mod] H.Yashiro 20120525 + !--- 2020 Fujitsu + integer(8) :: vsend_pl_l_desc + integer(8) :: vsend_pl_l_lb(2), vsend_pl_l_ub(2) + integer(8) :: vsend_pl_l_sec + !--- 2020 Fujitsu end + real(8) :: vrecv_pl(GRD_XDIR:GRD_ZDIR,ADM_VLINK_NMAX) ! [mod] H.Yashiro 20120525 +!coarray + !--- 2020 Fujitsu + !real(8) :: vrecv_plc(GRD_XDIR:GRD_ZDIR,5)[*] !! not used + !real(8),allocatable :: vrecv_plA(:,:)[:] + integer , POINTER :: vrecv_plA ( : , : ) => null ( ) + integer(8) :: vrecv_plA_desc + integer(8) :: vrecv_plA_lb, vrecv_plA_ub + integer(8) :: vrecv_plA_sec + integer(4) :: img_dims(1) + !--- 2020 Fujitsu end + +!coarray integer :: istat(MPI_STATUS_SIZE) + integer :: n, l, ierr + + integer :: suf, i, j + suf(i,j) = ADM_gall_1d * ((j)-1) + (i) + !--------------------------------------------------------------------------- +!coarray + !--- 2020 Fujitsu + !allocate(vrecv_plA(GRD_XDIR:GRD_ZDIR,5)[*]) + vrecv_plA_lb(1) = GRD_XDIR; vrecv_plA_ub(1) = GRD_ZDIR + vrecv_plA_lb(2) = 1; vrecv_plA_ub(2) = 5 + call xmp_new_coarray(vrecv_plA_desc, 8, 2, vrecv_plA_lb, vrecv_plA_ub, 1, img_dims) + call xmp_coarray_bind(vrecv_plA_desc, vrecv_plA) + + vsend_pl_l_lb(1) = GRD_XDIR; vsend_pl_l_ub(1) = GRD_ZDIR + vsend_pl_l_lb(2) = 1; vsend_pl_l_ub(2) = ADM_VLINK_NMAX + call xmp_new_local_array(vsend_pl_l_desc, 8, 2, vsend_pl_l_lb, vsend_pl_l_ub, loc(vsend_pl)) + + call xmp_new_array_section(vsend_pl_l_sec, 2) + call xmp_new_array_section(vrecv_plA_sec, 2) + !--- 2020 Fujitsu end + vrecv_pl (:,:) = 0.d0 + !vrecv_plc(:,:) = 0.d0 !--- 2020 Fujitsu --- + vrecv_plA(:,:) = 0.d0 + + !--- control volume points at the north pole + do l = ADM_rgn_nmax, 1, -1 + if ( ADM_rgn_vnum(ADM_N,l) == ADM_VLINK_NMAX ) then + do n = 1, ADM_VLINK_NMAX + rgntab(n) = ADM_rgn_vtab(ADM_RID,ADM_N,l,n) + prctab(n) = ADM_rgn2prc(rgntab(n)) + enddo + exit + endif + enddo + + send_flag(:) = .false. + + do n = 1, ADM_VLINK_NMAX + do l = 1, ADM_lall + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + if ( ADM_prc_tab(l,ADM_prc_me) == rgntab(n) ) then + vsend_pl(:,n) = GRD_xt(suf(ADM_gmin,ADM_gmax),ADM_KNONE,l,ADM_TJ,:) ! [mod] H.Yashiro 20120525 + +!coarray +! call MPI_ISEND( vsend_pl(:,n), & ! [mod] H.Yashiro 20120525 +! 3, & +! MPI_DOUBLE_PRECISION, & +! ADM_prc_npl-1, & +! rgntab(n), & +! ADM_COMM_RUN_WORLD, & +! sreq(n), & +! ierr ) + !--- 2020 Fujitsu + !vrecv_plA(:,n)[ADM_prc_npl] = vsend_pl(:,n) + call xmp_array_section_set_triplet(vrecv_plA_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) + call xmp_array_section_set_triplet(vrecv_plA_sec, 2, n, n, 1, ierr) + call xmp_array_section_set_triplet(vsend_pl_l_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) + call xmp_array_section_set_triplet(vsend_pl_l_sec, 2, n, n, 1, ierr) + img_dims(1) = ADM_prc_npl + call xmp_coarray_put_local(img_dims, vrecv_plA_desc, vrecv_plA_sec, vsend_pl_l_desc, vsend_pl_l_sec, ierr) + !--- 2020 Fujitsu end + + send_flag(n) = .true. + endif + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + enddo + enddo + +! if ( ADM_prc_me == ADM_prc_npl ) then +! do n = 1, ADM_VLINK_NMAX +! call MPI_IRECV( vrecv_pl(:,n), & ! [mod] H.Yashiro 20120525 +! 3, & +! MPI_DOUBLE_PRECISION, & +! prctab(n)-1, & +! rgntab(n), & +! ADM_COMM_RUN_WORLD, & +! rreq(n), & +! ierr ) +! enddo +! endif + +! do n = 1, ADM_VLINK_NMAX +! if ( send_flag(n) ) then +! call MPI_WAIT(sreq(n),istat,ierr) +! endif +! enddo + + if ( ADM_prc_me == ADM_prc_npl ) then + do n = 1, ADM_VLINK_NMAX +! call MPI_WAIT(rreq(n),istat,ierr) +! GRD_xt_pl(n+1,ADM_KNONE,ADM_NPL,:) = vrecv_pl(:,n) ! [mod] H.Yashiro 20120525 +!coarray + GRD_xt_pl(n+1,ADM_KNONE,ADM_NPL,:) = vrecv_plA(:,n) + enddo + endif + +!----------------------------------------------------------------------------------------- + + !--- control volume points at the sourth pole + do l = 1, ADM_rgn_nmax + if ( ADM_rgn_vnum(ADM_S,l) == ADM_VLINK_NMAX ) then + do n = 1, ADM_VLINK_NMAX + rgntab(n) = ADM_rgn_vtab(ADM_RID,ADM_S,l,n) + prctab(n) = ADM_rgn2prc(rgntab(n)) + enddo + exit + endif + enddo + + send_flag(:) = .false. + + do n = 1, ADM_VLINK_NMAX + do l =1, ADM_lall + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + if (ADM_prc_tab(l,ADM_prc_me) == rgntab(n) ) then + vsend_pl(:,n) = GRD_xt(suf(ADM_gmax,ADM_gmin),ADM_KNONE,l,ADM_TI,:) ! [mod] H.Yashiro 20120525 +!coarray +! call MPI_ISEND( vsend_pl(:,n), & ! [mod] H.Yashiro 20120525 +! 3, & +! MPI_DOUBLE_PRECISION, & +! ADM_prc_spl-1, & +! rgntab(n), & +! ADM_COMM_RUN_WORLD, & +! sreq(n), & +! ierr ) + !--- 2020 Fujitsu + !vrecv_plA(:,n)[ADM_prc_npl] = vsend_pl(:,n) + call xmp_array_section_set_triplet(vrecv_plA_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) + call xmp_array_section_set_triplet(vrecv_plA_sec, 2, n, n, 1, ierr) + call xmp_array_section_set_triplet(vsend_pl_l_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) + call xmp_array_section_set_triplet(vsend_pl_l_sec, 2, n, n, 1, ierr) + img_dims(1) = ADM_prc_npl + call xmp_coarray_put_local(img_dims, vrecv_plA_desc, vrecv_plA_sec, vsend_pl_l_desc, vsend_pl_l_sec, ierr) + !--- 2020 Fujitsu end + + send_flag(n) = .true. + endif + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + enddo + enddo + +!coarray +! if ( ADM_prc_me == ADM_prc_spl ) then +! do n = 1, ADM_VLINK_NMAX +! call MPI_IRECV( vrecv_pl(:,n), & ! [mod] H.Yashiro 20120525 +! 3, & +! MPI_DOUBLE_PRECISION, & +! prctab(n)-1, & +! rgntab(n), & +! ADM_COMM_RUN_WORLD, & +! rreq(n), & +! ierr ) +! enddo +! endif + +! do n = 1, ADM_VLINK_NMAX +! if ( send_flag(n) ) then +! call MPI_WAIT(sreq(n),istat,ierr) +! endif +! enddo + + if ( ADM_prc_me == ADM_prc_spl ) then + do n = 1, ADM_VLINK_NMAX +!coarray +! call MPI_WAIT(rreq(n),istat,ierr) +! GRD_xt_pl(n+1,ADM_KNONE,ADM_SPL,:) = vrecv_pl(:,n) ! [mod] H.Yashiro 20120525 + GRD_xt_pl(n+1,ADM_KNONE,ADM_SPL,:) = vrecv_plA(:,n) + enddo + endif + + !--- grid point communication + call COMM_var(GRD_x,GRD_x_pl,ADM_KNONE,3,comm_type=2,NSval_fix=.false.) + GRD_xt_pl(ADM_GSLF_PL,:,:,:) = GRD_x_pl(ADM_GSLF_PL,:,:,:) + + !--- 2020 Fujitsu + call xmp_free_array_section(vsend_pl_l_sec) + call xmp_free_array_section(vrecv_plA_sec) + + call xmp_coarray_deallocate(vrecv_plA_desc, ierr) + call xmp_free_local_array(vsend_pl_l_desc) + !--- 2020 Fujitsu end + return + end subroutine GRD_gen_plgrid + + !----------------------------------------------------------------------------- + ! [ADD] R.Yoshida 20121020 + ! imported from ENDGame UK Met.office. + !----------------------------------------------------------------------------- + subroutine GRD_jbw_init_topo() + use mod_misc, only : & + MISC_get_latlon + use mod_adm, only : & + ADM_lall, & + ADM_gall, & + ADM_gall_pl, & + ADM_lall_pl, & + ADM_KNONE, & + ADM_prc_me, & + ADM_prc_pl, & + ADM_LOG_FID + use mod_cnst, only: & + CNST_PI, & + CNST_ERADIUS, & + CNST_EOHM, & + CNST_EGRAV, & + CNST_RAIR + implicit none + + real(8), parameter :: u00 = 35.D0 + + real(8) :: cs32ev, f1, f2 + real(8) :: lat, lon + real(8) :: rsurf (ADM_gall ,ADM_lall ) ! surface height in ICO-grid + real(8) :: rsurf_p(ADM_gall_pl,ADM_lall_pl) ! surface height in ICO-grid for pole region + + integer :: n, l, k0 + !--------------------------------------------------------------------------- + + k0 = ADM_KNONE + + cs32ev = ( cos( (1.D0-0.252D0) * CNST_PI * 0.5D0 ) )**1.5D0 + + ! for globe + do l = 1, ADM_lall + do n = 1, ADM_gall + call MISC_get_latlon( lat, lon, & + GRD_x(n,k0,l,GRD_XDIR), & + GRD_x(n,k0,l,GRD_YDIR), & + GRD_x(n,k0,l,GRD_ZDIR) ) + + f1 = 10.D0/63.D0 - 2.D0 * sin(lat)**6 * ( cos(lat)**2 + 1.D0/3.D0 ) + f2 = 1.6D0 * cos(lat)**3 * ( sin(lat)**2 + 2.D0/3.D0 ) - 0.25D0 * CNST_PI + + rsurf(n,l) = u00 * cs32ev * ( f1*u00*cs32ev + f2*CNST_ERADIUS*CNST_EOHM ) / CNST_EGRAV + enddo + enddo + + do l=1, ADM_lall + do n=1, ADM_gall + GRD_zs(n,k0,l,GRD_ZSFC) = rsurf(n,l) + enddo + enddo + + ! for pole region + if ( ADM_prc_me == ADM_prc_pl ) then + do l = 1, ADM_lall_pl + do n = 1, ADM_gall_pl + call MISC_get_latlon( lat, lon, & + GRD_x_pl(n,k0,l,GRD_XDIR), & + GRD_x_pl(n,k0,l,GRD_YDIR), & + GRD_x_pl(n,k0,l,GRD_ZDIR) ) + + f1 = 10.D0/63.D0 - 2.D0 * sin(lat)**6 * ( cos(lat)**2 + 1.D0/3.D0 ) + f2 = 1.6D0 * cos(lat)**3 * ( sin(lat)**2 + 2.D0/3.D0 ) - 0.25D0 * CNST_PI + + rsurf_p(n,l) = u00 * cs32ev * ( f1*u00*cs32ev + f2*CNST_ERADIUS*CNST_EOHM ) / CNST_EGRAV + enddo + enddo + + do l=1, ADM_lall_pl + do n=1, ADM_gall_pl + GRD_zs_pl(n,k0,l,GRD_ZSFC) = rsurf_p(n,l) + enddo + enddo + endif + + write(ADM_LOG_FID,*) 'Msg : Sub[GRD_input_topograph]/Mod[grid]' + write(ADM_LOG_FID, '(" *** Topography for JBW: -- MAX: ",F9.3,2X,"MIN: ",F9.3)') & + maxval(GRD_zs(:,:,:,GRD_ZSFC)), minval(GRD_zs(:,:,:,GRD_ZSFC)) + + return + end subroutine GRD_jbw_init_topo + + !----------------------------------------------------------------------------- + ! [ADD] R.Yoshida 20130328 + ! mountain of dcmip 2012 setting + !----------------------------------------------------------------------------- + subroutine GRD_mwave_init_topo() + use mod_misc, only : & + MISC_get_latlon + use mod_adm, only : & + ADM_lall, & + ADM_gall, & + ADM_gall_pl, & + ADM_lall_pl, & + ADM_KNONE, & + ADM_prc_me, & + ADM_prc_pl, & + ADM_LOG_FID + use mod_cnst, only: & + CNST_PI + implicit none + + ! + real(8),parameter :: FAI_M =0.d0 + real(8),parameter :: H_ZERO = 250.d0 + real(8),parameter :: QSI = 4000.d0 + real(8),parameter :: a_ref = 6371220.0D0 + real(8),parameter :: X_reduce = 500.d0 + real(8),parameter :: HALF_WIDTH = 5000.0d0 + + real(8) :: dist_m, aa, bb, LAMBDA_M + real(8) :: lat, lon + integer :: n, l, K0 + !--------------------------------------------------------------------------- + + LAMBDA_M=CNST_PI/4.d0 + K0 = ADM_KNONE + + ! for globe + do l=1, ADM_lall + do n=1, ADM_gall + call MISC_get_latlon( lat, lon, & + GRD_x(n,K0,l,GRD_XDIR), & + GRD_x(n,K0,l,GRD_YDIR), & + GRD_x(n,K0,l,GRD_ZDIR) ) + + dist_m = (a_ref/X_reduce)*acos (sin (FAI_M)*sin (lat) & + +cos (FAI_M)*cos (lat)*cos (lon-LAMBDA_M)) + + aa = exp(- (dist_m)**2.0 / HALF_WIDTH**2.0d0) + bb = cos(CNST_PI*dist_m/QSI)**2.0d0 + GRD_zs(n,ADM_KNONE,l,GRD_ZSFC) = H_ZERO * aa * bb ! equation (76) in dcmip reference + enddo + enddo + + ! for pole region + if ( ADM_prc_me==ADM_prc_pl ) then + do l=1, ADM_lall_pl + do n=1, ADM_gall_pl + call MISC_get_latlon( lat, lon, & + GRD_x(n,K0,l,GRD_XDIR), & + GRD_x(n,K0,l,GRD_YDIR), & + GRD_x(n,K0,l,GRD_ZDIR) ) + + dist_m = (a_ref/X_reduce)*acos (sin (FAI_M)*sin (lat)& + +cos (FAI_M)*cos (lat)*cos (lon-LAMBDA_M)) + + aa = exp(- (dist_m)**2.0 / HALF_WIDTH**2.0d0) + bb = cos(CNST_PI*dist_m/QSI)**2.0d0 + GRD_zs_pl(n,ADM_KNONE,l,GRD_ZSFC) = H_ZERO * aa * bb ! equation (76) in dcmip reference + enddo + enddo + endif + + write(ADM_LOG_FID,*) 'Msg : Sub[GRD_input_topograph]/Mod[grid]' + write (ADM_LOG_FID, '(" *** Topography for mwave: -- MAX: ",F9.3,2X,"MIN: ",F9.3)') & + maxval(GRD_zs(:,:,:,GRD_ZSFC)), minval(GRD_zs(:,:,:,GRD_ZSFC)) + return + end subroutine GRD_mwave_init_topo + +end module mod_grd +!------------------------------------------------------------------------------- diff --git a/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmp b/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmp new file mode 100644 index 0000000..8b3099d --- /dev/null +++ b/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmp @@ -0,0 +1,38 @@ +# +# ------ FOR Linux 64bit & gfortran4.3 & OpenMPI1.3 ----- +# + +##### for computation + +FFLAGS_FAST = -O2 -m64 + +FFLAGS_DEBUG = -O0 -m64 \ + -std=f2003 -pedantic-errors -fimplicit-none -fmodule-private \ + -fconvert=big-endian -frecord-marker=4 \ + -ffpe-trap=invalid,zero,overflow -finit-integer=-32768 \ + -finit-real=nan -finit-logical=false -finit-character=9 \ + -Wall -Wextra -Wcharacter-truncation -Wunderflow \ + -g -fbacktrace -fbounds-check -fall-intrinsics + +FC = xmpf90 +FFLAGS = -cpp --Wn -fconvert=big-endian $(FFLAGS_FAST) +#FFLAGS = $(FFLAGS_DEBUG) +FFLAGS += -x f95-cpp-input + +# if gcc < 4.5: -M, else if gcc >= 4.6: -J +MODDIROPT ?= -J + +CC = mpicc +CFLAGS = -O2 -m64 + +LD = $(FC) +# to avoid "-x f95-cpp-input" option +LFLAGS = $(FFLAGS_FAST) +#LFLAGS = $(FFLAGS_DEBUG) + +##### for frontend +INSTALL = install +AR = ar +ARFLAGS = r +RANLIB = ranlib +JOBSUB = sh diff --git a/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmpAPI b/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmpAPI new file mode 100644 index 0000000..60d78cc --- /dev/null +++ b/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmpAPI @@ -0,0 +1,38 @@ +# +# ------ FOR Linux 64bit & gfortran4.3 & OpenMPI1.3 ----- +# + +##### for computation + +FFLAGS_FAST = -O2 -m64 + +FFLAGS_DEBUG = -O0 -m64 \ + -std=f2003 -pedantic-errors -fimplicit-none -fmodule-private \ + -fconvert=big-endian -frecord-marker=4 \ + -ffpe-trap=invalid,zero,overflow -finit-integer=-32768 \ + -finit-real=nan -finit-logical=false -finit-character=9 \ + -Wall -Wextra -Wcharacter-truncation -Wunderflow \ + -g -fbacktrace -fbounds-check -fall-intrinsics + +FC = mpif90 +FFLAGS = -cpp --Wn -fconvert=big-endian $(FFLAGS_FAST) +#FFLAGS = $(FFLAGS_DEBUG) +FFLAGS += -x f95-cpp-input + +# if gcc < 4.5: -M, else if gcc >= 4.6: -J +MODDIROPT ?= -J + +CC = mpicc +CFLAGS = -O2 -m64 + +LD = $(FC) +# to avoid "-x f95-cpp-input" option +LFLAGS = $(FFLAGS_FAST) +#LFLAGS = $(FFLAGS_DEBUG) + +##### for frontend +INSTALL = install +AR = ar +ARFLAGS = r +RANLIB = ranlib +JOBSUB = sh diff --git a/NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmp.sh b/NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmp.sh new file mode 100644 index 0000000..a67bd94 --- /dev/null +++ b/NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmp.sh @@ -0,0 +1,100 @@ +#! /bin/bash -x + +GLEV=${1} +RLEV=${2} +NMPI=${3} +ZL=${4} +VGRID=${5} +TOPDIR=${6} +BINNAME=${7} +RUNCONF=${8} + +# System specific +MPIEXEC="mpirun -np ${NMPI}" + +GL=`printf %02d ${GLEV}` +RL=`printf %02d ${RLEV}` +if [ ${NMPI} -ge 10000 ]; then + NP=`printf %05d ${NMPI}` +elif [ ${NMPI} -ge 1000 ]; then + NP=`printf %04d ${NMPI}` +elif [ ${NMPI} -ge 100 ]; then + NP=`printf %03d ${NMPI}` +else + NP=`printf %02d ${NMPI}` +fi + +dir2d=gl${GL}rl${RL}pe${NP} +dir3d=gl${GL}rl${RL}z${ZL}pe${NP} +res2d=GL${GL}RL${RL} +res3d=GL${GL}RL${RL}z${ZL} + +MNGINFO=rl${RL}-prc${NP}.info + +outdir=${dir3d} +cd ${outdir} + +cat << EOF1 > run.sh +#! /bin/bash -x +################################################################################ +# +# ------ FOR Linux64 & gnu C&fortran & openmpi ----- +# +################################################################################ +export FORT_FMT_RECL=400 + + +ln -s ${TOPDIR}/bin/${BINNAME} . +ln -s ${TOPDIR}/data/mnginfo/${MNGINFO} . +ln -s ${TOPDIR}/data/grid/vgrid/${VGRID} . +EOF1 + +for f in $( ls ${TOPDIR}/data/grid/boundary/${dir2d} ) +do + echo "ln -s ${TOPDIR}/data/grid/boundary/${dir2d}/${f} ." >> run.sh +done + +cat << EOF2 >> run.sh + +# run +${MPIEXEC} ./${BINNAME} || exit + +################################################################################ +EOF2 + +exit +cat << EOFICO2LL1 > ico2ll.sh +#! /bin/bash -x +################################################################################ +# +# ------ FOR Linux64 & gnu C&fortran & openmpi ----- +# +################################################################################ +export FORT_FMT_RECL=400 + + +ln -s ${TOPDIR}/bin/fio_ico2ll_mpi . +ln -s ${TOPDIR}/data/mnginfo/${MNGINFO} . +ln -s ${TOPDIR}/data/zaxis . +EOFICO2LL1 + +for f in $( ls ${TOPDIR}/data/grid/llmap/gl${GL}/rl${RL}/ ) +do + echo "ln -s ${TOPDIR}/data/grid/llmap/gl${GL}/rl${RL}/${f} ." >> ico2ll.sh +done + +cat << EOFICO2LL2 >> ico2ll.sh + +# run +${MPIEXEC} ./fio_ico2ll_mpi \ +history \ +glevel=${GLEV} \ +rlevel=${RLEV} \ +mnginfo="./${MNGINFO}" \ +layerfile_dir="./zaxis" \ +llmap_base="./llmap" \ +-lon_swap \ +-comm_smallchunk + +################################################################################ +EOFICO2LL2 diff --git a/NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmpAPI.sh b/NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmpAPI.sh new file mode 100644 index 0000000..a67bd94 --- /dev/null +++ b/NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmpAPI.sh @@ -0,0 +1,100 @@ +#! /bin/bash -x + +GLEV=${1} +RLEV=${2} +NMPI=${3} +ZL=${4} +VGRID=${5} +TOPDIR=${6} +BINNAME=${7} +RUNCONF=${8} + +# System specific +MPIEXEC="mpirun -np ${NMPI}" + +GL=`printf %02d ${GLEV}` +RL=`printf %02d ${RLEV}` +if [ ${NMPI} -ge 10000 ]; then + NP=`printf %05d ${NMPI}` +elif [ ${NMPI} -ge 1000 ]; then + NP=`printf %04d ${NMPI}` +elif [ ${NMPI} -ge 100 ]; then + NP=`printf %03d ${NMPI}` +else + NP=`printf %02d ${NMPI}` +fi + +dir2d=gl${GL}rl${RL}pe${NP} +dir3d=gl${GL}rl${RL}z${ZL}pe${NP} +res2d=GL${GL}RL${RL} +res3d=GL${GL}RL${RL}z${ZL} + +MNGINFO=rl${RL}-prc${NP}.info + +outdir=${dir3d} +cd ${outdir} + +cat << EOF1 > run.sh +#! /bin/bash -x +################################################################################ +# +# ------ FOR Linux64 & gnu C&fortran & openmpi ----- +# +################################################################################ +export FORT_FMT_RECL=400 + + +ln -s ${TOPDIR}/bin/${BINNAME} . +ln -s ${TOPDIR}/data/mnginfo/${MNGINFO} . +ln -s ${TOPDIR}/data/grid/vgrid/${VGRID} . +EOF1 + +for f in $( ls ${TOPDIR}/data/grid/boundary/${dir2d} ) +do + echo "ln -s ${TOPDIR}/data/grid/boundary/${dir2d}/${f} ." >> run.sh +done + +cat << EOF2 >> run.sh + +# run +${MPIEXEC} ./${BINNAME} || exit + +################################################################################ +EOF2 + +exit +cat << EOFICO2LL1 > ico2ll.sh +#! /bin/bash -x +################################################################################ +# +# ------ FOR Linux64 & gnu C&fortran & openmpi ----- +# +################################################################################ +export FORT_FMT_RECL=400 + + +ln -s ${TOPDIR}/bin/fio_ico2ll_mpi . +ln -s ${TOPDIR}/data/mnginfo/${MNGINFO} . +ln -s ${TOPDIR}/data/zaxis . +EOFICO2LL1 + +for f in $( ls ${TOPDIR}/data/grid/llmap/gl${GL}/rl${RL}/ ) +do + echo "ln -s ${TOPDIR}/data/grid/llmap/gl${GL}/rl${RL}/${f} ." >> ico2ll.sh +done + +cat << EOFICO2LL2 >> ico2ll.sh + +# run +${MPIEXEC} ./fio_ico2ll_mpi \ +history \ +glevel=${GLEV} \ +rlevel=${RLEV} \ +mnginfo="./${MNGINFO}" \ +layerfile_dir="./zaxis" \ +llmap_base="./llmap" \ +-lon_swap \ +-comm_smallchunk + +################################################################################ +EOFICO2LL2 From 103e723bf3054a1b997883b3a2f672341d1c5292 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Mon, 8 Mar 2021 20:05:27 +0900 Subject: [PATCH 37/70] Move xmp_new_local_array,xmp_new_array_section and xmp_free_array_section to out of loop. --- .../xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 | 45 ++++++++----------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 index 8c62284..67d5dd8 100755 --- a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 @@ -180,9 +180,17 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP T2BufSend_ub(3) = LenOccBat call xmp_new_local_array(T2BufSend_local_desc,8,3,T2BufSend_lb, & T2BufSend_ub,loc(T2BufSend)) - ! TODO:chack unnecessary? - !call xmp_coarray_bind(T2BufSend_local_desc,T2BufSend) + IdxBF_RI_MyRank_lb(1) = 1 + IdxBF_RI_MyRank_ub(1) = NBF_RI + call xmp_new_local_array(IdxBF_RI_MyRank_local_desc,4,1,IdxBF_RI_MyRank_lb, & + IdxBF_RI_MyRank_ub,loc(IdxBF_RI_MyRank)) + + call xmp_new_array_section(IdxBF_RI_MyRank_local_sec,1) + call xmp_new_array_section(IdxBF_RI_Irank_sec,1) + call xmp_new_array_section(T2BufRecv_sec,3) + call xmp_new_array_section(T2BufSend_local_sec,3) + !coarray ! ALLOCATE(T2BufRecv(MXNActO,MXNBF_RI_MyRank,LenOccBat)) @@ -283,22 +291,6 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP WTimeBgn = MPI_WTIME() CALL CPU_TIME(TimeBgn) -!coarray -! CALL MPI_IRecv(IdxBF_RI_Irank, NBF_RI_MyRank(Irankrecv), MPI_INTEGER, Irankrecv, 0, & -! & MPI_COMM_MO, ireq(2), IErr) -! CALL MPI_ISend(IdxBF_RI_MyRank, NBF_RI_MyRank(MyRankMO), MPI_INTEGER, Iranksend, 0, & -! & MPI_COMM_MO, ireq(1), IErr) -! CALL MPI_Wait(ireq(1), istat1, IErr) -! CALL MPI_Wait(ireq(2), istat2, IErr) - - IdxBF_RI_MyRank_lb(1) = 1 - IdxBF_RI_MyRank_ub(1) = NBF_RI - call xmp_new_local_array(IdxBF_RI_MyRank_local_desc,4,1,IdxBF_RI_MyRank_lb, & - IdxBF_RI_MyRank_ub,loc(IdxBF_RI_MyRank)) - - call xmp_new_array_section(IdxBF_RI_MyRank_local_sec,1) - call xmp_new_array_section(IdxBF_RI_Irank_sec,1) - call xmp_array_section_set_triplet(IdxBF_RI_MyRank_local_sec, & 1,int(1,kind=8),int(NBF_RI_MyRank(MyRankMO),kind=8),1,status) call xmp_array_section_set_triplet(IdxBF_RI_Irank_sec, & @@ -311,9 +303,6 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP IdxBF_RI_MyRank_local_desc,IdxBF_RI_MyRank_local_sec,status) ! sync all call xmp_sync_all(status) - - call xmp_free_array_section(IdxBF_RI_Irank_sec) - call xmp_free_array_section(IdxBF_RI_MyRank_local_sec) !! !coarray @@ -322,9 +311,6 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP ! CALL MPI_Wait(ireq(3), istat3, IErr) ! CALL MPI_Wait(ireq(4), istat4, IErr) - call xmp_new_array_section(T2BufRecv_sec,3) - call xmp_new_array_section(T2BufSend_local_sec,3) - call xmp_array_section_set_triplet(T2BufSend_local_sec, & 1,int(1,kind=8),int(LenOccBat,kind=8),1,status) call xmp_array_section_set_triplet(T2BufSend_local_sec, & @@ -348,8 +334,6 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP ! sync all call xmp_sync_all(status) - call xmp_free_array_section(T2BufSend_local_sec) - call xmp_free_array_section(T2BufRecv_sec) !! CALL CPU_TIME(TimeEnd) WTimeEnd = MPI_WTIME() @@ -478,6 +462,15 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP PRINT '(" ..... WALL time (2/3 tran3c2 comm) :", F12.2)', WTime_T2C PRINT '(" ..... WALL time (3/3 tran3c2 tran) :", F12.2)', WTime_T3 END IF + + + ! allocate section + call xmp_free_array_section(IdxBF_RI_Irank_sec) + call xmp_free_array_section(IdxBF_RI_MyRank_local_sec) + call xmp_free_array_section(T2BufSend_local_sec) + call xmp_free_array_section(T2BufRecv_sec) + + ! ! o deallocate memory ! From 0ea8a4ca55e394491658ee7ade7dff0f94fbeff2 Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Mon, 8 Mar 2021 20:19:26 +0900 Subject: [PATCH 38/70] Rename file (typo) --- NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 | 1420 ++++++++++++++++++++ 1 file changed, 1420 insertions(+) create mode 100755 NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 diff --git a/NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 new file mode 100755 index 0000000..c87b6b9 --- /dev/null +++ b/NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 @@ -0,0 +1,1420 @@ +!------------------------------------------------------------------------------- +!> +!! Grid system module +!! +!! @par Description +!! This module is for the management of the icosahedral grid system +!! +!! @author H.Tomita +!! +!! @par History +!! @li 2004-02-17 (H.Tomita) Imported from igdc-4.33 +!! @li 2009-01-23 (H.Tomita) extend the vertical grid method, introducing "hflat". +!! @li 2009-03-10 (H.Tomita) 1. add sub[GRD_gen_plgrid] +!! ( This subroutine generates +!! the pole grids from the regular region grids. ) +!! 2. support direct access of grid file without pole data. +!! sub[GRD_input_hgrid,GRD_output_hgrid]. +!! 3. add 'da_access_hgrid' in the namelist. +!! @li 2009-03-10 (H.Tomita) add error handling in GRD_input_hgrid. +!! @li 2009-05-27 (M.Hara) 1. bug fix of error handling in GRD_input_hgrid. +!! 2. remove "optional" declaration from +!! da_access in GRD_input_hgrid and GRD_output_hgrid. +!! @li 2011-07-22 (T.Ohno) add parameters +!! 1.GRD_grid_type 'ON_SPHERE' / 'ON_PLANE' +!! 2.hgrid_comm_flg +!! the grid data should be communicated or not. ( default:.true. ) +!! 3.triangle_size +!! scale factor when GRD_grid_type is 'ON_PLANE' +!! @li 2011-09-03 (H.Yashiro) New I/O +!! @li 2012-05-25 (H.Yashiro) Avoid irregal ISEND/IRECV comm. +!! @li 2012-10-20 (R.Yoshida) Topography for Jablonowski test +!! +!< +module mod_grd + !----------------------------------------------------------------------------- + ! + !++ Used modules + ! +!! use mpi + !--- 2020 Fujitsu + !use mod_coarray + use xmp_api + !--- 2020 Fujitsu end + use mod_adm, only: & + ADM_LOG_FID, & + ADM_NSYS, & + ADM_MAXFNAME + !----------------------------------------------------------------------------- + implicit none + private + !----------------------------------------------------------------------------- + ! + !++ Public procedure + ! + public :: GRD_setup + public :: GRD_output_hgrid + public :: GRD_input_hgrid + public :: GRD_scaling + public :: GRD_output_vgrid + public :: GRD_input_vgrid + public :: GRD_gen_plgrid + + !----------------------------------------------------------------------------- + ! + !++ Public parameters & variables + ! + + !====== Horizontal direction ====== + ! + !------ Scaling factor for length, e.g., earth's radius. + real(8), public, save :: GRD_rscale + ! + !------ Indentifiers for the directions in the Cartesian coordinate. + integer, public, parameter :: GRD_XDIR=1 + integer, public, parameter :: GRD_YDIR=2 + integer, public, parameter :: GRD_ZDIR=3 + ! + !------ Grid points ( CELL CENTER ) + real(8), public, allocatable, save :: GRD_x (:,:,:,:) + real(8), public, allocatable, save :: GRD_x_pl(:,:,:,:) + !<----- + !<----- GRD_x(1:ADM_gall, & --- horizontal + !<----- 1:ADM_KNONE, & --- vertical + !<----- 1:ADM_lall, & --- local region + !<----- GRD_XDIR:GRD_ZDIR) --- three components + !<----- + !<----- GRD_x_pl(1:ADM_gall_pl, & --- horizontal + !<----- 1:ADM_KNONE, & --- vertical + !<----- 1:ADM_lall_pl, & --- pole regions + !<----- GRD_XDIR:GRD_ZDIR) --- three components + !<----- ___ + !<----- / \ + !<----- < p > + !<----- \ ___ / + !<----- + + !------ Grid points ( CELL CORNER ) + real(8), public, allocatable, save :: GRD_xt (:,:,:,:,:) + real(8), public, allocatable, save :: GRD_xt_pl(:,:,:,:) + !<----- + !<----- GRD_xt(1:ADM_gall, & --- horizontal + !<----- 1:ADM_KNONE, & --- vertical + !<----- 1:ADM_lall, & --- local region + !<----- ADM_TI:ADM_TJ, & --- upper or lower triangle. + !<----- GRD_XDIR:GRD_ZDIR) --- three components + !<----- + !<----- GRD_xt_pl(1:ADM_gall_pl, & --- horizontal + !<----- 1:ADM_KNONE, & --- vertical + !<----- 1:ADM_lall_pl, & --- pole regions + !<----- GRD_XDIR:GRD_ZDIR) --- three components + !<----- p___p + !<----- / \ + !<----- p p + !<----- \ ___ / + !<----- p p + + real(8), public, allocatable, save :: GRD_e (:,:,:) ! unscaled GRD_x (=unit vector) + real(8), public, allocatable, save :: GRD_e_pl(:,:,:) + + !====== Vertical direction ====== + ! + !------ Top height + real(8), public, save :: GRD_htop + !<----- unit : [m] + ! + !------ xi coordinate + real(8), public, allocatable, save :: GRD_gz(:) + ! + !------ xi coordinate at the half point + real(8), public, allocatable, save :: GRD_gzh(:) + ! + !------ d(xi) + real(8), public, allocatable, save :: GRD_dgz(:) + ! + !------ d(xi) at the half point + real(8), public, allocatable, save :: GRD_dgzh(:) + ! + !------ 1/dgz, 1/dgzh ( add by kgoto ) + real(8), public, allocatable, save :: GRD_rdgz (:) + real(8), public, allocatable, save :: GRD_rdgzh(:) + + !------ Topography & vegitation + integer, public, parameter :: GRD_ZSFC = 1 + integer, public, parameter :: GRD_ZSD = 2 + integer, public, parameter :: GRD_VEGINDX = 3 + real(8), public, allocatable, save :: GRD_zs (:,:,:,:) + real(8), public, allocatable, save :: GRD_zs_pl(:,:,:,:) + !<----- + !<----- GRD_zs(1:ADM_gall, & + !<----- ADM_KNONE, & <- one layer data + !<----- 1:ADM_lall, & + !<----- GRD_ZSFC:GRD_VEGINDX)) + !<----- + !<----- GRD_zs_pl(1:ADM_gall_pl, & + !<----- ADM_KNONE, & <- one layer data + !<----- 1:ADM_lall_pl, & + !<----- GRD_ZSFC:GRD_VEGINDX)) + !<----- + ! + !------ z coordinate ( actual height ) + integer, public, parameter :: GRD_Z = 1 + integer, public, parameter :: GRD_ZH = 2 + real(8), public, allocatable, save :: GRD_vz (:,:,:,:) + real(8), public, allocatable, save :: GRD_vz_pl(:,:,:,:) + !<----- + !<----- GRD_vz(1:ADM_gall, & + !<----- 1:ADM_kall, & + !<----- 1:ADM_lall, & + !<----- GRD_Z:GRD_ZH)) + !<----- GRD_vz_pl(1:ADM_gall_pl, & + !<----- 1:ADM_kall, & + !<----- 1:ADM_lall_pl, & + !<----- GRD_Z:GRD_ZH)) + !<----- + ! + !------ Vertical interpolation factors + real(8), public, allocatable, save :: GRD_afac(:) + real(8), public, allocatable, save :: GRD_bfac(:) + real(8), public, allocatable, save :: GRD_cfac(:) + real(8), public, allocatable, save :: GRD_dfac(:) + ! + ! [add] T.Ohno 110722 + character(ADM_NSYS), public, save :: GRD_grid_type = 'ON_SPHERE' + ! 'ON_PLANE' + + !----------------------------------------------------------------------------- + ! + !++ Private procedure + ! + !----------------------------------------------------------------------------- + ! + !++ Private parameters & variables + ! + character(len=ADM_MAXFNAME), private, save :: hgrid_fname = '' ! Horizontal grid file + + character(len=ADM_MAXFNAME), private, save :: topo_fname = '' ! Topographical data file + character(len=ADM_MAXFNAME), private, save :: toposd_fname = '' ! Standard deviation of topog. data file + character(len=ADM_MAXFNAME), private, save :: vegeindex_fname = '' ! Vegetation index data file + + character(len=ADM_MAXFNAME), private, save :: vgrid_fname = '' ! Vertical grid file + character(len=ADM_NSYS), private, save :: vgrid_scheme = 'LINEAR' ! Vertical coordinate scheme + real(8), private, save :: h_efold = 10000.D0 ! [m] + real(8), private, save :: hflat = -999.D0 ! [m] + + logical, private, save :: hgrid_comm_flg = .true. ! [add] T.Ohno 110722 + real(8), private, save :: triangle_size = 0.D0 ! [add] T.Ohno 110722 length of sides of triangle + + logical, private, save :: da_access_hgrid = .false. + logical, private, save :: topo_direct_access = .false. ! [add] H.Yashiro 20110819 + character(len=ADM_NSYS), private, save :: hgrid_io_mode = 'LEGACY' ! [add] H.Yashiro 20110819 + character(len=ADM_NSYS), private, save :: topo_io_mode = 'LEGACY' ! [add] H.Yashiro 20110819 + + logical, private, save :: output_vgrid = .false. + + !----------------------------------------------------------------------------- +contains + + !----------------------------------------------------------------------------- + !> + !> Setup routine for grid module. + !> 1. set the horizontal grid + !> 2. set the vertical grid + !> 3. set the topograph + !> + subroutine GRD_setup + use mod_adm, only : & + ADM_CTL_FID, & + ADM_PRC_PL, & + ADM_lall_pl, & + ADM_gall_pl, & + ADM_TI, & + ADM_TJ, & + ADM_GSLF_PL, & + ADM_KNONE, & + ADM_VNONE, & + ADM_prc_me, & + ADM_lall, & + ADM_kall, & + ADM_gmin, & + ADM_gmax, & + ADM_gall, & + ADM_gall_1d, & + ADM_kmin, & + ADM_kmax, & + ADM_prc_run_master,& + ADM_proc_stop + use mod_cnst, only : & + CNST_ERADIUS + use mod_comm, only : & + COMM_data_transfer, & + COMM_var ! [add] H.Yashiro 20110819 + implicit none + + namelist / GRDPARAM / & + vgrid_fname, & !--- vertical grid file-name + hgrid_fname, & !--- horizontal grid basename + topo_fname, & !--- topography basename + toposd_fname, & !--- standard deviation of topography basename + vegeindex_fname, & !--- vegetation index basename + vgrid_scheme, & !--- verical grid scheme + h_efold, & !--- efolding height for hybrid vertical grid. + hflat, & + output_vgrid, & !--- output verical grid file? + hgrid_comm_flg, & !--- communicate GRD_x ! [add] T.Ohno 110722 + triangle_size, & !--- length of sides of triangle ! [add] T.Ohno 110722 + GRD_grid_type, & !--- grid type ! [add] T.Ohno 110722 + da_access_hgrid, & + hgrid_io_mode, & !--- io type(hgrid) [add] H.Yashiro 20110819 + topo_io_mode !--- io type(topo) [add] H.Yashiro 20110819 + + integer :: n,k,l + integer :: ierr + + integer :: kflat, K0 + real(8) :: htop + + real(8) :: fac_scale ! [add] T.Ohno 110722 + + integer :: nstart,nend + integer :: i,j,suf + suf(i,j) = ADM_gall_1d * ((j)-1) + (i) + !--------------------------------------------------------------------------- + + !--- read parameters + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) '+++ Module[grd]/Category[common share]' + rewind(ADM_CTL_FID) + read(ADM_CTL_FID,nml=GRDPARAM,iostat=ierr) + if ( ierr < 0 ) then + write(ADM_LOG_FID,*) '*** GRDPARAM is not specified. use default.' + elseif( ierr > 0 ) then + write(*, *) 'xxx Not appropriate names in namelist GRDPARAM. STOP.' + write(ADM_LOG_FID,*) 'xxx Not appropriate names in namelist GRDPARAM. STOP.' + call ADM_proc_stop + endif + write(ADM_LOG_FID,GRDPARAM) + + K0 = ADM_KNONE + + ! + !--- < setting the horizontal grid > --- + ! + !------ allocation and intitialization of horizontal grid points + !------ ( cell CENTER ) + allocate( GRD_x (ADM_gall, K0,ADM_lall, GRD_XDIR:GRD_ZDIR) ) + allocate( GRD_x_pl(ADM_gall_pl,K0,ADM_lall_pl,GRD_XDIR:GRD_ZDIR) ) + + !------ allocation and intitialization of horizontal grid points + !------ ( cell CORNER ) + allocate( GRD_xt (ADM_gall, K0,ADM_lall, ADM_TI:ADM_TJ,GRD_XDIR:GRD_ZDIR) ) + allocate( GRD_xt_pl(ADM_gall_pl,K0,ADM_lall_pl, GRD_XDIR:GRD_ZDIR) ) + + !--- reading the horzontal grid (unit sphere) and + !--- scaled by earth radius + call GRD_input_hgrid( hgrid_fname, & ![IN] + .true., & ![IN] + hgrid_io_mode ) ![IN] + + !--- data transfer for GRD_x + !--- note : do not communicate GRD_xt + if( hgrid_comm_flg ) call COMM_data_transfer(GRD_x,GRD_x_pl) ! [mod] T.Ohno 110722 + + ! save unscaled grid points as the unit vector + allocate( GRD_e (ADM_gall, ADM_lall, GRD_XDIR:GRD_ZDIR) ) + allocate( GRD_e_pl(ADM_gall_pl,ADM_lall_pl,GRD_XDIR:GRD_ZDIR) ) + GRD_e (:,:,:) = GRD_x (:,K0,:,:) + GRD_e_pl(:,:,:) = GRD_x_pl(:,K0,:,:) + + ! [mod] T.Ohno 110722 ==> + if ( trim(GRD_grid_type) == 'ON_PLANE' ) then + fac_scale = triangle_size + else + fac_scale = CNST_ERADIUS + endif + + call GRD_scaling(fac_scale) + ! [mod] T.Ohno 110722 <== + + + !------ allocation, initialization, and + !------ reading of surface height, standard deviation, vegetation index + allocate(GRD_zs (ADM_gall, K0,ADM_lall, GRD_ZSFC:GRD_VEGINDX)) + allocate(GRD_zs_pl(ADM_gall_pl,K0,ADM_lall_pl,GRD_ZSFC:GRD_VEGINDX)) + GRD_zs (:,:,:,:) = 0.D0 + GRD_zs_pl(:,:,:,:) = 0.D0 + + ! -> [add] R.Yoshida 20121020 + if ( trim(topo_fname) == 'Jablonowski' ) then + call GRD_jbw_init_topo + elseif ( trim(topo_fname) == 'Mountainwave' ) then + call GRD_mwave_init_topo + else + call GRD_input_topograph(topo_fname,GRD_ZSFC) + endif + ! <- [add] R.Yoshida 20121020 + + call GRD_input_topograph(toposd_fname, GRD_ZSD) + call GRD_input_topograph(vegeindex_fname,GRD_VEGINDX) + + !--- data transfer for GRD_zs + if (topo_direct_access) then ! [add] H.Yashiro 20110819 + call COMM_var( GRD_zs, GRD_zs_pl, K0, 3, comm_type=2, NSval_fix=.true. ) + else + call COMM_data_transfer(GRD_zs,GRD_zs_pl) + endif + + ! + !--- < setting the vertical coordinate > --- + ! + if( ADM_kall /= ADM_KNONE ) then + + !------ allocation of vertical grid. + allocate( GRD_gz (ADM_kall) ) + allocate( GRD_gzh (ADM_kall) ) + allocate( GRD_dgz (ADM_kall) ) + allocate( GRD_dgzh (ADM_kall) ) + allocate( GRD_rdgz (ADM_kall) ) + allocate( GRD_rdgzh(ADM_kall) ) + + !------ input the vertical grid. + call GRD_input_vgrid(vgrid_fname) + + !------ calculation of grid intervals ( cell center ) + do k = ADM_kmin-1, ADM_kmax + GRD_dgz(k) = GRD_gzh(k+1) - GRD_gzh(k) + enddo + GRD_dgz(ADM_kmax+1) = GRD_dgz(ADM_kmax) + + !------ calculation of grid intervals ( cell wall ) + do k = ADM_kmin, ADM_kmax+1 + GRD_dgzh(k) = GRD_gz(k) - GRD_gz(k-1) + enddo + GRD_dgzh(ADM_kmin-1) = GRD_dgzh(ADM_kmin) + + !------ calculation of 1/dgz and 1/dgzh + do k = 1, ADM_kall + GRD_rdgz (k) = 1.D0 / grd_dgz (k) + GRD_rdgzh(k) = 1.D0 / grd_dgzh(k) + enddo + + !------ hight top + GRD_htop = GRD_gzh(ADM_kmax+1) - GRD_gzh(ADM_kmin) + + !--- < vertical interpolation factor > --- + allocate( GRD_afac(ADM_kall) ) + allocate( GRD_bfac(ADM_kall) ) + allocate( GRD_cfac(ADM_kall) ) + allocate( GRD_dfac(ADM_kall) ) + + !------ From the cell center value to the cell wall value + !------ A(k-1/2) = ( afac(k) A(k) + bfac(k) * A(k-1) ) / 2 + do k = ADM_kmin, ADM_kmax+1 + GRD_afac(k) = 2.D0 * ( GRD_gzh(k) - GRD_gz(k-1) ) & + / ( GRD_gz (k) - GRD_gz(k-1) ) + enddo + GRD_afac(ADM_kmin-1) = 2.D0 + + GRD_bfac(:) = 2.D0 - GRD_afac(:) + + !------ From the cell wall value to the cell center value + !------ A(k) = ( cfac(k) A(k+1/2) + dfac(k) * A(k-1/2) ) / 2 + do k = ADM_kmin, ADM_kmax + GRD_cfac(k) = 2.D0 * ( GRD_gz (k ) - GRD_gzh(k) ) & + / ( GRD_gzh(k+1) - GRD_gzh(k) ) + enddo + GRD_cfac(ADM_kmin-1) = 2.D0 + GRD_cfac(ADM_kmax+1) = 0.D0 + + GRD_dfac(:) = 2.D0 - GRD_cfac(:) + + !------ allocation, initilization, and setting the z-coordinate + allocate( GRD_vz ( ADM_gall, ADM_kall,ADM_lall, GRD_Z:GRD_ZH) ) + allocate( GRD_vz_pl( ADM_gall_pl,ADM_kall,ADM_lall_pl,GRD_Z:GRD_ZH) ) + + select case(trim(vgrid_scheme)) + case('LINEAR') + !--- linear transfromation : (Gal-Chen & Sommerville(1975) + !--- gz = H(z-zs)/(H-zs) -> z = (H-zs)/H * gz + zs + kflat = -1 + if ( hflat > 0.D0 ) then !--- default : -999.0 + do k = ADM_kmin+1, ADM_kmax+1 + if ( hflat < GRD_gzh(k) ) then + kflat = k + exit + endif + enddo + endif + + if ( kflat == -1 ) then + kflat = ADM_kmax + 1 + htop = GRD_htop + else + htop = GRD_gzh(kflat) - GRD_gzh(ADM_kmin) + endif + + K0 = ADM_KNONE + nstart = suf(ADM_gmin,ADM_gmin) + nend = suf(ADM_gmax,ADM_gmax) + + do l = 1, ADM_lall + do k = ADM_kmin-1, kflat + do n = nstart,nend + GRD_vz(n,k,l,GRD_Z ) = GRD_zs(n,K0,l,GRD_ZSFC) & + + ( htop - GRD_zs(n,K0,l,GRD_ZSFC) ) / htop * GRD_gz(k) + GRD_vz(n,k,l,GRD_ZH) = GRD_zs(n,K0,l,GRD_ZSFC) & + + ( htop - GRD_zs(n,K0,l,GRD_ZSFC) ) / htop * GRD_gzh(k) + enddo + enddo + + if ( kflat < ADM_kmax+1 ) then + do k = kflat+1, ADM_kmax+1 + do n = nstart, nend + GRD_vz(n,k,l,GRD_Z ) = GRD_gz (k) + GRD_vz(n,k,l,GRD_ZH) = GRD_gzh(k) + enddo + enddo + endif + enddo + + if ( ADM_prc_me == ADM_prc_pl ) then + n = ADM_GSLF_PL + + do l = 1, ADM_lall_pl + do k = ADM_kmin-1, kflat + GRD_vz_pl(n,k,l,GRD_Z) = GRD_zs_pl(n,K0,l,GRD_ZSFC) & + + ( htop - GRD_zs_pl(n,K0,l,GRD_ZSFC) ) / htop * GRD_gz(k) + GRD_vz_pl(n,k,l,GRD_ZH) = GRD_zs_pl(n,K0,l,GRD_ZSFC) & + + ( htop - GRD_zs_pl(n,K0,l,GRD_ZSFC) ) / htop * GRD_gzh(k) + enddo + + if ( kflat < ADM_kmax+1 ) then + do k = kflat+1, ADM_kmax+1 + GRD_vz_pl(n,k,l,GRD_Z ) = GRD_gz (k) + GRD_vz_pl(n,k,l,GRD_ZH) = GRD_gzh(k) + enddo + endif + enddo + endif + + case('HYBRID') + !--------- Hybrid transformation : like as Simmons & Buridge(1981) + K0 = ADM_KNONE + nstart = suf(ADM_gmin,ADM_gmin) + nend = suf(ADM_gmax,ADM_gmax) + + do l = 1, ADM_lall + do k = ADM_kmin-1, ADM_kmax+1 + do n = nstart,nend + GRD_vz(n,k,l,GRD_Z) = GRD_gz(k) & + + GRD_zs(n,K0,l,ADM_VNONE) & + * sinh( (GRD_htop-GRD_gz(k)) / h_efold ) & + / sinh( GRD_htop / h_efold ) + GRD_vz(n,k,l,GRD_ZH) = GRD_gzh(k) & + + GRD_zs(n,K0,l,ADM_VNONE) & + * sinh( (GRD_htop-GRD_gzh(k)) / h_efold ) & + / sinh( GRD_htop / h_efold ) + enddo + enddo + enddo + + if ( ADM_prc_me == ADM_prc_pl ) then + n = ADM_GSLF_PL + + do l = 1, ADM_lall_pl + do k = ADM_kmin-1, ADM_kmax+1 + GRD_vz_pl(n,k,l,GRD_Z) = GRD_gz(k) & + + GRD_zs_pl(n,K0,l,ADM_VNONE) & + * sinh( (GRD_htop-GRD_gz(k)) / h_efold ) & + / sinh( GRD_htop / h_efold ) + GRD_vz_pl(n,k,l,GRD_ZH) = GRD_gzh(k) & + + GRD_zs_pl(n,K0,l,ADM_VNONE) & + * sinh( (GRD_htop-GRD_gzh(k)) / h_efold ) & + / sinh( GRD_htop / h_efold ) + enddo + enddo + endif + + endselect + + call COMM_data_transfer(GRD_vz,GRD_vz_pl) + + GRD_vz(suf(1,ADM_gall_1d),:,:,:) = GRD_vz(suf(ADM_gmin,ADM_gmin),:,:,:) + GRD_vz(suf(ADM_gall_1d,1),:,:,:) = GRD_vz(suf(ADM_gmin,ADM_gmin),:,:,:) + endif + + !--- output information about grid. + if ( ADM_kall /= ADM_KNONE ) then + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,'(5x,A)') '|====== Vertical Coordinate [m] ======|' + write(ADM_LOG_FID,'(5x,A)') '| |' + write(ADM_LOG_FID,'(5x,A)') '| -GRID CENTER- -GRID INTERFACE- |' + write(ADM_LOG_FID,'(5x,A)') '| k gz d(gz) gzh d(gzh) k |' + write(ADM_LOG_FID,'(5x,A)') '| |' + k = ADM_kmax + 1 + write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' | dummy' + write(ADM_LOG_FID,'(5x,A,2F10.1,I4,A)') '| ',GRD_gzh(k),GRD_dgzh(k),k,' | TOA' + k = ADM_kmax + write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' | kmax' + write(ADM_LOG_FID,'(5x,A,2F10.1,I4,A)') '| ',GRD_gzh(k),GRD_dgzh(k),k,' |' + do k = ADM_kmax-1, ADM_kmin+1, -1 + write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' |' + write(ADM_LOG_FID,'(5x,A,2F10.1,I4,A)') '| ',GRD_gzh(k),GRD_dgzh(k),k,' |' + enddo + k = ADM_kmin + write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' | kmin' + write(ADM_LOG_FID,'(5x,A,2F10.1,I4,A)') '| ',GRD_gzh(k),GRD_dgzh(k),k,' | ground' + k = ADM_kmin-1 + write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' | dummy' + write(ADM_LOG_FID,'(5x,A)') '|===============================================|' + + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) '--- Vertical layer scheme = ', trim(vgrid_scheme) + if ( vgrid_scheme == 'HYBRID' ) then + write(ADM_LOG_FID,*) '--- e-folding height = ', h_efold + endif + + if ( output_vgrid ) then + if ( ADM_prc_me == ADM_prc_run_master ) then + call GRD_output_vgrid('./vgrid_used.dat') + endif + endif + else + write(ADM_LOG_FID,*) + write(ADM_LOG_FID,*) '--- vartical layer = 1' + endif + + return + end subroutine GRD_setup + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_scaling + !> + subroutine GRD_scaling( fact ) + implicit none + + real(8), intent(in) :: fact !--- IN : scaling factor + !--------------------------------------------------------------------------- + + ! [mod] T.Ohno 110722 ==> + if ( trim(GRD_grid_type) == 'ON_PLANE' ) then + GRD_x (:,:,:,:) = GRD_x (:,:,:,:) * fact + GRD_x_pl (:,:,:,:) = GRD_x_pl (:,:,:,:) * fact + GRD_xt (:,:,:,:,:) = GRD_xt (:,:,:,:,:) * fact + GRD_xt_pl(:,:,:,:) = GRD_xt_pl(:,:,:,:) * fact + else + !--- setting the sphere radius + GRD_rscale = fact + + !--- scaling by using GRD_rscale + GRD_x (:,:,:,:) = GRD_x (:,:,:,:) * GRD_rscale + GRD_x_pl (:,:,:,:) = GRD_x_pl (:,:,:,:) * GRD_rscale + GRD_xt (:,:,:,:,:) = GRD_xt (:,:,:,:,:) * GRD_rscale + GRD_xt_pl(:,:,:,:) = GRD_xt_pl(:,:,:,:) * GRD_rscale + endif + ! [mod] T.Ohno 110722 <== + + return + end subroutine GRD_scaling + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_output_hgrid + !> + subroutine GRD_output_hgrid( & + basename, & + output_vertex, & + io_mode ) + use mod_misc, only: & + MISC_make_idstr,& + MISC_get_available_fid + use mod_adm, only: & + ADM_proc_stop, & + ADM_prc_tab, & + ADM_prc_me, & + ADM_TI, & + ADM_TJ, & + ADM_gall, & + ADM_lall, & + ADM_KNONE + use mod_fio, only: & ! [add] H.Yashiro 20110819 + FIO_output, & + FIO_HMID, & + FIO_REAL8 + implicit none + + character(len=*), intent(in) :: basename ! output basename + logical, intent(in) :: output_vertex ! output flag of B-grid + character(len=*), intent(in) :: io_mode ! io_mode + + character(len=ADM_MAXFNAME) :: fname + character(len=FIO_HMID) :: desc = 'HORIZONTAL GRID FILE' + + integer :: fid + integer :: rgnid, l, K0 + !--------------------------------------------------------------------------- + + K0 = ADM_KNONE + + if ( io_mode == 'ADVANCED' ) then + + call FIO_output( GRD_x(:,:,:,GRD_XDIR), & + basename, desc, "", & + "grd_x_x", "GRD_x (X_DIR)", "", & + "NIL", FIO_REAL8, "ZSSFC1", K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_x(:,:,:,GRD_YDIR), & + basename, desc, '', & + 'grd_x_y', 'GRD_x (Y_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_x(:,:,:,GRD_ZDIR), & + basename, desc, '', & + 'grd_x_z', 'GRD_x (Z_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + + if ( output_vertex ) then + call FIO_output( GRD_xt(:,:,:,ADM_TI,GRD_XDIR), & + basename, desc, '', & + 'grd_xt_ix', 'GRD_xt (TI,X_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_xt(:,:,:,ADM_TJ,GRD_XDIR), & + basename, desc, '', & + 'grd_xt_jx', 'GRD_xt (TJ,X_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_xt(:,:,:,ADM_TI,GRD_YDIR), & + basename, desc, '', & + 'grd_xt_iy', 'GRD_xt (TI,Y_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_xt(:,:,:,ADM_TJ,GRD_YDIR), & + basename, desc, '', & + 'grd_xt_jy', 'GRD_xt (TJ,Y_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_xt(:,:,:,ADM_TI,GRD_ZDIR), & + basename, desc, '', & + 'grd_xt_iz', 'GRD_xt (TI,Z_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + call FIO_output( GRD_xt(:,:,:,ADM_TJ,GRD_ZDIR), & + basename, desc, '', & + 'grd_xt_jz', 'GRD_xt (TJ,Z_DIR)', '', & + 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) + endif + + elseif( io_mode == 'LEGACY' ) then + + do l = 1, ADM_lall + rgnid = ADM_prc_tab(l,ADM_prc_me) + call MISC_make_idstr(fname,trim(basename),'rgn',rgnid) + + fid = MISC_get_available_fid() + open( unit = fid, & + file=trim(fname), & + form='unformatted', & + access='direct', & + recl=ADM_gall*8 ) + + write(fid,rec=1) GRD_x(:,K0,l,GRD_XDIR) + write(fid,rec=2) GRD_x(:,K0,l,GRD_YDIR) + write(fid,rec=3) GRD_x(:,K0,l,GRD_ZDIR) + if ( output_vertex ) then + write(fid,rec=4) GRD_xt(:,K0,l,ADM_TI,GRD_XDIR) + write(fid,rec=5) GRD_xt(:,K0,l,ADM_TI,GRD_YDIR) + write(fid,rec=6) GRD_xt(:,K0,l,ADM_TI,GRD_ZDIR) + write(fid,rec=7) GRD_xt(:,K0,l,ADM_TJ,GRD_XDIR) + write(fid,rec=8) GRD_xt(:,K0,l,ADM_TJ,GRD_YDIR) + write(fid,rec=9) GRD_xt(:,K0,l,ADM_TJ,GRD_ZDIR) + endif + close(fid) + enddo + else + write(ADM_LOG_FID,*) 'Invalid io_mode!' + call ADM_proc_stop + endif + + return + end subroutine GRD_output_hgrid + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_input_hgrid + !> + subroutine GRD_input_hgrid( & + basename, & + input_vertex, & + io_mode ) + use mod_misc, only: & + MISC_make_idstr, & + MISC_get_available_fid + use mod_adm, only: & + ADM_proc_stop, & + ADM_prc_tab, & + ADM_prc_me, & + ADM_TI, & + ADM_TJ, & + ADM_gall, & + ADM_lall, & + ADM_KNONE + use mod_fio, only : & ! [add] H.Yashiro 20110819 + FIO_input + implicit none + + character(len=*), intent(in) :: basename ! input basename + logical, intent(in) :: input_vertex ! flag of B-grid input + character(len=*), intent(in) :: io_mode ! io_mode + + character(len=ADM_MAXFNAME) :: fname + + integer :: fid, ierr + integer :: rgnid, l, K0 + !--------------------------------------------------------------------------- + + K0 = ADM_KNONE + + if ( io_mode == 'ADVANCED' ) then + + call FIO_input(GRD_x(:,:,:,GRD_XDIR),basename,'grd_x_x','ZSSFC1',K0,K0,1) + call FIO_input(GRD_x(:,:,:,GRD_YDIR),basename,'grd_x_y','ZSSFC1',K0,K0,1) + call FIO_input(GRD_x(:,:,:,GRD_ZDIR),basename,'grd_x_z','ZSSFC1',K0,K0,1) + if ( input_vertex ) then + call FIO_input(GRD_xt(:,:,:,ADM_TI,GRD_XDIR),basename, & + 'grd_xt_ix','ZSSFC1',K0,K0,1 ) + call FIO_input(GRD_xt(:,:,:,ADM_TJ,GRD_XDIR),basename, & + 'grd_xt_jx','ZSSFC1',K0,K0,1 ) + call FIO_input(GRD_xt(:,:,:,ADM_TI,GRD_YDIR),basename, & + 'grd_xt_iy','ZSSFC1',K0,K0,1 ) + call FIO_input(GRD_xt(:,:,:,ADM_TJ,GRD_YDIR),basename, & + 'grd_xt_jy','ZSSFC1',K0,K0,1 ) + call FIO_input(GRD_xt(:,:,:,ADM_TI,GRD_ZDIR),basename, & + 'grd_xt_iz','ZSSFC1',K0,K0,1 ) + call FIO_input(GRD_xt(:,:,:,ADM_TJ,GRD_ZDIR),basename, & + 'grd_xt_jz','ZSSFC1',K0,K0,1 ) + endif + + elseif( io_mode == 'LEGACY' ) then + + do l = 1, ADM_lall + rgnid = ADM_prc_tab(l,ADM_prc_me) + call MISC_make_idstr(fname,trim(basename),'rgn',rgnid) + + fid = MISC_get_available_fid() + open( unit = fid, & + file = trim(fname), & + form = 'unformatted', & + access = 'direct', & + recl = ADM_gall*8, & + status = 'old', & + iostat = ierr ) + + if ( ierr /= 0 ) then + write(ADM_LOG_FID,*) 'xxx Error occured in reading grid file.', trim(fname) + call ADM_proc_stop + endif + + read(fid,rec=1) GRD_x(:,K0,l,GRD_XDIR) + read(fid,rec=2) GRD_x(:,K0,l,GRD_YDIR) + read(fid,rec=3) GRD_x(:,K0,l,GRD_ZDIR) + if ( input_vertex ) then + read(fid,rec=4) GRD_xt(:,K0,l,ADM_TI,GRD_XDIR) + read(fid,rec=5) GRD_xt(:,K0,l,ADM_TI,GRD_YDIR) + read(fid,rec=6) GRD_xt(:,K0,l,ADM_TI,GRD_ZDIR) + read(fid,rec=7) GRD_xt(:,K0,l,ADM_TJ,GRD_XDIR) + read(fid,rec=8) GRD_xt(:,K0,l,ADM_TJ,GRD_YDIR) + read(fid,rec=9) GRD_xt(:,K0,l,ADM_TJ,GRD_ZDIR) + endif + close(fid) + enddo + + else + write(ADM_LOG_FID,*) 'Invalid io_mode!' + call ADM_proc_stop + endif + + call GRD_gen_plgrid + + return + end subroutine GRD_input_hgrid + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_input_vgrid + !> + subroutine GRD_input_vgrid( fname ) + use mod_misc, only: & + MISC_get_available_fid + use mod_adm, only: & + ADM_LOG_FID, & + ADM_vlayer, & + ADM_proc_stop + implicit none + + character(len=ADM_MAXFNAME), intent(in) :: fname ! vertical grid file name + + integer :: num_of_layer + integer :: fid, ierr + !--------------------------------------------------------------------------- + + fid = MISC_get_available_fid() + open( unit = fid, & + file = trim(fname), & + status = 'old', & + form = 'unformatted', & + iostat = ierr ) + + if ( ierr /= 0 ) then + write(ADM_LOG_FID,*) 'xxx No vertical grid file.' + call ADM_proc_stop + endif + + read(fid) num_of_layer + + if ( num_of_layer /= ADM_vlayer ) then + write(ADM_LOG_FID,*) 'xxx inconsistency in number of vertical layers.' + call ADM_proc_stop + endif + + read(fid) GRD_gz + read(fid) GRD_gzh + + close(fid) + + return + end subroutine GRD_input_vgrid + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_output_vgrid + !> + subroutine GRD_output_vgrid( fname ) + use mod_misc, only: & + MISC_get_available_fid + use mod_adm, only: & + ADM_vlayer + implicit none + + character(len=*), intent(in) :: fname + + integer :: fid + !--------------------------------------------------------------------------- + + fid = MISC_get_available_fid() + open(fid,file=trim(fname),form='unformatted') + write(fid) ADM_vlayer + write(fid) GRD_gz + write(fid) GRD_gzh + close(fid) + + return + end subroutine GRD_output_vgrid + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_input_topograph + !> + subroutine GRD_input_topograph( & + basename, & + i_var ) + use mod_misc, only: & + MISC_make_idstr,& + MISC_get_available_fid + use mod_adm, only: & + ADM_LOG_FID, & + ADM_prc_tab, & + ADM_prc_me, & + ADM_PRC_PL, & + ADM_lall, & + ADM_gall, & + ADM_KNONE + use mod_fio, only: & + FIO_input + implicit none + + character(len=*), intent(in) :: basename + integer, intent(in) :: i_var + + character(len=16) :: varname(3) + data varname / 'topo', 'topo_stddev', 'vegeindex' / + + character(len=128) :: fname + integer :: ierr + integer :: l, rgnid, fid + !--------------------------------------------------------------------------- + + if ( topo_io_mode == 'ADVANCED' ) then + topo_direct_access = .true. + + call FIO_input(GRD_zs(:,:,:,i_var),basename,varname(i_var),'ZSSFC1',1,1,1) + + elseif( topo_io_mode == 'LEGACY' ) then + + if ( topo_direct_access ) then !--- direct access ( defalut ) + do l = 1, ADM_lall + rgnid = ADM_prc_tab(l,ADM_prc_me) + call MISC_make_idstr(fname,trim(basename),'rgn',rgnid) + fid = MISC_get_available_fid() + + open( fid, & + file = trim(fname), & + form = 'unformatted', & + access = 'direct', & + recl = ADM_gall*8, & + status = 'old' ) + + read(fid,rec=1) GRD_zs(:,ADM_KNONE,l,i_var) + + close(fid) + enddo + else !--- sequential access + do l = 1, ADM_lall + rgnid = ADM_prc_tab(l,ADM_prc_me) + call MISC_make_idstr(fname,trim(basename),'rgn',rgnid) + fid = MISC_get_available_fid() + + open(fid,file=trim(fname),status='old',form='unformatted',iostat=ierr) + if ( ierr /= 0 ) then + write(ADM_LOG_FID,*) 'Msg : Sub[GRD_input_topograph]/Mod[grid]' + write(ADM_LOG_FID,*) ' *** No topographical file. Number :', i_var + return + endif + + read(fid) GRD_zs(:,ADM_KNONE,l,i_var) + close(fid) + enddo + + if ( ADM_prc_me == ADM_prc_pl ) then + fname = trim(basename)//'.pl' + fid = MISC_get_available_fid() + + open(fid,file=trim(fname),status='old',form='unformatted') + read(fid) GRD_zs_pl(:,:,:,i_var) + close(fid) + endif + endif !--- direct/sequencial + + endif !--- io_mode + + return + end subroutine GRD_input_topograph + + !----------------------------------------------------------------------------- + !> + !> Description of the subroutine GRD_gen_plgrid + !> + subroutine GRD_gen_plgrid + use mod_adm, only: & + ADM_rgn_nmax, & + ADM_rgn_vnum, & + ADM_rgn_vtab, & + ADM_rgn2prc, & + ADM_RID, & + ADM_VLINK_NMAX, & + ADM_COMM_RUN_WORLD, & + ADM_prc_tab, & + ADM_prc_me, & + ADM_prc_npl, & + ADM_prc_spl, & + ADM_TI, & + ADM_TJ, & + ADM_N, & + ADM_S, & + ADM_NPL, & + ADM_SPL, & + ADM_lall, & + ADM_gall_1d, & + ADM_gmax, & + ADM_gmin, & + ADM_KNONE, & + ADM_GSLF_PL + use mod_comm, only: & + COMM_var + implicit none + + integer :: prctab (ADM_VLINK_NMAX) + integer :: rgntab (ADM_VLINK_NMAX) + integer :: sreq (ADM_VLINK_NMAX) + integer :: rreq (ADM_VLINK_NMAX) + logical :: send_flag(ADM_VLINK_NMAX) + +! real(8) :: v_pl(GRD_XDIR:GRD_ZDIR,ADM_VLINK_NMAX) + real(8) :: vsend_pl(GRD_XDIR:GRD_ZDIR,ADM_VLINK_NMAX) ! [mod] H.Yashiro 20120525 + !--- 2020 Fujitsu + integer(8) :: vsend_pl_l_desc + integer(8) :: vsend_pl_l_lb(2), vsend_pl_l_ub(2) + integer(8) :: vsend_pl_l_sec + !--- 2020 Fujitsu end + real(8) :: vrecv_pl(GRD_XDIR:GRD_ZDIR,ADM_VLINK_NMAX) ! [mod] H.Yashiro 20120525 +!coarray + !--- 2020 Fujitsu + !real(8) :: vrecv_plc(GRD_XDIR:GRD_ZDIR,5)[*] !! not used + !real(8),allocatable :: vrecv_plA(:,:)[:] + integer , POINTER :: vrecv_plA ( : , : ) => null ( ) + integer(8) :: vrecv_plA_desc + integer(8) :: vrecv_plA_lb, vrecv_plA_ub + integer(8) :: vrecv_plA_sec + integer(4) :: img_dims(1) + !--- 2020 Fujitsu end + +!coarray integer :: istat(MPI_STATUS_SIZE) + integer :: n, l, ierr + + integer :: suf, i, j + suf(i,j) = ADM_gall_1d * ((j)-1) + (i) + !--------------------------------------------------------------------------- +!coarray + !--- 2020 Fujitsu + !allocate(vrecv_plA(GRD_XDIR:GRD_ZDIR,5)[*]) + vrecv_plA_lb(1) = GRD_XDIR; vrecv_plA_ub(1) = GRD_ZDIR + vrecv_plA_lb(2) = 1; vrecv_plA_ub(2) = 5 + call xmp_new_coarray(vrecv_plA_desc, 8, 2, vrecv_plA_lb, vrecv_plA_ub, 1, img_dims) + call xmp_coarray_bind(vrecv_plA_desc, vrecv_plA) + + vsend_pl_l_lb(1) = GRD_XDIR; vsend_pl_l_ub(1) = GRD_ZDIR + vsend_pl_l_lb(2) = 1; vsend_pl_l_ub(2) = ADM_VLINK_NMAX + call xmp_new_local_array(vsend_pl_l_desc, 8, 2, vsend_pl_l_lb, vsend_pl_l_ub, loc(vsend_pl)) + + call xmp_new_array_section(vsend_pl_l_sec, 2) + call xmp_new_array_section(vrecv_plA_sec, 2) + !--- 2020 Fujitsu end + vrecv_pl (:,:) = 0.d0 + !vrecv_plc(:,:) = 0.d0 !--- 2020 Fujitsu --- + vrecv_plA(:,:) = 0.d0 + + !--- control volume points at the north pole + do l = ADM_rgn_nmax, 1, -1 + if ( ADM_rgn_vnum(ADM_N,l) == ADM_VLINK_NMAX ) then + do n = 1, ADM_VLINK_NMAX + rgntab(n) = ADM_rgn_vtab(ADM_RID,ADM_N,l,n) + prctab(n) = ADM_rgn2prc(rgntab(n)) + enddo + exit + endif + enddo + + send_flag(:) = .false. + + do n = 1, ADM_VLINK_NMAX + do l = 1, ADM_lall + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + if ( ADM_prc_tab(l,ADM_prc_me) == rgntab(n) ) then + vsend_pl(:,n) = GRD_xt(suf(ADM_gmin,ADM_gmax),ADM_KNONE,l,ADM_TJ,:) ! [mod] H.Yashiro 20120525 + +!coarray +! call MPI_ISEND( vsend_pl(:,n), & ! [mod] H.Yashiro 20120525 +! 3, & +! MPI_DOUBLE_PRECISION, & +! ADM_prc_npl-1, & +! rgntab(n), & +! ADM_COMM_RUN_WORLD, & +! sreq(n), & +! ierr ) + !--- 2020 Fujitsu + !vrecv_plA(:,n)[ADM_prc_npl] = vsend_pl(:,n) + call xmp_array_section_set_triplet(vrecv_plA_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) + call xmp_array_section_set_triplet(vrecv_plA_sec, 2, n, n, 1, ierr) + call xmp_array_section_set_triplet(vsend_pl_l_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) + call xmp_array_section_set_triplet(vsend_pl_l_sec, 2, n, n, 1, ierr) + img_dims(1) = ADM_prc_npl + call xmp_coarray_put_local(img_dims, vrecv_plA_desc, vrecv_plA_sec, vsend_pl_l_desc, vsend_pl_l_sec, ierr) + !--- 2020 Fujitsu end + + send_flag(n) = .true. + endif + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + enddo + enddo + +! if ( ADM_prc_me == ADM_prc_npl ) then +! do n = 1, ADM_VLINK_NMAX +! call MPI_IRECV( vrecv_pl(:,n), & ! [mod] H.Yashiro 20120525 +! 3, & +! MPI_DOUBLE_PRECISION, & +! prctab(n)-1, & +! rgntab(n), & +! ADM_COMM_RUN_WORLD, & +! rreq(n), & +! ierr ) +! enddo +! endif + +! do n = 1, ADM_VLINK_NMAX +! if ( send_flag(n) ) then +! call MPI_WAIT(sreq(n),istat,ierr) +! endif +! enddo + + if ( ADM_prc_me == ADM_prc_npl ) then + do n = 1, ADM_VLINK_NMAX +! call MPI_WAIT(rreq(n),istat,ierr) +! GRD_xt_pl(n+1,ADM_KNONE,ADM_NPL,:) = vrecv_pl(:,n) ! [mod] H.Yashiro 20120525 +!coarray + GRD_xt_pl(n+1,ADM_KNONE,ADM_NPL,:) = vrecv_plA(:,n) + enddo + endif + +!----------------------------------------------------------------------------------------- + + !--- control volume points at the sourth pole + do l = 1, ADM_rgn_nmax + if ( ADM_rgn_vnum(ADM_S,l) == ADM_VLINK_NMAX ) then + do n = 1, ADM_VLINK_NMAX + rgntab(n) = ADM_rgn_vtab(ADM_RID,ADM_S,l,n) + prctab(n) = ADM_rgn2prc(rgntab(n)) + enddo + exit + endif + enddo + + send_flag(:) = .false. + + do n = 1, ADM_VLINK_NMAX + do l =1, ADM_lall + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + if (ADM_prc_tab(l,ADM_prc_me) == rgntab(n) ) then + vsend_pl(:,n) = GRD_xt(suf(ADM_gmax,ADM_gmin),ADM_KNONE,l,ADM_TI,:) ! [mod] H.Yashiro 20120525 +!coarray +! call MPI_ISEND( vsend_pl(:,n), & ! [mod] H.Yashiro 20120525 +! 3, & +! MPI_DOUBLE_PRECISION, & +! ADM_prc_spl-1, & +! rgntab(n), & +! ADM_COMM_RUN_WORLD, & +! sreq(n), & +! ierr ) + !--- 2020 Fujitsu + !vrecv_plA(:,n)[ADM_prc_npl] = vsend_pl(:,n) + call xmp_array_section_set_triplet(vrecv_plA_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) + call xmp_array_section_set_triplet(vrecv_plA_sec, 2, n, n, 1, ierr) + call xmp_array_section_set_triplet(vsend_pl_l_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) + call xmp_array_section_set_triplet(vsend_pl_l_sec, 2, n, n, 1, ierr) + img_dims(1) = ADM_prc_npl + call xmp_coarray_put_local(img_dims, vrecv_plA_desc, vrecv_plA_sec, vsend_pl_l_desc, vsend_pl_l_sec, ierr) + !--- 2020 Fujitsu end + + send_flag(n) = .true. + endif + !--- 2020 Fujitsu + !sync all + call xmp_sync_all(ierr) + !--- 2020 Fujitsu end + enddo + enddo + +!coarray +! if ( ADM_prc_me == ADM_prc_spl ) then +! do n = 1, ADM_VLINK_NMAX +! call MPI_IRECV( vrecv_pl(:,n), & ! [mod] H.Yashiro 20120525 +! 3, & +! MPI_DOUBLE_PRECISION, & +! prctab(n)-1, & +! rgntab(n), & +! ADM_COMM_RUN_WORLD, & +! rreq(n), & +! ierr ) +! enddo +! endif + +! do n = 1, ADM_VLINK_NMAX +! if ( send_flag(n) ) then +! call MPI_WAIT(sreq(n),istat,ierr) +! endif +! enddo + + if ( ADM_prc_me == ADM_prc_spl ) then + do n = 1, ADM_VLINK_NMAX +!coarray +! call MPI_WAIT(rreq(n),istat,ierr) +! GRD_xt_pl(n+1,ADM_KNONE,ADM_SPL,:) = vrecv_pl(:,n) ! [mod] H.Yashiro 20120525 + GRD_xt_pl(n+1,ADM_KNONE,ADM_SPL,:) = vrecv_plA(:,n) + enddo + endif + + !--- grid point communication + call COMM_var(GRD_x,GRD_x_pl,ADM_KNONE,3,comm_type=2,NSval_fix=.false.) + GRD_xt_pl(ADM_GSLF_PL,:,:,:) = GRD_x_pl(ADM_GSLF_PL,:,:,:) + + !--- 2020 Fujitsu + call xmp_free_array_section(vsend_pl_l_sec) + call xmp_free_array_section(vrecv_plA_sec) + + call xmp_coarray_deallocate(vrecv_plA_desc, ierr) + call xmp_free_local_array(vsend_pl_l_desc) + !--- 2020 Fujitsu end + return + end subroutine GRD_gen_plgrid + + !----------------------------------------------------------------------------- + ! [ADD] R.Yoshida 20121020 + ! imported from ENDGame UK Met.office. + !----------------------------------------------------------------------------- + subroutine GRD_jbw_init_topo() + use mod_misc, only : & + MISC_get_latlon + use mod_adm, only : & + ADM_lall, & + ADM_gall, & + ADM_gall_pl, & + ADM_lall_pl, & + ADM_KNONE, & + ADM_prc_me, & + ADM_prc_pl, & + ADM_LOG_FID + use mod_cnst, only: & + CNST_PI, & + CNST_ERADIUS, & + CNST_EOHM, & + CNST_EGRAV, & + CNST_RAIR + implicit none + + real(8), parameter :: u00 = 35.D0 + + real(8) :: cs32ev, f1, f2 + real(8) :: lat, lon + real(8) :: rsurf (ADM_gall ,ADM_lall ) ! surface height in ICO-grid + real(8) :: rsurf_p(ADM_gall_pl,ADM_lall_pl) ! surface height in ICO-grid for pole region + + integer :: n, l, k0 + !--------------------------------------------------------------------------- + + k0 = ADM_KNONE + + cs32ev = ( cos( (1.D0-0.252D0) * CNST_PI * 0.5D0 ) )**1.5D0 + + ! for globe + do l = 1, ADM_lall + do n = 1, ADM_gall + call MISC_get_latlon( lat, lon, & + GRD_x(n,k0,l,GRD_XDIR), & + GRD_x(n,k0,l,GRD_YDIR), & + GRD_x(n,k0,l,GRD_ZDIR) ) + + f1 = 10.D0/63.D0 - 2.D0 * sin(lat)**6 * ( cos(lat)**2 + 1.D0/3.D0 ) + f2 = 1.6D0 * cos(lat)**3 * ( sin(lat)**2 + 2.D0/3.D0 ) - 0.25D0 * CNST_PI + + rsurf(n,l) = u00 * cs32ev * ( f1*u00*cs32ev + f2*CNST_ERADIUS*CNST_EOHM ) / CNST_EGRAV + enddo + enddo + + do l=1, ADM_lall + do n=1, ADM_gall + GRD_zs(n,k0,l,GRD_ZSFC) = rsurf(n,l) + enddo + enddo + + ! for pole region + if ( ADM_prc_me == ADM_prc_pl ) then + do l = 1, ADM_lall_pl + do n = 1, ADM_gall_pl + call MISC_get_latlon( lat, lon, & + GRD_x_pl(n,k0,l,GRD_XDIR), & + GRD_x_pl(n,k0,l,GRD_YDIR), & + GRD_x_pl(n,k0,l,GRD_ZDIR) ) + + f1 = 10.D0/63.D0 - 2.D0 * sin(lat)**6 * ( cos(lat)**2 + 1.D0/3.D0 ) + f2 = 1.6D0 * cos(lat)**3 * ( sin(lat)**2 + 2.D0/3.D0 ) - 0.25D0 * CNST_PI + + rsurf_p(n,l) = u00 * cs32ev * ( f1*u00*cs32ev + f2*CNST_ERADIUS*CNST_EOHM ) / CNST_EGRAV + enddo + enddo + + do l=1, ADM_lall_pl + do n=1, ADM_gall_pl + GRD_zs_pl(n,k0,l,GRD_ZSFC) = rsurf_p(n,l) + enddo + enddo + endif + + write(ADM_LOG_FID,*) 'Msg : Sub[GRD_input_topograph]/Mod[grid]' + write(ADM_LOG_FID, '(" *** Topography for JBW: -- MAX: ",F9.3,2X,"MIN: ",F9.3)') & + maxval(GRD_zs(:,:,:,GRD_ZSFC)), minval(GRD_zs(:,:,:,GRD_ZSFC)) + + return + end subroutine GRD_jbw_init_topo + + !----------------------------------------------------------------------------- + ! [ADD] R.Yoshida 20130328 + ! mountain of dcmip 2012 setting + !----------------------------------------------------------------------------- + subroutine GRD_mwave_init_topo() + use mod_misc, only : & + MISC_get_latlon + use mod_adm, only : & + ADM_lall, & + ADM_gall, & + ADM_gall_pl, & + ADM_lall_pl, & + ADM_KNONE, & + ADM_prc_me, & + ADM_prc_pl, & + ADM_LOG_FID + use mod_cnst, only: & + CNST_PI + implicit none + + ! + real(8),parameter :: FAI_M =0.d0 + real(8),parameter :: H_ZERO = 250.d0 + real(8),parameter :: QSI = 4000.d0 + real(8),parameter :: a_ref = 6371220.0D0 + real(8),parameter :: X_reduce = 500.d0 + real(8),parameter :: HALF_WIDTH = 5000.0d0 + + real(8) :: dist_m, aa, bb, LAMBDA_M + real(8) :: lat, lon + integer :: n, l, K0 + !--------------------------------------------------------------------------- + + LAMBDA_M=CNST_PI/4.d0 + K0 = ADM_KNONE + + ! for globe + do l=1, ADM_lall + do n=1, ADM_gall + call MISC_get_latlon( lat, lon, & + GRD_x(n,K0,l,GRD_XDIR), & + GRD_x(n,K0,l,GRD_YDIR), & + GRD_x(n,K0,l,GRD_ZDIR) ) + + dist_m = (a_ref/X_reduce)*acos (sin (FAI_M)*sin (lat) & + +cos (FAI_M)*cos (lat)*cos (lon-LAMBDA_M)) + + aa = exp(- (dist_m)**2.0 / HALF_WIDTH**2.0d0) + bb = cos(CNST_PI*dist_m/QSI)**2.0d0 + GRD_zs(n,ADM_KNONE,l,GRD_ZSFC) = H_ZERO * aa * bb ! equation (76) in dcmip reference + enddo + enddo + + ! for pole region + if ( ADM_prc_me==ADM_prc_pl ) then + do l=1, ADM_lall_pl + do n=1, ADM_gall_pl + call MISC_get_latlon( lat, lon, & + GRD_x(n,K0,l,GRD_XDIR), & + GRD_x(n,K0,l,GRD_YDIR), & + GRD_x(n,K0,l,GRD_ZDIR) ) + + dist_m = (a_ref/X_reduce)*acos (sin (FAI_M)*sin (lat)& + +cos (FAI_M)*cos (lat)*cos (lon-LAMBDA_M)) + + aa = exp(- (dist_m)**2.0 / HALF_WIDTH**2.0d0) + bb = cos(CNST_PI*dist_m/QSI)**2.0d0 + GRD_zs_pl(n,ADM_KNONE,l,GRD_ZSFC) = H_ZERO * aa * bb ! equation (76) in dcmip reference + enddo + enddo + endif + + write(ADM_LOG_FID,*) 'Msg : Sub[GRD_input_topograph]/Mod[grid]' + write (ADM_LOG_FID, '(" *** Topography for mwave: -- MAX: ",F9.3,2X,"MIN: ",F9.3)') & + maxval(GRD_zs(:,:,:,GRD_ZSFC)), minval(GRD_zs(:,:,:,GRD_ZSFC)) + return + end subroutine GRD_mwave_init_topo + +end module mod_grd +!------------------------------------------------------------------------------- From 51a29ac56c6911ce0b05bb558c703e0b5e785a1b Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Mon, 8 Mar 2021 21:04:45 +0900 Subject: [PATCH 39/70] [WIP] Modify for xmpAPI code --- NICAM-DC-MINI/src/Makefile | 94 +++++++++++-------- NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 | 7 +- .../sysdep/Makedef.Linux64-gnu-ompi-xmpAPI | 10 +- 3 files changed, 65 insertions(+), 46 deletions(-) diff --git a/NICAM-DC-MINI/src/Makefile b/NICAM-DC-MINI/src/Makefile index b63c83e..244ca79 100644 --- a/NICAM-DC-MINI/src/Makefile +++ b/NICAM-DC-MINI/src/Makefile @@ -19,6 +19,18 @@ VPATH = share:nhm/share:nhm/dynamics:nhm/forcing:nhm mod_misc := mod_misc # endif +ifneq (,$(findstring xmpAPI,$(NICAM_SYS))) + mod_adm = mod_adm_xmpAPI + mod_comm = mod_comm_xmpAPI + mod_debug = mod_debug_xmpAPI + mod_grd = mod_grd_xmpAPI +else + mod_adm = mod_adm + mod_comm = mod_comm + mod_debug = mod_debug + mod_grd = mod_grd +endif + PRJ1 = driver-dc # PRJ2 = mkmnginfo # PRJ3 = mkrawgrid @@ -50,16 +62,16 @@ maketool: $(LIBDIR)/$(LIBNAME) MODS = \ mod_coarray.o \ - mod_adm.o \ + $(mod_adm).o \ $(mod_misc).o \ - mod_debug.o \ + $(mod_debug).o \ mod_cnst.o \ mod_fio.o \ mod_calendar.o \ fio.o \ fiof.o \ - mod_comm.o \ - mod_grd.o \ + $(mod_comm).o \ + $(mod_grd).o \ mod_gmtr.o \ mod_oprt.o \ mod_vmtr.o \ @@ -109,47 +121,47 @@ $(LIBNAME): $(MODS) mod_coarray.o : mod_coarray.f90 $(mod_misc).o : $(mod_misc).f90 -mod_adm.o : mod_adm.f90 $(mod_misc).o -mod_debug.o : mod_debug.f90 mod_adm.o $(mod_misc).o -mod_cnst.o : mod_cnst.f90 mod_adm.o -mod_calendar.o : mod_calendar.f90 mod_adm.o +$(mod_adm).o : $(mod_adm).f90 $(mod_misc).o +$(mod_debug).o : $(mod_debug).f90 $(mod_adm).o $(mod_misc).o +mod_cnst.o : mod_cnst.f90 $(mod_adm).o +mod_calendar.o : mod_calendar.f90 $(mod_adm).o fio.o : fio.c fio.h fio_def.h fiof.o : fiof.c fiof.h fio.h fio_def.h -mod_fio.o : mod_fio.f90 mod_adm.o mod_calendar.o mod_cnst.o fio.o fiof.o mod_debug.o -mod_comm.o : mod_comm.f90 mod_adm.o mod_cnst.o mod_debug.o -mod_grd.o : mod_grd.f90 mod_adm.o mod_cnst.o mod_comm.o $(mod_misc).o mod_fio.o mod_coarray.o -mod_gmtr.o : mod_gmtr.f90 mod_adm.o mod_comm.o $(mod_misc).o mod_grd.o mod_cnst.o -mod_oprt.o : mod_oprt.f90 mod_adm.o mod_gmtr.o mod_grd.o mod_comm.o mod_cnst.o -mod_vmtr.o : mod_vmtr.f90 mod_adm.o mod_grd.o mod_comm.o mod_gmtr.o mod_oprt.o mod_cnst.o -mod_time.o : mod_time.f90 mod_adm.o mod_calendar.o -mod_latlon.o : mod_latlon.f90 mod_adm.o mod_cnst.o $(mod_misc).o mod_grd.o mod_oprt.o mod_comm.o mod_gmtr.o -mod_gtl.o : mod_gtl.f90 mod_adm.o mod_comm.o mod_gmtr.o mod_vmtr.o mod_cnst.o $(mod_misc).o mod_grd.o -mod_vintrpl.o : mod_vintrpl.f90 mod_adm.o mod_grd.o mod_cnst.o -mod_oprt3d.o : mod_oprt3d.f90 mod_adm.o mod_grd.o mod_gmtr.o mod_vmtr.o -mod_chemvar.o : mod_chemvar.f90 mod_adm.o -mod_runconf.o : mod_runconf.f90 mod_adm.o mod_cnst.o mod_chemvar.o -mod_thrmdyn.o : mod_thrmdyn.f90 mod_adm.o mod_cnst.o mod_runconf.o -mod_bndcnd.o : mod_bndcnd.f90 mod_adm.o mod_cnst.o mod_grd.o mod_runconf.o -mod_cnvvar.o : mod_cnvvar.f90 mod_adm.o mod_grd.o mod_thrmdyn.o mod_runconf.o mod_bndcnd.o mod_vmtr.o -mod_sfcvar.o : mod_sfcvar.f90 mod_adm.o mod_runconf.o mod_comm.o -mod_prgvar.o : mod_prgvar.f90 mod_adm.o mod_runconf.o mod_comm.o mod_dycoretest.o -mod_history.o : mod_history.f90 mod_adm.o mod_time.o mod_runconf.o mod_grd.o mod_cnst.o mod_calendar.o mod_fio.o mod_comm.o mod_gtl.o mod_vintrpl.o $(mod_misc).o mod_cnvvar.o mod_thrmdyn.o mod_vmtr.o mod_sfcvar.o mod_prgvar.o -mod_bsstate.o : mod_bsstate.f90 mod_adm.o mod_cnst.o mod_grd.o $(mod_misc).o mod_runconf.o mod_vintrpl.o mod_bndcnd.o mod_thrmdyn.o -mod_diagvar.o : mod_diagvar.f90 mod_adm.o mod_comm.o $(mod_misc).o mod_gtl.o mod_runconf.o mod_fio.o mod_time.o mod_grd.o -mod_history_vars.o : mod_history_vars.f90 mod_history.o mod_adm.o mod_runconf.o mod_gmtr.o mod_gtl.o mod_vmtr.o mod_prgvar.o mod_sfcvar.o mod_bsstate.o mod_cnvvar.o mod_thrmdyn.o mod_cnst.o mod_bndcnd.o mod_diagvar.o mod_grd.o -mod_embudget.o : mod_embudget.f90 mod_adm.o $(mod_misc).o mod_time.o mod_sfcvar.o mod_cnst.o mod_grd.o mod_vmtr.o mod_gtl.o mod_runconf.o mod_cnvvar.o mod_bsstate.o mod_thrmdyn.o mod_prgvar.o +mod_fio.o : mod_fio.f90 $(mod_adm).o mod_calendar.o mod_cnst.o fio.o fiof.o $(mod_debug).o +$(mod_comm).o : $(mod_comm).f90 $(mod_adm).o mod_cnst.o $(mod_debug).o +$(mod_grd).o : $(mod_grd).f90 $(mod_adm).o mod_cnst.o $(mod_comm).o $(mod_misc).o mod_fio.o mod_coarray.o +mod_gmtr.o : mod_gmtr.f90 $(mod_adm).o $(mod_comm).o $(mod_misc).o $(mod_grd).o mod_cnst.o +mod_oprt.o : mod_oprt.f90 $(mod_adm).o mod_gmtr.o $(mod_grd).o $(mod_comm).o mod_cnst.o +mod_vmtr.o : mod_vmtr.f90 $(mod_adm).o $(mod_grd).o $(mod_comm).o mod_gmtr.o mod_oprt.o mod_cnst.o +mod_time.o : mod_time.f90 $(mod_adm).o mod_calendar.o +mod_latlon.o : mod_latlon.f90 $(mod_adm).o mod_cnst.o $(mod_misc).o $(mod_grd).o mod_oprt.o $(mod_comm).o mod_gmtr.o +mod_gtl.o : mod_gtl.f90 $(mod_adm).o $(mod_comm).o mod_gmtr.o mod_vmtr.o mod_cnst.o $(mod_misc).o $(mod_grd).o +mod_vintrpl.o : mod_vintrpl.f90 $(mod_adm).o $(mod_grd).o mod_cnst.o +mod_oprt3d.o : mod_oprt3d.f90 $(mod_adm).o $(mod_grd).o mod_gmtr.o mod_vmtr.o +mod_chemvar.o : mod_chemvar.f90 $(mod_adm).o +mod_runconf.o : mod_runconf.f90 $(mod_adm).o mod_cnst.o mod_chemvar.o +mod_thrmdyn.o : mod_thrmdyn.f90 $(mod_adm).o mod_cnst.o mod_runconf.o +mod_bndcnd.o : mod_bndcnd.f90 $(mod_adm).o mod_cnst.o $(mod_grd).o mod_runconf.o +mod_cnvvar.o : mod_cnvvar.f90 $(mod_adm).o $(mod_grd).o mod_thrmdyn.o mod_runconf.o mod_bndcnd.o mod_vmtr.o +mod_sfcvar.o : mod_sfcvar.f90 $(mod_adm).o mod_runconf.o $(mod_comm).o +mod_prgvar.o : mod_prgvar.f90 $(mod_adm).o mod_runconf.o $(mod_comm).o mod_dycoretest.o +mod_history.o : mod_history.f90 $(mod_adm).o mod_time.o mod_runconf.o $(mod_grd).o mod_cnst.o mod_calendar.o mod_fio.o $(mod_comm).o mod_gtl.o mod_vintrpl.o $(mod_misc).o mod_cnvvar.o mod_thrmdyn.o mod_vmtr.o mod_sfcvar.o mod_prgvar.o +mod_bsstate.o : mod_bsstate.f90 $(mod_adm).o mod_cnst.o $(mod_grd).o $(mod_misc).o mod_runconf.o mod_vintrpl.o mod_bndcnd.o mod_thrmdyn.o +mod_diagvar.o : mod_diagvar.f90 $(mod_adm).o $(mod_comm).o $(mod_misc).o mod_gtl.o mod_runconf.o mod_fio.o mod_time.o $(mod_grd).o +mod_history_vars.o : mod_history_vars.f90 mod_history.o $(mod_adm).o mod_runconf.o mod_gmtr.o mod_gtl.o mod_vmtr.o mod_prgvar.o mod_sfcvar.o mod_bsstate.o mod_cnvvar.o mod_thrmdyn.o mod_cnst.o mod_bndcnd.o mod_diagvar.o $(mod_grd).o +mod_embudget.o : mod_embudget.f90 $(mod_adm).o $(mod_misc).o mod_time.o mod_sfcvar.o mod_cnst.o $(mod_grd).o mod_vmtr.o mod_gtl.o mod_runconf.o mod_cnvvar.o mod_bsstate.o mod_thrmdyn.o mod_prgvar.o mod_dcmip.o : mod_dcmip.f90 -mod_dycoretest.o : mod_dycoretest.f90 mod_adm.o mod_runconf.o $(mod_misc).o mod_vmtr.o mod_fio.o mod_time.o mod_cnvvar.o mod_vintrpl.o mod_comm.o mod_cnst.o mod_thrmdyn.o mod_dcmip.o mod_vmtr.o mod_src.o -mod_trcadv_thuburn.o : mod_trcadv_thuburn.f90 mod_adm.o mod_cnst.o mod_comm.o mod_grd.o mod_gmtr.o mod_vmtr.o -mod_src.o : mod_src.f90 mod_adm.o mod_grd.o mod_vmtr.o mod_oprt.o mod_cnst.o mod_runconf.o mod_gtl.o -mod_numfilter.o : mod_numfilter.f90 mod_adm.o mod_time.o mod_grd.o mod_cnst.o mod_gtl.o mod_gmtr.o mod_oprt.o mod_oprt3d.o mod_comm.o mod_src.o mod_vmtr.o mod_runconf.o mod_bsstate.o +mod_dycoretest.o : mod_dycoretest.f90 $(mod_adm).o mod_runconf.o $(mod_misc).o mod_vmtr.o mod_fio.o mod_time.o mod_cnvvar.o mod_vintrpl.o $(mod_comm).o mod_cnst.o mod_thrmdyn.o mod_dcmip.o mod_vmtr.o mod_src.o +mod_trcadv_thuburn.o : mod_trcadv_thuburn.f90 $(mod_adm).o mod_cnst.o $(mod_comm).o $(mod_grd).o mod_gmtr.o mod_vmtr.o +mod_src.o : mod_src.f90 $(mod_adm).o $(mod_grd).o mod_vmtr.o mod_oprt.o mod_cnst.o mod_runconf.o mod_gtl.o +mod_numfilter.o : mod_numfilter.f90 $(mod_adm).o mod_time.o $(mod_grd).o mod_cnst.o mod_gtl.o mod_gmtr.o mod_oprt.o mod_oprt3d.o $(mod_comm).o mod_src.o mod_vmtr.o mod_runconf.o mod_bsstate.o mod_af_trcadv.o : mod_af_trcadv.f90 -mod_af_heldsuarez.o : mod_af_heldsuarez.f90 mod_adm.o mod_cnst.o -mod_forcing_driver.o : mod_forcing_driver.f90 mod_adm.o mod_grd.o mod_gmtr.o mod_vmtr.o mod_time.o mod_runconf.o mod_cnst.o mod_cnvvar.o mod_prgvar.o mod_sfcvar.o mod_thrmdyn.o mod_gtl.o mod_diagvar.o mod_bsstate.o mod_bndcnd.o $(mod_misc).o mod_af_heldsuarez.o mod_history.o mod_af_trcadv.o -mod_vi.o : mod_vi.f90 mod_adm.o mod_comm.o mod_time.o mod_cnst.o mod_grd.o mod_oprt.o mod_vmtr.o mod_runconf.o mod_bndcnd.o mod_numfilter.o mod_src.o mod_cnvvar.o mod_bsstate.o -mod_ndg.o : mod_ndg.f90 mod_adm.o $(mod_misc).o mod_time.o mod_calendar.o mod_grd.o mod_cnst.o mod_vmtr.o mod_history.o mod_gmtr.o mod_oprt.o mod_runconf.o mod_gtl.o mod_comm.o -mod_tb_smg.o : mod_tb_smg.f90 mod_adm.o mod_vmtr.o mod_history.o mod_gmtr.o mod_grd.o mod_cnst.o mod_runconf.o mod_oprt.o mod_comm.o mod_bsstate.o mod_time.o -mod_dynstep.o : mod_dynstep.f90 mod_adm.o mod_cnst.o mod_time.o mod_grd.o mod_vmtr.o mod_runconf.o mod_bsstate.o mod_bndcnd.o mod_prgvar.o mod_diagvar.o mod_thrmdyn.o mod_numfilter.o mod_vi.o mod_src.o mod_trcadv_thuburn.o mod_ndg.o mod_tb_smg.o mod_oprt.o mod_debug.o +mod_af_heldsuarez.o : mod_af_heldsuarez.f90 $(mod_adm).o mod_cnst.o +mod_forcing_driver.o : mod_forcing_driver.f90 $(mod_adm).o $(mod_grd).o mod_gmtr.o mod_vmtr.o mod_time.o mod_runconf.o mod_cnst.o mod_cnvvar.o mod_prgvar.o mod_sfcvar.o mod_thrmdyn.o mod_gtl.o mod_diagvar.o mod_bsstate.o mod_bndcnd.o $(mod_misc).o mod_af_heldsuarez.o mod_history.o mod_af_trcadv.o +mod_vi.o : mod_vi.f90 $(mod_adm).o $(mod_comm).o mod_time.o mod_cnst.o $(mod_grd).o mod_oprt.o mod_vmtr.o mod_runconf.o mod_bndcnd.o mod_numfilter.o mod_src.o mod_cnvvar.o mod_bsstate.o +mod_ndg.o : mod_ndg.f90 $(mod_adm).o $(mod_misc).o mod_time.o mod_calendar.o $(mod_grd).o mod_cnst.o mod_vmtr.o mod_history.o mod_gmtr.o mod_oprt.o mod_runconf.o mod_gtl.o $(mod_comm).o +mod_tb_smg.o : mod_tb_smg.f90 $(mod_adm).o mod_vmtr.o mod_history.o mod_gmtr.o $(mod_grd).o mod_cnst.o mod_runconf.o mod_oprt.o $(mod_comm).o mod_bsstate.o mod_time.o +mod_dynstep.o : mod_dynstep.f90 $(mod_adm).o mod_cnst.o mod_time.o $(mod_grd).o mod_vmtr.o mod_runconf.o mod_bsstate.o mod_bndcnd.o mod_prgvar.o mod_diagvar.o mod_thrmdyn.o mod_numfilter.o mod_vi.o mod_src.o mod_trcadv_thuburn.o mod_ndg.o mod_tb_smg.o mod_oprt.o $(mod_debug).o .SUFFIXES: .o .f90 .c diff --git a/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 index 13dacca..673d8dc 100755 --- a/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 @@ -27,11 +27,14 @@ module mod_adm !++ used modules ! !----------------------------------------------------------------------------- - implicit none - private !--- 2020 Fujitsu + !implicit none + !private !use mod_coarray use xmp_api + use mpi + implicit none + private !--- 2020 Fujitsu end !----------------------------------------------------------------------------- ! diff --git a/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmpAPI b/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmpAPI index 60d78cc..99d45d0 100644 --- a/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmpAPI +++ b/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmpAPI @@ -3,8 +3,10 @@ # ##### for computation +OMNI_HOME=$(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') -FFLAGS_FAST = -O2 -m64 +FFLAGS_FAST = -O2 -m64 \ + -I$(OMNI_HOME)/include FFLAGS_DEBUG = -O0 -m64 \ -std=f2003 -pedantic-errors -fimplicit-none -fmodule-private \ @@ -12,10 +14,11 @@ FFLAGS_DEBUG = -O0 -m64 \ -ffpe-trap=invalid,zero,overflow -finit-integer=-32768 \ -finit-real=nan -finit-logical=false -finit-character=9 \ -Wall -Wextra -Wcharacter-truncation -Wunderflow \ - -g -fbacktrace -fbounds-check -fall-intrinsics + -g -fbacktrace -fbounds-check -fall-intrinsics \ + -I$(OMNI_HOME)/include FC = mpif90 -FFLAGS = -cpp --Wn -fconvert=big-endian $(FFLAGS_FAST) +FFLAGS = -cpp -fconvert=big-endian $(FFLAGS_FAST) #FFLAGS = $(FFLAGS_DEBUG) FFLAGS += -x f95-cpp-input @@ -29,6 +32,7 @@ LD = $(FC) # to avoid "-x f95-cpp-input" option LFLAGS = $(FFLAGS_FAST) #LFLAGS = $(FFLAGS_DEBUG) +LFLAGS += -L$(OMNI_HOME)/lib -lxmp ##### for frontend INSTALL = install From 55aa750f795bcabc4fd8ac136639df79a9885f23 Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Tue, 9 Mar 2021 11:16:48 +0900 Subject: [PATCH 40/70] Modify with xmpAPI --- NICAM-DC-MINI/src/Makefile | 2 +- NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 | 5 +- NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 | 229 ++- NICAM-DC-MINI/src/share/mod_debug_xmpAPI.f90 | 14 +- NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 | 18 +- NICAM-DC-MINI/src/share/mod_grd_xmpXPI.f90 | 1420 ----------------- .../sysdep/Makedef.Linux64-gnu-ompi-xmpAPI | 4 +- 7 files changed, 168 insertions(+), 1524 deletions(-) delete mode 100755 NICAM-DC-MINI/src/share/mod_grd_xmpXPI.f90 diff --git a/NICAM-DC-MINI/src/Makefile b/NICAM-DC-MINI/src/Makefile index 244ca79..b40dae2 100644 --- a/NICAM-DC-MINI/src/Makefile +++ b/NICAM-DC-MINI/src/Makefile @@ -107,7 +107,7 @@ MODS = \ $(PRJ1): prg_$(PRJ1).o $(LIBDIR)/$(LIBNAME) - $(LD) $(LFLAGS) -o $@ $^ + $(LD) -o $@ $^ $(LFLAGS) prg_$(PRJ1).o : prg_$(PRJ1).f90 $(LIBNAME) diff --git a/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 index 673d8dc..66bc9fb 100755 --- a/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 @@ -453,7 +453,8 @@ subroutine ADM_proc_stop write(ADM_LOG_FID,*) write(ADM_LOG_FID,*) 'MPI process going to STOP...' - request='STOP' + !!!request='STOP' + request=1 !coarray ! call MPI_BCAST( request, & !--- starting address ! ADM_NSYS, & !--- number of array @@ -466,7 +467,7 @@ subroutine ADM_proc_stop if(ll /= ADM_prc_me) then !--- 2020 Fujitsu !request[ll] = request - call xmp_array_section_set_triplet(request_sec, 1, 1,ADM_NSYS,1, ierr) + call xmp_array_section_set_triplet(request_sec, 1, int(1,kind=8),int(ADM_NSYS,kind=8),1, ierr) img_dims(1) = ll call xmp_coarray_put(img_dims, request_desc,request_sec, request_desc,request_sec, ierr) !--- 2020 Fujitsu end diff --git a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 index 31c5455..dd2c103 100755 --- a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 @@ -2738,8 +2738,10 @@ subroutine COMM_data_transfer(& ! comm_call_count=comm_call_count+1 ! - !coarray t(0)=mpi_wtime() - t(0) = xmp_wtime() + !--- 2020 Fujitsu + t(0)=mpi_wtime() + !t(0) = xmp_wtime() + !--- 2020 Fujitsu end ! shp=shape(var) kmax = shp(2) @@ -2788,12 +2790,16 @@ subroutine COMM_data_transfer(& ! -> end if ! - !coarray t(1)=mpi_wtime() - t(1) = xmp_wtime() + !--- 2020 Fujitsu + t(1)=mpi_wtime() + !t(1) = xmp_wtime() + !--- 2020 Fujitsu end time_pre=time_pre+(t(1)-t(0)) !call mpi_barrier(ADM_comm_run_world,ierr) - !coarray t(2)=mpi_wtime() - t(2) = xmp_wtime() + !--- 2020 Fujitsu + t(2)=mpi_wtime() + !t(2) = xmp_wtime() + !--- 2020 Fujitsu end time_bar1=time_bar1+(t(2)-t(1)) !coarray !--- 2020 Fujitsu @@ -2802,9 +2808,9 @@ subroutine COMM_data_transfer(& call xmp_new_coarray(tbl_desc, 4, 1, tbl_lb, tbl_ub, 1, img_dims) call xmp_coarray_bind(tbl_desc, tbl) - !call co_max(maxdatasiaze_r) - call MPI_Allreduce(maxdatasiaze_r, max_tmp, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr) - maxdatasiaze_r = max_tmp + !call co_max(maxdatasize_r) + call MPI_Allreduce(maxdatasize_r, max_tmp, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr) + maxdatasize_r = max_tmp bufsize1 = maxdatasize_r bufsize2 = romax(halomax) @@ -2874,8 +2880,10 @@ subroutine COMM_data_transfer(& endif enddo - !coarray t(3)=mpi_wtime() - t(3) = xmp_wtime() + !--- 2020 Fujitsu + t(3)=mpi_wtime() + !t(3) = xmp_wtime() + !--- 2020 Fujitsu end time_recv=time_recv+(t(3)-t(2)) !----------------------------------------- ! var -> sendbuf @@ -2888,8 +2896,10 @@ subroutine COMM_data_transfer(& - !coarray t(4)=mpi_wtime() - t(4) = xmp_wtime() + !--- 2020 Fujitsu + t(4)=mpi_wtime() + !t(4) = xmp_wtime() + !--- 2020 Fujitsu end do ns=1,nsmax(so,halo) ss=sendinfo(SIZE_COMM,ns,so,halo) sl=sendinfo(LRGNID_COMM,ns,so,halo) @@ -2950,8 +2960,10 @@ subroutine COMM_data_transfer(& enddo enddo - !coarray t(5)=mpi_wtime() - t(5) = xmp_wtime() + !--- 2020 Fujitsu + t(5)=mpi_wtime() + !t(5) = xmp_wtime() + !--- 2020 Fujitsu end time_sbuf=time_sbuf+(t(5)-t(4)) @@ -2984,15 +2996,15 @@ subroutine COMM_data_transfer(& !--- 2020 Fujitsu !dst_img = tbl(this_image())[destrank(so,halo)+1] img_dims(1) = destrank(so,halo)+1 - call xmp_array_section_set_triplet(tbl_sec, 1, ADM_prc_me, ADM_prc_me, 1, ierr) + call xmp_array_section_set_triplet(tbl_sec, 1, int(ADM_prc_me,kind=8), int(ADM_prc_me,kind=8), 1, ierr) call xmp_coarray_get_scalar(img_dims, tbl_desc, tbl_sec, dst_img, ierr) !caf_recvbuf(1:ssize(so,halo)*cmax,dst_img)[destrank(so,halo)+1] & ! = sendbuf(1:ssize(so,halo)*cmax,so) - call xmp_array_section_set_triplet(caf_recvbuf_sec, 1, 1, ssize(so,halo)*cmax, 1, ierr) - call xmp_array_section_set_triplet(caf_recvbuf_sec, 2, dst_img, dst_img, 1, ierr) - call xmp_array_section_set_triplet(sendbuf_l_sec, 1, 1, ssize(so,halo)*cmax, 1, ierr) - call xmp_array_section_set_triplet(sendbuf_l_sec, 2, so, so, 1, ierr) + call xmp_array_section_set_triplet(caf_recvbuf_sec, 1, int(1,kind=8), int(ssize(so,halo)*cmax,kind=8), 1, ierr) + call xmp_array_section_set_triplet(caf_recvbuf_sec, 2, int(dst_img,kind=8), int(dst_img,kind=8), 1, ierr) + call xmp_array_section_set_triplet(sendbuf_l_sec, 1, int(1,kind=8), int(ssize(so,halo)*cmax,kind=8), 1, ierr) + call xmp_array_section_set_triplet(sendbuf_l_sec, 2, int(so,kind=8), int(so,kind=8), 1, ierr) call xmp_coarray_put_local(img_dims, caf_recvbuf_desc, caf_recvbuf_sec, sendbuf_l_desc, sendbuf_l_sec, ierr) !--- 2020 Fujitsu @@ -3012,16 +3024,20 @@ subroutine COMM_data_transfer(& endif - !coarray t(6)=mpi_wtime() - t(6) = xmp_wtime() + !--- 2020 Fujitsu + t(6)=mpi_wtime() + !t(6) = xmp_wtime() + !--- 2020 Fujitsu end time_send=time_send+(t(6)-t(5)) size_total=size_total+ssize(so,halo)*cmax comm_count=comm_count+1 enddo !loop so !----------------------------------------- ! - !coarray t(7)=mpi_wtime() - t(7) = xmp_wtime() + !--- 2020 Fujitsu + t(7)=mpi_wtime() + !t(7) = xmp_wtime() + !--- 2020 Fujitsu end !--------------------------------------------------- ! var -> var (region to region copy in same rank) !--------------------------------------------------- @@ -3116,8 +3132,10 @@ subroutine COMM_data_transfer(& !----------------------------------------- ! !----------------------------------------- - !coarray t(8)=mpi_wtime() - t(8) = xmp_wtime() + !--- 2020 Fujitsu + t(8)=mpi_wtime() + !t(8) = xmp_wtime() + !--- 2020 Fujitsu end time_copy=time_copy+(t(8)-t(7)) acount=romax(halo)+somax(halo) !coarray call mpi_waitall(acount,areq,stat,ierr) @@ -3131,8 +3149,10 @@ subroutine COMM_data_transfer(& end do end do - !coarray t(9)=mpi_wtime() - t(9) = xmp_wtime() + !--- 2020 Fujitsu + t(9)=mpi_wtime() + !t(9) = xmp_wtime() + !--- 2020 Fujitsu end time_wait=time_wait+(t(9)-t(8)) if (opt_comm_dbg) then !================== dbg start @@ -3274,8 +3294,10 @@ subroutine COMM_data_transfer(& enddo enddo !loop ro - !coarray t(10)=mpi_wtime() - t(10) = xmp_wtime() + !--- 2020 Fujitsu + t(10)=mpi_wtime() + !t(10) = xmp_wtime() + !--- 2020 Fujitsu end time_rbuf=time_rbuf+(t(10)-t(9)) ! write(ADM_log_fid,*) 'recv count=',i_dbg,'prc=',adm_prc_me @@ -3312,13 +3334,17 @@ subroutine COMM_data_transfer(& enddo enddo - !coarray t(11)=mpi_wtime() - t(11) = xmp_wtime() + !--- 2020 Fujitsu + t(11)=mpi_wtime() + !t(11) = xmp_wtime() + !--- 2020 Fujitsu end time_copy_sgp=time_copy_sgp+(t(11)-t(10)) !! !call mpi_barrier(ADM_comm_run_world,ierr) - !coarray t(12)=mpi_wtime() - t(12) = xmp_wtime() + !--- 2020 Fujitsu + t(12)=mpi_wtime() + !t(12) = xmp_wtime() + !--- 2020 Fujitsu end time_bar2=time_bar2+(t(12)-t(11)) time_total=time_total+(t(12)-t(0)) @@ -3411,8 +3437,8 @@ subroutine COMM_data_transfer_rgn2pl( & call xmp_new_array_section(v_npl_sec, 2) call xmp_new_array_section(v_spl_sec, 2) - call xmp_new_local_array(v_npl_send_l_desc, 8, 2, v_npl_lb, v_npl_ub, v_npl_send) - call xmp_new_local_array(v_spl_send_l_desc, 8, 2, v_spl_lb, v_spl_ub, v_spl_send) + call xmp_new_local_array(v_npl_send_l_desc, 8, 2, v_npl_lb, v_npl_ub, loc(v_npl_send)) + call xmp_new_local_array(v_spl_send_l_desc, 8, 2, v_spl_lb, v_spl_ub, loc(v_spl_send)) !--- 2020 Fujitsu end v_npl_recvc = 0.d0 v_spl_recvc = 0.d0 @@ -3473,8 +3499,8 @@ subroutine COMM_data_transfer_rgn2pl( & ! ierr ) !--- 2020 Fujitsu !v_npl_recvc(:,:)[ADM_prc_npl] = v_npl_send(:,:) - call xmp_array_section_set_triplet(v_npl_sec, 1, 1, knum, 1, ierr) - call xmp_array_section_set_triplet(v_npl_sec, 2, 1, nnum, 1, ierr) + call xmp_array_section_set_triplet(v_npl_sec, 1, int(1,kind=8), int(knum,kind=8), 1, ierr) + call xmp_array_section_set_triplet(v_npl_sec, 2, int(1,kind=8), int(nnum,kind=8), 1, ierr) img_dims(1) = ADM_prc_npl call xmp_coarray_put_local(img_dims, v_npl_recvc_desc, v_npl_sec, v_npl_send_l_desc, v_npl_sec, ierr) !--- 2020 Fujitsu end @@ -3503,8 +3529,8 @@ subroutine COMM_data_transfer_rgn2pl( & ! ierr ) !--- 2020 Fujitsu !v_spl_recvc(:,:)[ADM_prc_spl] = v_spl_send(:,:) - call xmp_array_section_set_triplet(v_spl_sec, 1, 1, knum, 1, ierr) - call xmp_array_section_set_triplet(v_spl_sec, 2, 1, nnum, 1, ierr) + call xmp_array_section_set_triplet(v_spl_sec, 1, int(1,kind=8), int(knum,kind=8), 1, ierr) + call xmp_array_section_set_triplet(v_spl_sec, 2, int(1,kind=8), int(nnum,kind=8), 1, ierr) img_dims(1) = ADM_prc_spl call xmp_coarray_put_local(img_dims, v_spl_recvc_desc, v_spl_sec, v_spl_send_l_desc, v_spl_sec, ierr) !--- 2020 Fujitsu end @@ -3702,8 +3728,8 @@ subroutine COMM_var( & ! call MPI_WAIT(ireq(1),istat,ierr) !--- 2020 Fujitsu !v_npl_recvc(:,:)[ADM_prc_npl] = v_npl_send(:,:) - call xmp_array_section_set_triplet(v_npl_sec, 1, 1, knum, 1, ierr) - call xmp_array_section_set_triplet(v_npl_sec, 2, 1, nnum, 1, ierr) + call xmp_array_section_set_triplet(v_npl_sec, 1, int(1,kind=8), int(knum,kind=8), 1, ierr) + call xmp_array_section_set_triplet(v_npl_sec, 2, int(1,kind=8), int(nnum,kind=8), 1, ierr) img_dims(1) = ADM_prc_npl call xmp_coarray_put_local(img_dims, v_npl_recvc_desc, v_npl_sec, v_npl_send_l_desc, v_npl_sec, ierr) !--- 2020 Fujitsu end @@ -3755,8 +3781,8 @@ subroutine COMM_var( & ! call MPI_WAIT(ireq(2),istat,ierr) !--- 2020 Fujitsu !v_spl_recvc(:,:)[ADM_prc_spl] = v_spl_send(:,:) - call xmp_array_section_set_triplet(v_spl_sec, 1, 1, knum, 1, ierr) - call xmp_array_section_set_triplet(v_spl_sec, 2, 1, nnum, 1, ierr) + call xmp_array_section_set_triplet(v_spl_sec, 1, int(1,kind=8), int(knum,kind=8), 1, ierr) + call xmp_array_section_set_triplet(v_spl_sec, 2, int(1,kind=8), int(nnum,kind=8), 1, ierr) img_dims(1) = ADM_prc_spl call xmp_coarray_put_local(img_dims, v_spl_recvc_desc, v_spl_sec, v_spl_send_l_desc, v_spl_sec, ierr) !--- 2020 Fujitsu end @@ -3853,11 +3879,13 @@ subroutine COMM_data_transfer_nopl(& integer(8) :: tbl_sec, caf_recvbuf_sec integer(4) :: img_dims(1) integer :: max_tmp + integer(8) :: sendbuf_l_sec ! for senbuf(:,:) integer bufsize1,bufsize2 call xmp_new_array_section(tbl_sec, 1) call xmp_new_array_section(caf_recvbuf_sec, 2) + call xmp_new_array_section(sendbuf_l_sec, 2) !--- 2020 Fujitsu end ! @@ -3865,8 +3893,10 @@ subroutine COMM_data_transfer_nopl(& ! comm_call_count=comm_call_count+1 ! -!coarray t(0)=mpi_wtime() - t(0) = xmp_wtime() + !--- 2020 Fujitsu + t(0)=mpi_wtime() + !t(0) = xmp_wtime() + !--- 2020 Fujitsu end ! shp=shape(var) kmax = shp(2) @@ -3911,11 +3941,15 @@ subroutine COMM_data_transfer_nopl(& ! -> end if ! -!coarray t(1)=mpi_wtime() - t(1) = xmp_wtime() + !--- 2020 Fujitsu + t(1)=mpi_wtime() + !t(1) = xmp_wtime() + !--- 2020 Fujitsu end time_pre=time_pre+(t(1)-t(0)) -!coarray t(2)=mpi_wtime() - t(2) = xmp_wtime() + !--- 2020 Fujitsu + t(2)=mpi_wtime() + !t(2) = xmp_wtime() + !--- 2020 Fujitsu end time_bar1=time_bar1+(t(2)-t(1)) !coarray !--- 2020 Fujitsu @@ -3925,8 +3959,8 @@ subroutine COMM_data_transfer_nopl(& call xmp_coarray_bind(tbl_desc, tbl) !call co_max(maxdatasize_r) - call MPI_Allreduce(maxdatasiaze_r, max_tmp, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr) - maxdatasiaze_r = max_tmp + call MPI_Allreduce(maxdatasize_r, max_tmp, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, ierr) + maxdatasize_r = max_tmp bufsize1 = maxdatasize_r bufsize2 = romax(halomax) @@ -3991,8 +4025,10 @@ subroutine COMM_data_transfer_nopl(& endif enddo -!coarray t(3)=mpi_wtime() - t(3) = xmp_wtime() + !--- 2020 Fujitsu + t(3)=mpi_wtime() + !t(3) = xmp_wtime() + !--- 2020 Fujitsu end time_recv=time_recv+(t(3)-t(2)) !----------------------------------------- ! var -> sendbuf @@ -4001,8 +4037,10 @@ subroutine COMM_data_transfer_nopl(& -!coarray t(4)=mpi_wtime() - t(4) = xmp_wtime() + !--- 2020 Fujitsu + t(4)=mpi_wtime() + !t(4) = xmp_wtime() + !--- 2020 Fujitsu end do ns=1,nsmax(so,halo) ss=sendinfo(SIZE_COMM,ns,so,halo) sl=sendinfo(LRGNID_COMM,ns,so,halo) @@ -4020,8 +4058,10 @@ subroutine COMM_data_transfer_nopl(& enddo !----------------------------------------- -!coarray t(5)=mpi_wtime() - t(5) = xmp_wtime() + !--- 2020 Fujitsu + t(5)=mpi_wtime() + !t(5) = xmp_wtime() + !--- 2020 Fujitsu end time_sbuf=time_sbuf+(t(5)-t(4)) @@ -4049,15 +4089,15 @@ subroutine COMM_data_transfer_nopl(& !--- 2020 Fujitsu !dst_img = tbl(this_image())[destrank(so,halo)+1] img_dims(1) = destrank(so,halo)+1 - call xmp_array_section_set_triplet(tbl_sec, 1, ADM_prc_me, ADM_prc_me, 1, ierr) + call xmp_array_section_set_triplet(tbl_sec, 1, int(ADM_prc_me,kind=8), int(ADM_prc_me,kind=8), 1, ierr) call xmp_coarray_get_scalar(img_dims, tbl_desc, tbl_sec, dst_img, ierr) !caf_recvbuf(1:ssize(so,halo)*cmax,dst_img)[destrank(so,halo)+1] & ! = sendbuf(1:ssize(so,halo)*cmax,so) - call xmp_array_section_set_triplet(caf_recvbuf_sec, 1, 1, ssize(so,halo)*cmax, 1, ierr) - call xmp_array_section_set_triplet(caf_recvbuf_sec, 2, dst_img, dst_img, 1, ierr) - call xmp_array_section_set_triplet(sendbuf_l_sec, 1, 1, ssize(so,halo)*cmax, 1, ierr) - call xmp_array_section_set_triplet(sendbuf_l_sec, 2, so, so, 1, ierr) + call xmp_array_section_set_triplet(caf_recvbuf_sec, 1, int(1,kind=8), int(ssize(so,halo)*cmax,kind=8), 1, ierr) + call xmp_array_section_set_triplet(caf_recvbuf_sec, 2, int(dst_img,kind=8), int(dst_img,kind=8), 1, ierr) + call xmp_array_section_set_triplet(sendbuf_l_sec, 1, int(1,kind=8), int(ssize(so,halo)*cmax,kind=8), 1, ierr) + call xmp_array_section_set_triplet(sendbuf_l_sec, 2, int(so,kind=8), int(so,kind=8), 1, ierr) call xmp_coarray_put_local(img_dims, caf_recvbuf_desc, caf_recvbuf_sec, sendbuf_l_desc, sendbuf_l_sec, ierr) !--- 2020 Fujitsu end @@ -4077,16 +4117,20 @@ subroutine COMM_data_transfer_nopl(& endif -!coarray t(6)=mpi_wtime() - t(6) = xmp_wtime() + !--- 2020 Fujitsu + t(6)=mpi_wtime() + !t(6) = xmp_wtime() + !--- 2020 Fujitsu end time_send=time_send+(t(6)-t(5)) size_total=size_total+ssize(so,halo)*cmax comm_count=comm_count+1 enddo !loop so !----------------------------------------- ! -!coarray t(7)=mpi_wtime() - t(7) = xmp_wtime() + !--- 2020 Fujitsu + t(7)=mpi_wtime() + !t(7) = xmp_wtime() + !--- 2020 Fujitsu end !--------------------------------------------------- ! var -> var (region to region copy in same rank) !--------------------------------------------------- @@ -4108,8 +4152,10 @@ subroutine COMM_data_transfer_nopl(& !----------------------------------------- ! !----------------------------------------- -!coarray t(8)=mpi_wtime() - t(8) = xmp_wtime() + !--- 2020 Fujitsu + t(8)=mpi_wtime() + !t(8) = xmp_wtime() + !--- 2020 Fujitsu end time_copy=time_copy+(t(8)-t(7)) acount=romax(halo)+somax(halo) !coarray @@ -4125,8 +4171,10 @@ subroutine COMM_data_transfer_nopl(& end do end do -!coarray t(9)=mpi_wtime() - t(9) = xmp_wtime() + !--- 2020 Fujitsu + t(9)=mpi_wtime() + !t(9) = xmp_wtime() + !--- 2020 Fujitsu end time_wait=time_wait+(t(9)-t(8)) if (opt_comm_dbg) then !================== dbg start @@ -4217,8 +4265,10 @@ subroutine COMM_data_transfer_nopl(& enddo enddo enddo !loop ro -!coarray t(10)=mpi_wtime() - t(10) = xmp_wtime() + !--- 2020 Fujitsu + t(10)=mpi_wtime() + !t(10) = xmp_wtime() + !--- 2020 Fujitsu end time_rbuf=time_rbuf+(t(10)-t(9)) !----------------------------------------- @@ -4241,13 +4291,17 @@ subroutine COMM_data_transfer_nopl(& enddo enddo -!coarray t(11)=mpi_wtime() - t(11) = xmp_wtime() + !--- 2020 Fujitsu + t(11)=mpi_wtime() + !t(11) = xmp_wtime() + !--- 2020 Fujitsu end time_copy_sgp=time_copy_sgp+(t(11)-t(10)) !! !call mpi_barrier(ADM_comm_run_world,ierr) -!coarray t(12)=mpi_wtime() - t(12) = xmp_wtime() + !--- 2020 Fujitsu + t(12)=mpi_wtime() + !t(12) = xmp_wtime() + !--- 2020 Fujitsu end time_bar2=time_bar2+(t(12)-t(11)) time_total=time_total+(t(12)-t(0)) ! @@ -4255,6 +4309,7 @@ subroutine COMM_data_transfer_nopl(& !--- 2020 Fujitsu call xmp_free_array_section(tbl_sec) call xmp_free_array_section(caf_recvbuf_sec) + call xmp_free_array_section(sendbuf_l_sec) call xmp_coarray_deallocate(tbl_desc, ierr) call xmp_coarray_deallocate(caf_recvbuf_desc, ierr) @@ -4425,10 +4480,12 @@ end subroutine COMM_Stat_avg !----------------------------------------------------------------------------- subroutine COMM_Stat_max( localmax, globalmax ) !coarray -! use mod_adm, only: & -! ADM_COMM_RUN_WORLD, & -! ADM_prc_all, & -! ADM_prc_me + !--- 2020 Fujitsu + use mod_adm, only: & + ADM_COMM_RUN_WORLD, & + ADM_prc_all, & + ADM_prc_me + !--- 2020 Fujitsu end implicit none real(8), intent(in) :: localmax @@ -4473,10 +4530,12 @@ end subroutine COMM_Stat_max !----------------------------------------------------------------------------- subroutine COMM_Stat_min( localmin, globalmin ) !coarray -! use mod_adm, only: & -! ADM_COMM_RUN_WORLD, & -! ADM_prc_all, & -! ADM_prc_me + !--- 2020 Fujitsu + use mod_adm, only: & + ADM_COMM_RUN_WORLD, & + ADM_prc_all, & + ADM_prc_me + !--- 2020 Fujitsu end implicit none real(8), intent(in) :: localmin diff --git a/NICAM-DC-MINI/src/share/mod_debug_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_debug_xmpAPI.f90 index 9381d97..d8bc1a0 100755 --- a/NICAM-DC-MINI/src/share/mod_debug_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_debug_xmpAPI.f90 @@ -18,8 +18,8 @@ module mod_debug !--- 2020 Fujitsu use mpi use xmp_api + !use mod_coarray !--- 2020 Fujitsu end - use mod_coarray use mod_adm, only: & ADM_LOG_FID, & ADM_NSYS, & @@ -322,8 +322,10 @@ subroutine DEBUG_rapstart( rapname ) id = DEBUG_rapid( rapname ) -!coarray time = real(MPI_WTIME(), kind=8) - time = xmp_wtime() + !--- 2020 Fujitsu + time = real(MPI_WTIME(), kind=8) + !time = xmp_wtime() + !--- 2020 Fujitsu end DEBUG_raptstr(id) = time DEBUG_rapnstr(id) = DEBUG_rapnstr(id) + 1 @@ -348,8 +350,10 @@ subroutine DEBUG_rapend( rapname ) id = DEBUG_rapid( rapname ) -!coarray time = real(MPI_WTIME(), kind=8) - time = xmp_wtime() + !--- 2020 Fujitsu + time = real(MPI_WTIME(), kind=8) + !time = xmp_wtime() + !--- 2020 Fujitsu end DEBUG_rapttot(id) = DEBUG_rapttot(id) + ( time-DEBUG_raptstr(id) ) DEBUG_rapnend(id) = DEBUG_rapnend(id) + 1 diff --git a/NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 index c87b6b9..67bd5f8 100755 --- a/NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 @@ -1047,7 +1047,7 @@ subroutine GRD_gen_plgrid !real(8),allocatable :: vrecv_plA(:,:)[:] integer , POINTER :: vrecv_plA ( : , : ) => null ( ) integer(8) :: vrecv_plA_desc - integer(8) :: vrecv_plA_lb, vrecv_plA_ub + integer(8) :: vrecv_plA_lb(2), vrecv_plA_ub(2) integer(8) :: vrecv_plA_sec integer(4) :: img_dims(1) !--- 2020 Fujitsu end @@ -1110,10 +1110,10 @@ subroutine GRD_gen_plgrid ! ierr ) !--- 2020 Fujitsu !vrecv_plA(:,n)[ADM_prc_npl] = vsend_pl(:,n) - call xmp_array_section_set_triplet(vrecv_plA_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) - call xmp_array_section_set_triplet(vrecv_plA_sec, 2, n, n, 1, ierr) - call xmp_array_section_set_triplet(vsend_pl_l_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) - call xmp_array_section_set_triplet(vsend_pl_l_sec, 2, n, n, 1, ierr) + call xmp_array_section_set_triplet(vrecv_plA_sec, 1, int(GRD_XDIR,kind=8), int(GRD_ZDIR,kind=8), 1, ierr) + call xmp_array_section_set_triplet(vrecv_plA_sec, 2, int(n,kind=8), int(n,kind=8), 1, ierr) + call xmp_array_section_set_triplet(vsend_pl_l_sec, 1, int(GRD_XDIR,kind=8), int(GRD_ZDIR,kind=8), 1, ierr) + call xmp_array_section_set_triplet(vsend_pl_l_sec, 2, int(n,kind=8), int(n,kind=8), 1, ierr) img_dims(1) = ADM_prc_npl call xmp_coarray_put_local(img_dims, vrecv_plA_desc, vrecv_plA_sec, vsend_pl_l_desc, vsend_pl_l_sec, ierr) !--- 2020 Fujitsu end @@ -1189,10 +1189,10 @@ subroutine GRD_gen_plgrid ! ierr ) !--- 2020 Fujitsu !vrecv_plA(:,n)[ADM_prc_npl] = vsend_pl(:,n) - call xmp_array_section_set_triplet(vrecv_plA_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) - call xmp_array_section_set_triplet(vrecv_plA_sec, 2, n, n, 1, ierr) - call xmp_array_section_set_triplet(vsend_pl_l_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) - call xmp_array_section_set_triplet(vsend_pl_l_sec, 2, n, n, 1, ierr) + call xmp_array_section_set_triplet(vrecv_plA_sec, 1, int(GRD_XDIR,kind=8), int(GRD_ZDIR,kind=8), 1, ierr) + call xmp_array_section_set_triplet(vrecv_plA_sec, 2, int(n,kind=8), int(n,kind=8), 1, ierr) + call xmp_array_section_set_triplet(vsend_pl_l_sec, 1, int(GRD_XDIR,kind=8), int(GRD_ZDIR,kind=8), 1, ierr) + call xmp_array_section_set_triplet(vsend_pl_l_sec, 2, int(n,kind=8), int(n,kind=8), 1, ierr) img_dims(1) = ADM_prc_npl call xmp_coarray_put_local(img_dims, vrecv_plA_desc, vrecv_plA_sec, vsend_pl_l_desc, vsend_pl_l_sec, ierr) !--- 2020 Fujitsu end diff --git a/NICAM-DC-MINI/src/share/mod_grd_xmpXPI.f90 b/NICAM-DC-MINI/src/share/mod_grd_xmpXPI.f90 deleted file mode 100755 index c87b6b9..0000000 --- a/NICAM-DC-MINI/src/share/mod_grd_xmpXPI.f90 +++ /dev/null @@ -1,1420 +0,0 @@ -!------------------------------------------------------------------------------- -!> -!! Grid system module -!! -!! @par Description -!! This module is for the management of the icosahedral grid system -!! -!! @author H.Tomita -!! -!! @par History -!! @li 2004-02-17 (H.Tomita) Imported from igdc-4.33 -!! @li 2009-01-23 (H.Tomita) extend the vertical grid method, introducing "hflat". -!! @li 2009-03-10 (H.Tomita) 1. add sub[GRD_gen_plgrid] -!! ( This subroutine generates -!! the pole grids from the regular region grids. ) -!! 2. support direct access of grid file without pole data. -!! sub[GRD_input_hgrid,GRD_output_hgrid]. -!! 3. add 'da_access_hgrid' in the namelist. -!! @li 2009-03-10 (H.Tomita) add error handling in GRD_input_hgrid. -!! @li 2009-05-27 (M.Hara) 1. bug fix of error handling in GRD_input_hgrid. -!! 2. remove "optional" declaration from -!! da_access in GRD_input_hgrid and GRD_output_hgrid. -!! @li 2011-07-22 (T.Ohno) add parameters -!! 1.GRD_grid_type 'ON_SPHERE' / 'ON_PLANE' -!! 2.hgrid_comm_flg -!! the grid data should be communicated or not. ( default:.true. ) -!! 3.triangle_size -!! scale factor when GRD_grid_type is 'ON_PLANE' -!! @li 2011-09-03 (H.Yashiro) New I/O -!! @li 2012-05-25 (H.Yashiro) Avoid irregal ISEND/IRECV comm. -!! @li 2012-10-20 (R.Yoshida) Topography for Jablonowski test -!! -!< -module mod_grd - !----------------------------------------------------------------------------- - ! - !++ Used modules - ! -!! use mpi - !--- 2020 Fujitsu - !use mod_coarray - use xmp_api - !--- 2020 Fujitsu end - use mod_adm, only: & - ADM_LOG_FID, & - ADM_NSYS, & - ADM_MAXFNAME - !----------------------------------------------------------------------------- - implicit none - private - !----------------------------------------------------------------------------- - ! - !++ Public procedure - ! - public :: GRD_setup - public :: GRD_output_hgrid - public :: GRD_input_hgrid - public :: GRD_scaling - public :: GRD_output_vgrid - public :: GRD_input_vgrid - public :: GRD_gen_plgrid - - !----------------------------------------------------------------------------- - ! - !++ Public parameters & variables - ! - - !====== Horizontal direction ====== - ! - !------ Scaling factor for length, e.g., earth's radius. - real(8), public, save :: GRD_rscale - ! - !------ Indentifiers for the directions in the Cartesian coordinate. - integer, public, parameter :: GRD_XDIR=1 - integer, public, parameter :: GRD_YDIR=2 - integer, public, parameter :: GRD_ZDIR=3 - ! - !------ Grid points ( CELL CENTER ) - real(8), public, allocatable, save :: GRD_x (:,:,:,:) - real(8), public, allocatable, save :: GRD_x_pl(:,:,:,:) - !<----- - !<----- GRD_x(1:ADM_gall, & --- horizontal - !<----- 1:ADM_KNONE, & --- vertical - !<----- 1:ADM_lall, & --- local region - !<----- GRD_XDIR:GRD_ZDIR) --- three components - !<----- - !<----- GRD_x_pl(1:ADM_gall_pl, & --- horizontal - !<----- 1:ADM_KNONE, & --- vertical - !<----- 1:ADM_lall_pl, & --- pole regions - !<----- GRD_XDIR:GRD_ZDIR) --- three components - !<----- ___ - !<----- / \ - !<----- < p > - !<----- \ ___ / - !<----- - - !------ Grid points ( CELL CORNER ) - real(8), public, allocatable, save :: GRD_xt (:,:,:,:,:) - real(8), public, allocatable, save :: GRD_xt_pl(:,:,:,:) - !<----- - !<----- GRD_xt(1:ADM_gall, & --- horizontal - !<----- 1:ADM_KNONE, & --- vertical - !<----- 1:ADM_lall, & --- local region - !<----- ADM_TI:ADM_TJ, & --- upper or lower triangle. - !<----- GRD_XDIR:GRD_ZDIR) --- three components - !<----- - !<----- GRD_xt_pl(1:ADM_gall_pl, & --- horizontal - !<----- 1:ADM_KNONE, & --- vertical - !<----- 1:ADM_lall_pl, & --- pole regions - !<----- GRD_XDIR:GRD_ZDIR) --- three components - !<----- p___p - !<----- / \ - !<----- p p - !<----- \ ___ / - !<----- p p - - real(8), public, allocatable, save :: GRD_e (:,:,:) ! unscaled GRD_x (=unit vector) - real(8), public, allocatable, save :: GRD_e_pl(:,:,:) - - !====== Vertical direction ====== - ! - !------ Top height - real(8), public, save :: GRD_htop - !<----- unit : [m] - ! - !------ xi coordinate - real(8), public, allocatable, save :: GRD_gz(:) - ! - !------ xi coordinate at the half point - real(8), public, allocatable, save :: GRD_gzh(:) - ! - !------ d(xi) - real(8), public, allocatable, save :: GRD_dgz(:) - ! - !------ d(xi) at the half point - real(8), public, allocatable, save :: GRD_dgzh(:) - ! - !------ 1/dgz, 1/dgzh ( add by kgoto ) - real(8), public, allocatable, save :: GRD_rdgz (:) - real(8), public, allocatable, save :: GRD_rdgzh(:) - - !------ Topography & vegitation - integer, public, parameter :: GRD_ZSFC = 1 - integer, public, parameter :: GRD_ZSD = 2 - integer, public, parameter :: GRD_VEGINDX = 3 - real(8), public, allocatable, save :: GRD_zs (:,:,:,:) - real(8), public, allocatable, save :: GRD_zs_pl(:,:,:,:) - !<----- - !<----- GRD_zs(1:ADM_gall, & - !<----- ADM_KNONE, & <- one layer data - !<----- 1:ADM_lall, & - !<----- GRD_ZSFC:GRD_VEGINDX)) - !<----- - !<----- GRD_zs_pl(1:ADM_gall_pl, & - !<----- ADM_KNONE, & <- one layer data - !<----- 1:ADM_lall_pl, & - !<----- GRD_ZSFC:GRD_VEGINDX)) - !<----- - ! - !------ z coordinate ( actual height ) - integer, public, parameter :: GRD_Z = 1 - integer, public, parameter :: GRD_ZH = 2 - real(8), public, allocatable, save :: GRD_vz (:,:,:,:) - real(8), public, allocatable, save :: GRD_vz_pl(:,:,:,:) - !<----- - !<----- GRD_vz(1:ADM_gall, & - !<----- 1:ADM_kall, & - !<----- 1:ADM_lall, & - !<----- GRD_Z:GRD_ZH)) - !<----- GRD_vz_pl(1:ADM_gall_pl, & - !<----- 1:ADM_kall, & - !<----- 1:ADM_lall_pl, & - !<----- GRD_Z:GRD_ZH)) - !<----- - ! - !------ Vertical interpolation factors - real(8), public, allocatable, save :: GRD_afac(:) - real(8), public, allocatable, save :: GRD_bfac(:) - real(8), public, allocatable, save :: GRD_cfac(:) - real(8), public, allocatable, save :: GRD_dfac(:) - ! - ! [add] T.Ohno 110722 - character(ADM_NSYS), public, save :: GRD_grid_type = 'ON_SPHERE' - ! 'ON_PLANE' - - !----------------------------------------------------------------------------- - ! - !++ Private procedure - ! - !----------------------------------------------------------------------------- - ! - !++ Private parameters & variables - ! - character(len=ADM_MAXFNAME), private, save :: hgrid_fname = '' ! Horizontal grid file - - character(len=ADM_MAXFNAME), private, save :: topo_fname = '' ! Topographical data file - character(len=ADM_MAXFNAME), private, save :: toposd_fname = '' ! Standard deviation of topog. data file - character(len=ADM_MAXFNAME), private, save :: vegeindex_fname = '' ! Vegetation index data file - - character(len=ADM_MAXFNAME), private, save :: vgrid_fname = '' ! Vertical grid file - character(len=ADM_NSYS), private, save :: vgrid_scheme = 'LINEAR' ! Vertical coordinate scheme - real(8), private, save :: h_efold = 10000.D0 ! [m] - real(8), private, save :: hflat = -999.D0 ! [m] - - logical, private, save :: hgrid_comm_flg = .true. ! [add] T.Ohno 110722 - real(8), private, save :: triangle_size = 0.D0 ! [add] T.Ohno 110722 length of sides of triangle - - logical, private, save :: da_access_hgrid = .false. - logical, private, save :: topo_direct_access = .false. ! [add] H.Yashiro 20110819 - character(len=ADM_NSYS), private, save :: hgrid_io_mode = 'LEGACY' ! [add] H.Yashiro 20110819 - character(len=ADM_NSYS), private, save :: topo_io_mode = 'LEGACY' ! [add] H.Yashiro 20110819 - - logical, private, save :: output_vgrid = .false. - - !----------------------------------------------------------------------------- -contains - - !----------------------------------------------------------------------------- - !> - !> Setup routine for grid module. - !> 1. set the horizontal grid - !> 2. set the vertical grid - !> 3. set the topograph - !> - subroutine GRD_setup - use mod_adm, only : & - ADM_CTL_FID, & - ADM_PRC_PL, & - ADM_lall_pl, & - ADM_gall_pl, & - ADM_TI, & - ADM_TJ, & - ADM_GSLF_PL, & - ADM_KNONE, & - ADM_VNONE, & - ADM_prc_me, & - ADM_lall, & - ADM_kall, & - ADM_gmin, & - ADM_gmax, & - ADM_gall, & - ADM_gall_1d, & - ADM_kmin, & - ADM_kmax, & - ADM_prc_run_master,& - ADM_proc_stop - use mod_cnst, only : & - CNST_ERADIUS - use mod_comm, only : & - COMM_data_transfer, & - COMM_var ! [add] H.Yashiro 20110819 - implicit none - - namelist / GRDPARAM / & - vgrid_fname, & !--- vertical grid file-name - hgrid_fname, & !--- horizontal grid basename - topo_fname, & !--- topography basename - toposd_fname, & !--- standard deviation of topography basename - vegeindex_fname, & !--- vegetation index basename - vgrid_scheme, & !--- verical grid scheme - h_efold, & !--- efolding height for hybrid vertical grid. - hflat, & - output_vgrid, & !--- output verical grid file? - hgrid_comm_flg, & !--- communicate GRD_x ! [add] T.Ohno 110722 - triangle_size, & !--- length of sides of triangle ! [add] T.Ohno 110722 - GRD_grid_type, & !--- grid type ! [add] T.Ohno 110722 - da_access_hgrid, & - hgrid_io_mode, & !--- io type(hgrid) [add] H.Yashiro 20110819 - topo_io_mode !--- io type(topo) [add] H.Yashiro 20110819 - - integer :: n,k,l - integer :: ierr - - integer :: kflat, K0 - real(8) :: htop - - real(8) :: fac_scale ! [add] T.Ohno 110722 - - integer :: nstart,nend - integer :: i,j,suf - suf(i,j) = ADM_gall_1d * ((j)-1) + (i) - !--------------------------------------------------------------------------- - - !--- read parameters - write(ADM_LOG_FID,*) - write(ADM_LOG_FID,*) '+++ Module[grd]/Category[common share]' - rewind(ADM_CTL_FID) - read(ADM_CTL_FID,nml=GRDPARAM,iostat=ierr) - if ( ierr < 0 ) then - write(ADM_LOG_FID,*) '*** GRDPARAM is not specified. use default.' - elseif( ierr > 0 ) then - write(*, *) 'xxx Not appropriate names in namelist GRDPARAM. STOP.' - write(ADM_LOG_FID,*) 'xxx Not appropriate names in namelist GRDPARAM. STOP.' - call ADM_proc_stop - endif - write(ADM_LOG_FID,GRDPARAM) - - K0 = ADM_KNONE - - ! - !--- < setting the horizontal grid > --- - ! - !------ allocation and intitialization of horizontal grid points - !------ ( cell CENTER ) - allocate( GRD_x (ADM_gall, K0,ADM_lall, GRD_XDIR:GRD_ZDIR) ) - allocate( GRD_x_pl(ADM_gall_pl,K0,ADM_lall_pl,GRD_XDIR:GRD_ZDIR) ) - - !------ allocation and intitialization of horizontal grid points - !------ ( cell CORNER ) - allocate( GRD_xt (ADM_gall, K0,ADM_lall, ADM_TI:ADM_TJ,GRD_XDIR:GRD_ZDIR) ) - allocate( GRD_xt_pl(ADM_gall_pl,K0,ADM_lall_pl, GRD_XDIR:GRD_ZDIR) ) - - !--- reading the horzontal grid (unit sphere) and - !--- scaled by earth radius - call GRD_input_hgrid( hgrid_fname, & ![IN] - .true., & ![IN] - hgrid_io_mode ) ![IN] - - !--- data transfer for GRD_x - !--- note : do not communicate GRD_xt - if( hgrid_comm_flg ) call COMM_data_transfer(GRD_x,GRD_x_pl) ! [mod] T.Ohno 110722 - - ! save unscaled grid points as the unit vector - allocate( GRD_e (ADM_gall, ADM_lall, GRD_XDIR:GRD_ZDIR) ) - allocate( GRD_e_pl(ADM_gall_pl,ADM_lall_pl,GRD_XDIR:GRD_ZDIR) ) - GRD_e (:,:,:) = GRD_x (:,K0,:,:) - GRD_e_pl(:,:,:) = GRD_x_pl(:,K0,:,:) - - ! [mod] T.Ohno 110722 ==> - if ( trim(GRD_grid_type) == 'ON_PLANE' ) then - fac_scale = triangle_size - else - fac_scale = CNST_ERADIUS - endif - - call GRD_scaling(fac_scale) - ! [mod] T.Ohno 110722 <== - - - !------ allocation, initialization, and - !------ reading of surface height, standard deviation, vegetation index - allocate(GRD_zs (ADM_gall, K0,ADM_lall, GRD_ZSFC:GRD_VEGINDX)) - allocate(GRD_zs_pl(ADM_gall_pl,K0,ADM_lall_pl,GRD_ZSFC:GRD_VEGINDX)) - GRD_zs (:,:,:,:) = 0.D0 - GRD_zs_pl(:,:,:,:) = 0.D0 - - ! -> [add] R.Yoshida 20121020 - if ( trim(topo_fname) == 'Jablonowski' ) then - call GRD_jbw_init_topo - elseif ( trim(topo_fname) == 'Mountainwave' ) then - call GRD_mwave_init_topo - else - call GRD_input_topograph(topo_fname,GRD_ZSFC) - endif - ! <- [add] R.Yoshida 20121020 - - call GRD_input_topograph(toposd_fname, GRD_ZSD) - call GRD_input_topograph(vegeindex_fname,GRD_VEGINDX) - - !--- data transfer for GRD_zs - if (topo_direct_access) then ! [add] H.Yashiro 20110819 - call COMM_var( GRD_zs, GRD_zs_pl, K0, 3, comm_type=2, NSval_fix=.true. ) - else - call COMM_data_transfer(GRD_zs,GRD_zs_pl) - endif - - ! - !--- < setting the vertical coordinate > --- - ! - if( ADM_kall /= ADM_KNONE ) then - - !------ allocation of vertical grid. - allocate( GRD_gz (ADM_kall) ) - allocate( GRD_gzh (ADM_kall) ) - allocate( GRD_dgz (ADM_kall) ) - allocate( GRD_dgzh (ADM_kall) ) - allocate( GRD_rdgz (ADM_kall) ) - allocate( GRD_rdgzh(ADM_kall) ) - - !------ input the vertical grid. - call GRD_input_vgrid(vgrid_fname) - - !------ calculation of grid intervals ( cell center ) - do k = ADM_kmin-1, ADM_kmax - GRD_dgz(k) = GRD_gzh(k+1) - GRD_gzh(k) - enddo - GRD_dgz(ADM_kmax+1) = GRD_dgz(ADM_kmax) - - !------ calculation of grid intervals ( cell wall ) - do k = ADM_kmin, ADM_kmax+1 - GRD_dgzh(k) = GRD_gz(k) - GRD_gz(k-1) - enddo - GRD_dgzh(ADM_kmin-1) = GRD_dgzh(ADM_kmin) - - !------ calculation of 1/dgz and 1/dgzh - do k = 1, ADM_kall - GRD_rdgz (k) = 1.D0 / grd_dgz (k) - GRD_rdgzh(k) = 1.D0 / grd_dgzh(k) - enddo - - !------ hight top - GRD_htop = GRD_gzh(ADM_kmax+1) - GRD_gzh(ADM_kmin) - - !--- < vertical interpolation factor > --- - allocate( GRD_afac(ADM_kall) ) - allocate( GRD_bfac(ADM_kall) ) - allocate( GRD_cfac(ADM_kall) ) - allocate( GRD_dfac(ADM_kall) ) - - !------ From the cell center value to the cell wall value - !------ A(k-1/2) = ( afac(k) A(k) + bfac(k) * A(k-1) ) / 2 - do k = ADM_kmin, ADM_kmax+1 - GRD_afac(k) = 2.D0 * ( GRD_gzh(k) - GRD_gz(k-1) ) & - / ( GRD_gz (k) - GRD_gz(k-1) ) - enddo - GRD_afac(ADM_kmin-1) = 2.D0 - - GRD_bfac(:) = 2.D0 - GRD_afac(:) - - !------ From the cell wall value to the cell center value - !------ A(k) = ( cfac(k) A(k+1/2) + dfac(k) * A(k-1/2) ) / 2 - do k = ADM_kmin, ADM_kmax - GRD_cfac(k) = 2.D0 * ( GRD_gz (k ) - GRD_gzh(k) ) & - / ( GRD_gzh(k+1) - GRD_gzh(k) ) - enddo - GRD_cfac(ADM_kmin-1) = 2.D0 - GRD_cfac(ADM_kmax+1) = 0.D0 - - GRD_dfac(:) = 2.D0 - GRD_cfac(:) - - !------ allocation, initilization, and setting the z-coordinate - allocate( GRD_vz ( ADM_gall, ADM_kall,ADM_lall, GRD_Z:GRD_ZH) ) - allocate( GRD_vz_pl( ADM_gall_pl,ADM_kall,ADM_lall_pl,GRD_Z:GRD_ZH) ) - - select case(trim(vgrid_scheme)) - case('LINEAR') - !--- linear transfromation : (Gal-Chen & Sommerville(1975) - !--- gz = H(z-zs)/(H-zs) -> z = (H-zs)/H * gz + zs - kflat = -1 - if ( hflat > 0.D0 ) then !--- default : -999.0 - do k = ADM_kmin+1, ADM_kmax+1 - if ( hflat < GRD_gzh(k) ) then - kflat = k - exit - endif - enddo - endif - - if ( kflat == -1 ) then - kflat = ADM_kmax + 1 - htop = GRD_htop - else - htop = GRD_gzh(kflat) - GRD_gzh(ADM_kmin) - endif - - K0 = ADM_KNONE - nstart = suf(ADM_gmin,ADM_gmin) - nend = suf(ADM_gmax,ADM_gmax) - - do l = 1, ADM_lall - do k = ADM_kmin-1, kflat - do n = nstart,nend - GRD_vz(n,k,l,GRD_Z ) = GRD_zs(n,K0,l,GRD_ZSFC) & - + ( htop - GRD_zs(n,K0,l,GRD_ZSFC) ) / htop * GRD_gz(k) - GRD_vz(n,k,l,GRD_ZH) = GRD_zs(n,K0,l,GRD_ZSFC) & - + ( htop - GRD_zs(n,K0,l,GRD_ZSFC) ) / htop * GRD_gzh(k) - enddo - enddo - - if ( kflat < ADM_kmax+1 ) then - do k = kflat+1, ADM_kmax+1 - do n = nstart, nend - GRD_vz(n,k,l,GRD_Z ) = GRD_gz (k) - GRD_vz(n,k,l,GRD_ZH) = GRD_gzh(k) - enddo - enddo - endif - enddo - - if ( ADM_prc_me == ADM_prc_pl ) then - n = ADM_GSLF_PL - - do l = 1, ADM_lall_pl - do k = ADM_kmin-1, kflat - GRD_vz_pl(n,k,l,GRD_Z) = GRD_zs_pl(n,K0,l,GRD_ZSFC) & - + ( htop - GRD_zs_pl(n,K0,l,GRD_ZSFC) ) / htop * GRD_gz(k) - GRD_vz_pl(n,k,l,GRD_ZH) = GRD_zs_pl(n,K0,l,GRD_ZSFC) & - + ( htop - GRD_zs_pl(n,K0,l,GRD_ZSFC) ) / htop * GRD_gzh(k) - enddo - - if ( kflat < ADM_kmax+1 ) then - do k = kflat+1, ADM_kmax+1 - GRD_vz_pl(n,k,l,GRD_Z ) = GRD_gz (k) - GRD_vz_pl(n,k,l,GRD_ZH) = GRD_gzh(k) - enddo - endif - enddo - endif - - case('HYBRID') - !--------- Hybrid transformation : like as Simmons & Buridge(1981) - K0 = ADM_KNONE - nstart = suf(ADM_gmin,ADM_gmin) - nend = suf(ADM_gmax,ADM_gmax) - - do l = 1, ADM_lall - do k = ADM_kmin-1, ADM_kmax+1 - do n = nstart,nend - GRD_vz(n,k,l,GRD_Z) = GRD_gz(k) & - + GRD_zs(n,K0,l,ADM_VNONE) & - * sinh( (GRD_htop-GRD_gz(k)) / h_efold ) & - / sinh( GRD_htop / h_efold ) - GRD_vz(n,k,l,GRD_ZH) = GRD_gzh(k) & - + GRD_zs(n,K0,l,ADM_VNONE) & - * sinh( (GRD_htop-GRD_gzh(k)) / h_efold ) & - / sinh( GRD_htop / h_efold ) - enddo - enddo - enddo - - if ( ADM_prc_me == ADM_prc_pl ) then - n = ADM_GSLF_PL - - do l = 1, ADM_lall_pl - do k = ADM_kmin-1, ADM_kmax+1 - GRD_vz_pl(n,k,l,GRD_Z) = GRD_gz(k) & - + GRD_zs_pl(n,K0,l,ADM_VNONE) & - * sinh( (GRD_htop-GRD_gz(k)) / h_efold ) & - / sinh( GRD_htop / h_efold ) - GRD_vz_pl(n,k,l,GRD_ZH) = GRD_gzh(k) & - + GRD_zs_pl(n,K0,l,ADM_VNONE) & - * sinh( (GRD_htop-GRD_gzh(k)) / h_efold ) & - / sinh( GRD_htop / h_efold ) - enddo - enddo - endif - - endselect - - call COMM_data_transfer(GRD_vz,GRD_vz_pl) - - GRD_vz(suf(1,ADM_gall_1d),:,:,:) = GRD_vz(suf(ADM_gmin,ADM_gmin),:,:,:) - GRD_vz(suf(ADM_gall_1d,1),:,:,:) = GRD_vz(suf(ADM_gmin,ADM_gmin),:,:,:) - endif - - !--- output information about grid. - if ( ADM_kall /= ADM_KNONE ) then - write(ADM_LOG_FID,*) - write(ADM_LOG_FID,'(5x,A)') '|====== Vertical Coordinate [m] ======|' - write(ADM_LOG_FID,'(5x,A)') '| |' - write(ADM_LOG_FID,'(5x,A)') '| -GRID CENTER- -GRID INTERFACE- |' - write(ADM_LOG_FID,'(5x,A)') '| k gz d(gz) gzh d(gzh) k |' - write(ADM_LOG_FID,'(5x,A)') '| |' - k = ADM_kmax + 1 - write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' | dummy' - write(ADM_LOG_FID,'(5x,A,2F10.1,I4,A)') '| ',GRD_gzh(k),GRD_dgzh(k),k,' | TOA' - k = ADM_kmax - write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' | kmax' - write(ADM_LOG_FID,'(5x,A,2F10.1,I4,A)') '| ',GRD_gzh(k),GRD_dgzh(k),k,' |' - do k = ADM_kmax-1, ADM_kmin+1, -1 - write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' |' - write(ADM_LOG_FID,'(5x,A,2F10.1,I4,A)') '| ',GRD_gzh(k),GRD_dgzh(k),k,' |' - enddo - k = ADM_kmin - write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' | kmin' - write(ADM_LOG_FID,'(5x,A,2F10.1,I4,A)') '| ',GRD_gzh(k),GRD_dgzh(k),k,' | ground' - k = ADM_kmin-1 - write(ADM_LOG_FID,'(5x,A,I3,2F10.1,A)') '|',k,GRD_gz(k),GRD_dgz(k), ' | dummy' - write(ADM_LOG_FID,'(5x,A)') '|===============================================|' - - write(ADM_LOG_FID,*) - write(ADM_LOG_FID,*) '--- Vertical layer scheme = ', trim(vgrid_scheme) - if ( vgrid_scheme == 'HYBRID' ) then - write(ADM_LOG_FID,*) '--- e-folding height = ', h_efold - endif - - if ( output_vgrid ) then - if ( ADM_prc_me == ADM_prc_run_master ) then - call GRD_output_vgrid('./vgrid_used.dat') - endif - endif - else - write(ADM_LOG_FID,*) - write(ADM_LOG_FID,*) '--- vartical layer = 1' - endif - - return - end subroutine GRD_setup - - !----------------------------------------------------------------------------- - !> - !> Description of the subroutine GRD_scaling - !> - subroutine GRD_scaling( fact ) - implicit none - - real(8), intent(in) :: fact !--- IN : scaling factor - !--------------------------------------------------------------------------- - - ! [mod] T.Ohno 110722 ==> - if ( trim(GRD_grid_type) == 'ON_PLANE' ) then - GRD_x (:,:,:,:) = GRD_x (:,:,:,:) * fact - GRD_x_pl (:,:,:,:) = GRD_x_pl (:,:,:,:) * fact - GRD_xt (:,:,:,:,:) = GRD_xt (:,:,:,:,:) * fact - GRD_xt_pl(:,:,:,:) = GRD_xt_pl(:,:,:,:) * fact - else - !--- setting the sphere radius - GRD_rscale = fact - - !--- scaling by using GRD_rscale - GRD_x (:,:,:,:) = GRD_x (:,:,:,:) * GRD_rscale - GRD_x_pl (:,:,:,:) = GRD_x_pl (:,:,:,:) * GRD_rscale - GRD_xt (:,:,:,:,:) = GRD_xt (:,:,:,:,:) * GRD_rscale - GRD_xt_pl(:,:,:,:) = GRD_xt_pl(:,:,:,:) * GRD_rscale - endif - ! [mod] T.Ohno 110722 <== - - return - end subroutine GRD_scaling - - !----------------------------------------------------------------------------- - !> - !> Description of the subroutine GRD_output_hgrid - !> - subroutine GRD_output_hgrid( & - basename, & - output_vertex, & - io_mode ) - use mod_misc, only: & - MISC_make_idstr,& - MISC_get_available_fid - use mod_adm, only: & - ADM_proc_stop, & - ADM_prc_tab, & - ADM_prc_me, & - ADM_TI, & - ADM_TJ, & - ADM_gall, & - ADM_lall, & - ADM_KNONE - use mod_fio, only: & ! [add] H.Yashiro 20110819 - FIO_output, & - FIO_HMID, & - FIO_REAL8 - implicit none - - character(len=*), intent(in) :: basename ! output basename - logical, intent(in) :: output_vertex ! output flag of B-grid - character(len=*), intent(in) :: io_mode ! io_mode - - character(len=ADM_MAXFNAME) :: fname - character(len=FIO_HMID) :: desc = 'HORIZONTAL GRID FILE' - - integer :: fid - integer :: rgnid, l, K0 - !--------------------------------------------------------------------------- - - K0 = ADM_KNONE - - if ( io_mode == 'ADVANCED' ) then - - call FIO_output( GRD_x(:,:,:,GRD_XDIR), & - basename, desc, "", & - "grd_x_x", "GRD_x (X_DIR)", "", & - "NIL", FIO_REAL8, "ZSSFC1", K0, K0, 1, 0.D0, 0.D0 ) - call FIO_output( GRD_x(:,:,:,GRD_YDIR), & - basename, desc, '', & - 'grd_x_y', 'GRD_x (Y_DIR)', '', & - 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) - call FIO_output( GRD_x(:,:,:,GRD_ZDIR), & - basename, desc, '', & - 'grd_x_z', 'GRD_x (Z_DIR)', '', & - 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) - - if ( output_vertex ) then - call FIO_output( GRD_xt(:,:,:,ADM_TI,GRD_XDIR), & - basename, desc, '', & - 'grd_xt_ix', 'GRD_xt (TI,X_DIR)', '', & - 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) - call FIO_output( GRD_xt(:,:,:,ADM_TJ,GRD_XDIR), & - basename, desc, '', & - 'grd_xt_jx', 'GRD_xt (TJ,X_DIR)', '', & - 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) - call FIO_output( GRD_xt(:,:,:,ADM_TI,GRD_YDIR), & - basename, desc, '', & - 'grd_xt_iy', 'GRD_xt (TI,Y_DIR)', '', & - 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) - call FIO_output( GRD_xt(:,:,:,ADM_TJ,GRD_YDIR), & - basename, desc, '', & - 'grd_xt_jy', 'GRD_xt (TJ,Y_DIR)', '', & - 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) - call FIO_output( GRD_xt(:,:,:,ADM_TI,GRD_ZDIR), & - basename, desc, '', & - 'grd_xt_iz', 'GRD_xt (TI,Z_DIR)', '', & - 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) - call FIO_output( GRD_xt(:,:,:,ADM_TJ,GRD_ZDIR), & - basename, desc, '', & - 'grd_xt_jz', 'GRD_xt (TJ,Z_DIR)', '', & - 'NIL', FIO_REAL8, 'ZSSFC1', K0, K0, 1, 0.D0, 0.D0 ) - endif - - elseif( io_mode == 'LEGACY' ) then - - do l = 1, ADM_lall - rgnid = ADM_prc_tab(l,ADM_prc_me) - call MISC_make_idstr(fname,trim(basename),'rgn',rgnid) - - fid = MISC_get_available_fid() - open( unit = fid, & - file=trim(fname), & - form='unformatted', & - access='direct', & - recl=ADM_gall*8 ) - - write(fid,rec=1) GRD_x(:,K0,l,GRD_XDIR) - write(fid,rec=2) GRD_x(:,K0,l,GRD_YDIR) - write(fid,rec=3) GRD_x(:,K0,l,GRD_ZDIR) - if ( output_vertex ) then - write(fid,rec=4) GRD_xt(:,K0,l,ADM_TI,GRD_XDIR) - write(fid,rec=5) GRD_xt(:,K0,l,ADM_TI,GRD_YDIR) - write(fid,rec=6) GRD_xt(:,K0,l,ADM_TI,GRD_ZDIR) - write(fid,rec=7) GRD_xt(:,K0,l,ADM_TJ,GRD_XDIR) - write(fid,rec=8) GRD_xt(:,K0,l,ADM_TJ,GRD_YDIR) - write(fid,rec=9) GRD_xt(:,K0,l,ADM_TJ,GRD_ZDIR) - endif - close(fid) - enddo - else - write(ADM_LOG_FID,*) 'Invalid io_mode!' - call ADM_proc_stop - endif - - return - end subroutine GRD_output_hgrid - - !----------------------------------------------------------------------------- - !> - !> Description of the subroutine GRD_input_hgrid - !> - subroutine GRD_input_hgrid( & - basename, & - input_vertex, & - io_mode ) - use mod_misc, only: & - MISC_make_idstr, & - MISC_get_available_fid - use mod_adm, only: & - ADM_proc_stop, & - ADM_prc_tab, & - ADM_prc_me, & - ADM_TI, & - ADM_TJ, & - ADM_gall, & - ADM_lall, & - ADM_KNONE - use mod_fio, only : & ! [add] H.Yashiro 20110819 - FIO_input - implicit none - - character(len=*), intent(in) :: basename ! input basename - logical, intent(in) :: input_vertex ! flag of B-grid input - character(len=*), intent(in) :: io_mode ! io_mode - - character(len=ADM_MAXFNAME) :: fname - - integer :: fid, ierr - integer :: rgnid, l, K0 - !--------------------------------------------------------------------------- - - K0 = ADM_KNONE - - if ( io_mode == 'ADVANCED' ) then - - call FIO_input(GRD_x(:,:,:,GRD_XDIR),basename,'grd_x_x','ZSSFC1',K0,K0,1) - call FIO_input(GRD_x(:,:,:,GRD_YDIR),basename,'grd_x_y','ZSSFC1',K0,K0,1) - call FIO_input(GRD_x(:,:,:,GRD_ZDIR),basename,'grd_x_z','ZSSFC1',K0,K0,1) - if ( input_vertex ) then - call FIO_input(GRD_xt(:,:,:,ADM_TI,GRD_XDIR),basename, & - 'grd_xt_ix','ZSSFC1',K0,K0,1 ) - call FIO_input(GRD_xt(:,:,:,ADM_TJ,GRD_XDIR),basename, & - 'grd_xt_jx','ZSSFC1',K0,K0,1 ) - call FIO_input(GRD_xt(:,:,:,ADM_TI,GRD_YDIR),basename, & - 'grd_xt_iy','ZSSFC1',K0,K0,1 ) - call FIO_input(GRD_xt(:,:,:,ADM_TJ,GRD_YDIR),basename, & - 'grd_xt_jy','ZSSFC1',K0,K0,1 ) - call FIO_input(GRD_xt(:,:,:,ADM_TI,GRD_ZDIR),basename, & - 'grd_xt_iz','ZSSFC1',K0,K0,1 ) - call FIO_input(GRD_xt(:,:,:,ADM_TJ,GRD_ZDIR),basename, & - 'grd_xt_jz','ZSSFC1',K0,K0,1 ) - endif - - elseif( io_mode == 'LEGACY' ) then - - do l = 1, ADM_lall - rgnid = ADM_prc_tab(l,ADM_prc_me) - call MISC_make_idstr(fname,trim(basename),'rgn',rgnid) - - fid = MISC_get_available_fid() - open( unit = fid, & - file = trim(fname), & - form = 'unformatted', & - access = 'direct', & - recl = ADM_gall*8, & - status = 'old', & - iostat = ierr ) - - if ( ierr /= 0 ) then - write(ADM_LOG_FID,*) 'xxx Error occured in reading grid file.', trim(fname) - call ADM_proc_stop - endif - - read(fid,rec=1) GRD_x(:,K0,l,GRD_XDIR) - read(fid,rec=2) GRD_x(:,K0,l,GRD_YDIR) - read(fid,rec=3) GRD_x(:,K0,l,GRD_ZDIR) - if ( input_vertex ) then - read(fid,rec=4) GRD_xt(:,K0,l,ADM_TI,GRD_XDIR) - read(fid,rec=5) GRD_xt(:,K0,l,ADM_TI,GRD_YDIR) - read(fid,rec=6) GRD_xt(:,K0,l,ADM_TI,GRD_ZDIR) - read(fid,rec=7) GRD_xt(:,K0,l,ADM_TJ,GRD_XDIR) - read(fid,rec=8) GRD_xt(:,K0,l,ADM_TJ,GRD_YDIR) - read(fid,rec=9) GRD_xt(:,K0,l,ADM_TJ,GRD_ZDIR) - endif - close(fid) - enddo - - else - write(ADM_LOG_FID,*) 'Invalid io_mode!' - call ADM_proc_stop - endif - - call GRD_gen_plgrid - - return - end subroutine GRD_input_hgrid - - !----------------------------------------------------------------------------- - !> - !> Description of the subroutine GRD_input_vgrid - !> - subroutine GRD_input_vgrid( fname ) - use mod_misc, only: & - MISC_get_available_fid - use mod_adm, only: & - ADM_LOG_FID, & - ADM_vlayer, & - ADM_proc_stop - implicit none - - character(len=ADM_MAXFNAME), intent(in) :: fname ! vertical grid file name - - integer :: num_of_layer - integer :: fid, ierr - !--------------------------------------------------------------------------- - - fid = MISC_get_available_fid() - open( unit = fid, & - file = trim(fname), & - status = 'old', & - form = 'unformatted', & - iostat = ierr ) - - if ( ierr /= 0 ) then - write(ADM_LOG_FID,*) 'xxx No vertical grid file.' - call ADM_proc_stop - endif - - read(fid) num_of_layer - - if ( num_of_layer /= ADM_vlayer ) then - write(ADM_LOG_FID,*) 'xxx inconsistency in number of vertical layers.' - call ADM_proc_stop - endif - - read(fid) GRD_gz - read(fid) GRD_gzh - - close(fid) - - return - end subroutine GRD_input_vgrid - - !----------------------------------------------------------------------------- - !> - !> Description of the subroutine GRD_output_vgrid - !> - subroutine GRD_output_vgrid( fname ) - use mod_misc, only: & - MISC_get_available_fid - use mod_adm, only: & - ADM_vlayer - implicit none - - character(len=*), intent(in) :: fname - - integer :: fid - !--------------------------------------------------------------------------- - - fid = MISC_get_available_fid() - open(fid,file=trim(fname),form='unformatted') - write(fid) ADM_vlayer - write(fid) GRD_gz - write(fid) GRD_gzh - close(fid) - - return - end subroutine GRD_output_vgrid - - !----------------------------------------------------------------------------- - !> - !> Description of the subroutine GRD_input_topograph - !> - subroutine GRD_input_topograph( & - basename, & - i_var ) - use mod_misc, only: & - MISC_make_idstr,& - MISC_get_available_fid - use mod_adm, only: & - ADM_LOG_FID, & - ADM_prc_tab, & - ADM_prc_me, & - ADM_PRC_PL, & - ADM_lall, & - ADM_gall, & - ADM_KNONE - use mod_fio, only: & - FIO_input - implicit none - - character(len=*), intent(in) :: basename - integer, intent(in) :: i_var - - character(len=16) :: varname(3) - data varname / 'topo', 'topo_stddev', 'vegeindex' / - - character(len=128) :: fname - integer :: ierr - integer :: l, rgnid, fid - !--------------------------------------------------------------------------- - - if ( topo_io_mode == 'ADVANCED' ) then - topo_direct_access = .true. - - call FIO_input(GRD_zs(:,:,:,i_var),basename,varname(i_var),'ZSSFC1',1,1,1) - - elseif( topo_io_mode == 'LEGACY' ) then - - if ( topo_direct_access ) then !--- direct access ( defalut ) - do l = 1, ADM_lall - rgnid = ADM_prc_tab(l,ADM_prc_me) - call MISC_make_idstr(fname,trim(basename),'rgn',rgnid) - fid = MISC_get_available_fid() - - open( fid, & - file = trim(fname), & - form = 'unformatted', & - access = 'direct', & - recl = ADM_gall*8, & - status = 'old' ) - - read(fid,rec=1) GRD_zs(:,ADM_KNONE,l,i_var) - - close(fid) - enddo - else !--- sequential access - do l = 1, ADM_lall - rgnid = ADM_prc_tab(l,ADM_prc_me) - call MISC_make_idstr(fname,trim(basename),'rgn',rgnid) - fid = MISC_get_available_fid() - - open(fid,file=trim(fname),status='old',form='unformatted',iostat=ierr) - if ( ierr /= 0 ) then - write(ADM_LOG_FID,*) 'Msg : Sub[GRD_input_topograph]/Mod[grid]' - write(ADM_LOG_FID,*) ' *** No topographical file. Number :', i_var - return - endif - - read(fid) GRD_zs(:,ADM_KNONE,l,i_var) - close(fid) - enddo - - if ( ADM_prc_me == ADM_prc_pl ) then - fname = trim(basename)//'.pl' - fid = MISC_get_available_fid() - - open(fid,file=trim(fname),status='old',form='unformatted') - read(fid) GRD_zs_pl(:,:,:,i_var) - close(fid) - endif - endif !--- direct/sequencial - - endif !--- io_mode - - return - end subroutine GRD_input_topograph - - !----------------------------------------------------------------------------- - !> - !> Description of the subroutine GRD_gen_plgrid - !> - subroutine GRD_gen_plgrid - use mod_adm, only: & - ADM_rgn_nmax, & - ADM_rgn_vnum, & - ADM_rgn_vtab, & - ADM_rgn2prc, & - ADM_RID, & - ADM_VLINK_NMAX, & - ADM_COMM_RUN_WORLD, & - ADM_prc_tab, & - ADM_prc_me, & - ADM_prc_npl, & - ADM_prc_spl, & - ADM_TI, & - ADM_TJ, & - ADM_N, & - ADM_S, & - ADM_NPL, & - ADM_SPL, & - ADM_lall, & - ADM_gall_1d, & - ADM_gmax, & - ADM_gmin, & - ADM_KNONE, & - ADM_GSLF_PL - use mod_comm, only: & - COMM_var - implicit none - - integer :: prctab (ADM_VLINK_NMAX) - integer :: rgntab (ADM_VLINK_NMAX) - integer :: sreq (ADM_VLINK_NMAX) - integer :: rreq (ADM_VLINK_NMAX) - logical :: send_flag(ADM_VLINK_NMAX) - -! real(8) :: v_pl(GRD_XDIR:GRD_ZDIR,ADM_VLINK_NMAX) - real(8) :: vsend_pl(GRD_XDIR:GRD_ZDIR,ADM_VLINK_NMAX) ! [mod] H.Yashiro 20120525 - !--- 2020 Fujitsu - integer(8) :: vsend_pl_l_desc - integer(8) :: vsend_pl_l_lb(2), vsend_pl_l_ub(2) - integer(8) :: vsend_pl_l_sec - !--- 2020 Fujitsu end - real(8) :: vrecv_pl(GRD_XDIR:GRD_ZDIR,ADM_VLINK_NMAX) ! [mod] H.Yashiro 20120525 -!coarray - !--- 2020 Fujitsu - !real(8) :: vrecv_plc(GRD_XDIR:GRD_ZDIR,5)[*] !! not used - !real(8),allocatable :: vrecv_plA(:,:)[:] - integer , POINTER :: vrecv_plA ( : , : ) => null ( ) - integer(8) :: vrecv_plA_desc - integer(8) :: vrecv_plA_lb, vrecv_plA_ub - integer(8) :: vrecv_plA_sec - integer(4) :: img_dims(1) - !--- 2020 Fujitsu end - -!coarray integer :: istat(MPI_STATUS_SIZE) - integer :: n, l, ierr - - integer :: suf, i, j - suf(i,j) = ADM_gall_1d * ((j)-1) + (i) - !--------------------------------------------------------------------------- -!coarray - !--- 2020 Fujitsu - !allocate(vrecv_plA(GRD_XDIR:GRD_ZDIR,5)[*]) - vrecv_plA_lb(1) = GRD_XDIR; vrecv_plA_ub(1) = GRD_ZDIR - vrecv_plA_lb(2) = 1; vrecv_plA_ub(2) = 5 - call xmp_new_coarray(vrecv_plA_desc, 8, 2, vrecv_plA_lb, vrecv_plA_ub, 1, img_dims) - call xmp_coarray_bind(vrecv_plA_desc, vrecv_plA) - - vsend_pl_l_lb(1) = GRD_XDIR; vsend_pl_l_ub(1) = GRD_ZDIR - vsend_pl_l_lb(2) = 1; vsend_pl_l_ub(2) = ADM_VLINK_NMAX - call xmp_new_local_array(vsend_pl_l_desc, 8, 2, vsend_pl_l_lb, vsend_pl_l_ub, loc(vsend_pl)) - - call xmp_new_array_section(vsend_pl_l_sec, 2) - call xmp_new_array_section(vrecv_plA_sec, 2) - !--- 2020 Fujitsu end - vrecv_pl (:,:) = 0.d0 - !vrecv_plc(:,:) = 0.d0 !--- 2020 Fujitsu --- - vrecv_plA(:,:) = 0.d0 - - !--- control volume points at the north pole - do l = ADM_rgn_nmax, 1, -1 - if ( ADM_rgn_vnum(ADM_N,l) == ADM_VLINK_NMAX ) then - do n = 1, ADM_VLINK_NMAX - rgntab(n) = ADM_rgn_vtab(ADM_RID,ADM_N,l,n) - prctab(n) = ADM_rgn2prc(rgntab(n)) - enddo - exit - endif - enddo - - send_flag(:) = .false. - - do n = 1, ADM_VLINK_NMAX - do l = 1, ADM_lall - !--- 2020 Fujitsu - !sync all - call xmp_sync_all(ierr) - !--- 2020 Fujitsu end - if ( ADM_prc_tab(l,ADM_prc_me) == rgntab(n) ) then - vsend_pl(:,n) = GRD_xt(suf(ADM_gmin,ADM_gmax),ADM_KNONE,l,ADM_TJ,:) ! [mod] H.Yashiro 20120525 - -!coarray -! call MPI_ISEND( vsend_pl(:,n), & ! [mod] H.Yashiro 20120525 -! 3, & -! MPI_DOUBLE_PRECISION, & -! ADM_prc_npl-1, & -! rgntab(n), & -! ADM_COMM_RUN_WORLD, & -! sreq(n), & -! ierr ) - !--- 2020 Fujitsu - !vrecv_plA(:,n)[ADM_prc_npl] = vsend_pl(:,n) - call xmp_array_section_set_triplet(vrecv_plA_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) - call xmp_array_section_set_triplet(vrecv_plA_sec, 2, n, n, 1, ierr) - call xmp_array_section_set_triplet(vsend_pl_l_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) - call xmp_array_section_set_triplet(vsend_pl_l_sec, 2, n, n, 1, ierr) - img_dims(1) = ADM_prc_npl - call xmp_coarray_put_local(img_dims, vrecv_plA_desc, vrecv_plA_sec, vsend_pl_l_desc, vsend_pl_l_sec, ierr) - !--- 2020 Fujitsu end - - send_flag(n) = .true. - endif - !--- 2020 Fujitsu - !sync all - call xmp_sync_all(ierr) - !--- 2020 Fujitsu end - enddo - enddo - -! if ( ADM_prc_me == ADM_prc_npl ) then -! do n = 1, ADM_VLINK_NMAX -! call MPI_IRECV( vrecv_pl(:,n), & ! [mod] H.Yashiro 20120525 -! 3, & -! MPI_DOUBLE_PRECISION, & -! prctab(n)-1, & -! rgntab(n), & -! ADM_COMM_RUN_WORLD, & -! rreq(n), & -! ierr ) -! enddo -! endif - -! do n = 1, ADM_VLINK_NMAX -! if ( send_flag(n) ) then -! call MPI_WAIT(sreq(n),istat,ierr) -! endif -! enddo - - if ( ADM_prc_me == ADM_prc_npl ) then - do n = 1, ADM_VLINK_NMAX -! call MPI_WAIT(rreq(n),istat,ierr) -! GRD_xt_pl(n+1,ADM_KNONE,ADM_NPL,:) = vrecv_pl(:,n) ! [mod] H.Yashiro 20120525 -!coarray - GRD_xt_pl(n+1,ADM_KNONE,ADM_NPL,:) = vrecv_plA(:,n) - enddo - endif - -!----------------------------------------------------------------------------------------- - - !--- control volume points at the sourth pole - do l = 1, ADM_rgn_nmax - if ( ADM_rgn_vnum(ADM_S,l) == ADM_VLINK_NMAX ) then - do n = 1, ADM_VLINK_NMAX - rgntab(n) = ADM_rgn_vtab(ADM_RID,ADM_S,l,n) - prctab(n) = ADM_rgn2prc(rgntab(n)) - enddo - exit - endif - enddo - - send_flag(:) = .false. - - do n = 1, ADM_VLINK_NMAX - do l =1, ADM_lall - !--- 2020 Fujitsu - !sync all - call xmp_sync_all(ierr) - !--- 2020 Fujitsu end - if (ADM_prc_tab(l,ADM_prc_me) == rgntab(n) ) then - vsend_pl(:,n) = GRD_xt(suf(ADM_gmax,ADM_gmin),ADM_KNONE,l,ADM_TI,:) ! [mod] H.Yashiro 20120525 -!coarray -! call MPI_ISEND( vsend_pl(:,n), & ! [mod] H.Yashiro 20120525 -! 3, & -! MPI_DOUBLE_PRECISION, & -! ADM_prc_spl-1, & -! rgntab(n), & -! ADM_COMM_RUN_WORLD, & -! sreq(n), & -! ierr ) - !--- 2020 Fujitsu - !vrecv_plA(:,n)[ADM_prc_npl] = vsend_pl(:,n) - call xmp_array_section_set_triplet(vrecv_plA_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) - call xmp_array_section_set_triplet(vrecv_plA_sec, 2, n, n, 1, ierr) - call xmp_array_section_set_triplet(vsend_pl_l_sec, 1, GRD_XDIR, GRD_ZDIR, 1, ierr) - call xmp_array_section_set_triplet(vsend_pl_l_sec, 2, n, n, 1, ierr) - img_dims(1) = ADM_prc_npl - call xmp_coarray_put_local(img_dims, vrecv_plA_desc, vrecv_plA_sec, vsend_pl_l_desc, vsend_pl_l_sec, ierr) - !--- 2020 Fujitsu end - - send_flag(n) = .true. - endif - !--- 2020 Fujitsu - !sync all - call xmp_sync_all(ierr) - !--- 2020 Fujitsu end - enddo - enddo - -!coarray -! if ( ADM_prc_me == ADM_prc_spl ) then -! do n = 1, ADM_VLINK_NMAX -! call MPI_IRECV( vrecv_pl(:,n), & ! [mod] H.Yashiro 20120525 -! 3, & -! MPI_DOUBLE_PRECISION, & -! prctab(n)-1, & -! rgntab(n), & -! ADM_COMM_RUN_WORLD, & -! rreq(n), & -! ierr ) -! enddo -! endif - -! do n = 1, ADM_VLINK_NMAX -! if ( send_flag(n) ) then -! call MPI_WAIT(sreq(n),istat,ierr) -! endif -! enddo - - if ( ADM_prc_me == ADM_prc_spl ) then - do n = 1, ADM_VLINK_NMAX -!coarray -! call MPI_WAIT(rreq(n),istat,ierr) -! GRD_xt_pl(n+1,ADM_KNONE,ADM_SPL,:) = vrecv_pl(:,n) ! [mod] H.Yashiro 20120525 - GRD_xt_pl(n+1,ADM_KNONE,ADM_SPL,:) = vrecv_plA(:,n) - enddo - endif - - !--- grid point communication - call COMM_var(GRD_x,GRD_x_pl,ADM_KNONE,3,comm_type=2,NSval_fix=.false.) - GRD_xt_pl(ADM_GSLF_PL,:,:,:) = GRD_x_pl(ADM_GSLF_PL,:,:,:) - - !--- 2020 Fujitsu - call xmp_free_array_section(vsend_pl_l_sec) - call xmp_free_array_section(vrecv_plA_sec) - - call xmp_coarray_deallocate(vrecv_plA_desc, ierr) - call xmp_free_local_array(vsend_pl_l_desc) - !--- 2020 Fujitsu end - return - end subroutine GRD_gen_plgrid - - !----------------------------------------------------------------------------- - ! [ADD] R.Yoshida 20121020 - ! imported from ENDGame UK Met.office. - !----------------------------------------------------------------------------- - subroutine GRD_jbw_init_topo() - use mod_misc, only : & - MISC_get_latlon - use mod_adm, only : & - ADM_lall, & - ADM_gall, & - ADM_gall_pl, & - ADM_lall_pl, & - ADM_KNONE, & - ADM_prc_me, & - ADM_prc_pl, & - ADM_LOG_FID - use mod_cnst, only: & - CNST_PI, & - CNST_ERADIUS, & - CNST_EOHM, & - CNST_EGRAV, & - CNST_RAIR - implicit none - - real(8), parameter :: u00 = 35.D0 - - real(8) :: cs32ev, f1, f2 - real(8) :: lat, lon - real(8) :: rsurf (ADM_gall ,ADM_lall ) ! surface height in ICO-grid - real(8) :: rsurf_p(ADM_gall_pl,ADM_lall_pl) ! surface height in ICO-grid for pole region - - integer :: n, l, k0 - !--------------------------------------------------------------------------- - - k0 = ADM_KNONE - - cs32ev = ( cos( (1.D0-0.252D0) * CNST_PI * 0.5D0 ) )**1.5D0 - - ! for globe - do l = 1, ADM_lall - do n = 1, ADM_gall - call MISC_get_latlon( lat, lon, & - GRD_x(n,k0,l,GRD_XDIR), & - GRD_x(n,k0,l,GRD_YDIR), & - GRD_x(n,k0,l,GRD_ZDIR) ) - - f1 = 10.D0/63.D0 - 2.D0 * sin(lat)**6 * ( cos(lat)**2 + 1.D0/3.D0 ) - f2 = 1.6D0 * cos(lat)**3 * ( sin(lat)**2 + 2.D0/3.D0 ) - 0.25D0 * CNST_PI - - rsurf(n,l) = u00 * cs32ev * ( f1*u00*cs32ev + f2*CNST_ERADIUS*CNST_EOHM ) / CNST_EGRAV - enddo - enddo - - do l=1, ADM_lall - do n=1, ADM_gall - GRD_zs(n,k0,l,GRD_ZSFC) = rsurf(n,l) - enddo - enddo - - ! for pole region - if ( ADM_prc_me == ADM_prc_pl ) then - do l = 1, ADM_lall_pl - do n = 1, ADM_gall_pl - call MISC_get_latlon( lat, lon, & - GRD_x_pl(n,k0,l,GRD_XDIR), & - GRD_x_pl(n,k0,l,GRD_YDIR), & - GRD_x_pl(n,k0,l,GRD_ZDIR) ) - - f1 = 10.D0/63.D0 - 2.D0 * sin(lat)**6 * ( cos(lat)**2 + 1.D0/3.D0 ) - f2 = 1.6D0 * cos(lat)**3 * ( sin(lat)**2 + 2.D0/3.D0 ) - 0.25D0 * CNST_PI - - rsurf_p(n,l) = u00 * cs32ev * ( f1*u00*cs32ev + f2*CNST_ERADIUS*CNST_EOHM ) / CNST_EGRAV - enddo - enddo - - do l=1, ADM_lall_pl - do n=1, ADM_gall_pl - GRD_zs_pl(n,k0,l,GRD_ZSFC) = rsurf_p(n,l) - enddo - enddo - endif - - write(ADM_LOG_FID,*) 'Msg : Sub[GRD_input_topograph]/Mod[grid]' - write(ADM_LOG_FID, '(" *** Topography for JBW: -- MAX: ",F9.3,2X,"MIN: ",F9.3)') & - maxval(GRD_zs(:,:,:,GRD_ZSFC)), minval(GRD_zs(:,:,:,GRD_ZSFC)) - - return - end subroutine GRD_jbw_init_topo - - !----------------------------------------------------------------------------- - ! [ADD] R.Yoshida 20130328 - ! mountain of dcmip 2012 setting - !----------------------------------------------------------------------------- - subroutine GRD_mwave_init_topo() - use mod_misc, only : & - MISC_get_latlon - use mod_adm, only : & - ADM_lall, & - ADM_gall, & - ADM_gall_pl, & - ADM_lall_pl, & - ADM_KNONE, & - ADM_prc_me, & - ADM_prc_pl, & - ADM_LOG_FID - use mod_cnst, only: & - CNST_PI - implicit none - - ! - real(8),parameter :: FAI_M =0.d0 - real(8),parameter :: H_ZERO = 250.d0 - real(8),parameter :: QSI = 4000.d0 - real(8),parameter :: a_ref = 6371220.0D0 - real(8),parameter :: X_reduce = 500.d0 - real(8),parameter :: HALF_WIDTH = 5000.0d0 - - real(8) :: dist_m, aa, bb, LAMBDA_M - real(8) :: lat, lon - integer :: n, l, K0 - !--------------------------------------------------------------------------- - - LAMBDA_M=CNST_PI/4.d0 - K0 = ADM_KNONE - - ! for globe - do l=1, ADM_lall - do n=1, ADM_gall - call MISC_get_latlon( lat, lon, & - GRD_x(n,K0,l,GRD_XDIR), & - GRD_x(n,K0,l,GRD_YDIR), & - GRD_x(n,K0,l,GRD_ZDIR) ) - - dist_m = (a_ref/X_reduce)*acos (sin (FAI_M)*sin (lat) & - +cos (FAI_M)*cos (lat)*cos (lon-LAMBDA_M)) - - aa = exp(- (dist_m)**2.0 / HALF_WIDTH**2.0d0) - bb = cos(CNST_PI*dist_m/QSI)**2.0d0 - GRD_zs(n,ADM_KNONE,l,GRD_ZSFC) = H_ZERO * aa * bb ! equation (76) in dcmip reference - enddo - enddo - - ! for pole region - if ( ADM_prc_me==ADM_prc_pl ) then - do l=1, ADM_lall_pl - do n=1, ADM_gall_pl - call MISC_get_latlon( lat, lon, & - GRD_x(n,K0,l,GRD_XDIR), & - GRD_x(n,K0,l,GRD_YDIR), & - GRD_x(n,K0,l,GRD_ZDIR) ) - - dist_m = (a_ref/X_reduce)*acos (sin (FAI_M)*sin (lat)& - +cos (FAI_M)*cos (lat)*cos (lon-LAMBDA_M)) - - aa = exp(- (dist_m)**2.0 / HALF_WIDTH**2.0d0) - bb = cos(CNST_PI*dist_m/QSI)**2.0d0 - GRD_zs_pl(n,ADM_KNONE,l,GRD_ZSFC) = H_ZERO * aa * bb ! equation (76) in dcmip reference - enddo - enddo - endif - - write(ADM_LOG_FID,*) 'Msg : Sub[GRD_input_topograph]/Mod[grid]' - write (ADM_LOG_FID, '(" *** Topography for mwave: -- MAX: ",F9.3,2X,"MIN: ",F9.3)') & - maxval(GRD_zs(:,:,:,GRD_ZSFC)), minval(GRD_zs(:,:,:,GRD_ZSFC)) - return - end subroutine GRD_mwave_init_topo - -end module mod_grd -!------------------------------------------------------------------------------- diff --git a/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmpAPI b/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmpAPI index 99d45d0..847d9f8 100644 --- a/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmpAPI +++ b/NICAM-DC-MINI/sysdep/Makedef.Linux64-gnu-ompi-xmpAPI @@ -20,7 +20,7 @@ FFLAGS_DEBUG = -O0 -m64 \ FC = mpif90 FFLAGS = -cpp -fconvert=big-endian $(FFLAGS_FAST) #FFLAGS = $(FFLAGS_DEBUG) -FFLAGS += -x f95-cpp-input +FFLAGS += -x f95-cpp-input -fopenmp # if gcc < 4.5: -M, else if gcc >= 4.6: -J MODDIROPT ?= -J @@ -32,7 +32,7 @@ LD = $(FC) # to avoid "-x f95-cpp-input" option LFLAGS = $(FFLAGS_FAST) #LFLAGS = $(FFLAGS_DEBUG) -LFLAGS += -L$(OMNI_HOME)/lib -lxmp +LFLAGS += -fopenmp -L$(OMNI_HOME)/lib -lxmp ##### for frontend INSTALL = install From 19f894c0914a6c00be61073edc720925a6f701a9 Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Tue, 9 Mar 2021 15:56:33 +0900 Subject: [PATCH 41/70] modify POINTER type --- NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 | 4 ++-- NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 | 12 ++++++------ NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 index 66bc9fb..45773f2 100755 --- a/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 @@ -396,7 +396,7 @@ subroutine ADM_proc_init( rtype ) !--- 2020 Fujitsu call xmp_api_init - call MPI_Init(ierr) + !!!call MPI_Init(ierr) call MPI_Comm_size(MPI_COMM_WORLD, ADM_prc_all, ierr) !ADM_prc_all = num_images() !coarray call MPI_Comm_rank(MPI_COMM_WORLD, my_rank, ierr) @@ -433,7 +433,7 @@ subroutine ADM_proc_stop !coarray character(len=ADM_NSYS) :: request !--- 2020 Fujitsu !character(len=ADM_NSYS) :: request[*] - integer, pointer :: request(:) => null() + integer, POINTER :: request(:) => null() integer(8) :: request_desc integer(8), dimension(1) :: request_lb, request_ub integer(8) :: request_sec diff --git a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 index dd2c103..58f9be1 100755 --- a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 @@ -2708,7 +2708,7 @@ subroutine COMM_data_transfer(& !real(8),allocatable :: caf_recvbuf(:,:)[:] integer bufsize1,bufsize2 integer , POINTER :: tbl ( : ) => null ( ) - integer , POINTER :: caf_recvbuf ( : , : ) => null ( ) + real(8) , POINTER :: caf_recvbuf ( : , : ) => null ( ) integer(8) :: tbl_desc, caf_recvbuf_desc integer(8) :: tbl_lb(1),tbl_ub(1), caf_recvbuf_lb(2), caf_recvbuf_ub(2) integer(8) :: tbl_sec, caf_recvbuf_sec @@ -3404,8 +3404,8 @@ subroutine COMM_data_transfer_rgn2pl( & !--- 2020 Fujitsu !real(8),allocatable :: v_npl_recvc(:,:)[:] !real(8),allocatable :: v_spl_recvc(:,:)[:] - integer , POINTER :: v_npl_recvc ( : , : ) => null ( ) - integer , POINTER :: v_spl_recvc ( : , : ) => null ( ) + real(8) , POINTER :: v_npl_recvc ( : , : ) => null ( ) + real(8) , POINTER :: v_spl_recvc ( : , : ) => null ( ) integer(8) :: v_npl_recvc_desc, v_spl_recvc_desc integer(8) :: v_npl_lb(2), v_npl_ub(2), v_spl_lb(2), v_spl_ub(2) integer(8) :: v_npl_sec, v_spl_sec @@ -3647,8 +3647,8 @@ subroutine COMM_var( & !--- 2020 Fujitsu !real(8),allocatable :: v_npl_recvc(:,:)[:] !real(8),allocatable :: v_spl_recvc(:,:)[:] - integer , POINTER :: v_npl_recvc ( : , : ) => null ( ) - integer , POINTER :: v_spl_recvc ( : , : ) => null ( ) + real(8) , POINTER :: v_npl_recvc ( : , : ) => null ( ) + real(8) , POINTER :: v_spl_recvc ( : , : ) => null ( ) integer(8) :: v_npl_recvc_desc, v_spl_recvc_desc integer(8) :: v_npl_lb(2), v_npl_ub(2), v_spl_lb(2), v_spl_ub(2) integer(8) :: v_npl_sec, v_spl_sec @@ -3873,7 +3873,7 @@ subroutine COMM_data_transfer_nopl(& !integer,allocatable :: tbl(:)[:] !real(8),allocatable :: caf_recvbuf(:,:)[:] integer , POINTER :: tbl ( : ) => null ( ) - integer , POINTER :: caf_recvbuf ( : , : ) => null ( ) + real(8) , POINTER :: caf_recvbuf ( : , : ) => null ( ) integer(8) :: tbl_desc, caf_recvbuf_desc integer(8) :: tbl_lb(1),tbl_ub(1), caf_recvbuf_lb(2), caf_recvbuf_ub(2) integer(8) :: tbl_sec, caf_recvbuf_sec diff --git a/NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 index 67bd5f8..552e0e0 100755 --- a/NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_grd_xmpAPI.f90 @@ -1045,7 +1045,7 @@ subroutine GRD_gen_plgrid !--- 2020 Fujitsu !real(8) :: vrecv_plc(GRD_XDIR:GRD_ZDIR,5)[*] !! not used !real(8),allocatable :: vrecv_plA(:,:)[:] - integer , POINTER :: vrecv_plA ( : , : ) => null ( ) + real(8) , POINTER :: vrecv_plA ( : , : ) => null ( ) integer(8) :: vrecv_plA_desc integer(8) :: vrecv_plA_lb(2), vrecv_plA_ub(2) integer(8) :: vrecv_plA_sec From cd3c8aa52943c6ad4f18837e5aac265faeb7320b Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Tue, 9 Mar 2021 19:05:57 +0900 Subject: [PATCH 42/70] add xmpAPI configs for NICAM on Fugaku --- NICAM-DC-MINI/sysdep/Makedef.Fugaku-xmpAPI | 47 +++++++++++ .../sysdep/Mkjobshell.Fugaku-xmpAPI.sh | 78 +++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 NICAM-DC-MINI/sysdep/Makedef.Fugaku-xmpAPI create mode 100644 NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmpAPI.sh diff --git a/NICAM-DC-MINI/sysdep/Makedef.Fugaku-xmpAPI b/NICAM-DC-MINI/sysdep/Makedef.Fugaku-xmpAPI new file mode 100644 index 0000000..e6f09da --- /dev/null +++ b/NICAM-DC-MINI/sysdep/Makedef.Fugaku-xmpAPI @@ -0,0 +1,47 @@ +# +# ------ FOR Fugaku computer ----- +# +OMNI_HOME=$(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') +OMNI_INC = -I$(OMNI_HOME)/include +OMNI_LIB = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') + +##### for computation + +FFLAGS_FAST = -Kfast,parallel,auto,ocl,preex,array_private,noalias=s,mfunc=2 \ + -Kparallel_iteration=8,instance=8,dynamic_iteration \ + -Qi -Qt -X03 -Ncompdisp -Koptmsg=1 -Cpp \ + -Kprefetch_cache_level=all,prefetch_iteration_L2=50 -Ksimd \ + $(OMNI_INC) + +FFLAGS_DEBUG = -O0 \ + -Qi -Qt -X03 -v03s -v03d -v03o -Ncompdisp -Koptmsg=1 -Cpp \ + -Ec -Eg -Ha -He -Hf -Ho -Hs -Hu -Hx -Ncheck_global \ + $(OMNI_INC) +# -DDEBUG + +# Performance monitor +# disable +PERF_MONIT = -Ntl_notrt -U_FIPP_ -U_FAPP_ +# fipp +#PERF_MONIT = -Ntl_trt -D_FIPP_ -U_FAPP_ +# fapp +#PERF_MONIT = -Ntl_trt -U_FIPP_ -D_FAPP_ + +FC = mpifrtpx +FFLAGS = -cpp $(FFLAGS_FAST) $(PERF_MONIT) $(RDMA) +#FFLAGS = $(FFLAGS_DEBUG) $(PERF_MONIT) $(RDMA) + +MODDIROPT ?= -M + +CC = mpifccpx +CFLAGS = -Kfast,parallel,ocl,preex,array_private,region_extension,restp=all -Qt -Ksimd $(PERF_MONIT) + +LD = $(FC) +LFLAGS = $(FFLAGS) $(OMNI_LIB) + +##### for frontend +INSTALL = install +AR = ar +ARFLAGS = r +RANLIB = ranlib +JOBSUB = pjsub diff --git a/NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmpAPI.sh b/NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmpAPI.sh new file mode 100644 index 0000000..0c614f4 --- /dev/null +++ b/NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmpAPI.sh @@ -0,0 +1,78 @@ +#! /bin/bash -x + +GLEV=${1} +RLEV=${2} +NMPI=${3} +ZL=${4} +VGRID=${5} +TOPDIR=${6} +BINNAME=${7} +RUNCONF=${8} + +# System specific +MPIEXEC="mpiexec" + +GL=`printf %02d ${GLEV}` +RL=`printf %02d ${RLEV}` +if [ ${NMPI} -ge 10000 ]; then + NP=`printf %05d ${NMPI}` +elif [ ${NMPI} -ge 1000 ]; then + NP=`printf %04d ${NMPI}` +elif [ ${NMPI} -ge 100 ]; then + NP=`printf %03d ${NMPI}` +else + NP=`printf %02d ${NMPI}` +fi + +dir2d=gl${GL}rl${RL}pe${NP} +dir3d=gl${GL}rl${RL}z${ZL}pe${NP} +res2d=GL${GL}RL${RL} +res3d=GL${GL}RL${RL}z${ZL} + +MNGINFO=rl${RL}-prc${NP}.info + +if [ ${NMPI} -gt 36864 ]; then + rscgrp="huge" +elif [ ${NMPI} -gt 384 ]; then + rscgrp="large" +else + rscgrp="small" +fi + +outdir=${dir3d} +cd ${outdir} +HERE=${PWD} + +ln -s ${TOPDIR}/bin/${BINNAME} . +ln -s ${TOPDIR}/data/mnginfo/${MNGINFO} . +ln -s ${TOPDIR}/data/grid/vgrid/${VGRID} . + +for f in $( ls ${TOPDIR}/data/grid/boundary/${dir2d} ) +do + ln -s ${TOPDIR}/data/grid/boundary/${dir2d}/${f} . +done + + +cat << EOF1 > run.sh +#! /bin/bash -x +################################################################################ +# +# for Fugaku computer +# +################################################################################ +#PJM --rsc-list "node=${NMPI}" +#PJM --rsc-list "elapse=02:00:00" +#PJM --mpi "use-rankdir" +#PJM -j +#PJM -s +# +export PARALLEL=8 +export OMP_NUM_THREADS=8 + +# run +${MPIEXEC} ./${BINNAME} || exit + +################################################################################ +EOF1 + +exit From d325e8b9e902e87109e22d37f84a63d13bcc0973 Mon Sep 17 00:00:00 2001 From: tozaki_chisae Date: Tue, 9 Mar 2021 19:14:14 +0900 Subject: [PATCH 43/70] [WIP] modify 3 files. --- FFB-MINI/src/dd_mpi/Makefile | 2 +- FFB-MINI/src/dd_mpi/Makefile.coarray | 28 + FFB-MINI/src/dd_mpi/dd_mpi.F90 | 127 +-- FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 | 1379 +++++++++++++++++++++++++ FFB-MINI/src/les3x.F | 10 +- 5 files changed, 1426 insertions(+), 120 deletions(-) create mode 100755 FFB-MINI/src/dd_mpi/Makefile.coarray create mode 100755 FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 diff --git a/FFB-MINI/src/dd_mpi/Makefile b/FFB-MINI/src/dd_mpi/Makefile index d270a26..971ccc4 100755 --- a/FFB-MINI/src/dd_mpi/Makefile +++ b/FFB-MINI/src/dd_mpi/Makefile @@ -6,7 +6,7 @@ RANLIB ?= ranlib all: libdd_mpi.a -OBJS = dd_mpi.o ddcom4.o +OBJS = xmpAPI_dd_mpi.o ddcom4.o libdd_mpi.a: $(OBJS) $(AR) $(ARFLAGS) $@ $(OBJS) diff --git a/FFB-MINI/src/dd_mpi/Makefile.coarray b/FFB-MINI/src/dd_mpi/Makefile.coarray new file mode 100755 index 0000000..d270a26 --- /dev/null +++ b/FFB-MINI/src/dd_mpi/Makefile.coarray @@ -0,0 +1,28 @@ +include ../make_setting + +AR ?= ar +ARFLAGS ?= rv +RANLIB ?= ranlib + +all: libdd_mpi.a + +OBJS = dd_mpi.o ddcom4.o + +libdd_mpi.a: $(OBJS) + $(AR) $(ARFLAGS) $@ $(OBJS) + $(RANLIB) $@ + +.SUFFIXES: +.SUFFIXES: .f .F .c .F90 .o + +.c.o: + $(CC) $(CFLAGS) -c $< +.f.o: + $(FC) $(FFLAGS) -c $< +.F.o: + $(FC) $(FFLAGS) -c $< +.F90.o: + $(FC) $(FFLAGS) -c $< + +clean: + rm -rf *.o *.a diff --git a/FFB-MINI/src/dd_mpi/dd_mpi.F90 b/FFB-MINI/src/dd_mpi/dd_mpi.F90 index 39fdf40..f317230 100755 --- a/FFB-MINI/src/dd_mpi/dd_mpi.F90 +++ b/FFB-MINI/src/dd_mpi/dd_mpi.F90 @@ -367,7 +367,7 @@ SUBROUTINE DDCOM2(SEND,RECV) IMPLICIT REAL*4(A-H,O-Z) ! INCLUDE 'mpif.h' -! INCLUDE 'xmp_coarray.h' + INCLUDE 'xmp_coarray.h' !C$XMP NODES PDDCOM2(*) ! ! @@ -413,10 +413,8 @@ SUBROUTINE DDCOM2(SEND,RECV) CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL8,MPI_SUM,MPI_COMM_WORLD,IERR) #else RECV = SEND -! Fujitsu start 202103 - CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL ,MPI_SUM,MPI_COMM_WORLD,IERR) -! CALL CO_SUM(SEND,RECV) -! Fujitsu end 202103 +! CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL ,MPI_SUM,MPI_COMM_WORLD,IERR) + CALL CO_SUM(SEND,RECV) !C$XMP REDUCTION(+:RECV) on PDDCOM2 !C$XMP BCAST (RECV) FROM PDDCOM2(1) @@ -793,43 +791,21 @@ SUBROUTINE DDCOM3(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT END ! SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT0,IERR,& - snd_desc, rcv_desc, MAXBUF) -!fj BUFSND, BUFRCV, MAXBUF) -! Fujitsu start 202103 - use xmp_api - use mpi -! Fujitsu end 202103 -! INCLUDE 'mpif.h' -! INCLUDE 'xmp_coarray.h' + BUFSND, BUFRCV, MAXBUF) + INCLUDE 'mpif.h' + INCLUDE 'xmp_coarray.h' IMPLICIT REAL*4(A-H,O-Z) !CTTDEBG REAL*8 DFX(NP),DFY(NP),DFZ(NP) !CTTDEBG DIMENSION LDOM(NDOM),NBPDOM(NDOM),IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM), FX(NP),FY(NP),FZ(NP) -! Fujitsu start 202103 -! DIMENSION BUFSND(MAXBUF)[*], BUFRCV(MAXBUF)[*] - REAL*4 , POINTER :: BUFSND ( : ) => null ( ) - REAL*4 , POINTER :: BUFRCV ( : ) => null ( ) - INTEGER*8 :: snd_desc, rcv_desc - INTEGER*8 :: snd_sec, rcv_sec - INTEGER*8, DIMENSION(1) :: snd_lb, snd_ub, rcv_lb, rcv_ub - INTEGER*8 :: st_desc, st_l_desc - INTEGER*8 :: st_sec, st_l_sec - INTEGER*8, DIMENSION(1) :: st_lb, st_ub, st_l_lb, st_l_ub - INTEGER*8 :: start1, end1, start2, end2 - INTEGER*4 :: img_dims(1) - INTEGER*4 status -! Fujitsu end 202103 + DIMENSION BUFSND(MAXBUF)[*], BUFRCV(MAXBUF)[*] ! PARAMETER ( MAXDOM = 10000 ) INTEGER*4 MSGIDS(MAXDOM),MSGSTS(MPI_STATUS_SIZE,MAXDOM) ! INTEGER MAX_RECV_LEN -! Fujitsu start 202103 -! INTEGER ,ALLOCATABLE :: START_R(:)[:] - INTEGER*4 , POINTER :: START_R ( : ) => null ( ) - INTEGER*4 , POINTER :: start_rr_p ( : ) => null ( ) -! Fujitsu end 202103 + INTEGER ,ALLOCATABLE :: START_R(:)[:] ! INTEGER ,ALLOCATABLE :: END_R(:)[:] INTEGER ,ALLOCATABLE :: START_S(:) INTEGER ,ALLOCATABLE :: END_S(:) @@ -925,30 +901,7 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! THE RESIDUALS FROM THE NEIGHBORING ! SUB-DOMAINS CALL MPI_COMM_RANK(MPI_COMM_WORLD,ITASK,IERR) -! Fujitsu start 202103 - call xmp_api_init -! - snd_lb(1) = 1 - snd_ub(1) = MAXBUF - rcv_lb(1) = 1 - rcv_ub(1) = MAXBUF -! call xmp_new_coarray(snd_desc, 4, 1, snd_lb, snd_ub, 1, img_dims) -! call xmp_new_coarray(rcv_desc, 4, 1, rcv_lb, rcv_ub, 1, img_dims) -! - call xmp_coarray_bind(snd_desc,BUFSND) - call xmp_coarray_bind(rcv_desc,BUFRCV) -! -! allocate(START_R(1:NP)[*]) - st_lb(1) = 1 - st_ub(1) = NP - st_l_lb(1) = 1 - st_l_ub(1) = 1 - call xmp_new_coarray(st_desc, 4, 1, st_lb, st_ub, 1, img_dims) -! call xmp_new_local_array(st_l_desc, 4, 1, st_l_lb, st_l_ub) - call xmp_new_coarray(st_l_desc, 4, 1, st_l_lb, st_l_ub, 1, img_dims) - call xmp_coarray_bind(st_desc,START_R) - call xmp_coarray_bind(st_l_desc,start_rr_p) -! Fujitsu end 202103 + allocate(START_R(1:NP)[*]) ! allocate(END_R(1:NP)[*]) allocate(START_S(1:NP)) allocate(END_S(1:NP)) @@ -993,10 +946,7 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! ! PRINT *,"NDOM:",NDOM ! -! Fujitsu start 202103 -! ME=THIS_IMAGE() - ME=xmp_this_image() -! Fujitsu end 202103 + ME=THIS_IMAGE() START_DASH=0 MAX_RECV_LEN = 0 NSTART = 1 @@ -1115,54 +1065,19 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT 220 CONTINUE ! ! -! Fujitsu start 202103 -! SYNC ALL - call xmp_sync_all(status) -! - call xmp_new_array_section(snd_sec,1) - call xmp_new_array_section(rcv_sec,1) - call xmp_new_array_section(st_sec,1) - call xmp_new_array_section(st_l_sec,1) -! Fujitsu start 202103 -! + SYNC ALL + DO IDOM = 1, NDOM ! PRINT *,ME,"->",LDOM(IDOM)," BUFRECV(",START_R(ME)[LDOM(IDOM)],":",END_R(ME)[LDOM(IDOM)],")[",LDOM(IDOM),"]=BUFSND(",START_S(LDOM(IDOM)),":",END_S(LDOM(IDOM)),")" ! BUFRCV(START_R(ME)[LDOM(IDOM)]:END_R(ME)[LDOM(IDOM)])[LDOM(IDOM)] = & ! BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) -! Fujitsu start 202103 -! START_RR = START_R(ME)[LDOM(IDOM)] - start1 = ME - end1 = ME - start2 = 1 - end2 = 1 - call xmp_array_section_set_triplet(st_sec,1,start1,end1,1,status) - call xmp_array_section_set_triplet(st_l_sec,1,start2,end2,1,status) - img_dims = LDOM(IDOM) - call xmp_coarray_get(img_dims,st_desc,st_sec, & - st_l_desc,st_l_sec,status) - START_RR = start_rr_p(1) -! Fujitsu end 202103 + START_RR = START_R(ME)[LDOM(IDOM)] END_RR = START_RR + (END_S(LDOM(IDOM)) - START_S(LDOM(IDOM))) -! Fujitsu start 202103 -! BUFRCV(START_RR:END_RR)[LDOM(IDOM)] = & -! BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) - start1 = START_RR - end1 = END_RR - call xmp_array_section_set_triplet(rcv_sec,1,start1,end1,1,status) - START_RR = START_S(LDOM(IDOM)) - END_RR = END_S(LDOM(IDOM)) - start1 = START_RR - end1 = END_RR - call xmp_array_section_set_triplet(snd_sec,1,start1,end1,1,status) - img_dims = LDOM(IDOM) - call xmp_coarray_put(img_dims,rcv_desc,rcv_sec,snd_desc,snd_sec,status); -! Fujitsu end 202103 + BUFRCV(START_RR:END_RR)[LDOM(IDOM)] = & + BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) END DO -! Fujitsu start 202103 -! SYNC ALL - call xmp_sync_all(status) -! Fujitsu end 202103 + SYNC ALL ! SYNC ALL ! DO 231 IDOM = 1 , NDOM @@ -1357,16 +1272,6 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT RETURN ENDIF ! -! Fujitsu start 202103 - call xmp_free_array_section(snd_sec) - call xmp_free_array_section(rcv_sec) -! -! call xmp_coarray_deallocate(snd_desc, status) -! call xmp_coarray_deallocate(rcv_desc, status) -! - call xmp_api_finalize -! Fujitsu end 202103 -! ! IPART = IPART RETURN END diff --git a/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 b/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 new file mode 100755 index 0000000..7f5324f --- /dev/null +++ b/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 @@ -0,0 +1,1379 @@ + +!======================================================================C +! C +! SOFTWARE NAME : FRONTFLOW_BLUE.1.0 C +! C +! SUB ROUTINE DD_MPI C +! C +! WRITTEN BY C.KATO C +! C +! C +! Contact address: The University of Tokyo, FSIS project C +! C +!======================================================================C +! +! GENERIC TO MPI FORTRAN INTERFACE FOR DOMAIN-DECOMPOSITION +! PROGRAMMING MODEL +! AUTHOR: C. KATO, MERL, HITACHI, LTD. +! DATE FIRST WRITTEN : FEBRUARY 20TH, 1998 +! DATE MODIFIED: APRIL 14TH, 2001 +! (-PRECEXP OPTION IMPLEMENTED) +! DATE LAST MODIFIED: MARCH 7TH, 2003 +! (ENTRIES DDCOM1 AND DDCOM2 ADDED) +! +! + SUBROUTINE DDINIT(NPART,IPART) + IMPLICIT REAL*4(A-H,O-Z) +! + INCLUDE 'mpif.h' +! +! +! INITIALIZE A PARALLEL TASK FOR DOMAIN-DECOMPOSITION PROGRAMMING +! MODEL +! ( MPI VERSION ) +! +! +! NOTE 1; THIS SUBROUTINE QUERIES THE TASK NUMBER (I.E. TASK ID) OF THE +! CALLING TASK, THE NUMBER OF TASKS WHICH ARE SHARING THE PROGRAM +! RUN. THE NUMBER OF SUB-DOMAINS AND THE SUB-DOMAIN NUMBER +! THAT THE CALLING TASK SHOULD TAKE CARE OF, WILL BE RETURNED TO +! THE CALLING TASK, AFTER BEING SET TO THE NUMBER OF TASKS, AND +! THE TASK NUMBER PLUS ONE, RESPECTIVELY. +! +! NOTE 2; THE NUMBER OF TASKS TO SHARE A PARTICULAR RUN IS DETERMINED AT +! RUN TIME BY THE PARALLEL ENVIRONMENT, WHILE THE NUMBER OF +! SUB-DOMAINS IS THE SAME AS THE NUMBER OF SUB-DOMAIN FILES WHICH +! HAVE BEEN PREPARED BEFORE THE RUN. THEREFORE, IN SOME CASES, +! THE NUMBER OF TASKS MIGHT DIFFER FROM THE ACTUAL NUMBER OF +! SUB-DOMAINS, ALTHOUGH THEY MUST BE THE SAME TO RUN A MEANINGFUL +! COMPUTATION. NOTE THAT THIS SUBROUTINE DOES NOT CHECK THE +! CONSISTENCY OF THESE VALUES. +! +! NOTE 3; A TASK NUMBER (I.E. TASK ID) IS A UNIQUE NUMBER FROM 0 TO ONE +! MINUS THE TOTAL NUMBER OF TASKS, ASSIGNED BY THE SYSTEM AT RUN +! TIME. THUS, THE SUB-DOMAIN NUMBER IS ALSO A UNIQUE NUMBER +! FROM 1 TO THE NUMBER OF SUB-DOMAINS. +! +! NOTE 4; ALL 'MPI' ROUTINES RETURN AN ERROR CODE 'IERR' WHICH INDICATES +! THE STATUS OF ITS EXECUTION. THIS SUBROUTINE IGNORES SUCH ERROR +! CODE AND RETURNS THE SEQUENCE TO THE CALLING PROGRAM UNIT, +! REGARDLESS OF THE VALUE OF THE 'MPI' RETURN CODE. +! +! +! ARGUMENT LISTINGS +! (1) INPUT +! ( NONE ) +! +! (2) OUTPUT +! INT *4 NPART ; TOTAL NUMBER OF SUB-DOMAINS +! INT *4 IPART ; SUB-DOMAIN NUMBER THAT THE CALLING TASK SHOULD +! TAKE CARE OF +! +! + logical flag +! call MPI_Initialized(flag, IERR) +! if (.not. flag) call MPI_Init(IERR) +!* CALL MPI_INIT(IERR) + CALL MPI_COMM_SIZE(MPI_COMM_WORLD,NTASK,IERR) + CALL MPI_COMM_RANK(MPI_COMM_WORLD,ITASK,IERR) +! +! IF (ITASK.eq.0) PRINT *,"CALL DDINIT" + NPART = NTASK + IPART = ITASK+1 +! +! + RETURN + END + SUBROUTINE DDEXIT + IMPLICIT REAL*4(A-H,O-Z) +! + INCLUDE 'mpif.h' +! +! +! EXIT FROM PARALLEL EXECUTIONS +! ( MPI VERSION ) +! +! +! NOTE ; ALL 'MPI' ROUTINES RETURN AN ERROR CODE 'IERR' WHICH INDICATES +! THE STATUS OF ITS EXECUTION. THIS SUBROUTINE IGNORES SUCH ERROR +! CODE AND RETURNS THE SEQUENCE TO THE CALLING PROGRAM UNIT, +! REGARDLESS OF THE VALUE OF THE 'MPI' RETURN CODE. +! +! +! ARGUMENT LISTINGS +! (1) INPUT +! ( NONE ) +! +! (2) OUTPUT +! ( NONE ) +! +! +! CALL MPI_FINALIZE(IERR) +! +! + CALL MPI_COMM_RANK(MPI_COMM_WORLD,ITASK,IERR) +! IF (ITASK.eq.0) PRINT *,"CALL DDEXIT" + RETURN + END + SUBROUTINE DDSYNC + IMPLICIT REAL*4(A-H,O-Z) +! + INCLUDE 'mpif.h' +! +! +! IMPLEMENT BARRIER SYNCHRONIZATION AMONG THE GROUP OF ALL TASKS +! ( MPI VERSION ) +! +! +! NOTE ; ALL 'MPI' ROUTINES RETURN AN ERROR CODE 'IERR' WHICH INDICATES +! THE STATUS OF ITS EXECUTION. THIS SUBROUTINE IGNORES SUCH ERROR +! CODE AND RETURNS THE SEQUENCE TO THE CALLING PROGRAM UNIT, +! REGARDLESS OF THE VALUE OF THE 'MPI' RETURN CODE. +! +! +! ARGUMENT LISTINGS +! (1) INPUT +! ( NONE ) +! +! (2) OUTPUT +! ( NONE ) +! +! + CALL MPI_COMM_RANK(MPI_COMM_WORLD,ITASK,IERR) +! IF (ITASK.eq.0) PRINT *,"CALL DDSYNC" + CALL MPI_BARRIER(MPI_COMM_WORLD,IERR) +! +! + RETURN + END +! +! + SUBROUTINE DDSTOP(IPART,IUT0) + IMPLICIT REAL*4(A-H,O-Z) +! + INCLUDE 'mpif.h' +! + CHARACTER*60 ERMSGB / ' ## SUBROUTINE DDSTOP: FATAL ERROR REPORTED ; STOPPING' / + CHARACTER*60 EREXP1 / ' A SUB-DOMAIN COMPUTATION HAS BEEN ABNORMALLY TERMINATED AT' / +! +! +! STOP ALL THE RUNNING PARALLEL TASKS FOR DOMAIN-DECOMPOSITION +! PROGRAMMING MODEL +! ( MPI VERSION ) +! +! +! NOTE 1; IF AN ERROR CONDITION HAS BEEN DETECTED IN SOME TASK RUNNING +! IN PARALLEL, ALL THE TASKS SHARING THAT PARTICULAR RUN SHOULD BE +! APPROPRIATELY STOPPED. THIS SUBROUTINE TERMINATES ALL THE +! RUNNING TASKS AND CANCELS THE PARALLEL JOB. +! +! NOTE 2; ALL 'MPI' ROUTINES RETURN AN ERROR CODE 'IERR' WHICH INDICATES +! THE STATUS OF ITS EXECUTION. THIS SUBROUTINE IGNORES SUCH ERROR +! CODE AND RETURNS THE SEQUENCE TO THE CALLING PROGRAM UNIT, +! REGARDLESS OF THE VALUE OF THE 'MPI' RETURN CODE. +! +! +! ARGUMENT LISTINGS +! (1) INPUT +! INT *4 IPART ; SUB-DOMAIN NUMBER THAT THE CALLING TASK IS +! TAKING CARE OF +! INT *4 IUT0 ; FILE NUMBER TO WRITE ERROR MESSAGE +! +! (2) OUTPUT +! ( NONE ) +! +! + CALL MPI_COMM_RANK(MPI_COMM_WORLD,ITASK,IERR) +! IF (ITASK.eq.0) PRINT *,"CALL DDSTOP" + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP1, IPART + CALL MPI_ABORT(MPI_COMM_WORLD,IPART,IERR) +! + RETURN + END +! +! + SUBROUTINE DDCOM0(LPINT1,LPINT2,LPINT3,NPINT,MDOM,MBPDOM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,IUT0,IERR) + IMPLICIT REAL*4(A-H,O-Z) + DIMENSION LPINT1(NPINT),LPINT2(NPINT),LPINT3(NPINT),LDOM(MDOM),NBPDOM(MDOM),IPSLF(MBPDOM,MDOM),IPSND(MBPDOM,MDOM) +! +! + CHARACTER*60 ERMSGB / ' ## SUBROUTINE DDCOM0: FATAL ERROR OCCURRENCE; RETURNED' / + CHARACTER*60 EREXP1 / ' DIMENSION SIZE OF PASSED LIST ARRAYS IS NOT SUFFICIENT ' / +! +! +! SET UP NEIGHBORING DOMAIN LISTS FOR DOMAIN-DECOMPOSITION +! PROGRAMMING MODEL +! +! +! ARGUMENT LISTINGS +! (1) INPUT +! INT *4 LPINT1 (IBP) ; INTER-CONNECT BOUNDARY NODES +! INT *4 LPINT2 (IBP) ; NEIGHBORING SUB-DOMAIN NUMBERS +! INT *4 LPINT3 (IBP) ; NODE NUMBER IN THE NEIGHBORING SUB-DOMAINS +! INT *4 NPINT ; NUMBER OF INTER-CONNECT BOUNDARY NODES +! +! INT *4 MDOM ; MAX. NUMBER OF THE NEIGHBORING SUB-DOMAINS +! INT *4 MBPDOM ; THE DIMENSION SIZE OF THE FIRST ELEMENTS +! OF THE PASSED ARRAYS 'IPSLF' AND 'IPSND' +! (I.E. THE MAXIMUM NUMBER OF THE +! INTER-CONNECT BOUNDARY NODES FOR A +! NEIGHBORING SUB-DOMAIN) +! INT *4 IUT0 ; FILE NUMBER TO WRITE ERROR MESSAGE +! +! (2) OUTPUT +! INT *4 LDOM (IDOM) ; NEIGHBORING SUB-DOMAIN NUMBER +! INT *4 NBPDOM (IDOM) ; NUMBER OF INTER-CONNECT BOUNDARY NODES +! SHARING WITH THE IDOM'TH NEIGHBORING +! SUB-DOMAIN, LDOM(IDOM) +! INT *4 NDOM ; NUMBER OF THE NEIGHBORING SUB-DOMAINS +! INT *4 IPSLF (IBP,IDOM) ; INTER-CONNECT BOUNDARY NODE NUMBER IN THE +! CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +! NEIGHBORING SUB-DOMAIN, LDOM(IDOM) +! INT *4 IPSND (IBP,IDOM) ; INTER-CONNECT BOUNDARY NODE NUMBER IN THE +! SUB-DOMAIN THAT IS RECEIVING THE CALLING +! TASK'S RESIDUALS. +! INT *4 IERR ; RETURN CODE WHOSE VALUE WILL BE EITHER +! 0 --- INDICATING SUCCESSFUL TERMINATION +! OR 1 --- INDICATING OCCURRENCE OF SOME ERROR CONDITIONS +! +! +! CALL MPI_COMM_RANK(MPI_COMM_WORLD,ITASK,IERR) +! IF (ITASK.eq.0) PRINT *,"CALL DDCOM0" + IERR = 0 +! +! +! +! SET INITIAL VALUES +! +! +! + NDOM = 0 + DO 100 IDOM = 1 , MDOM + NBPDOM(IDOM) = 0 + 100 CONTINUE +! +! +! +! SET NEIGHBORING DOMAIN LISTS +! +! +! + DO 230 IPINT = 1 , NPINT + IFNEW = LPINT2(IPINT) + DO 210 ICHK = 1 , NDOM + IF(LDOM(ICHK).EQ.IFNEW) THEN + IDOM = ICHK + GO TO 220 + ENDIF + 210 CONTINUE + NDOM = NDOM+1 + IDOM = NDOM +! + IF(NDOM.GT.MDOM) THEN + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP1 + IERR = 1 + RETURN + ENDIF +! + LDOM(NDOM) = IFNEW +! + 220 CONTINUE + NBPDOM(IDOM) = NBPDOM(IDOM)+1 +! + IF(NBPDOM(IDOM).GT.MBPDOM) THEN + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP1 + IERR = 1 + RETURN + ENDIF +! + IPSLF(NBPDOM(IDOM),IDOM) = LPINT1(IPINT) + IPSND(NBPDOM(IDOM),IDOM) = LPINT3(IPINT) + 230 CONTINUE +! +! + RETURN + END +! +! + SUBROUTINE DDCOM1(LPINT1,NPINT,NUMIP,NP,IUT0,IERR) + IMPLICIT REAL*4(A-H,O-Z) + DIMENSION LPINT1(NPINT),NUMIP(NP) +! +! + CHARACTER*60 ERMSGB / ' ## SUBROUTINE DDCOM1: FATAL ERROR OCCURRENCE; RETURNED' / + CHARACTER*60 EREXP1 / ' SPECIFIED NODE NUMBER IS OUT OF RANGE ' / +! +! +! CALCULATE NUMBER OF NEIGHBORING DOMAINS THAT EACH NODE BELONGS TO +! FOR DOMAIN-DECOMPOSITION PROGRAMMING MODEL +! +! +! ARGUMENT LISTINGS +! (1) INPUT +! INT *4 LPINT1 (IBP) ; INTER-CONNECT BOUNDARY NODES +! INT *4 NPINT ; NUMBER OF INTER-CONNECT BOUNDARY NODES +! INT *4 NP ; NUMBER OF TOTAL NODES +! INT *4 IUT0 ; FILE NUMBER TO WRITE ERROR MESSAGE +! +! (2) OUTPUT +! INT *4 NUMIP (IP) ; NUMBER OF NEIGHBORING DOMAINS THAT NODE +! 'IP' BELONG TO +! INT *4 IERR ; RETURN CODE WHOSE VALUE WILL BE EITHER +! 0 --- INDICATING SUCCESSFUL TERMINATION +! OR 1 --- INDICATING OCCURRENCE OF SOME ERROR CONDITIONS +! +! + + IERR = 0 +! +! +! +! SET INITIAL VALUES +! +! +! + DO 100 IP = 1 , NP + NUMIP(IP) = 0 + 100 CONTINUE +! +! +! +! CALCULATE NUMBER OF NEIGHBORING DOMAINS THAT NODE 'IP' BELONGS TO +! +! +! + DO 110 IPINT = 1 , NPINT + IP = LPINT1(IPINT) +! + IF(IP.LT.1 .OR. IP.GT.NP) THEN + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP1 + IERR = 1 + RETURN + ENDIF +! + NUMIP(IP) = NUMIP(IP)+1 + 110 CONTINUE +! +! + RETURN + END +! +! + SUBROUTINE DDCOM2(SEND,RECV) + IMPLICIT REAL*4(A-H,O-Z) +! + INCLUDE 'mpif.h' +! INCLUDE 'xmp_coarray.h' +!C$XMP NODES PDDCOM2(*) +! +! +! SUM UP A SINGLE SCALAR AMONG ALL THE PARTICIPATING TASKS FOR +! DOMAIN-DECOMPOSITION PROGRAMMING MODEL +! +! ( MPI VERSION ) +! +! +! NOTE 1; ALL 'MPI' ROUTINES RETURN AN ERROR CODE 'IERR' WHICH INDICATES +! THE STATUS OF ITS EXECUTION. THIS SUBROUTINE IGNORES SUCH ERROR +! CODE AND RETURNS THE SEQUENCE TO THE CALLING PROGRAM UNIT, +! REGARDLESS OF THE VALUE OF THE 'MPI' RETURN CODE. +! +! NOTE 2; SOME COMPILERS, SUCH AS OFORT90 IN HI-UXMPP, SUPPORT AUTOMATIC +! PRECISION EXPANSION, WHERE ALL THE CONSTANTS, VARIABLES AND +! ARRAYS OF 4-BYTE PRECISION (REAL*4) ARE AUTOMATICALLY CONVERTED +! TO THOSE OF 8-BYTE PRECISION (REAL*8) WITH UNFORMATTED I/O DATA +! BEING KEPT AS THEY ARE (IF SO SPECIFIED). WHEN USING SUCH +! FEATURES (FUNCTIONS) OF A COMPILER, SPECIAL CARE IS NEEDED +! BECAUSE A COUPLE OF MPI SUBROUTINES CALLED IN THIS SUBPROGRAM +! ACCEPT THE DATA TYPE (DATA PRECISION) AS THEIR INPUT AND +! PERFORM THE OPERATIONS ACCORDING TO THIS INPUT VALUE. THIS +! INTERFACE SUPPORTS THE AUTOMATIC PRECISION EXPANSION MENTIONED +! ABOVE. IF YOU WISH TO USE SUCH FEATURE, ADD '-DPRECEXP' OPTION +! WHEN INVOKING 'cpp' FOR PRI-PROCESSING THIS SOURCE PROGRAM FILE. +! +! +! ARGUMENT LISTINGS +! (1) INPUT +! REAL*4 SEND ; SCALAR VARIABLE TO SUM UP +! +! (2) OUTPUT +! REAL*4 RECV ; SCALAR VARIABLE SUMMED UP AMONG ALL TASKS +! +! +! +#ifdef USE_BARRIER + CALL MPI_BARRIER(MPI_COMM_WORLD,IERR) +#endif + +#ifdef PRECEXP + CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL8,MPI_SUM,MPI_COMM_WORLD,IERR) +#else + RECV = SEND +! Fujitsu start 202103 + CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL ,MPI_SUM,MPI_COMM_WORLD,IERR) +! CALL CO_SUM(SEND,RECV) +! Fujitsu end 202103 +!C$XMP REDUCTION(+:RECV) on PDDCOM2 +!C$XMP BCAST (RECV) FROM PDDCOM2(1) + +! CALL MPI_COMM_RANK(MPI_COMM_WORLD,ITASK,IERR) +! IF(ITASK.EQ.0)PRINT *,"ALLREDUCE=",RECV,"REDUCTION=",RECV2 +#endif + +#ifdef USE_BARRIER + CALL MPI_BARRIER(MPI_COMM_WORLD,IERR) +#endif + +! + RETURN + END +! +!CTT SUBROUTINE DDCOM2 END +! +!CTT SUBROUTINE DDALLD BEGIN +! + SUBROUTINE DDALLD(SEND,RECV,IFLAG,IUT0,IERR) + IMPLICIT REAL*4(A-H,O-Z) +! + INCLUDE 'mpif.h' +! +! + CHARACTER*60 ERMSGB / ' ## SUBROUTINE DDALLD: FATAL ERROR OCCURENCE; RETURNED ' / + CHARACTER*60 ERMSG1 / ' ILLIGAL OPERATION NUMBER IS GIVEN ' / +! +! +! SUM UP A SINGLE SCALAR AMONG ALL THE PARTICIPATING TASKS FOR +! DOMAIN-DECOMPOSITION PROGRAMMING MODEL +! +! ( MPI VERSION ) +! +! +! NOTE 1; ALL 'MPI' ROUTINES RETURN AN ERROR CODE 'IERR' WHICH INDICATES +! THE STATUS OF ITS EXECUTION. THIS SUBROUTINE IGNORES SUCH ERROR +! CODE AND RETURNS THE SEQUENCE TO THE CALLING PROGRAM UNIT, +! REGARDLESS OF THE VALUE OF THE 'MPI' RETURN CODE. +! +! NOTE 2; SOME COMPILERS, SUCH AS OFORT90 IN HI-UXMPP, SUPPORT AUTOMATIC +! PRECISION EXPANSION, WHERE ALL THE CONSTANTS, VARIABLES AND +! ARRAYS OF 4-BYTE PRECISION (REAL*4) ARE AUTOMATICALLY CONVERTED +! TO THOSE OF 8-BYTE PRECISION (REAL*8) WITH UNFORMATTED I/O DATA +! BEING KEPT AS THEY ARE (IF SO SPECIFIED). WHEN USING SUCH +! FEATURES (FUNCTIONS) OF A COMPILER, SPECIAL CARE IS NEEDED +! BECAUSE A COUPLE OF MPI SUBROUTINES CALLED IN THIS SUBPROGRAM +! ACCEPT THE DATA TYPE (DATA PRECISION) AS THEIR INPUT AND +! PERFORM THE OPERATIONS ACCORDING TO THIS INPUT VALUE. THIS +! INTERFACE SUPPORTS THE AUTOMATIC PRECISION EXPANSION MENTIONED +! ABOVE. IF YOU WISH TO USE SUCH FEATURE, ADD '-DPRECEXP' OPTION +! WHEN INVOKING 'cpp' FOR PRI-PROCESSING THIS SOURCE PROGRAM FILE. +! +! +! ARGUMENT LISTINGS +! (1) INPUT +! REAL*4 SEND ; SCALAR VARIABLE TO SUM UP +! INTEGER IFLAG ; OPERATION DISCRIMINATER +! 1: OP=MPI_SUM +! 2: OP=MPI_MAX +! 3: OP=MPI_MIN +! +! (2) OUTPUT +! REAL*4 RECV ; SCALAR VARIABLE SUMMED UP AMONG ALL TASKS +! +! + CALL MPI_COMM_RANK(MPI_COMM_WORLD,ITASK,IERR) +! IF (ITASK.eq.0) PRINT *,"CALL ALLD" + IF(IFLAG.EQ.1) THEN +! +#ifdef PRECEXP + CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL8,MPI_SUM,MPI_COMM_WORLD,IERR) +#else + CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL ,MPI_SUM,MPI_COMM_WORLD,IERR) +#endif +! + ELSE IF (IFLAG.EQ.2) THEN +! +#ifdef PRECEXP + CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL8,MPI_MAX,MPI_COMM_WORLD,IERR) +#else + CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL ,MPI_MAX,MPI_COMM_WORLD,IERR) +#endif +! + ELSE IF (IFLAG.EQ.3) THEN +! +#ifdef PRECEXP + CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL8,MPI_MIN,MPI_COMM_WORLD,IERR) +#else + CALL MPI_ALLREDUCE(SEND,RECV,1,MPI_REAL ,MPI_MIN,MPI_COMM_WORLD,IERR) +#endif +! + ELSE + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) ERMSG1 + IERR=1 + RETURN + ENDIF +! + RETURN + END +! +!CTT SUBROUTINE DDALLD END +! +!CTT SUBROUTINE DDCOM3 BEGIN +! + SUBROUTINE DDCOM3(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT0,IERR,BUFSND,BUFRCV,MAXBUF) + IMPLICIT REAL*4(A-H,O-Z) + DIMENSION LDOM(NDOM),NBPDOM(NDOM),IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM), FX(NP),FY(NP),FZ(NP),BUFSND(MAXBUF),BUFRCV(MAXBUF) +! + INCLUDE 'mpif.h' +! + PARAMETER ( MAXDOM = 10000 ) + INTEGER*4 MSGIDS(MAXDOM),MSGSTS(MPI_STATUS_SIZE,MAXDOM) +! +! + CHARACTER*60 ERMSGB / ' ## SUBROUTINE DDCOM3: FATAL ERROR OCCURRENCE; RETURNED' / + CHARACTER*60 EREXP1 / ' DIMENSION SIZE OF INTERNAL ARRAYS IS NOT SUFFICIENT ' / + CHARACTER*60 EREXP2 / ' DIMENSION SIZE OF PASSED BUFFER ARRAYS IS NOT SUFFICIENT ' / + CHARACTER*60 EREXP3 / ' RECEIVED NODE NUMBER IS OUT OF THE GLOBAL NODE NUMBER ' / +! +! +! EXCHANGE X, Y, AND Z RESIDUALS AMONG THE NEIGHBORING SUB-DOMAINS +! AND SUPERIMPOSE THE EXCHANGED RESIDUALS TO THE CALLING TASK'S +! RESIDUALS, FOR DOMAIN-DECOMPOSITION PROGRAMMING MODEL +! +! ( MPI VERSION ) +! +! +! NOTE 1; ALL 'MPI' ROUTINES RETURN AN ERROR CODE 'IERR' WHICH INDICATES +! THE STATUS OF ITS EXECUTION. THIS SUBROUTINE IGNORES SUCH ERROR +! CODE AND RETURNS THE SEQUENCE TO THE CALLING PROGRAM UNIT, +! REGARDLESS OF THE VALUE OF THE 'MPI' RETURN CODE. +! +! NOTE 2; SOME COMPILERS, SUCH AS OFORT90 IN HI-UXMPP, SUPPORT AUTOMATIC +! PRECISION EXPANSION, WHERE ALL THE CONSTANTS, VARIABLES AND +! ARRAYS OF 4-BYTE PRECISION (REAL*4) ARE AUTOMATICALLY CONVERTED +! TO THOSE OF 8-BYTE PRECISION (REAL*8) WITH UNFORMATTED I/O DATA +! BEING KEPT AS THEY ARE (IF SO SPECIFIED). WHEN USING SUCH +! FEATURES (FUNCTIONS) OF A COMPILER, SPECIAL CARE IS NEEDED +! BECAUSE A COUPLE OF MPI SUBROUTINES CALLED IN THIS SUBPROGRAM +! ACCEPT THE DATA TYPE (DATA PRECISION) AS THEIR INPUT AND +! PERFORM THE OPERATIONS ACCORDING TO THIS INPUT VALUE. THIS +! INTERFACE SUPPORTS THE AUTOMATIC PRECISION EXPANSION MENTIONED +! ABOVE. IF YOU WISH TO USE SUCH FEATURE, ADD '-DPRECEXP' OPTION +! WHEN INVOKING 'cpp' FOR PRI-PROCESSING THIS SOURCE PROGRAM FILE. +! +! +! ARGUMENT LISTINGS +! (1) INPUT +! INT *4 IPART ; SUB-DOMAIN NUMBER THAT THE CALLING TASK IS +! TAKING CARE OF +! NOTES ; ARGUMENT 'IPART' IS NOT CURRENTLY USED. IT IS +! RETAINED FOR A POSSIBLE FUTURE USE. +! INT *4 IDIM ; SPACE DIMENSION ( 1, 2, OR 3 ) +! INT *4 LDOM (IDOM) ; NEIGHBORING SUB-DOMAIN NUMBER +! INT *4 NBPDOM (IDOM) ; NUMBER OF INTER-CONNECT BOUNDARY NODES +! SHARING WITH THE IDOM'TH NEIGHBORING +! SUB-DOMAIN, LDOM(IDOM) +! INT *4 NDOM ; NUMBER OF THE NEIGHBORING SUB-DOMAINS +! INT *4 IPSLF (IBP,IDOM) ; INTER-CONNECT BOUNDARY NODE NUMBER IN THE +! CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +! NEIGHBORING SUB-DOMAIN, LDOM(IDOM) +! INT *4 IPSND (IBP,IDOM) ; INTER-CONNECT BOUNDARY NODE NUMBER IN THE +! SUB-DOMAIN THAT IS RECEIVING THE CALLING +! TASK'S RESIDUALS. +! INT *4 MBPDOM ; THE DIMENSION SIZE OF THE FIRST ELEMENTS +! OF THE PASSED ARRAYS 'IPSLF' AND 'IPSND' +! (I.E. THE MAXIMUM NUMBER OF THE +! INTER-CONNECT BOUNDARY NODES FOR A +! NEIGHBORING SUB-DOMAIN) +! INT *4 NP ; NUMBER OF THE TOTAL NODES IN THE CALLING +! TASK'S SUB-DOMAIN +! INT *4 IUT0 ; FILE NUMBER TO WRITE ERROR MESSAGE +! INT *4 MAXBUF ; LENGTH OF THE PASSED COMMUNICATION BUFFERS +! 'BUFSND' AND 'BUFRCV' IN WORDS. 'MAXBUF' +! MUST BE NO SMALLER THAN 4 TIMES THE TOTAL +! NUMBER OF INTER-CONNECT BOUNDARY NODES IN +! THE CALLING TASK +! +! (2) OUTPUT +! INT *4 IERR ; RETURN CODE WHOSE VALUE WILL BE EITHER +! 0 --- INDICATING SUCCESSFUL TERMINATION +! OR 1 --- INDICATING OCCURRENCE OF SOME ERROR CONDITIONS +! +! (3) INPUT-OUTPUT +! REAL*4 FX(IP) ; X-DIRECTION RESIDUAL VECTOR +! REAL*4 FY(IP) ; Y-DIRECTION RESIDUAL VECTOR +! REAL*4 FZ(IP) ; Z-DIRECTION RESIDUAL VECTOR +! +! (4) WORK +! REAL*4 BUFSND(IBUF) ; HOLDS THE VALUES OF THE INTER-CONNECT +! BOUNDARY NODE NUMBER IN THE NEIGHBORING +! SUB-DOMAINS AND THE RESIDUALS OF THE +! CALLING TASK'S SUB-DOMAIN WHEN SENDING +! THE RESIDUALS +! +! REAL*4 BUFRCV(IBUF) ; HOLDS THE VALUES OF THE INTER-CONNECT +! BOUNDARY NODE NUMBER IN THE CALLING TASK'S +! SUB-DOMAIN AND THE RESIDUALS OF THE +! NEIGHBORING SUB-DOMAINS AT THE RECEIPT OF +! THE RESIDUALS FROM THE NEIGHBORING +! SUB-DOMAINS +! +! + CALL MPI_COMM_RANK(MPI_COMM_WORLD,ITASK,IERR) +! IF (ITASK.eq.0) PRINT *,"CALL DDCOM3" + IERR = 0 +! +! +! +! CHECK THE INTERNAL ARRAY SIZE +! +! +! + IF(2*NDOM.GT.MAXDOM) THEN + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP1 + IERR = 1 + RETURN + ENDIF +! +! +! +! POST ALL THE EXPECTED RECEIVES +! +! +! + NSTART = 1 + DO 110 IDOM = 1 , NDOM + MSGTYP = 1 + IRECV = LDOM(IDOM)-1 + MSGLEN = 4*NBPDOM(IDOM) +! + IF(NSTART+MSGLEN-1.GT.MAXBUF) THEN + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP2 + IERR = 1 + RETURN + ENDIF +! +#ifdef PRECEXP + CALL MPI_IRECV(BUFRCV(NSTART),MSGLEN,MPI_REAL8,IRECV,MSGTYP,MPI_COMM_WORLD,MSGIDS(IDOM),IERR) + #else + CALL MPI_IRECV(BUFRCV(NSTART),MSGLEN,MPI_REAL ,IRECV,MSGTYP,MPI_COMM_WORLD,MSGIDS(IDOM),IERR) +#endif +! + NSTART = NSTART+MSGLEN + 110 CONTINUE +! +! CALL MPI_BARRIER(MPI_COMM_WORLD,IERR) +! +! +! +! SET UP THE SEND BUFFER +! +! +!C CALL FTRACE_REGION_BEGIN("ddcom3:200-210") +!CDIR PARALLEL DO PRIVATE(NSTART,IP,IPS) + DO 210 IDOM = 1 , NDOM + NSTART = 0 + DO 205 ITMP = 2 , IDOM + NSTART = NSTART + NBPDOM(ITMP-1)*4 + 205 CONTINUE +!CDIR NOINNER + DO 200 IBP = 1 , NBPDOM(IDOM) + IP = IPSLF(IBP,IDOM) + IPS = IPSND(IBP,IDOM) + BUFSND(NSTART+1) = IPS + BUFSND(NSTART+2) = FX(IP) + BUFSND(NSTART+3) = FY(IP) + BUFSND(NSTART+4) = FZ(IP) + NSTART = NSTART + 4 + 200 CONTINUE + 210 CONTINUE +!C CALL FTRACE_REGION_END("ddcom3:200-210") +! +! +! +! SEND THE RESIDUALS +! +! +! + NSTART = 1 + DO 220 IDOM = 1 , NDOM + MSGTYP = 1 + ISEND = LDOM(IDOM)-1 + MSGLEN = 4*NBPDOM(IDOM) + +#ifdef PRECEXP + CALL MPI_ISEND(BUFSND(NSTART),MSGLEN,MPI_REAL8,ISEND,MSGTYP,MPI_COMM_WORLD,MSGIDS(NDOM+IDOM),IERR) +#else + CALL MPI_ISEND(BUFSND(NSTART),MSGLEN,MPI_REAL ,ISEND,MSGTYP,MPI_COMM_WORLD,MSGIDS(NDOM+IDOM),IERR) +#endif +! + NSTART = NSTART+MSGLEN + 220 CONTINUE +! +! +! +! WAIT FOR THE COMPLETION OF ALL THE REQUESTED COMMUNICATIONS +! +! +! + CALL MPI_WAITALL(2*NDOM,MSGIDS,MSGSTS,IERR) +! +! +! IMPORTANT NOTES! +! AFTER A NON-BLOCKING SEND/RECEIVE ROUTINE, SUCH AS 'MPI_ISEND' +! OR 'MPI_IRECV', IS CALLED, THE COMMUNICATION REQUEST CREATED BY +! THESE ROUTINES MUST BE FREED EITHER BY EXPLICITLY OR IMPLICITLY. +! 'MPI_REQUEST_FREE' FREES SUCH REQUEST EXPLICITLY, WHILE A ROUTINE +! WHICH IDENTIFIES COMPLETION OF THE REQUEST, SUCH AS 'MPI_WAIT', +! 'MPI_WAITANY', OR 'MPI_WAITALL' IMPLICITLY FREES THE REQUEST. +! THIS INTERFACE PROGRAM USES 'MPI_WAITALL' ROUTINES TO FREE SUCH +! REQUESTS. PAY PARTICULAR ATTENTION IF YOU WISH TO, INSTEAD, USE +! 'MPI_REQUEST_FREE', BECAUSE 'MPI_REQUEST_FREE' FREES THE REQUESTS +! REGARDLESS OF THE STATE OF THE PREVIOUSLY CALLED COMMUNICATION +! ROUTINES, THUS SOMETIMES FREES REQUESTS WHICH HAVE NOT BEEN +! COMPLETED. +! +! +! SUPERIMPOSE THE RECEIVED RESIDUALS +! +! +!C CALL FTRACE_REGION_BEGIN("ddcom3:300-310") + NSTARTMP = 0 + DO IDOM = 1 , NDOM +!CDIR NOINNER + DO IBP = 1, NBPDOM(IDOM) + IP = BUFRCV(NSTARTMP+1)+0.1 + IF(IP.LT.1 .OR. IP.GT.NP) THEN + IERR = 1 + ENDIF + NSTARTMP = NSTARTMP + 4 + ENDDO + ENDDO +! + IF(IERR .eq. 1) THEN + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP3 + RETURN + ENDIF +! + IF(IDIM .GE. 3) THEN +!CDIR LISTVEC + DO NSTART = 0, NSTARTMP-4, 4 + IP = BUFRCV(NSTART+1)+0.1 + FX(IP) = FX(IP)+BUFRCV(NSTART+2) + FY(IP) = FY(IP)+BUFRCV(NSTART+3) + FZ(IP) = FZ(IP)+BUFRCV(NSTART+4) + ENDDO + ELSE IF(IDIM .GE. 2) THEN +!CDIR LISTVEC + DO NSTART = 0, NSTARTMP-4, 4 + IP = BUFRCV(NSTART+1)+0.1 + FX(IP) = FX(IP)+BUFRCV(NSTART+2) + FY(IP) = FY(IP)+BUFRCV(NSTART+3) + ENDDO + ELSE +!CDIR LISTVEC + DO NSTART = 0, NSTARTMP-4, 4 + IP = BUFRCV(NSTART+1)+0.1 + FX(IP) = FX(IP)+BUFRCV(NSTART+2) + ENDDO + ENDIF +! +!C CALL FTRACE_REGION_END("ddcom3:300-310") +! + IPART = IPART +! +! + RETURN + END +! + SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT0,IERR,& + snd_desc, rcv_desc, MAXBUF) +!fj BUFSND, BUFRCV, MAXBUF) +! Fujitsu start 202103 + use xmp_api + use mpi +! Fujitsu end 202103 +! INCLUDE 'mpif.h' +! INCLUDE 'xmp_coarray.h' + IMPLICIT REAL*4(A-H,O-Z) +!CTTDEBG + REAL*8 DFX(NP),DFY(NP),DFZ(NP) +!CTTDEBG + DIMENSION LDOM(NDOM),NBPDOM(NDOM),IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM), FX(NP),FY(NP),FZ(NP) +! Fujitsu start 202103 +! DIMENSION BUFSND(MAXBUF)[*], BUFRCV(MAXBUF)[*] + REAL*4 , POINTER :: BUFSND ( : ) => null ( ) + REAL*4 , POINTER :: BUFRCV ( : ) => null ( ) + INTEGER*8 :: snd_desc, rcv_desc + INTEGER*8 :: snd_sec, rcv_sec + INTEGER*8, DIMENSION(1) :: snd_lb, snd_ub, rcv_lb, rcv_ub + INTEGER*8 :: st_desc, st_l_desc + INTEGER*8 :: st_sec, st_l_sec + INTEGER*8, DIMENSION(1) :: st_lb, st_ub, st_l_lb, st_l_ub + INTEGER*8 :: start1, end1, start2, end2 + INTEGER*4 :: img_dims(1) + INTEGER*4 status +! Fujitsu end 202103 +! + PARAMETER ( MAXDOM = 10000 ) + INTEGER*4 MSGIDS(MAXDOM),MSGSTS(MPI_STATUS_SIZE,MAXDOM) +! + INTEGER MAX_RECV_LEN +! Fujitsu start 202103 +! INTEGER ,ALLOCATABLE :: START_R(:)[:] + INTEGER*4 , POINTER :: START_R ( : ) => null ( ) + INTEGER*4 , POINTER :: start_rr_p ( : ) => null ( ) +! Fujitsu end 202103 +! INTEGER ,ALLOCATABLE :: END_R(:)[:] + INTEGER ,ALLOCATABLE :: START_S(:) + INTEGER ,ALLOCATABLE :: END_S(:) + INTEGER START_RR, END_RR +! REAL*4, ALLOCATABLE :: BUFRCV2(:)[:] +! + CHARACTER*60 ERMSGB / ' ## SUBROUTINE DDCOMX: FATAL ERROR OCCURRENCE; RETURNED' / + CHARACTER*60 EREXP1 / ' DIMENSION SIZE OF INTERNAL ARRAYS IS NOT SUFFICIENT ' / + CHARACTER*60 EREXP2 / ' DIMENSION SIZE OF PASSED BUFFER ARRAYS IS NOT SUFFICIENT ' / + CHARACTER*60 EREXP3 / ' RECEIVED NODE NUMBER IS OUT OF THE GLOBAL NODE NUMBER ' / +! PRINT *,"MAXBUF=",MAXBUF +! +! +! EXCHANGE X, Y, AND Z RESIDUALS AMONG THE NEIGHBORING SUB-DOMAINS +! AND SUPERIMPOSE THE EXCHANGED RESIDUALS TO THE CALLING TASK'S +! RESIDUALS, FOR DOMAIN-DECOMPOSITION PROGRAMMING MODEL +! +! ( MPI VERSION ) +! +! +! NOTE 1; ALL 'MPI' ROUTINES RETURN AN ERROR CODE 'IERR' WHICH INDICATES +! THE STATUS OF ITS EXECUTION. THIS SUBROUTINE IGNORES SUCH ERROR +! CODE AND RETURNS THE SEQUENCE TO THE CALLING PROGRAM UNIT, +! REGARDLESS OF THE VALUE OF THE 'MPI' RETURN CODE. +! +! NOTE 2; SOME COMPILERS, SUCH AS OFORT90 IN HI-UXMPP, SUPPORT AUTOMATIC +! PRECISION EXPANSION, WHERE ALL THE CONSTANTS, VARIABLES AND +! ARRAYS OF 4-BYTE PRECISION (REAL*4) ARE AUTOMATICALLY CONVERTED +! TO THOSE OF 8-BYTE PRECISION (REAL*8) WITH UNFORMATTED I/O DATA +! BEING KEPT AS THEY ARE (IF SO SPECIFIED). WHEN USING SUCH +! FEATURES (FUNCTIONS) OF A COMPILER, SPECIAL CARE IS NEEDED +! BECAUSE A COUPLE OF MPI SUBROUTINES CALLED IN THIS SUBPROGRAM +! ACCEPT THE DATA TYPE (DATA PRECISION) AS THEIR INPUT AND +! PERFORM THE OPERATIONS ACCORDING TO THIS INPUT VALUE. THIS +! INTERFACE SUPPORTS THE AUTOMATIC PRECISION EXPANSION MENTIONED +! ABOVE. IF YOU WISH TO USE SUCH FEATURE, ADD '-DPRECEXP' OPTION +! WHEN INVOKING 'cpp' FOR PRI-PROCESSING THIS SOURCE PROGRAM FILE. +! +! +! ARGUMENT LISTINGS +! (1) INPUT +! INT *4 IPART ; SUB-DOMAIN NUMBER THAT THE CALLING TASK IS +! TAKING CARE OF +! NOTES ; ARGUMENT 'IPART' IS NOT CURRENTLY USED. IT IS +! RETAINED FOR A POSSIBLE FUTURE USE. +! INT *4 IDIM ; SPACE DIMENSION ( 1, 2, OR 3 ) +! INT *4 LDOM (IDOM) ; NEIGHBORING SUB-DOMAIN NUMBER +! INT *4 NBPDOM (IDOM) ; NUMBER OF INTER-CONNECT BOUNDARY NODES +! SHARING WITH THE IDOM'TH NEIGHBORING +! SUB-DOMAIN, LDOM(IDOM) +! INT *4 NDOM ; NUMBER OF THE NEIGHBORING SUB-DOMAINS +! INT *4 IPSLF (IBP,IDOM) ; INTER-CONNECT BOUNDARY NODE NUMBER IN THE +! CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +! NEIGHBORING SUB-DOMAIN, LDOM(IDOM) +! INT *4 IPSND (IBP,IDOM) ; INTER-CONNECT BOUNDARY NODE NUMBER IN THE +! SUB-DOMAIN THAT IS RECEIVING THE CALLING +! TASK'S RESIDUALS. +! INT *4 MBPDOM ; THE DIMENSION SIZE OF THE FIRST ELEMENTS +! OF THE PASSED ARRAYS 'IPSLF' AND 'IPSND' +! (I.E. THE MAXIMUM NUMBER OF THE +! INTER-CONNECT BOUNDARY NODES FOR A +! NEIGHBORING SUB-DOMAIN) +! INT *4 NP ; NUMBER OF THE TOTAL NODES IN THE CALLING +! TASK'S SUB-DOMAIN +! INT *4 IUT0 ; FILE NUMBER TO WRITE ERROR MESSAGE +! INT *4 MAXBUF ; LENGTH OF THE PASSED COMMUNICATION BUFFERS +! 'BUFSND' AND 'BUFRCV' IN WORDS. 'MAXBUF' +! MUST BE NO SMALLER THAN 4 TIMES THE TOTAL +! NUMBER OF INTER-CONNECT BOUNDARY NODES IN +! THE CALLING TASK +! +! (2) OUTPUT +! INT *4 IERR ; RETURN CODE WHOSE VALUE WILL BE EITHER +! 0 --- INDICATING SUCCESSFUL TERMINATION +! OR 1 --- INDICATING OCCURRENCE OF SOME ERROR CONDITIONS +! +! (3) INPUT-OUTPUT +! REAL*4 FX(IP) ; X-DIRECTION RESIDUAL VECTOR +! REAL*4 FY(IP) ; Y-DIRECTION RESIDUAL VECTOR +! REAL*4 FZ(IP) ; Z-DIRECTION RESIDUAL VECTOR +! +! (4) WORK +! REAL*4 BUFSND(IBUF) ; HOLDS THE VALUES OF THE INTER-CONNECT +! BOUNDARY NODE NUMBER IN THE NEIGHBORING +! SUB-DOMAINS AND THE RESIDUALS OF THE +! CALLING TASK'S SUB-DOMAIN WHEN SENDING +! THE RESIDUALS +! +! REAL*4 BUFRCV(IBUF) ; HOLDS THE VALUES OF THE INTER-CONNECT +! BOUNDARY NODE NUMBER IN THE CALLING TASK'S +! SUB-DOMAIN AND THE RESIDUALS OF THE +! NEIGHBORING SUB-DOMAINS AT THE RECEIPT OF +! THE RESIDUALS FROM THE NEIGHBORING +! SUB-DOMAINS + CALL MPI_COMM_RANK(MPI_COMM_WORLD,ITASK,IERR) +! Fujitsu start 202103 +! call xmp_api_init +! +! write(*, '("DBG1 : snd_desc = ", i8)') snd_desc + snd_lb(1) = 1 + snd_ub(1) = MAXBUF + rcv_lb(1) = 1 + rcv_ub(1) = MAXBUF +! call xmp_new_coarray(snd_desc, 4, 1, snd_lb, snd_ub, 1, img_dims) +! call xmp_new_coarray(rcv_desc, 4, 1, rcv_lb, rcv_ub, 1, img_dims) +! + call xmp_coarray_bind(snd_desc,BUFSND) + call xmp_coarray_bind(rcv_desc,BUFRCV) +! +! allocate(START_R(1:NP)[*]) + st_lb(1) = 1 + st_ub(1) = NP + st_l_lb(1) = 1 + st_l_ub(1) = 1 + call xmp_new_coarray(st_desc, 4, 1, st_lb, st_ub, 1, img_dims) +! call xmp_new_local_array(st_l_desc, 4, 1, st_l_lb, st_l_ub) + call xmp_new_coarray(st_l_desc, 4, 1, st_l_lb, st_l_ub, 1, img_dims) + call xmp_coarray_bind(st_desc,START_R) + call xmp_coarray_bind(st_l_desc,start_rr_p) +! Fujitsu end 202103 +! allocate(END_R(1:NP)[*]) + allocate(START_S(1:NP)) + allocate(END_S(1:NP)) +! allocate(BUFRCV2(1:MAXBUF)[*]) +! IF (ITASK.eq.0) PRINT *,"CALL DDCOMX" +#ifdef USE_BARRIER + call MPI_BARRIER(MPI_COMM_WORLD, IERR) +#endif + IERR = 0 + + IF(IDIM.EQ.0) THEN + NSKIP=1 + ELSE IF(IDIM.EQ.1) THEN + NSKIP=1 + ELSE IF(IDIM.EQ.2) THEN + NSKIP=2 + ELSE IF(IDIM.EQ.3) THEN + NSKIP=3 + ELSE + WRITE(IUT0,*) EREXP1 + IERR = 1 + RETURN + ENDIF +! +! +! CHECK THE INTERNAL ARRAY SIZE +! +! +! + IF(2*NDOM.GT.MAXDOM) THEN + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP1 + IERR = 1 + RETURN + ENDIF +! +! +! +! POST ALL THE EXPECTED RECEIVES +! +! +! +! PRINT *,"NDOM:",NDOM +! +! Fujitsu start 202103 +! ME=THIS_IMAGE() + ME=xmp_this_image() +! Fujitsu end 202103 + START_DASH=0 + MAX_RECV_LEN = 0 + NSTART = 1 + IF (NDOM > 1) START_R(LDOM(1))=NSTART + DO 110 IDOM = 1 , NDOM + MSGTYP = 1 + IRECV = LDOM(IDOM)-1 + MSGLEN = NSKIP*NBPDOM(IDOM) +! END_R(LDOM(IDOM))=START_R(LDOM(IDOM))+MSGLEN-1 +! IF(ITASK.eq.7) PRINT *,"(Recv)ITASK=",ITASK,"LDOM(",IDOM,")-1=",IRECV + IF(NSTART+MSGLEN-1.GT.MAXBUF) THEN + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP2 + IERR = 1 + RETURN + ENDIF + if(MAX_RECV_LEN.lt.MSGLEN)MAX_RECV_LEN=MSGLEN +! +#ifdef PRECEXP +! CALL MPI_IRECV(BUFRCV(NSTART),MSGLEN,MPI_REAL8,IRECV,MSGTYP,MPI_COMM_WORLD,MSGIDS(IDOM),IERR) +#else +! CALL MPI_IRECV(BUFRCV(NSTART),MSGLEN,MPI_REAL ,IRECV,MSGTYP,MPI_COMM_WORLD,MSGIDS(IDOM),IERR) +#endif +! +! IF (ITASK.eq.0) PRINT *,"MSGLEN=",MSGLEN ,"FROM",IRECV +! PRINT *,"MSGLEN=",MSGLEN ,"FROM",IRECV,"TO",ITASK + NSTART = NSTART+MSGLEN + IF(IDOM.LT.NDOM)START_R(LDOM(IDOM+1))=NSTART +! PRINT *,NSTART,MSGLEN + 110 CONTINUE +! PRINT *,"MAX_RECV_LEN = ",MAX_RECV_LEN +! + CALL MPI_BARRIER(MPI_COMM_WORLD,IERR) +! +! +! +! SET UP THE SEND BUFFER +! +! +!C CALL FTRACE_REGION_BEGIN("ddcom3:200-210") +#ifdef USE_DETAIL + call start_collection('ddcomx_210') +#endif +!CDIR PARALLEL DO PRIVATE(NSTART,IP,IPS) + DO 210 IDOM = 1 , NDOM + NSTART = 0 + DO 205 ITMP = 2 , IDOM + NSTART = NSTART + NBPDOM(ITMP-1)*NSKIP + 205 CONTINUE +!CDIR NOINNER + IF(IDIM.EQ.0) THEN +!ocl norecurrence(BUFSND) + DO IBP=1,NBPDOM(IDOM) + NSTART2 = NSTART + NSKIP * (IBP-1) + IP = IPSLF(IBP,IDOM) + IPS = IPSND(IBP,IDOM) + BUFSND(NSTART2+1) = IPS + ENDDO + ELSE IF(IDIM.EQ.1) THEN +!ocl norecurrence(BUFSND) + DO IBP=1,NBPDOM(IDOM) + NSTART2 = NSTART + NSKIP * (IBP-1) + IP = IPSLF(IBP,IDOM) + BUFSND(NSTART2+1) = FX(IP) + ENDDO + ELSE IF(IDIM.EQ.2) THEN +!ocl norecurrence(BUFSND) + DO IBP=1,NBPDOM(IDOM) + NSTART2 = NSTART + NSKIP * (IBP-1) + IP = IPSLF(IBP,IDOM) + BUFSND(NSTART2+1) = FX(IP) + BUFSND(NSTART2+2) = FY(IP) + ENDDO + ELSE IF(IDIM.EQ.3) THEN +!ocl norecurrence(BUFSND) + DO IBP=1,NBPDOM(IDOM) + NSTART2 = NSTART + NSKIP * (IBP-1) + IP = IPSLF(IBP,IDOM) + BUFSND(NSTART2+1) = FX(IP) + BUFSND(NSTART2+2) = FY(IP) + BUFSND(NSTART2+3) = FZ(IP) + ENDDO + ENDIF + 210 CONTINUE +#ifdef USE_DETAIL + call stop_collection('ddcomx_210') +#endif + +! +! +! +! SEND THE RESIDUALS +! +! +! + NSTART = 1 + IF (NDOM > 1) START_S(LDOM(1))=NSTART + DO 220 IDOM = 1 , NDOM + MSGTYP = 1 + ISEND = LDOM(IDOM)-1 + MSGLEN = NSKIP*NBPDOM(IDOM) + END_S(LDOM(IDOM))=START_S(LDOM(IDOM))+MSGLEN-1 +! IF(ITASK.eq.7) PRINT *,"(Send)ITASK=",ITASK,"LDOM(",IDOM,")-1=",ISEND + +#ifdef PRECEXP +! CALL MPI_ISEND(BUFSND(NSTART),MSGLEN,MPI_REAL8,ISEND,MSGTYP,MPI_COMM_WORLD,MSGIDS(NDOM+IDOM),IERR) +#else +! CALL MPI_ISEND(BUFSND(NSTART),MSGLEN,MPI_REAL ,ISEND,MSGTYP,MPI_COMM_WORLD,MSGIDS(NDOM+IDOM),IERR) +#endif +! +! IF (ITASK.eq.0) PRINT *,"MSGLEN=",MSGLEN ,"TO",ISEND +! PRINT *,"FROM",ITASK,"MSGLEN=",MSGLEN ,"TO",ISEND + NSTART = NSTART+MSGLEN + IF(IDOM.LT.NDOM)START_S(LDOM(IDOM+1))=NSTART +! PRINT *,NSTART,MSGLEN + 220 CONTINUE +! +! +! Fujitsu start 202103 +! SYNC ALL + call xmp_sync_all(status) +! + call xmp_new_array_section(snd_sec,1) + call xmp_new_array_section(rcv_sec,1) + call xmp_new_array_section(st_sec,1) + call xmp_new_array_section(st_l_sec,1) +! Fujitsu start 202103 +! + DO IDOM = 1, NDOM +! PRINT *,ME,"->",LDOM(IDOM)," BUFRECV(",START_R(ME)[LDOM(IDOM)],":",END_R(ME)[LDOM(IDOM)],")[",LDOM(IDOM),"]=BUFSND(",START_S(LDOM(IDOM)),":",END_S(LDOM(IDOM)),")" +! BUFRCV(START_R(ME)[LDOM(IDOM)]:END_R(ME)[LDOM(IDOM)])[LDOM(IDOM)] = & +! BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) +! Fujitsu start 202103 +! START_RR = START_R(ME)[LDOM(IDOM)] + start1 = ME + end1 = ME + start2 = 1 + end2 = 1 + call xmp_array_section_set_triplet(st_sec,1,start1,end1,1,status) + call xmp_array_section_set_triplet(st_l_sec,1,start2,end2,1,status) + img_dims(1) = LDOM(IDOM) + call xmp_coarray_get(img_dims,st_desc,st_sec, & + st_l_desc,st_l_sec,status) + START_RR = start_rr_p(1) +! Fujitsu end 202103 + END_RR = START_RR + (END_S(LDOM(IDOM)) - START_S(LDOM(IDOM))) +! Fujitsu start 202103 +! write(*, '("DBG2 : img_dims = ", i4, " ME = ", i4)') & +! img_dims(1), ME +! write(*, '(" : START_RR = ", i8, " END_RR = ", i8)') & +! START_RR, END_RR +! BUFRCV(START_RR:END_RR)[LDOM(IDOM)] = & +! BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) + start1 = START_RR + end1 = END_RR + call xmp_array_section_set_triplet(rcv_sec,1,start1,end1,1,status) + start1 = START_S(LDOM(IDOM)) + end1 = END_S(LDOM(IDOM)) + call xmp_array_section_set_triplet(snd_sec,1,start1,end1,1,status) + img_dims = LDOM(IDOM) + call xmp_coarray_put(img_dims,rcv_desc,rcv_sec,snd_desc,snd_sec,status); +! write(*, '("DBG3 : BUFSND = ", e12.6, " ", e12.6)') & +! BUFSND(START_RR), BUFSND(END_RR) +! Fujitsu end 202103 + END DO + +! Fujitsu start 202103 +! SYNC ALL + call xmp_sync_all(status) +! Fujitsu end 202103 + +! SYNC ALL +! DO 231 IDOM = 1 , NDOM +! ! SYNC ALL +! ! PRINT *,ME,"->",LDOM(IDOM)," BUFRECV(",START_R(LDOM(IDOM)),":",END_R(LDOM(IDOM)),")=BUFSND(",START_S(ME)[LDOM(IDOM)],":",END_S(ME)[LDOM(IDOM)],")[",LDOM(IDOM),"]" +! BUFRCV(START_R(LDOM(IDOM)):END_R(LDOM(IDOM))) = & +! BUFSND(START_S(ME)[LDOM(IDOM)]:END_S(ME)[LDOM(IDOM)])[LDOM(IDOM)] +! ! SYNC ALL +! 231 CONTINUE +! SYNC ALL + + +! IF(ITASK.eq.0)THEN +! DO 240 II=1,MAXBUF +! IF(.not.BUFRCV(II).eq.BUFRCV2(II)) PRINT *,II,BUFRCV(II),BUFRCV2(II),BUFRCV(II).eq.BUFRCV2(II) +! 240 CONTINUE +! END IF +! CALL MPI_BARRIER(MPI_COMM_WORLD,IERR) +! WAIT FOR THE COMPLETION OF ALL THE REQUESTED COMMUNICATIONS +! +! +! +! CALL MPI_WAITALL(2*NDOM,MSGIDS,MSGSTS,IERR) +! +! +! IMPORTANT NOTES! +! AFTER A NON-BLOCKING SEND/RECEIVE ROUTINE, SUCH AS 'MPI_ISEND' +! OR 'MPI_IRECV', IS CALLED, THE COMMUNICATION REQUEST CREATED BY +! THESE ROUTINES MUST BE FREED EITHER BY EXPLICITLY OR IMPLICITLY. +! 'MPI_REQUEST_FREE' FREES SUCH REQUEST EXPLICITLY, WHILE A ROUTINE +! WHICH IDENTIFIES COMPLETION OF THE REQUEST, SUCH AS 'MPI_WAIT', +! 'MPI_WAITANY', OR 'MPI_WAITALL' IMPLICITLY FREES THE REQUEST. +! THIS INTERFACE PROGRAM USES 'MPI_WAITALL' ROUTINES TO FREE SUCH +! REQUESTS. PAY PARTICULAR ATTENTION IF YOU WISH TO, INSTEAD, USE +! 'MPI_REQUEST_FREE', BECAUSE 'MPI_REQUEST_FREE' FREES THE REQUESTS +! REGARDLESS OF THE STATE OF THE PREVIOUSLY CALLED COMMUNICATION +! ROUTINES, THUS SOMETIMES FREES REQUESTS WHICH HAVE NOT BEEN +! COMPLETED. +! +!CTTDEBG +#ifdef USE_DETAIL + call start_collection('ddcomx_A') +#endif +!ocl simd +!ocl swp +!ocl xfill +!ocl loop_nofusion + DO IP=1, NP + DFX(IP)=DBLE(FX(IP)) + ENDDO +!ocl simd +!ocl swp +!ocl xfill +!ocl loop_nofusion + DO IP=1, NP + DFY(IP)=DBLE(FY(IP)) + ENDDO +!ocl simd +!ocl swp +!ocl xfill + DO IP=1, NP + DFZ(IP)=DBLE(FZ(IP)) + ENDDO +#ifdef USE_DETAIL + call stop_collection('ddcomx_A') +#endif +!CTTDEBG END +! +! SUPERIMPOSE THE RECEIVED RESIDUALS +! +! + NSTART = 0 +#ifdef USE_DETAIL + call start_collection('ddcomx_B') +#endif + DO IDOM = 1 , NDOM + IF(IDIM .EQ. 0) THEN + DO IBP = 1, NBPDOM(IDOM) + NSTART2 = NSTART + NSKIP * (IBP-1) + IPSND(IBP,IDOM)=BUFRCV(NSTART2+1)+0.1 + ENDDO + NSTART = NSTART + NSKIP * NBPDOM(IDOM) + + ELSE IF(IDIM .EQ. 1) THEN +!ocl norecurrence(DFX) + DO IBP = 1, NBPDOM(IDOM) + IP = IPSND(IBP,IDOM) + IF(IP.LT.1 .OR. IP.GT.NP) THEN + IERR = 1 + ENDIF + NSTART2 = NSTART + NSKIP * (IBP-1) + DFX(IP) = DFX(IP) + DBLE(BUFRCV(NSTART2+1)) + ENDDO + NSTART = NSTART + NSKIP * NBPDOM(IDOM) + + ELSE IF(IDIM .EQ. 2) THEN +!ocl norecurrence(DFX,DFY) + DO IBP = 1, NBPDOM(IDOM) + IP = IPSND(IBP,IDOM) + IF(IP.LT.1 .OR. IP.GT.NP) THEN + IERR = 1 + ENDIF + NSTART2 = NSTART + NSKIP * (IBP-1) + DFX(IP) = DFX(IP) + DBLE(BUFRCV(NSTART2+1)) + DFY(IP) = DFY(IP) + DBLE(BUFRCV(NSTART2+2)) + ENDDO + NSTART = NSTART + NSKIP * NBPDOM(IDOM) + + ELSE IF(IDIM .EQ. 3) THEN +!ocl norecurrence(DFX,DFY,DFZ) + DO IBP = 1, NBPDOM(IDOM) + IP = IPSND(IBP,IDOM) + IF(IP.LT.1 .OR. IP.GT.NP) THEN + IERR = 1 + ENDIF + NSTART2 = NSTART + NSKIP * (IBP-1) + DFX(IP) = DFX(IP) + DBLE(BUFRCV(NSTART2+1)) + DFY(IP) = DFY(IP) + DBLE(BUFRCV(NSTART2+2)) + DFZ(IP) = DFZ(IP) + DBLE(BUFRCV(NSTART2+3)) + ENDDO + NSTART = NSTART + NSKIP * NBPDOM(IDOM) + ENDIF +!CTTDEBG END +! +! IF(IDIM.EQ.1) THEN +! FX(IP) = FX(IP)+BUFRCV(NSTART+1) +! ELSE IF(IDIM .EQ. 2) THEN +! FX(IP) = FX(IP)+BUFRCV(NSTART+1) +! FY(IP) = FY(IP)+BUFRCV(NSTART+2) +! ELSE IF(IDIM .EQ. 3) THEN +! FX(IP) = FX(IP)+BUFRCV(NSTART+1) +! FY(IP) = FY(IP)+BUFRCV(NSTART+2) +! FZ(IP) = FZ(IP)+BUFRCV(NSTART+3) +! ENDIF + ENDDO +#ifdef USE_DETAIL + call stop_collection('ddcomx_B') +#endif +! +!CTTDEBG +#ifdef USE_DETAIL + call start_collection('ddcomx_C') +#endif + IF(IDIM.EQ.1) THEN +!ocl simd +!ocl swp + DO IP=1,NP + FX(IP) = SNGL(DFX(IP)) + ENDDO + ELSE IF(IDIM.EQ.2) THEN +!ocl simd +!ocl swp +!ocl loop_nofusion + DO IP=1,NP + FX(IP) = SNGL(DFX(IP)) + ENDDO +!ocl simd +!ocl swp + DO IP=1,NP + FY(IP) = SNGL(DFY(IP)) + ENDDO + ELSE IF(IDIM.EQ.3) THEN +!ocl simd +!ocl swp +!ocl loop_nofusion + DO IP=1,NP + FX(IP) = SNGL(DFX(IP)) + ENDDO +!ocl simd +!ocl swp +!ocl loop_nofusion + DO IP=1,NP + FY(IP) = SNGL(DFY(IP)) + ENDDO +!ocl simd +!ocl swp + DO IP=1,NP + FZ(IP) = SNGL(DFZ(IP)) + ENDDO + ENDIF +#ifdef USE_DETAIL + call stop_collection('ddcomx_C') +#endif +!CTTDEBG +! +! Fujitsu start 202103 + call xmp_free_array_section(snd_sec) + call xmp_free_array_section(rcv_sec) +! +! call xmp_coarray_deallocate(snd_desc, status) +! call xmp_coarray_deallocate(rcv_desc, status) +! +! call xmp_api_finalize +! Fujitsu end 202103 +! +#ifdef USE_BARRIER + call MPI_BARRIER(MPI_COMM_WORLD, IERR) +#endif + IF(IERR .eq. 1) THEN + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP3 + RETURN + ENDIF +! +! IPART = IPART + RETURN + END +! +!CTT SUBROUTINE DDCOMX END diff --git a/FFB-MINI/src/les3x.F b/FFB-MINI/src/les3x.F index 30878ab..f02f1a2 100755 --- a/FFB-MINI/src/les3x.F +++ b/FFB-MINI/src/les3x.F @@ -753,18 +753,12 @@ SUBROUTINE LES3X(FILEIN) C C C -C Fj start 202103 -C call mpi_init(ierr) -C call mpi_comm_size(mpi_comm_world,nnn,ierr) -C call mpi_comm_rank(mpi_comm_world,me,ierr) -C Fj end 202103 -C C NDOM = 0 - CALL DDINIT(NPART,IPART) -C C Fj start 202103 call xmp_api_init C Fj end 202103 +C + CALL DDINIT(NPART,IPART) C C IF(IPART.GE.1) NDOM = 1 C From c72be77c2a3b6df6cc305d9fbefb88a7455f369e Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Tue, 9 Mar 2021 20:32:04 +0900 Subject: [PATCH 44/70] Modify POINTER type for xmpAPI of CCS-QCD --- CCS-QCD/src/xmpAPI_comlib.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CCS-QCD/src/xmpAPI_comlib.F90 b/CCS-QCD/src/xmpAPI_comlib.F90 index 2c12a9d..b49481a 100644 --- a/CCS-QCD/src/xmpAPI_comlib.F90 +++ b/CCS-QCD/src/xmpAPI_comlib.F90 @@ -547,8 +547,8 @@ subroutine comlib_sendrecv_c16(id) !allocate(sbuff(id%ssize/16)[*]) !allocate(rbuff(id%rsize/16)[*]) ! - integer, pointer :: sbuff(:) => null() - integer, pointer :: rbuff(:) => null() + complex(8), pointer :: sbuff(:) => null() + complex(8), pointer :: rbuff(:) => null() integer(8) :: s_desc, r_desc integer(8), dimension(1) :: s_lb,s_ub, r_lb, r_ub integer(4), dimension(1) :: img_dims From 067f39a7528ed7a213acc98ed860f340a19b1837 Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Wed, 10 Mar 2021 13:11:11 +0900 Subject: [PATCH 45/70] Modify ADM_COMM_RUN_WORLD on NICAM --- NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 | 6 ++++-- NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 index 45773f2..8d35279 100755 --- a/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 @@ -405,8 +405,10 @@ subroutine ADM_proc_init( rtype ) !--- 2020 Fujitsu end !coarray call MPI_Comm_split(MPI_COMM_WORLD, 0, my_rank, ADM_COMM_RUN_WORLD,ierr) -!coarray ADM_COMM_RUN_WORLD = MPI_COMM_WORLD - ADM_COMM_RUN_WORLD = 1140850688 + !--- 2020 Fujitsu + ADM_COMM_RUN_WORLD = MPI_COMM_WORLD + !ADM_COMM_RUN_WORLD = 1140850688 + !--- 2020 Fujitsu end !--- 2020 Fujitsu !sync all diff --git a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 index 58f9be1..7a1d0c3 100755 --- a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 @@ -1334,6 +1334,7 @@ subroutine COMM_setup( & do l=1,ADM_rgn_nmax !coarray !--- 2020 Fujitsu +!print *, 'l=', l, ' COUNT=', max_comm_r2r*halomax, ' ROOT=', prc_tab_rev(ptr_prcid,l)-1, ' COMM=', ADM_comm_run_world call mpi_bcast( & rsize_r2r(1,1,l), & max_comm_r2r*halomax, & From 63f596a78f9c1eb9652298460a099b8b24099910 Mon Sep 17 00:00:00 2001 From: tozaki_chisae Date: Wed, 10 Mar 2021 16:44:47 +0900 Subject: [PATCH 46/70] [WIP] add xmpAPI files. (rename) --- FFB-MINI/src/Makefile | 44 +- FFB-MINI/src/Makefile.coarray | 129 ++ FFB-MINI/src/bcgs3x.F | 23 +- FFB-MINI/src/bcgsxe.F | 22 +- FFB-MINI/src/calax3.F | 13 +- FFB-MINI/src/callap.F | 13 +- FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 | 57 +- FFB-MINI/src/ffb_mini_main.F90 | 12 +- FFB-MINI/src/grad3x.F | 13 +- FFB-MINI/src/les3x.F | 86 +- FFB-MINI/src/lessfx.F | 18 +- FFB-MINI/src/lrfnms.F | 13 +- FFB-MINI/src/nodlex.F | 13 +- FFB-MINI/src/pres3e.F | 8 +- FFB-MINI/src/rcmelm.F | 28 +- FFB-MINI/src/vel3d1.F | 34 +- FFB-MINI/src/vel3d2.F | 13 +- FFB-MINI/src/xmpAPI_bcgs3x.F | 422 +++++ FFB-MINI/src/xmpAPI_bcgsxe.F | 430 +++++ FFB-MINI/src/xmpAPI_calax3.F | 198 +++ FFB-MINI/src/xmpAPI_callap.F | 191 +++ FFB-MINI/src/xmpAPI_elm3dx.F | 131 ++ FFB-MINI/src/xmpAPI_grad3x.F | 293 ++++ FFB-MINI/src/xmpAPI_les3x.F | 2238 +++++++++++++++++++++++++ FFB-MINI/src/xmpAPI_lessfx.F | 325 ++++ FFB-MINI/src/xmpAPI_lrfnms.F | 917 ++++++++++ FFB-MINI/src/xmpAPI_nodlex.F | 81 + FFB-MINI/src/xmpAPI_pres3e.F | 210 +++ FFB-MINI/src/xmpAPI_rcmelm.F | 518 ++++++ FFB-MINI/src/xmpAPI_vel3d1.F | 834 +++++++++ FFB-MINI/src/xmpAPI_vel3d2.F | 408 +++++ 31 files changed, 7426 insertions(+), 309 deletions(-) create mode 100755 FFB-MINI/src/Makefile.coarray create mode 100755 FFB-MINI/src/xmpAPI_bcgs3x.F create mode 100755 FFB-MINI/src/xmpAPI_bcgsxe.F create mode 100755 FFB-MINI/src/xmpAPI_calax3.F create mode 100755 FFB-MINI/src/xmpAPI_callap.F create mode 100755 FFB-MINI/src/xmpAPI_elm3dx.F create mode 100755 FFB-MINI/src/xmpAPI_grad3x.F create mode 100755 FFB-MINI/src/xmpAPI_les3x.F create mode 100755 FFB-MINI/src/xmpAPI_lessfx.F create mode 100755 FFB-MINI/src/xmpAPI_lrfnms.F create mode 100755 FFB-MINI/src/xmpAPI_nodlex.F create mode 100755 FFB-MINI/src/xmpAPI_pres3e.F create mode 100755 FFB-MINI/src/xmpAPI_rcmelm.F create mode 100755 FFB-MINI/src/xmpAPI_vel3d1.F create mode 100755 FFB-MINI/src/xmpAPI_vel3d2.F diff --git a/FFB-MINI/src/Makefile b/FFB-MINI/src/Makefile index 0d65c70..df24480 100755 --- a/FFB-MINI/src/Makefile +++ b/FFB-MINI/src/Makefile @@ -20,17 +20,17 @@ FFLAGS += -DFFB_MINI_VERSION=\"$(VERSION)\" all: $(LES3X.MPI) $(FFB_MINI) OBJS = \ - les3x.o bcgs3x.o bcgsxe.o calax3.o \ - callap.o caluel.o clrcrs.o \ + xmpAPI_les3x.o xmpAPI_bcgs3x.o xmpAPI_bcgsxe.o xmpAPI_calax3.o \ + xmpAPI_callap.o caluel.o clrcrs.o \ csin3x.o datcnv.o dgnscl.o e2plst.o e2pmtr.o \ - elm3dx.o errchk.o \ + xmpAPI_elm3dx.o errchk.o \ fild3x.o fld3x2.o \ - grad3x.o icalel.o int3dx.o \ - lesrop.o lesrpx.o lessfx.o lumpex.o \ + xmpAPI_grad3x.o icalel.o int3dx.o \ + lesrop.o lesrpx.o xmpAPI_lessfx.o lumpex.o \ match4.o matgau.o mkcrs.o neibr2.o \ - nodlex.o pres3e.o rcmelm.o reordr.o \ + xmpAPI_nodlex.o xmpAPI_pres3e.o xmpAPI_rcmelm.o reordr.o \ rfname.o sethex.o srfexx.o subcnv.o \ - vel3d1.o vel3d2.o \ + xmpAPI_vel3d1.o xmpAPI_vel3d2.o \ mfname.o \ miniapp_util.o @@ -39,7 +39,7 @@ OBJS += metis_wrapper.o endif ifeq (, $(findstring -DNO_REFINER, $(FFLAGS))) -OBJS += lrfnms.o extrfn.o +OBJS += xmpAPI_lrfnms.o extrfn.o endif param.h: param.h.in @@ -75,8 +75,8 @@ $(LIB_DD_MPI): $(LES3X.MPI): $(LIB_GF2) $(LIB_DD_MPI) $(OBJS) les3x_main.o $(LD) $(LDFLAGS) -o $@ $(OBJS) les3x_main.o $(LIB_GF2) $(LIB_DD_MPI) $(LIBS) -$(FFB_MINI): $(LIB_GF2) $(LIB_DD_MPI) $(OBJS) ffb_mini_main.o makemesh.o - $(LD) $(LDFLAGS) -o $@ $(OBJS) ffb_mini_main.o makemesh.o $(LIB_GF2) $(LIB_DD_MPI) $(LIBS) +$(FFB_MINI): $(LIB_GF2) $(LIB_DD_MPI) $(OBJS) xmpAPI_ffb_mini_main.o makemesh.o + $(LD) $(LDFLAGS) -o $@ $(OBJS) xmpAPI_ffb_mini_main.o makemesh.o $(LIB_GF2) $(LIB_DD_MPI) $(LIBS) .SUFFIXES: .SUFFIXES: .f .f90 .F .F90 .c .o @@ -114,16 +114,16 @@ test_fx10: $(FFB_MINI) $(MAKE) -C ../test test_fx10 -ffb_mini_main.o: param.h -ffb_mini_main.o: makemesh.o +xmpAPI_ffb_mini_main.o: param.h +xmpAPI_ffb_mini_main.o: makemesh.o -les3x.o: timing.h -vel3d1.o: timing.h -bcgs3x.o: timing.h -pres3e.o: timing.h -bcgs3x.o: timing.h -rcmelm.o: timing.h -bcgsxe.o: timing.h -callap.o: timing.h -calax3.o: timing.h -grad3x.o: timing.h +xmpAPI_les3x.o: timing.h +xmpAPI_vel3d1.o: timing.h +xmpAPI_bcgs3x.o: timing.h +xmpAPI_pres3e.o: timing.h +#bcgs3x.o: timing.h +xmpAPI_rcmelm.o: timing.h +xmpAPI_bcgsxe.o: timing.h +xmpAPI_callap.o: timing.h +xmpAPI_calax3.o: timing.h +xmpAPI_grad3x.o: timing.h diff --git a/FFB-MINI/src/Makefile.coarray b/FFB-MINI/src/Makefile.coarray new file mode 100755 index 0000000..0d65c70 --- /dev/null +++ b/FFB-MINI/src/Makefile.coarray @@ -0,0 +1,129 @@ +LD = +LDFLAGS = + +include ./make_setting + +ifndef LD +LD = $(FC) +endif +ifndef LDFLAGS +LDFLAGS = $(FFLAGS) +endif + +LES3X.MPI = ../bin/les3x.mpi +FFB_MINI = ../bin/ffb_mini + +VERSION = 1.0.0 + +FFLAGS += -DFFB_MINI_VERSION=\"$(VERSION)\" + +all: $(LES3X.MPI) $(FFB_MINI) + +OBJS = \ + les3x.o bcgs3x.o bcgsxe.o calax3.o \ + callap.o caluel.o clrcrs.o \ + csin3x.o datcnv.o dgnscl.o e2plst.o e2pmtr.o \ + elm3dx.o errchk.o \ + fild3x.o fld3x2.o \ + grad3x.o icalel.o int3dx.o \ + lesrop.o lesrpx.o lessfx.o lumpex.o \ + match4.o matgau.o mkcrs.o neibr2.o \ + nodlex.o pres3e.o rcmelm.o reordr.o \ + rfname.o sethex.o srfexx.o subcnv.o \ + vel3d1.o vel3d2.o \ + mfname.o \ + miniapp_util.o + +ifeq (, $(findstring -DNO_METIS, $(FFLAGS))) +OBJS += metis_wrapper.o +endif + +ifeq (, $(findstring -DNO_REFINER, $(FFLAGS))) +OBJS += lrfnms.o extrfn.o +endif + +param.h: param.h.in + ./make_param_h.sh $< > $@ + +ifneq (, $(findstring -DPROF_MAPROF, $(FFLAGS))) + +MAPROF_DIR = ma_prof/src +MAPROF_LIB = $(MAPROF_DIR)/libmaprof_f.a + +FFLAGS += -I$(MAPROF_DIR) +LDFLAGS += -L$(MAPROF_DIR) +LIBS += -lmaprof_f + +$(OBJS): $(MAPROF_LIB) + +export +$(MAPROF_LIB): + $(MAKE) -C $(MAPROF_DIR) f_mpi MAPROF_F="FC FFLAGS" MAX_SECTIONS=30 + +endif + +LIB_GF2 = gf2/libgf2.a +LIB_DD_MPI = dd_mpi/libdd_mpi.a +#LIB_DD_MPI = dd_mpi/dd_mpi.o dd_mpi/ddcom4.o + +$(LIB_GF2): + $(MAKE) -C gf2 + +$(LIB_DD_MPI): + $(MAKE) -C dd_mpi + +$(LES3X.MPI): $(LIB_GF2) $(LIB_DD_MPI) $(OBJS) les3x_main.o + $(LD) $(LDFLAGS) -o $@ $(OBJS) les3x_main.o $(LIB_GF2) $(LIB_DD_MPI) $(LIBS) + +$(FFB_MINI): $(LIB_GF2) $(LIB_DD_MPI) $(OBJS) ffb_mini_main.o makemesh.o + $(LD) $(LDFLAGS) -o $@ $(OBJS) ffb_mini_main.o makemesh.o $(LIB_GF2) $(LIB_DD_MPI) $(LIBS) + +.SUFFIXES: +.SUFFIXES: .f .f90 .F .F90 .c .o + +.c.o: + $(CC) $(CFLAGS) -c $< +.f.o: + $(FC) $(FFLAGS) -c $< +.f90.o: + $(FC) $(FFLAGS) -c $< +.F.o: + $(FC) $(FFLAGS) -c $< +.F90.o: + $(FC) $(FFLAGS) -c $< + +clean: + rm -rf *.o *.mod *.xmod *.lst param.h + $(MAKE) -C gf2 clean + $(MAKE) -C dd_mpi clean +ifneq (, $(findstring -DPROF_MAPROF, $(FFLAGS))) + $(MAKE) -C $(MAPROF_DIR) clean +endif + +distclean: clean + rm -rf ../bin/* + $(MAKE) -C ../test clean + +test: $(FFB_MINI) + $(MAKE) -C ../test + +test_k: $(FFB_MINI) + $(MAKE) -C ../test test_k + +test_fx10: $(FFB_MINI) + $(MAKE) -C ../test test_fx10 + + +ffb_mini_main.o: param.h +ffb_mini_main.o: makemesh.o + +les3x.o: timing.h +vel3d1.o: timing.h +bcgs3x.o: timing.h +pres3e.o: timing.h +bcgs3x.o: timing.h +rcmelm.o: timing.h +bcgsxe.o: timing.h +callap.o: timing.h +calax3.o: timing.h +grad3x.o: timing.h diff --git a/FFB-MINI/src/bcgs3x.F b/FFB-MINI/src/bcgs3x.F index efbca40..18ab675 100755 --- a/FFB-MINI/src/bcgs3x.F +++ b/FFB-MINI/src/bcgs3x.F @@ -1,10 +1,7 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, * NPP,NCRS,IPCRS,AAAPC,B,S,NITR,RESR, * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, -C Fj -C * WEIGHT,R0,RK,PK,APK,ATK,TK,S0,RX,RY, - * WEIGHT,R0,RK,PK,APK,ATK,TK,S0,rx_desc,ry_desc, -C Fj + * WEIGHT,R0,RK,PK,APK,ATK,TK,S0,RX,RY, * IUT0,IERR, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) #include "timing.h" @@ -34,9 +31,6 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, CCC [WORK] REAL*4 RX(0:N,ME),RY(0:N,ME),WEIGHT(NP), * R0(NP),RK(NP),PK(NP),APK(NP),ATK(NP),TK(NP),S0(NP) -C Fj - INTEGER*8 rx_desc,ry_desc -C Fj C C [FULL UNROLL] INTEGER*4 JUNROL @@ -154,10 +148,7 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, call maprof_time_start(TM_CALAX3) CALL CALAX3(AAAPC, S, RK, NP, NE, NCRS, IPCRS, NPP, * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * RX,RY,IUT0,IERR, - * rx_desc,ry_desc,IUT0,IERR, -C Fj + * RX,RY,IUT0,IERR, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) call maprof_time_stop(TM_CALAX3) C @@ -222,10 +213,7 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, call maprof_time_start(TM_CALAX3) CALL CALAX3(AAAPC, PK, APK, NP, NE, NCRS, IPCRS, NPP, * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * RX,RY,IUT0,IERR, - * rx_desc,ry_desc,IUT0,IERR, -C Fj + * RX,RY,IUT0,IERR, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) call maprof_time_stop(TM_CALAX3) C @@ -283,10 +271,7 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, call maprof_time_start(TM_CALAX3) CALL CALAX3(AAAPC, TK, ATK, NP, NE, NCRS, IPCRS, NPP, * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * RX,RY,IUT0,IERR, - * rx_desc,ry_desc,IUT0,IERR, -C Fj + * RX,RY,IUT0,IERR, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) call maprof_time_stop(TM_CALAX3) C diff --git a/FFB-MINI/src/bcgsxe.F b/FFB-MINI/src/bcgsxe.F index 9dd6c8f..9c98f5a 100755 --- a/FFB-MINI/src/bcgsxe.F +++ b/FFB-MINI/src/bcgsxe.F @@ -4,11 +4,7 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * B,NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT,NITR,RESR,S, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, -C Fj -C * R0,RK,PK,APK,ATK,TK,FXYZ,RX,RY,MWRK,WRKN, - * R0,RK,PK,APK,ATK,TK,FXYZ,rx_desc,ry_desc,MWRK, - * WRKN, -C Fj + * R0,RK,PK,APK,ATK,TK,FXYZ,RX,RY,MWRK,WRKN, * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -44,7 +40,6 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, REAL*4 RX(0:N,ME),RY(0:N,ME), 1 R0(NE),RK(NE),PK(NE),APK(NE),ATK(NE),TK(NE), 2 FXYZ(3,NP) - INTEGER*8 rx_desc,ry_desc INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) C @@ -165,10 +160,7 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj -C * FXYZ,RX,RY,MWRK,WRKN, - * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj + * FXYZ,RX,RY,MWRK,WRKN, * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C @@ -236,10 +228,7 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj -C * FXYZ,RX,RY,MWRK,WRKN, - * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj + * FXYZ,RX,RY,MWRK,WRKN, * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C @@ -302,10 +291,7 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj -C * FXYZ,RX,RY,MWRK,WRKN, - * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj + * FXYZ,RX,RY,MWRK,WRKN, * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C diff --git a/FFB-MINI/src/calax3.F b/FFB-MINI/src/calax3.F index aa0feed..ec71bda 100755 --- a/FFB-MINI/src/calax3.F +++ b/FFB-MINI/src/calax3.F @@ -1,10 +1,7 @@ C======================================================================= SUBROUTINE CALAX3(A, S, AS, NP, NE, NCRS, IPCRS, NPP, * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * RX,RY,IUT0,IERR, - * rx_desc,ry_desc,IUT0,IERR, -C Fj + * RX,RY,IUT0,IERR, * JUNROL,NPPMAX,NCRS2,TS,TA,ITPCRS) C======================================================================= #include "timing.h" @@ -19,9 +16,6 @@ SUBROUTINE CALAX3(A, S, AS, NP, NE, NCRS, IPCRS, NPP, INTEGER MAXBUF,IDUM INTEGER N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,IUT0,IERR REAL*4 RX,RY -C Fj - INTEGER*8 rx_desc,ry_desc -C Fj C DIMENSION LDOM(NDOM),NBPDOM(NDOM) DIMENSION IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM) @@ -183,10 +177,7 @@ SUBROUTINE CALAX3(A, S, AS, NP, NE, NCRS, IPCRS, NPP, call maprof_time_start(TM_CALAX3_COM) IDUM = 1 CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * AS,AS,AS,NP,IUT0,IERR,RX,RY,MAXBUF) - * AS,AS,AS,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj + * AS,AS,AS,NP,IUT0,IERR,RX,RY,MAXBUF) call maprof_time_stop(TM_CALAX3_COM) IF(IERR.NE.0) THEN WRITE(IUT0,*) diff --git a/FFB-MINI/src/callap.F b/FFB-MINI/src/callap.F index 8f0945c..660b903 100755 --- a/FFB-MINI/src/callap.F +++ b/FFB-MINI/src/callap.F @@ -5,10 +5,7 @@ SUBROUTINE CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj -C * FXYZ,RX,RY,MWRK,WRKN, - * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj + * FXYZ,RX,RY,MWRK,WRKN, * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -47,9 +44,6 @@ SUBROUTINE CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, C CCC [WORK] REAL*4 RX(0:N,ME),RY(0:N,ME) -C Fj - INTEGER*8 rx_desc,ry_desc -C Fj INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) C @@ -166,10 +160,7 @@ SUBROUTINE CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj -C * FXYZ,RX,RY,MWRK,WRKN, - * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj + * FXYZ,RX,RY,MWRK,WRKN, * IUT0,IERR) call maprof_time_stop(TM_GRAD3X) C diff --git a/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 b/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 index 7f5324f..c45455a 100755 --- a/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 +++ b/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 @@ -816,7 +816,7 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT INTEGER*8 :: st_desc, st_l_desc INTEGER*8 :: st_sec, st_l_sec INTEGER*8, DIMENSION(1) :: st_lb, st_ub, st_l_lb, st_l_ub - INTEGER*8 :: start1, end1, start2, end2 + INTEGER*8 :: start_pos, end_pos INTEGER*4 :: img_dims(1) INTEGER*4 status ! Fujitsu end 202103 @@ -1132,12 +1132,14 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) ! Fujitsu start 202103 ! START_RR = START_R(ME)[LDOM(IDOM)] - start1 = ME - end1 = ME - start2 = 1 - end2 = 1 - call xmp_array_section_set_triplet(st_sec,1,start1,end1,1,status) - call xmp_array_section_set_triplet(st_l_sec,1,start2,end2,1,status) + start_pos = ME + end_pos = ME + call xmp_array_section_set_triplet(st_sec,1, & + start_pos,end_pos,1,status) + start_pos = 1 + end_pos = 1 + call xmp_array_section_set_triplet(st_l_sec,1, & + start_pos,end_pos,1,status) img_dims(1) = LDOM(IDOM) call xmp_coarray_get(img_dims,st_desc,st_sec, & st_l_desc,st_l_sec,status) @@ -1145,22 +1147,30 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! Fujitsu end 202103 END_RR = START_RR + (END_S(LDOM(IDOM)) - START_S(LDOM(IDOM))) ! Fujitsu start 202103 -! write(*, '("DBG2 : img_dims = ", i4, " ME = ", i4)') & -! img_dims(1), ME -! write(*, '(" : START_RR = ", i8, " END_RR = ", i8)') & -! START_RR, END_RR ! BUFRCV(START_RR:END_RR)[LDOM(IDOM)] = & ! BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) - start1 = START_RR - end1 = END_RR - call xmp_array_section_set_triplet(rcv_sec,1,start1,end1,1,status) - start1 = START_S(LDOM(IDOM)) - end1 = END_S(LDOM(IDOM)) - call xmp_array_section_set_triplet(snd_sec,1,start1,end1,1,status) + start_pos = START_RR + end_pos = END_RR + call xmp_array_section_set_triplet(rcv_sec,1, & + start_pos,end_pos,1,status) + start_pos = START_S(LDOM(IDOM)) + end_pos = END_S(LDOM(IDOM)) + call xmp_array_section_set_triplet(snd_sec,1, & + start_pos,end_pos,1,status) img_dims = LDOM(IDOM) - call xmp_coarray_put(img_dims,rcv_desc,rcv_sec,snd_desc,snd_sec,status); -! write(*, '("DBG3 : BUFSND = ", e12.6, " ", e12.6)') & + call xmp_coarray_put(img_dims,rcv_desc,rcv_sec, & + snd_desc,snd_sec,status); +! IF(IDOM .EQ. 1) THEN +! write(*, '("DBG2 : img_dims = ",i4," ME = ",i4," this_img = ",i4)') & +! img_dims(1), ME, xmp_this_image() +! write(*, '(" : START_RR = ",i16," END_RR = ",i16)') & +! START_RR, END_RR +!! +! write(*, '("DBG3 : BUFSND = ", e12.6," ",e12.6)') & ! BUFSND(START_RR), BUFSND(END_RR) +! write(*, '("DBG3 : BUFSND = ", e12.6," ",e12.6)') & +! BUFSND(1), BUFSND(1) +! ENDIF ! Fujitsu end 202103 END DO @@ -1354,13 +1364,8 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT !CTTDEBG ! ! Fujitsu start 202103 - call xmp_free_array_section(snd_sec) - call xmp_free_array_section(rcv_sec) -! -! call xmp_coarray_deallocate(snd_desc, status) -! call xmp_coarray_deallocate(rcv_desc, status) -! -! call xmp_api_finalize +! call xmp_free_array_section(snd_sec) +! call xmp_free_array_section(rcv_sec) ! Fujitsu end 202103 ! #ifdef USE_BARRIER diff --git a/FFB-MINI/src/ffb_mini_main.F90 b/FFB-MINI/src/ffb_mini_main.F90 index 1a40c72..d3996b4 100755 --- a/FFB-MINI/src/ffb_mini_main.F90 +++ b/FFB-MINI/src/ffb_mini_main.F90 @@ -1,9 +1,7 @@ program ffb_mini -!Fj - use mpi + !use mpi use makemesh -!Fj -! include "mpif.h" + include "mpif.h" !implicit none integer :: ierr @@ -36,8 +34,7 @@ program ffb_mini intrinsic :: command_argument_count -!Fj - call MPI_Init(ierr) + !call MPI_Init(ierr) call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr) call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr) @@ -118,9 +115,6 @@ program ffb_mini call LES3X(file_parm) -!Fj -! call MPI_Finalize(ierr) - contains subroutine print_usage_and_exit() diff --git a/FFB-MINI/src/grad3x.F b/FFB-MINI/src/grad3x.F index 5ed006d..73c3780 100755 --- a/FFB-MINI/src/grad3x.F +++ b/FFB-MINI/src/grad3x.F @@ -4,10 +4,7 @@ SUBROUTINE GRAD3X(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj -C * FXYZ,RX,RY,MWRK,WRKN, - * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj + * FXYZ,RX,RY,MWRK,WRKN, * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -45,9 +42,6 @@ SUBROUTINE GRAD3X(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, C CCC [WORK] REAL*4 RX(0:N,ME),RY(0:N,ME) -C Fj - INTEGER*8 rx_desc,ry_desc -C Fj INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) C @@ -242,10 +236,7 @@ SUBROUTINE GRAD3X(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, call maprof_time_start(TM_GRAD3X_COM) CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * WRKN(1,4),WRKN(1,5),WRKN(1,6),NP,IUT0,IERR, -C Fj -C * RX,RY,MAXBUF) - * rx_desc,ry_desc,MAXBUF) -C Fj + * RX,RY,MAXBUF) call maprof_time_stop(TM_GRAD3X_COM) IF(IERR.NE.0) THEN WRITE(IUT0,*) diff --git a/FFB-MINI/src/les3x.F b/FFB-MINI/src/les3x.F index f02f1a2..e6365f5 100755 --- a/FFB-MINI/src/les3x.F +++ b/FFB-MINI/src/les3x.F @@ -16,8 +16,6 @@ C======================================================================C C* PROGRAM LES3X SUBROUTINE LES3X(FILEIN) - use xmp_api - use mpi #include "timing.h" !#include "xmp_coarray.h" IMPLICIT NONE @@ -324,23 +322,11 @@ SUBROUTINE LES3X(FILEIN) * NODWK1(:,:),LEWRK(:,:), * LWRK01(:),LWRK02(:),LWRK04(:) REAL*4, ALLOCATABLE:: -CC Fj start 202103 -CC * RX(:,:)[:], RY(:,:)[:], WRKN(:), - * WRKN(:), -CC Fj end 202103 + * RX(:,:)[:], RY(:,:)[:], WRKN(:), * WRK01(:),WRK02(:),WRK03(:),WRK04(:), * WRK05(:),WRK06(:),WRK07(:),WRK08(:), * WRK09(:),WRK10(:),WRK11(:),WRK12(:), * WRK13(:),WRK3(:,:) -CC Fj start 202103 - REAL*4 , POINTER :: RX ( : , : ) => null ( ) - REAL*4 , POINTER :: RY ( : , : ) => null ( ) - INTEGER*8 :: rx_desc, ry_desc - INTEGER*8, DIMENSION(2) :: rx_lb, rx_ub, ry_lb, ry_ub - INTEGER*4 :: img_dims(1) - INTEGER*4 :: status -C INTEGER :: ierr, nnn, me -CC Fj end 202103 REAL*8,ALLOCATABLE:: * DWRK01(:,:),DWRK02(:),DWRK03(:,:,:), * DWRK04(:,:),DWRK05(:) @@ -754,12 +740,7 @@ SUBROUTINE LES3X(FILEIN) C C C NDOM = 0 -C Fj start 202103 - call xmp_api_init -C Fj end 202103 -C CALL DDINIT(NPART,IPART) -C C IF(IPART.GE.1) NDOM = 1 C IF(IPART.GE.2) THEN @@ -1109,8 +1090,8 @@ SUBROUTINE LES3X(FILEIN) ALLOCATE(LWRK01(MWRK), STAT=LERR(03)) ALLOCATE(LWRK02(MWRK), STAT=LERR(04)) ALLOCATE(LWRK04(MWRK), STAT=LERR(05)) -C ALLOCATE(RX(N1,ME)[*], STAT=LERR(06)) -C ALLOCATE(RY(N1,ME)[*], STAT=LERR(07)) + ALLOCATE(RX(N1,ME)[*], STAT=LERR(06)) + ALLOCATE(RY(N1,ME)[*], STAT=LERR(07)) ALLOCATE(WRKN(MWRK*9), STAT=LERR(08)) ALLOCATE(WRK01(MWRK), STAT=LERR(09)) ALLOCATE(WRK02(MWRK), STAT=LERR(10)) @@ -1131,20 +1112,6 @@ SUBROUTINE LES3X(FILEIN) ALLOCATE(DWRK03(3,N1,MGAUSS), STAT=LERR(25)) ALLOCATE(DWRK04(3,N1 ), STAT=LERR(26)) ALLOCATE(DWRK05( MGAUSS), STAT=LERR(27)) -C Fj start 202103 - rx_lb(1) = 1 - rx_lb(2) = 1 - rx_ub(1) = N1 - rx_ub(2) = ME - ry_lb(1) = 1 - ry_lb(2) = 1 - ry_ub(1) = N1 - ry_ub(2) = ME - call xmp_new_coarray(rx_desc, 4, 1, rx_lb, rx_ub, 1, img_dims) - call xmp_new_coarray(ry_desc, 4, 1, ry_lb, ry_ub, 1, img_dims) - call xmp_coarray_bind(rx_desc,RX) - call xmp_coarray_bind(ry_desc,RY) -C Fj start 202103 CALL ERRCHK(IUT6,IPART,27,LERR,IERR) IF(IERR.NE.0) THEN WRITE(IUT0,*) BLANK @@ -1452,10 +1419,7 @@ SUBROUTINE LES3X(FILEIN) * MDOM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * WRK01,WRK02,WRK03,LWRK06, * DWRK3,LWRK01,LWRK02,WRK04,NODWK1,NODWK2,NODWK3, -C Fj -C * RX,RY,NPB0, - * rx_desc,ry_desc,NPB0, -C Fj + * RX,RY,NPB0, * NPB1,LPB1,XPB1,YPB1,ZPB1, * NPB2,LPB2,XPB2,YPB2,ZPB2, * LWRK03,LWRK04, @@ -1575,10 +1539,7 @@ SUBROUTINE LES3X(FILEIN) * NPCCL ,LPCCL1,LPCCL2, * IPART ,MPINT ,NPINT ,LPINT1,LPINT2,LPINT3, * MDOM ,NDOM ,LDOM ,NBPDOM,MBPDOM,IPSLF,IPSND, -C Fj -C * LPBTOA,IUT0,IUT6,IERR,RX,RY, - * LPBTOA,IUT0,IUT6,IERR,rx_desc,ry_desc, -C Fj + * LPBTOA,IUT0,IUT6,IERR,RX,RY, * MWRK,WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, * LWRK01,LEWRK) CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) @@ -1665,10 +1626,7 @@ SUBROUTINE LES3X(FILEIN) MELM=NELM+1 CALL ELM3DX(MGAUSS,IGAUSH, * MELM,N1,NE,NP,NEX,XD,YD,ZD,NODE, -C Fj * SNI ,DNXI,DNYI,DNZI,SN,RX,RY,WRKN, -C * SNI ,DNXI,DNYI,DNZI,SN,rx_desc,ry_desc,WRKN, -C Fj * E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EXZ,EYZ, * DWRK01,DWRK02,DWRK03,DWRK04,DWRK05,IUT0,IERR) CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) @@ -1706,10 +1664,7 @@ SUBROUTINE LES3X(FILEIN) ENDDO IDUM=1 CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * CM,CM,CM,NP,IUT0,IERR,RX,RY,MAXBUF) - * CM,CM,CM,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj + * CM,CM,CM,NP,IUT0,IERR,RX,RY,MAXBUF) DO 2700 IP=1,NP CM(IP)=1.0E0/CM(IP) 2700 CONTINUE @@ -1720,10 +1675,7 @@ SUBROUTINE LES3X(FILEIN) WRITE(IUT6,*) ' ** INTERPOLATING PRESSURE TO NODES **' CALL NODLEX(NODE,ME,NE,NP,N1,NEX,SN, * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * P,PN,CM,IUT0,IERR,RX,RY,MAXBUF) - * P,PN,CM,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj + * P,PN,CM,IUT0,IERR,RX,RY,MAXBUF) IF(IERRA.NE.0) THEN WRITE(IUT0,*) BLANK WRITE(IUT0,*) ERMSGC @@ -1874,10 +1826,7 @@ SUBROUTINE LES3X(FILEIN) * LWRK01,LWRK02, * WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, * WRK07,WRK08,WRK09,WRK10,WRK11,WRK12, -C Fj -C * RX,RY, - * rx_desc,ry_desc, -C Fj + * RX,RY, * JUNROL,NPPMAX,NCRS2,WRK13,TACRS,ITPCRS, * IUT0,IERR) call maprof_time_stop(TM_VEL3D1) @@ -1903,10 +1852,7 @@ SUBROUTINE LES3X(FILEIN) * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * LWRK01,LWRK02,WRK3,WRK01,WRK02,WRK03,WRK04, * WRK05,WRK06,WRK07,WRK08,WRK09,WRK10, -C Fj -C * PRCM,APRCM,RX,RY,MWRK,WRKN, - * PRCM,APRCM,rx_desc,ry_desc,MWRK,WRKN, -C Fj + * PRCM,APRCM,RX,RY,MWRK,WRKN, * IUT0,IERR) call maprof_time_stop(TM_PRES3E) IF(IERR.NE.0) GOTO 9999 @@ -1925,10 +1871,7 @@ SUBROUTINE LES3X(FILEIN) * NPWALL,LPWALL,UWALL,VWALL,WWALL, * NPSYMT,NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, * IPART ,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * RX,RY,MWRK,WRKN,WRK3,WRK01, - * rx_desc,ry_desc,MWRK,WRKN,WRK3,WRK01, -C Fj + * RX,RY,MWRK,WRKN,WRK3,WRK01, * IUT0,IERR, * WRK05) C @@ -2202,15 +2145,6 @@ SUBROUTINE LES3X(FILEIN) IF(IPART.GE.2) CLOSE(IUT6) CALL DDEXIT C -C -C Fj start 202103 - call xmp_coarray_deallocate(rx_desc, status) - call xmp_coarray_deallocate(ry_desc, status) -C - call xmp_api_finalize -C - call mpi_finalize(ierr) -C Fj end 202103 C STOP C diff --git a/FFB-MINI/src/lessfx.F b/FFB-MINI/src/lessfx.F index f02487f..a282c44 100755 --- a/FFB-MINI/src/lessfx.F +++ b/FFB-MINI/src/lessfx.F @@ -18,10 +18,7 @@ SUBROUTINE LESSFX(LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, * NPCCL ,LPCCL1,LPCCL2, * IPART ,MPINT ,NPINT ,LPINT1,LPINT2,LPINT3, * MDOM ,NDOM ,LDOM ,NBPDOM,MBPDOM,IPSLF,IPSND, -C Fj -C * LPBTOA,IUT0,IUT6,IERR,RX,RY, - * LPBTOA,IUT0,IUT6,IERR,rx_desc,ry_desc, -C Fj + * LPBTOA,IUT0,IUT6,IERR,RX,RY, * MWRK,WRK1,WRK2,WRK3,WRK4,WRK5,WRK6, * IWRK,IWRK2) IMPLICIT NONE @@ -39,9 +36,6 @@ SUBROUTINE LESSFX(LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, * LPCCL1(NPCCL),LPCCL2(NPCCL) , * WRK1(NE),WRK2(NE),WRK3(NE),WRK4(NP),WRK5(NP),WRK6(NP), * IWRK(MWRK),IWRK2(2,MWRK),RX(0:N,NE),RY(0:N,NE) -C Fj - INTEGER*8 :: rx_desc, ry_desc -C Fj C DIMENSION LPINT1(MPINT),LPINT2(MPINT),LPINT3(MPINT), 1 LDOM (MDOM) ,NBPDOM(MDOM) , @@ -179,10 +173,7 @@ SUBROUTINE LESSFX(LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, IDIM = 0 CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM, * IPSLF,IPSND,MBPDOM,WRK4,WRK5,WRK6,NP, -C Fj -C * IUT0,IERR,RX,RY,MAXBUF) - * IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj + * IUT0,IERR,RX,RY,MAXBUF) CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) IF(IERRA.NE.0) THEN @@ -298,10 +289,7 @@ SUBROUTINE LESSFX(LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, IDIM = 3 CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM, * IPSLF,IPSND,MBPDOM,WRK4,WRK5,WRK6,NP, -C Fj -C * IUT0,IERR,RX,RY,MAXBUF) - * IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj + * IUT0,IERR,RX,RY,MAXBUF) CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) IF(IERRA.NE.0) THEN WRITE(IUT0,*) diff --git a/FFB-MINI/src/lrfnms.F b/FFB-MINI/src/lrfnms.F index 7646af0..6ccc956 100755 --- a/FFB-MINI/src/lrfnms.F +++ b/FFB-MINI/src/lrfnms.F @@ -10,10 +10,7 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, * MDOM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * UFIX,VFIX,WFIX,LPFRM, * CRD,LWRK01,LWRK02,WRK04,NDRFN,NDORG,NODEBK, -C Fj -C * RX,RY,NPB0, - * rx_desc,ry_desc,NPB0, -C Fj + * RX,RY,NPB0, * NPB1,LPB1,XPB1,YPB1,ZPB1, * NPB2,LPB2,XPB2,YPB2,ZPB2, * ITYPOR,ITYPRF,IUT6,IUT0,IERR) @@ -77,9 +74,6 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, INTEGER*4 LPFRM(MP) REAL*8 CRD(MP*3) REAL*4 RX(ME*8),RY(ME*8) -C Fj - INTEGER*8 :: rx_desc, ry_desc -C Fj INTEGER*4 NODED(8),LEACNV(ME) INTEGER*4 LWRK01(ME),LWRK02(MP) INTEGER*4 WRK04(MP),NDRFN(ME*8),NDORG(NE*8),NODEBK(8,NE) @@ -201,10 +195,7 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, C IDIM = 0 CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * WRK04,WRK04,WRK04,NP,IUT0,IERR,RX,RY,MAXBUF) - * WRK04,WRK04,WRK04,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj + * WRK04,WRK04,WRK04,NP,IUT0,IERR,RX,RY,MAXBUF) CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) IF(IERRA.NE.0) THEN WRITE(IUT0,*) diff --git a/FFB-MINI/src/nodlex.F b/FFB-MINI/src/nodlex.F index e0a6a5b..d9d0f47 100755 --- a/FFB-MINI/src/nodlex.F +++ b/FFB-MINI/src/nodlex.F @@ -1,10 +1,7 @@ SUBROUTINE NODLEX * (NODE,ME,NE,NP,N1,NEX,SN, * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * VALELM,VALNOD,CM,IUT0,IERR,BUFSND,BUFRCV,MAXBUF) - * VALELM,VALNOD,CM,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj + * VALELM,VALNOD,CM,IUT0,IERR,BUFSND,BUFRCV,MAXBUF) C IMPLICIT NONE C @@ -17,9 +14,6 @@ SUBROUTINE NODLEX REAL*4 VALELM(NE),VALNOD(NP),CM(NP) INTEGER*4 IUT0,IERR,MAXBUF REAL*4 BUFSND(MAXBUF),BUFRCV(MAXBUF) -C Fj - INTEGER*8 rx_desc,ry_desc -C Fj C CHARACTER*60 ERMSGC & /' ## SUBROUTINE NODLEX: FATAL ERROR REPORT ; RETURNED' / @@ -59,10 +53,7 @@ SUBROUTINE NODLEX IDUM=1 CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, & VALNOD,VALNOD,VALNOD,NP,IUT0,IERR, -C Fj -C & BUFSND,BUFRCV,MAXBUF) - & rx_desc,ry_desc,MAXBUF) -C Fj + & BUFSND,BUFRCV,MAXBUF) IF(IERR.NE.0) THEN WRITE(IUT0,*) WRITE(IUT0,*) ERMSGC diff --git a/FFB-MINI/src/pres3e.F b/FFB-MINI/src/pres3e.F index eaf9990..e145b93 100755 --- a/FFB-MINI/src/pres3e.F +++ b/FFB-MINI/src/pres3e.F @@ -9,10 +9,7 @@ SUBROUTINE PRES3E(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * LPFIX,LFIX3D,FXYZ,WRK01,WRK02,WRK03,WRK04, * WRK05,WRK06,WRK07,WRK08,WRK09,B, -C Fj -C * PRCM,APRCM,RX,RY,MWRK,WRKN, - * PRCM,APRCM,rx_desc,ry_desc,MWRK,WRKN, -C Fj + * PRCM,APRCM,RX,RY,MWRK,WRKN, * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -52,9 +49,6 @@ SUBROUTINE PRES3E(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, REAL*4 PRCM(MRCM,NE),APRCM(MRCM,NE) INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) -C Fj - INTEGER*8 rx_desc,ry_desc -C Fj C C [IN:MID NODE COLORING] C diff --git a/FFB-MINI/src/rcmelm.F b/FFB-MINI/src/rcmelm.F index 0856a7f..33064b8 100755 --- a/FFB-MINI/src/rcmelm.F +++ b/FFB-MINI/src/rcmelm.F @@ -6,10 +6,7 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * FXYZ,WRK01,WRK02,WRK03,WRK04, * WRK05,WRK06,RRCM, W1RCM,W2RCM,PRCM,APRCM, -C Fj -C * RX,RY,MWRK,WRKN, - * rx_desc,ry_desc,MWRK,WRKN, -C Fj + * RX,RY,MWRK,WRKN, * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -50,9 +47,6 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * WRK03(NE),WRK04(NE),WRK05(NE),WRK06(NE), * RRCM(NE),PRCM(MRCM,NE),APRCM(MRCM,NE), * W1RCM(NE),W2RCM(NE) -C Fj - INTEGER*8 rx_desc,ry_desc -C Fj INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) C @@ -178,10 +172,7 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj -C * FXYZ,RX,RY,MWRK,WRKN, - * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj + * FXYZ,RX,RY,MWRK,WRKN, * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C @@ -267,10 +258,7 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * NITRB,RESB,W1RCM, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, -C Fj -C * FXYZ,RX,RY,MWRK,WRKN, - * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj + * FXYZ,RX,RY,MWRK,WRKN, * IUT0,IERR) call maprof_time_stop(TM_BCGSXE) IF(IERR.NE.0) THEN @@ -427,10 +415,7 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj -C * FXYZ,RX,RY,MWRK,WRKN, - * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj + * FXYZ,RX,RY,MWRK,WRKN, * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C @@ -491,10 +476,7 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj -C * FXYZ,RX,RY,MWRK,WRKN, - * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj + * FXYZ,RX,RY,MWRK,WRKN, * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C diff --git a/FFB-MINI/src/vel3d1.F b/FFB-MINI/src/vel3d1.F index 92b24d0..d4ea4c4 100755 --- a/FFB-MINI/src/vel3d1.F +++ b/FFB-MINI/src/vel3d1.F @@ -15,10 +15,7 @@ SUBROUTINE VEL3D1 * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * LPFIX,LFIX3D, * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,A0,AR,RHSU,RHSV,RHSW, -C Fj -C * RX,RY, - * rx_desc,ry_desc, -C Fj + * RX,RY, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS, * IUT0,IERR) C @@ -49,7 +46,6 @@ SUBROUTINE VEL3D1 * A,UG,VG,WG,UE,VE,WE, * WRK01,WRK02,WRK03,WRK04,A0,AR, * RHSU,RHSV,RHSW,APCRS - INTEGER*8 rx_desc,ry_desc C C @@ -624,10 +620,7 @@ SUBROUTINE VEL3D1 IDUM=1 C* call maprof_time_start(TM_VEL3D1_COM) CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * AR,AR,AR,NP,IUT0,IERR,RX,RY,MAXBUF) - * AR,AR,AR,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj + * AR,AR,AR,NP,IUT0,IERR,RX,RY,MAXBUF) C* call maprof_time_stop(TM_VEL3D1_COM) IF(IERR.NE.0) THEN WRITE(IUT0,*) @@ -650,10 +643,7 @@ SUBROUTINE VEL3D1 IDUM = 3 C* call maprof_time_start(TM_VEL3D1_COM) CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * RHSU,RHSV,RHSW,NP,IUT0,IERR,RX,RY,MAXBUF) - * RHSU,RHSV,RHSW,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj + * RHSU,RHSV,RHSW,NP,IUT0,IERR,RX,RY,MAXBUF) C* call maprof_time_stop(TM_VEL3D1_COM) IF(IERR.NE.0) THEN WRITE(IUT0,*) @@ -717,33 +707,21 @@ SUBROUTINE VEL3D1 CALL BCGS3X(IBCGS,IPART,NMAX,EPS,ME,N,NE,NP, * NPP,NCRS,IPCRS,APCRS,RHSU,U,NITRU,RESU, * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, -C Fj -C * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, - * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05, - * rx_desc,ry_desc, -C Fj + * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, * IUT0,IERR1, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) IF(NITRU.LT.NMAX) IRESU=1 CALL BCGS3X(IBCGS,IPART,NMAX,EPS,ME,N,NE,NP, * NPP,NCRS,IPCRS,APCRS,RHSV,V,NITRV,RESV, * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, -C Fj -C * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, - * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05, - * rx_desc,ry_desc, -C Fj + * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, * IUT0,IERR2, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) IF(NITRV.LT.NMAX) IRESV=1 CALL BCGS3X(IBCGS,IPART,NMAX,EPS,ME,N,NE,NP, * NPP,NCRS,IPCRS,APCRS,RHSW,W,NITRW,RESW, * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, -C Fj -C * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, - * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05, - * rx_desc,ry_desc, -C Fj + * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, * IUT0,IERR3, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) IF(NITRW.LT.NMAX) IRESW=1 diff --git a/FFB-MINI/src/vel3d2.F b/FFB-MINI/src/vel3d2.F index b9babb1..6155577 100755 --- a/FFB-MINI/src/vel3d2.F +++ b/FFB-MINI/src/vel3d2.F @@ -8,10 +8,7 @@ SUBROUTINE VEL3D2(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * NPWALL,LPWALL,UWALL,VWALL,WWALL, * NPSYMT,NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, * IPART ,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj -C * RX,RY,MWRK,WRKN,FXYZ,UG, - * rx_desc,ry_desc,MWRK,WRKN,FXYZ,UG, -C Fj + * RX,RY,MWRK,WRKN,FXYZ,UG, * IUT0,IERR, * WRK02) IMPLICIT NONE @@ -36,9 +33,6 @@ SUBROUTINE VEL3D2(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * UINLT,VINLT,WINLT,UWALL,VWALL,WWALL, * XPSYMT,YPSYMT,ZPSYMT, * RX,RY,FXYZ,UG -C Fj - INTEGER*8 rx_desc,ry_desc -C Fj C C [IN:MID NODE COLORING] C @@ -295,10 +289,7 @@ SUBROUTINE VEL3D2(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, IDIM=3 CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * WRKN(1,1),WRKN(1,2),WRKN(1,3),NP,IUT0,IERR, -C Fj -C * RX,RY,MAXBUF) - * rx_desc,ry_desc,MAXBUF) -C Fj + * RX,RY,MAXBUF) IF(IERR.NE.0) THEN WRITE(IUT0,*) WRITE(IUT0,*) ERMSGC diff --git a/FFB-MINI/src/xmpAPI_bcgs3x.F b/FFB-MINI/src/xmpAPI_bcgs3x.F new file mode 100755 index 0000000..efbca40 --- /dev/null +++ b/FFB-MINI/src/xmpAPI_bcgs3x.F @@ -0,0 +1,422 @@ + SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, + * NPP,NCRS,IPCRS,AAAPC,B,S,NITR,RESR, + * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, +C Fj +C * WEIGHT,R0,RK,PK,APK,ATK,TK,S0,RX,RY, + * WEIGHT,R0,RK,PK,APK,ATK,TK,S0,rx_desc,ry_desc, +C Fj + * IUT0,IERR, + * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) +#include "timing.h" + IMPLICIT NONE +C +CCCC [INPUT:CONTROL PARAMETERS] + INTEGER*4 IMODE,IPART,NMAX,ME,N,NE,NP,IUT0 + REAL*4 EPS +C +CCCC [INPUT:MATRIX] + INTEGER*4 NCRS,NPP(NP),IPCRS(NCRS) + REAL*4 AAAPC(NCRS),B(NP) +C +CCCC [INPUT:INTER CONNECT DATA] + INTEGER*4 NDOM,MBPDOM,LDOM(NDOM),NBPDOM(NDOM), + * IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM),NUMIP(NP) +C +CCCC [INPUT:MID NODE DATA] +C +CCC [INPUT/OUTPUT] + REAL*4 S(NP) +C +CCC [OUTPUT] + INTEGER*4 NITR,IERR + REAL*4 RESR +C +CCC [WORK] + REAL*4 RX(0:N,ME),RY(0:N,ME),WEIGHT(NP), + * R0(NP),RK(NP),PK(NP),APK(NP),ATK(NP),TK(NP),S0(NP) +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj +C +C [FULL UNROLL] + INTEGER*4 JUNROL + INTEGER*4 NPPMAX,NCRS2,ITPCRS(NCRS2) + REAL*4 TS(0:NP),TACRS(NCRS2) +C +C [IN:MID NODE COLORING] +C +CCC [LOCAL VARIABLE] + INTEGER*4 MAXBUF,IP + REAL*4 RKDOT,RKDOTA,BDOT,BDOTA,APDOT,APDOTA, + * ATTDOT,ATTDTA,QK,AT2DOT,AT2DTA,RKDOTP,RSDOT,RSDOTA, + * EPS0,RES,RESMIN,ALFA,BETA + DATA EPS0 / 1.E-30 / +C + CHARACTER*60 ERMSGC + & /' ## SUBROUTINE BCGST3X: FATAL ERROR REPORT ; RETURNED' / +C + logical dummy +C +C SOLVE MATRIX EQUATION BY BI-CGSTAB METHOS +C ( 3-D CALCULATION : SINGLE WORD & MULTI ELEMENT VERSION ) +C CODED BASED ON 'BCGSTB' +C 2011.01.14 MODIFIED TO SUPPORT MID NODE BY Y. YAMADE +C +C OPERATION COUNTS: 77 FLOP /ELEMENT/ITERATION +C DATA LOADINGS : 101 WORDS/ELEMENT/ITERATION +C ( 69 WORDS CONTIGUOUSLY, +C 8 WORDS BY 4-WORD STRIDE, AND +C 24 WORDS BY LIST ) +C +C +C ARGUMENT LISTINGS +C +C (1)INPUT +C +C (1.1) CONTROL PARAMETERS +C INT *4 IMODE ; BEST SOLUTION STORING FUNCTION (1:ON,0:OFF) +C INT *4 IPART ; SUB-DOMAIN NUMBER THAT THIS TASK SHOULD TAKE/IS +C TAKING CARE OF. IPART BEING SET ZERO MEANS THAT +C THE PROGRAM SHOULD RUN/IS RUNNING IN SERIAL +C MODE. +C INT *4 NMAX ; MAXIMUM NUMBER OF ITERATIONS +C INT *4 ME ; MAX. NUMBER OF TOTAL ELEMENTS +C INT *4 N ; =8 +C INT *4 NE ; NUMBER OF TOTAL ELEMENTS +C INT *4 NP ; NUMBER OF TOTAL NODES +C INT *4 IUT0 ; FILE NUMBER TO REPORT ERROR OCCURRENCE +C REAL*4 EPS ; CONVERGENCE CRITERIA (L2-NORM RESIDUAL) +C +C (1.2) MATRIX +C INT *4 NCRS ; NUMBER OF NONZERO ELEMENTS IN MATRIX OF CRS FORMAT +C INT *4 NPP (IP) ; NUMBER OF ADJACENT NODES TO NODE IP +C INT *4 IPCRS (ICRS) ; NODE NO. TABLE BASED ON CRS FORMAT +C INT *4 AAAPC (ICRS) ; MATRIX COEEFICIENTS +C REAL*4 B (IP) ; GLOBAL FORCE VECTOR +C +C NOTES ; FOR PARALLEL COMPUTATIONS, CONTRIBUTIONS FROM THE +C NEIGHBORING DOMAINS MUST HAVE BEEN SUPERIMPOSED +C TO THE GLOBAL FORCE VECTOR BEFORE THIS SUBROUTINE IS +C CALLED. +C +C (1.3) INTER CONNECT DATA +C INT *4 NDOM ; NUMBER OF THE NERIBERING SUB-DOMAINS +C INT *4 MBPDOM ; THE MAXIMUM NUMBER OF THE INTER-CONNECT +C BOUNDARY NODES FOR ONE NEIBERING SUB-DOMAIN +C INT *4 LDOM (IDOM); NEIBERING SUB-DOMAIN NUMBER +C INT *4 NBPDOM (IDOM); NUMBER OF INTER-CONNECT BOUNDARY NODES +C SHARING WITH THE IDOM'TH NEIBERING SUB-DOMAIN, +C LDOM(IDOM) +C INT *4 IPSLF(I,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +C NEIBERING SUB-DOMAIN, LDOM(IDOM) +C INT *4 IPSND(I,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C SUB-DOMAIN THAT IS RECEIVING THE CALLING +C TASK'S RESIDUALS. +C INT *4 NUMIP (IP); +C +C (1.4)MID NODE DATA] +C +C (2) INPUT-OUTPUT +C REAL*4 S (IP); GLOBAL SOLUTION VECTOR (PROVIDE INITIAL GUESS) +C +C (3) OUTPUT +C REAL*4 RES ; L2-NORM RESIDUAL OF THE FINAL SOLUTION VECTOR +C INT *4 NITR ; NUMBER OF ITERATIONS DONE +C INT* 4 IERR ; RETURN CODE TO REPORT ERROR OCCURRENCE +C 0 --- NORMAL TERMINATION +C 1 --- A FATAL ERROR HAS OCCURRED +C + dummy = .false. +C + IF(NMAX.EQ.0) RETURN +C + NITR=0 + MAXBUF=8*NE +C +C +CCC 1. SET WEIGHTING FUNCTION FOR COMPUTING AN INNER PRODUCT +C +C + DO 100 IP = 1 , NP + WEIGHT(IP) = 1.E0/(FLOAT(NUMIP(IP))+1.E0) + 100 CONTINUE +C +C +CCC 2. SET INITIAL RESIDUAL VECTOR AND SEARCH-DIRECTION VECTOR +C +C +C OPERATION COUNTS: 36 FLOP /ELEMENT +C DATA LOADINGS : 48 WORDS/ELEMENT +C ( 32 WORDS CONTIGUOUSLY, +C 4 WORDS BY 4-WORD STRIDE, AND +C 12 WORDS BY LIST ) + call maprof_time_start(TM_CALAX3) + CALL CALAX3(AAAPC, S, RK, NP, NE, NCRS, IPCRS, NPP, + * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fj +C * RX,RY,IUT0,IERR, + * rx_desc,ry_desc,IUT0,IERR, +C Fj + * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) + call maprof_time_stop(TM_CALAX3) +C + IF(IERR.EQ.1) THEN + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + RKDOT = 0.E0 + BDOT = 0.E0 + DO 400 IP = 1 , NP + RK (IP) = B (IP)-RK (IP) + R0 (IP) = RK(IP) + PK (IP) = RK(IP) + TK (IP) = 0.E0 + S0 (IP) = S(IP) + RKDOT = RKDOT+WEIGHT(IP)*R0(IP)*RK(IP) + BDOT = BDOT+WEIGHT(IP)*B(IP)*B(IP) + 400 CONTINUE +C + IF(IPART.GE.1) THEN + call maprof_time_start(TM_BCGS3X_COM) + CALL DDCOM2(RKDOT,RKDOTA) + CALL DDCOM2(BDOT,BDOTA) + call maprof_time_stop(TM_BCGS3X_COM) + RKDOT = RKDOTA + BDOT = BDOTA + ENDIF +C + IF(ABS(BDOT).LE.EPS0) BDOT = 1.0E0 +C + RES = SQRT(RKDOT) + RESR = RES/SQRT(BDOT) + RESMIN = RES +C +C IF(RES.LE.EPS.OR.RESR.LE.EPS) RETURN +C USE RELATIVE RESIDUAL +#if 0 + IF(RESR.LE.EPS) RETURN +#else + if (EPS > 0.0E0 .and. RESR <= EPS) RETURN +#endif +C +C +CCC 3. COMPUTE PRODUCT OF COEFFICIENT MATRIX AND SEARCH-DIRECTION VECTOR +CCC AND INNER PRODUCT OF COMPUTED PRODUCT AND SEARCH-DIRECTION VECTOR +C +C + 10 CONTINUE +C + NITR=NITR+1 +C +C +CCC 3.1 COMPUTE APK,ALFA +C +C +C OPERATION COUNTS: 36 FLOP /ELEMENT +C DATA LOADINGS : 48 WORDS/ELEMENT +C ( 32 WORDS CONTIGUOUSLY, +C 4 WORDS BY 4-WORD STRIDE, AND +C 12 WORDS BY LIST ) + call maprof_time_start(TM_CALAX3) + CALL CALAX3(AAAPC, PK, APK, NP, NE, NCRS, IPCRS, NPP, + * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fj +C * RX,RY,IUT0,IERR, + * rx_desc,ry_desc,IUT0,IERR, +C Fj + * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) + call maprof_time_stop(TM_CALAX3) +C + IF(IERR.EQ.1) THEN + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + APDOT = 0.E0 + DO 700 IP = 1 , NP + APDOT = APDOT+WEIGHT(IP)*R0(IP)*APK(IP) + 700 CONTINUE +C + IF(IPART.GE.1) THEN + call maprof_time_start(TM_BCGS3X_COM) + CALL DDCOM2(APDOT,APDOTA) + call maprof_time_stop(TM_BCGS3X_COM) + APDOT = APDOTA + ENDIF +C +#if 0 + IF(APDOT .EQ. 0.0E0) RETURN + ALFA = RKDOT/APDOT +#else + if (EPS > 0.0E0) then + if (APDOT == 0.0E0) RETURN + ALFA = RKDOT/APDOT + else + if (dummy .or. APDOT == 0.0E0) then + dummy = .true. + ALFA = 0.0E0 + else + ALFA = RKDOT/APDOT + end if + end if +#endif +C +C +CCC 3.2 COMPUTE TK=RK-ALFA*APK +C +C + DO 800 IP = 1 , NP + TK (IP) = RK(IP)-ALFA*APK(IP) + 800 CONTINUE +C +C +CCC 3.3 COMPUTE ATK +C +C +C OPERATION COUNTS: 36 FLOP /ELEMENT +C DATA LOADINGS : 48 WORDS/ELEMENT +C ( 32 WORDS CONTIGUOUSLY, +C 4 WORDS BY 4-WORD STRIDE, AND +C 12 WORDS BY LIST ) + call maprof_time_start(TM_CALAX3) + CALL CALAX3(AAAPC, TK, ATK, NP, NE, NCRS, IPCRS, NPP, + * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fj +C * RX,RY,IUT0,IERR, + * rx_desc,ry_desc,IUT0,IERR, +C Fj + * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) + call maprof_time_stop(TM_CALAX3) +C + IF(IERR.EQ.1) THEN + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C +C +CCC 3.4 COMPUTE QK +C +C + ATTDOT = 0.E0 + AT2DOT = 0.E0 + DO 1100 IP = 1 , NP + ATTDOT = ATTDOT+WEIGHT(IP)*ATK(IP)* TK(IP) + AT2DOT = AT2DOT+WEIGHT(IP)*ATK(IP)*ATK(IP) + 1100 CONTINUE +C + IF(IPART.GE.1) THEN + call maprof_time_start(TM_BCGS3X_COM) + CALL DDCOM2(ATTDOT,ATTDTA) + CALL DDCOM2(AT2DOT,AT2DTA) + call maprof_time_stop(TM_BCGS3X_COM) + ATTDOT = ATTDTA + AT2DOT = AT2DTA + ENDIF +C +#if 0 + IF(AT2DOT .EQ. 0.E0) RETURN + QK = ATTDOT/AT2DOT +#else + if (EPS > 0.0E0) then + if (AT2DOT == 0.0E0) RETURN + QK = ATTDOT/AT2DOT + else + if (dummy .or. AT2DOT == 0.0E0) then + dummy = .true. + QK = 0.0E0 + ALFA = 0.0E0 + else + QK = ATTDOT/AT2DOT + end if + end if +#endif +C +C +CCC 3.5 UPDATE SOLUTION VECTOR AND RESIDUAL VECTOR +CCC 3.6 RETURN IF L2-NORM OF UPDATED SOLUTION VECTOR IS LESS THAN CRITERIA +C +C + RKDOTP = RKDOT + RKDOT = 0.E0 + RSDOT = 0.E0 + DO 1200 IP = 1 , NP + S (IP) = S (IP)+ ALFA*PK(IP) + QK*TK (IP) + RK (IP) = TK(IP) - QK*ATK(IP) + RKDOT = RKDOT+WEIGHT(IP)*R0(IP)*RK(IP) + RSDOT = RSDOT+WEIGHT(IP)*RK(IP)*RK(IP) + 1200 CONTINUE +C +C + IF(IPART.GE.1) THEN + call maprof_time_start(TM_BCGS3X_COM) + CALL DDCOM2(RKDOT,RKDOTA) + CALL DDCOM2(RSDOT,RSDOTA) + call maprof_time_stop(TM_BCGS3X_COM) + RKDOT = RKDOTA + RSDOT = RSDOTA + ENDIF +C + RES = SQRT(RSDOT) + RESR = RES/SQRT(BDOT) +C +#if 0 + IF(RESR.LE.EPS) RETURN +#else + if (EPS > 0.0E0 .and. RESR <= EPS) RETURN +#endif +C +C +CCC 3.7 UPDATE SEARCH-DIRECTION VECTOR +C +C +#if 0 + IF(QK .EQ. 0.E0) RETURN + IF(RKDOTP .EQ. 0.E0) RETURN + BETA = (ALFA/QK)*(RKDOT/RKDOTP) +#else + if (EPS > 0.0E0) then + IF(QK .EQ. 0.E0) RETURN + IF(RKDOTP .EQ. 0.E0) RETURN + BETA = (ALFA/QK)*(RKDOT/RKDOTP) + else + if (dummy .or. QK == 0.0E0 .or. RKDOTP == 0.0E0) then + dummy = .true. + RKDOTP = 0.0E0 + BETA = 0.0E0 + else + BETA = (ALFA/QK)*(RKDOT/RKDOTP) + end if + endif +#endif +C + DO 1300 IP = 1 , NP + PK (IP) = RK(IP)+BETA*(PK(IP)-QK*APK(IP)) + 1300 CONTINUE +C +C +CCC 3.7 RETURN IF ITERATION NUMBER HAS REACHED THE GIVEN MAXIMUM NUMBER, +CCC OTHERWISE CONTINUE ITERATIONS UNTIL SOLUTION IS CONVERGED +C +C + IF(NITR.EQ.NMAX) THEN +C + IF(IMODE.EQ.0) RETURN +C + DO 1400 IP = 1 , NP + S(IP)=S0(IP) + 1400 CONTINUE + RES=RESMIN + RETURN + END IF +C + IF(RES.LT.RESMIN) THEN + RESMIN=RES + DO 1500 IP = 1 , NP + S0(IP)=S(IP) + 1500 CONTINUE + ENDIF + GO TO 10 +C + END diff --git a/FFB-MINI/src/xmpAPI_bcgsxe.F b/FFB-MINI/src/xmpAPI_bcgsxe.F new file mode 100755 index 0000000..9dd6c8f --- /dev/null +++ b/FFB-MINI/src/xmpAPI_bcgsxe.F @@ -0,0 +1,430 @@ + SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NEX,NE,NP,NMAX,EPS,EPSRE, + * NODE,CM,DNXYZ,DNXI,DNYI,DNZI, + * B,NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT,NITR,RESR,S, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, +C Fj +C * R0,RK,PK,APK,ATK,TK,FXYZ,RX,RY,MWRK,WRKN, + * R0,RK,PK,APK,ATK,TK,FXYZ,rx_desc,ry_desc,MWRK, + * WRKN, +C Fj + * IUT0,IERR) +#include "timing.h" + IMPLICIT NONE +C +CCC [LOOP] + INTEGER*4 MCOLOR,MCPART + INTEGER*4 NCOLOR(4),NCPART(MCOLOR,4),LLOOP(MCPART,MCOLOR,4) +C +CCC [FOR MID.NODES] +C +CCC [INPUT] + INTEGER*4 ME,N,N1,NEX(8),NE,NP,NMAX,IUT0 + REAL*4 EPS,EPSRE + INTEGER*4 NODE(N1,NE) + REAL*4 CM(NP), + * DNXYZ(3,N1,ME), + * DNXI(N1,ME),DNYI(N1,ME),DNZI(N1,ME), + * B(NE) + REAL*4 XPSYMT(NPSYMT),YPSYMT(NPSYMT),ZPSYMT(NPSYMT) + INTEGER*4 NPFIX,LPFIX(NPFIX),NPSYMT,LPSYMT(NPSYMT) + INTEGER*4 IPART,NDOM,MBPDOM, + * LDOM(NDOM),NBPDOM(NDOM), + * IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM) +C +CCC [INPUT/OUTPUT] + REAL*4 S(NE) +C +CCC [OUTPUT] + INTEGER*4 NITR,IERR + REAL*4 RES +C +CCC [WORK] + REAL*4 RX(0:N,ME),RY(0:N,ME), + 1 R0(NE),RK(NE),PK(NE),APK(NE),ATK(NE),TK(NE), + 2 FXYZ(3,NP) + INTEGER*8 rx_desc,ry_desc + INTEGER*4 MWRK + REAL*4 WRKN(MWRK,9) +C +CCCC [LOCL] + INTEGER*4 IE + REAL*4 RKDOT,RKDOTA,BDOT,BDOTA,RESR,APDOT,APDOTA, + * ATTDOT,ATTDTA,AT2DOT,AT2DTA, + * RKDOTP,RSDOT,RSDOTA, + * ALFA,BETA,QK + REAL*4 EPS0 + DATA EPS0 / 1.E-30 / +C + CHARACTER*60 ERMSGC + & /' ## SUBROUTINE BCGSXE: FATAL ERROR REPORT ; RETURNED' / +C + logical dummy +C +C +C SOLVE MATRIX EQUATION AT ELEMENTS BY BI-CGSTAB METHOS +C ( 3-D CALCULATION : SINGLE WORD & MULTI ELEMENT VERSION ) +C +C WRITTEN BY Y. YAMADE 2011.01.20 +C +C OPERATION COUNTS: FLOP /ELEMENT/ITERATION +C DATA LOADINGS : WORDS/ELEMENT/ITERATION +C ( WORDS CONTIGUOUSLY, +C WORDS BY 4-WORD STRIDE, AND +C WORDS BY LIST ) +C +C ARGUMENT LISTINGS +C (1) INPUT +C INT *4 ME ; MAX. NUMBER OF TOTAL ELEMENTS +C INT *4 N1 ; THE DIMENSION SIZE OF THE FIRST ELEMENTS OF THE +C PASSED ARRAYS 'NODET' +C INT *4 NE ; NUMBER OF ELEMENTS +C INT *4 NP ; NUMBER OF NODES +C INT *4 NEX (I); INCLUDES NUMBER OF ELEMENTS AND NUMBER OF LOCAL NODES +C INT *4 NE ; NUMBER OF ELEMENTS +C INT *4 NP ; NUMBER OF NODES +C INT *4 NMAX ; NUMBER OF MATRIX SOLVER ITERATIONS +C INT *4 IUT0 ; FILE NUMBER TO REPORT ERROR OCCURENCE +C REAL*4 EPS ; MAXIMUM ALLOWABLE ERROR +C INT *4 NODE (I,IE); NODE TABLE +C REAL*4 CM (IP); INVERSED LUMPED MASS MATRIX +C REAL*4 DNX (I,IE); ELEMENT CENTER VALUE OF NX +C REAL*4 DNY (I,IE); ELEMENT CENTER VALUE OF NY +C REAL*4 DNZ (I,IE); ELEMENT CENTER VALUE OF NZ +C REAL*4 DNXI (I,IE); ELEMENT CENTER VALUE OF NX +C REAL*4 DNYI (I,IE); ELEMENT CENTER VALUE OF NX +C REAL*4 DNZI (I,IE); ELEMENT CENTER VALUE OF NX +C +C REAL*4 B (IE); GLOBAL FORCE VECTOR +C INT *4 NFIX ; NUMBER OF FIX BOUNDARY NODES +C INT *4 LPFIX (IB); FIX BOUNDARY NODES +C +C INT *4 IPART ; SUB-DOMAIN NUMBER THAT THIS TASK SHOULD TAKE/IS +C TAKING CARE OF. IPART BEING SET ZERO MEANS THAT +C THE PROGRAM SHOULD RUN/IS RUNNING IN SERIAL +C MODE. +C INT *4 NDOM ; NUMBER OF THE NERIBERING SUB-DOMAINS +C INT*4 MBPDOM ; THE MAXIMUM NUMBER OF THE INTER-CONNECT +C BOUNDARY NODES FOR ONE NEIBERING SUB-DOMAIN +C INT *4 LDOM (IDOM); NEIBERING SUB-DOMAIN NUMBER +C INT *4 NBPDOM (IDOM); NUMBER OF INTER-CONNECT BOUNDARY NODES +C SHARING WITH THE IDOM'TH NEIBERING SUB-DOMAIN, +C LDOM(IDOM) +C INT *4 IPSLF(I,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +C NEIBERING SUB-DOMAIN, LDOM(IDOM) +C INT *4 IPSND(I,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C SUB-DOMAIN THAT IS RECEIVING THE CALLING +C TASK'S RESIDUALS. +C +C (2) INPUT/OUTPUT +C REAL*4 S (IE); GLOBAL SOLUTION VECTOR (PROVIDE INITIAL GUESS) +C +C (3) OUTPUT +C INT *4 NITR ; NUMBER OF ITERATIONS DONE +C REAL*4 RES ; L2-NORM RESIDUAL OF THE FINAL SOLUTION VECTOR +C INT *4 IERR ; RETURN CODE TO REPORT ERROR OCCURRENCE +C 0 --- NORMAL TERMINATION +C 1 --- A FATAL ERROR HAS OCCURRED +C +C (4) WORK +C REAL*4 RX (I,IE); WORK REGION PASSED FOR CALLAP +C REAL*4 RY (I,IE); WORK REGION PASSED FOR CALLAP +C REAL*4 R0 (IE); WORK REGION +C REAL*4 RK (IE); WORK REGION +C REAL*4 PK (IE); WORK REGION +C REAL*4 APK (IE); WORK REGION +C REAL*4 ATK (IE); WORK REGION +C REAL*4 TK (IE); WORK REGION +C REAL*4 FX (IE); WORK REGION +C REAL*4 FY (IE); WORK REGION +C REAL*4 FZ (IE); WORK REGION +C +C + dummy = .false. +C + IF(NMAX.EQ.0) RETURN +C + NITR=0 +C +C +CCC 1. SET INITIAL RESIDUAL VECTOR AND SEARCH-DIRECTION VECTOR +C +C +C OPERATION COUNTS: FLOP /ELEMENT +C DATA LOADINGS : WORDS/ELEMENT +C ( WORDS CONTIGUOUSLY, +C WORDS BY 4-WORD STRIDE, AND +C WORDS BY LIST ) + call maprof_time_start(TM_CALLAP) + CALL CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NE,NP,NEX,NODE, + * S,RK,DNXYZ,DNXI,DNYI,DNZI, + * CM, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) + call maprof_time_stop(TM_CALLAP) +C + IF(IERR.EQ.1) THEN + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + RKDOT = 0.E0 + BDOT = 0.E0 + DO 300 IE = 1 , NE + RK (IE) = B (IE)-RK (IE) + R0 (IE) = RK(IE) + PK (IE) = RK(IE) + TK (IE) = 0.E0 + RKDOT = RKDOT+R0(IE)*RK(IE) + BDOT = BDOT +B (IE)*B (IE) + 300 CONTINUE +C + IF(IPART.GE.1) THEN + call maprof_time_start(TM_PRES3E_COM) + CALL DDCOM2(RKDOT,RKDOTA) + CALL DDCOM2(BDOT,BDOTA) + call maprof_time_stop(TM_PRES3E_COM) + RKDOT = RKDOTA + BDOT = BDOTA + ENDIF +C + IF(ABS(BDOT).LE.EPS0) BDOT = 1.0E0 +C + RES = SQRT(RKDOT) + RESR = RES/SQRT(BDOT) +C +#if 0 + IF(RES.LE.EPS.OR.RESR.LE.EPSRE) RETURN +#else + if (EPS > 0.0) then + IF(RES.LE.EPS.OR.RESR.LE.EPSRE) RETURN + end if +#endif +C +C +CCC 2. COMPUTE PRODUCT OF COEFFICIENT MATRIX AND SEARCH-DIRECTION VECTOR +CCC AND INNER PRODUCT OF COMPUTED PRODUCT AND SEARCH-DIRECTION VECTOR +C +C + 10 CONTINUE +C + NITR=NITR+1 +C +C +CCC 2.1 COMPUTE APK,ALFA +C +C +C OPERATION COUNTS: FLOP /ELEMENT +C DATA LOADINGS : WORDS/ELEMENT +C ( WORDS CONTIGUOUSLY, +C WORDS BY 4-WORD STRIDE, AND +C WORDS BY LIST ) + call maprof_time_start(TM_CALLAP) + CALL CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NE,NP,NEX,NODE, + * PK,APK,DNXYZ,DNXI,DNYI,DNZI, + * CM, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) + call maprof_time_stop(TM_CALLAP) +C + IF(IERR.EQ.1) THEN + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + APDOT = 0.E0 + DO 400 IE = 1 , NE + APDOT = APDOT+R0(IE)*APK(IE) + 400 CONTINUE +C + IF(IPART.GE.1) THEN + call maprof_time_start(TM_PRES3E_COM) + CALL DDCOM2(APDOT,APDOTA) + call maprof_time_stop(TM_PRES3E_COM) + APDOT = APDOTA + ENDIF +C +#if 0 + IF(APDOT .EQ. 0.0E0) RETURN + ALFA = RKDOT/APDOT +#else + if (EPS > 0.0E0) then + IF(APDOT .EQ. 0.0E0) RETURN + ALFA = RKDOT/APDOT + else + if (dummy . or. APDOT == 0.0E0) then + dummy = .true. + ALFA = 0.0 + else + ALFA = RKDOT/APDOT + end if + endif +#endif +C +C +CCC 2.2 COMPUTE TK=RK-ALFA*APK +C +C + DO 500 IE = 1 , NE + TK (IE) = RK(IE)-ALFA*APK(IE) + 500 CONTINUE +C +C +CCC 2.3 COMPUTE ATK +C +C +C OPERATION COUNTS: 36 FLOP /ELEMENT +C DATA LOADINGS : 48 WORDS/ELEMENT +C ( 32 WORDS CONTIGUOUSLY, +C 4 WORDS BY 4-WORD STRIDE, AND +C 12 WORDS BY LIST ) + call maprof_time_start(TM_CALLAP) + CALL CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NE,NP,NEX,NODE, + * TK,ATK,DNXYZ,DNXI,DNYI,DNZI, + * CM, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) + call maprof_time_stop(TM_CALLAP) +C + IF(IERR.EQ.1) THEN + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C +C +CCC 2.4 COMPUTE QK +C +C + ATTDOT = 0.E0 + AT2DOT = 0.E0 + DO 600 IE = 1 , NE + ATTDOT = ATTDOT+ATK(IE)* TK(IE) + AT2DOT = AT2DOT+ATK(IE)*ATK(IE) + 600 CONTINUE +C + IF(IPART.GE.1) THEN + call maprof_time_start(TM_PRES3E_COM) + CALL DDCOM2(ATTDOT,ATTDTA) + CALL DDCOM2(AT2DOT,AT2DTA) + call maprof_time_stop(TM_PRES3E_COM) + ATTDOT = ATTDTA + AT2DOT = AT2DTA + ENDIF +C +#if 0 + IF(AT2DOT .EQ. 0.E0) RETURN + QK = ATTDOT/AT2DOT +#else + if (EPS > 0.0E0) then + IF(AT2DOT .EQ. 0.E0) RETURN + QK = ATTDOT/AT2DOT + else + if (dummy .or. AT2DOT == 0.0E0) then + dummy = .true. + QK = 0.0E0 + ALFA = 0.0E0 + else + QK = ATTDOT/AT2DOT + end if + end if +#endif +C +C +CCC 2.5 UPDATE SOLUTION VECTOR AND RESIDUAL VECTOR +CCC 2.6 RETURN IF L2-NORM OF UPDATED SOLUTION VECTOR IS LESS THAN CRITERIA +C +C + RKDOTP = RKDOT + RKDOT = 0.E0 + RSDOT = 0.E0 + DO 700 IE = 1 , NE + S (IE) = S (IE)+ ALFA*PK(IE) + QK*TK (IE) + RK (IE) = TK(IE) - QK*ATK(IE) + RKDOT = RKDOT+R0(IE)*RK(IE) + RSDOT = RSDOT+RK(IE)*RK(IE) + 700 CONTINUE +C +C + IF(IPART.GE.1) THEN + call maprof_time_start(TM_PRES3E_COM) + CALL DDCOM2(RKDOT,RKDOTA) + CALL DDCOM2(RSDOT,RSDOTA) + call maprof_time_stop(TM_PRES3E_COM) + RKDOT = RKDOTA + RSDOT = RSDOTA + ENDIF +C + RES = SQRT(RSDOT) + RESR = RES/SQRT(BDOT) +C +#if 0 + IF(RES.LE.EPS.OR.RESR.LE.EPSRE) RETURN +#else + if (EPS > 0.0E0) then + IF(RES.LE.EPS.OR.RESR.LE.EPSRE) RETURN + end if +#endif +C +C +CCC 2.7 UPDATE SEARCH-DIRECTION VECTOR +C +C +#if 0 + IF(QK .EQ. 0.E0) RETURN + IF(RKDOTP .EQ. 0.E0) RETURN + BETA = (ALFA/QK)*(RKDOT/RKDOTP) +#else + if (EPS > 0.0E0) then + IF(QK .EQ. 0.E0) RETURN + IF(RKDOTP .EQ. 0.E0) RETURN + BETA = (ALFA/QK)*(RKDOT/RKDOTP) + else + if (dummy .or. QK == 0.0E0 .or. RKDOTP == 0.0E0) then + dummy = .true. + RKDOTP = 0.0E0 + BETA = 0.0E0 + else + BETA = (ALFA/QK)*(RKDOT/RKDOTP) + end if + endif +#endif +C + DO 800 IE = 1 , NE + PK (IE) = RK(IE)+BETA*(PK(IE)-QK*APK(IE)) + 800 CONTINUE +C +C +CCC 2.7 RETURN IF ITERATION NUMBER HAS REACHED THE GIVEN MAXIMUM NUMBER, +CCC OTHERWISE CONTINUE ITERATIONS UNTIL SOLUTION IS CONVERGED +C +C + IF(NITR.EQ.NMAX) THEN + RETURN + END IF +C + GO TO 10 +C + END diff --git a/FFB-MINI/src/xmpAPI_calax3.F b/FFB-MINI/src/xmpAPI_calax3.F new file mode 100755 index 0000000..aa0feed --- /dev/null +++ b/FFB-MINI/src/xmpAPI_calax3.F @@ -0,0 +1,198 @@ +C======================================================================= + SUBROUTINE CALAX3(A, S, AS, NP, NE, NCRS, IPCRS, NPP, + * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fj +C * RX,RY,IUT0,IERR, + * rx_desc,ry_desc,IUT0,IERR, +C Fj + * JUNROL,NPPMAX,NCRS2,TS,TA,ITPCRS) +C======================================================================= +#include "timing.h" + IMPLICIT NONE +C + INTEGER NP, NE, NCRS + REAL*4 A(NCRS), S(NP), AS(NP) + INTEGER IPCRS(NCRS), NPP(NP) + INTEGER IP, K, ICRS, IP2 + REAL*4 BUF +C + INTEGER MAXBUF,IDUM + INTEGER N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,IUT0,IERR + REAL*4 RX,RY +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj +C + DIMENSION LDOM(NDOM),NBPDOM(NDOM) + DIMENSION IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM) + DIMENSION RX(0:N,ME),RY(0:N,ME) +C +C [FULL UNROOL] + INTEGER*4 JUNROL + INTEGER*4 NPPMAX,NCRS2,ITPCRS(NCRS2) + REAL*4 TS(0:NP),TA(NCRS2) +C +C [IN:MID NODE COLORING] +C + CHARACTER*60 ERMSGC + & /' ## SUBROUTINE CALAXC: FATAL ERROR REPORT ; RETURNED'/ +C +C +C CALCULATE THE PRODUCT OF MATRIX A AND VECTOR X IN PRESSURE EQUATION +C ( 3-D CALCULATION : SINGLE WORD & MULTI ELEMENT VERSION ) +C 2009.12.01 RIST +C +C OPERATION COUNTS: FLOP /ELEMENT +C DATA LOADINGS : WORDS/ELEMENT +C ( WORDS CONTIGUOUSLY, +C WORDS BY 4-WORD STRIDE, AND +C WORDS BY LIST ) +C +C ARGUMENT LISTINGS +C +C (1) INPUT +C ME ; MAX. NUMBER OF TOTAL ELEMENTS +C N ; NUMBER OF NODES ASSIGNED TO ONE ELEMENT +C NE ; NUMBER OF TOTAL ELEMENTS +C NP ; NUMBER OF TOTAL NODES +C +C NCRS ; NUMBER OF NONZERO ELEMENTS IN MATRIX +C OF CRS FORMAT +C A (ICRS); NODE-WISE COEFFICIENT MATRIX IN CRS FORMAT +C S (IP); GLOBAL FORCE VECTOR +C IPCRS (ICRS); NODE NO. TABLE BASED ON CRS FORMAT +C NPP (IP); NUMBER OF ADJACENT NODES TO NODE IP +C +C IPART ; SUB-DOMAIN NUMBER THAT THIS TASK SHOULD TAKE/IS +C TAKING CARE OF. IPART BEING SET ZERO MEANS THAT +C THE PROGRAM SHOULD RUN/IS RUNNING IN SERIAL +C MODE. +C LDOM (IDOM); NEIBERING SUB-DOMAIN NUMBER +C NBPDOM(IDOM); NUMBER OF INTER-CONNECT BOUNDARY NODES +C SHARING WITH THE IDOM'TH NEIBERING SUB-DOMAIN, +C LDOM(IDOM) +C NDOM ; NUMBER OF THE NERIBERING SUB-DOMAINS +C IPSLF (IBP,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +C NEIBERING SUB-DOMAIN, LDOM(IDOM) +C IPSND (IBP,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C SUB-DOMAIN THAT IS RECEIVING THE CALLING +C TASK'S RESIDUALS. +C MBPDOM ; THE MAXIMUM NUMBER OF THE INTER-CONNECT +C BOUNDARY NODES FOR ONE NEIBERING SUB-DOMAIN +C +C IUT0 ; FILE NUMBER TO REPORT ERROR OCCURRENCE +C +C (2) OUTPUT +C AS (IP); GLOBAL SOLUTION VECTOR (PROVIDE INITIAL GUESS) +C IERR ; RETURN CODE TO REPORT ERROR OCCURENCE +C +C (3) WORK +C RX (I,IE); USED IN DDCOMX +C RY (I,IE); USED IN DDCOMX +C +C + call maprof_time_start(TM_CALAX3_CAL) + IERR=0 +C + MAXBUF = NE*(N+1) +C + DO 10 IP=1,NP + AS(IP)=0.0E0 + 10 CONTINUE +C + IF (JUNROL.EQ.0) GOTO 500 +C +CC +CC FULL UNROL +CC + TS(0)=0.0E0 + DO 100 IP=1,NP + TS (IP)=S(IP) + 100 CONTINUE +C + IF (NPPMAX.GT.30) GOTO 500 +C +CC +CC FULL UNROL : NPPMAX <= 30 +CC +#ifndef __AIX__ +!ocl CACHE_SECTOR_SIZE(1,11) +!ocl CACHE_SUBSECTOR_ASSIGN(TS) +!!ocl UNROLL(0) +#endif + DO 210 IP=1,NP + BUF=0.0E0 + ICRS=(IP-1)*30 + BUF=BUF+ TA(ICRS+ 1)*TS(ITPCRS(ICRS+ 1)) + & +TA(ICRS+ 2)*TS(ITPCRS(ICRS+ 2)) + & +TA(ICRS+ 3)*TS(ITPCRS(ICRS+ 3)) + & +TA(ICRS+ 4)*TS(ITPCRS(ICRS+ 4)) + & +TA(ICRS+ 5)*TS(ITPCRS(ICRS+ 5)) + & +TA(ICRS+ 6)*TS(ITPCRS(ICRS+ 6)) + & +TA(ICRS+ 7)*TS(ITPCRS(ICRS+ 7)) + & +TA(ICRS+ 8)*TS(ITPCRS(ICRS+ 8)) + & +TA(ICRS+ 9)*TS(ITPCRS(ICRS+ 9)) + & +TA(ICRS+10)*TS(ITPCRS(ICRS+10)) + & +TA(ICRS+11)*TS(ITPCRS(ICRS+11)) + & +TA(ICRS+12)*TS(ITPCRS(ICRS+12)) + & +TA(ICRS+13)*TS(ITPCRS(ICRS+13)) + & +TA(ICRS+14)*TS(ITPCRS(ICRS+14)) + & +TA(ICRS+15)*TS(ITPCRS(ICRS+15)) + & +TA(ICRS+16)*TS(ITPCRS(ICRS+16)) + & +TA(ICRS+17)*TS(ITPCRS(ICRS+17)) + & +TA(ICRS+18)*TS(ITPCRS(ICRS+18)) + & +TA(ICRS+19)*TS(ITPCRS(ICRS+19)) + & +TA(ICRS+20)*TS(ITPCRS(ICRS+20)) + & +TA(ICRS+21)*TS(ITPCRS(ICRS+21)) + & +TA(ICRS+22)*TS(ITPCRS(ICRS+22)) + & +TA(ICRS+23)*TS(ITPCRS(ICRS+23)) + & +TA(ICRS+24)*TS(ITPCRS(ICRS+24)) + & +TA(ICRS+25)*TS(ITPCRS(ICRS+25)) + & +TA(ICRS+26)*TS(ITPCRS(ICRS+26)) + & +TA(ICRS+27)*TS(ITPCRS(ICRS+27)) + & +TA(ICRS+28)*TS(ITPCRS(ICRS+28)) + & +TA(ICRS+29)*TS(ITPCRS(ICRS+29)) + & +TA(ICRS+30)*TS(ITPCRS(ICRS+30)) + AS(IP)=AS(IP)+BUF + 210 CONTINUE +#ifndef __AIX__ +!ocl END_CACHE_SUBSECTOR +!ocl END_CACHE_SECTOR_SIZE +#endif +C + GOTO 900 +CC +CC ORIGINAL +CC + 500 CONTINUE + ICRS=0 + DO 510 IP=1,NP + BUF=0.0E0 + DO 520 K=1,NPP(IP) + ICRS=ICRS+1 + IP2=IPCRS(ICRS) + BUF=BUF+A(ICRS)*S(IP2) + 520 CONTINUE + AS(IP)=AS(IP)+BUF + 510 CONTINUE +C + 900 CONTINUE + call maprof_time_stop(TM_CALAX3_CAL) +C + call maprof_time_start(TM_CALAX3_COM) + IDUM = 1 + CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fj +C * AS,AS,AS,NP,IUT0,IERR,RX,RY,MAXBUF) + * AS,AS,AS,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj + call maprof_time_stop(TM_CALAX3_COM) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + RETURN + END diff --git a/FFB-MINI/src/xmpAPI_callap.F b/FFB-MINI/src/xmpAPI_callap.F new file mode 100755 index 0000000..8f0945c --- /dev/null +++ b/FFB-MINI/src/xmpAPI_callap.F @@ -0,0 +1,191 @@ + SUBROUTINE CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NE,NP,NEX,NODE, + * S,AS,DNXYZ,DNXI,DNYI,DNZI, + * CM, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) +#include "timing.h" + IMPLICIT NONE +C +CCCC [INPUT:LOOP] + INTEGER*4 MCOLOR,MCPART + INTEGER*4 NCOLOR(4),NCPART(MCOLOR,4),LLOOP(MCPART,MCOLOR,4) +C +CCCC [INPUT] + INTEGER*4 ME,N,N1,NE,NP,NEX(8),NODE(N1,NE), + * IUT0 + REAL*4 S(NE),AS(NE),DNXYZ(3,N1,ME), + * DNXI(N1,ME),DNYI(N1,ME),DNZI(N1,ME),CM(NP) +C +CCCC [INPUT:OVERSET NODE DATA] +C +CCC [INPUT:INTER CONNECT NODES] + INTEGER*4 IPART,NDOM,MBPDOM, + * LDOM(NDOM),NBPDOM(NDOM), + * IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM) +C +CCC [INPUT:B.C. NODES] + INTEGER*4 NPFIX,LPFIX(NPFIX), + * NPSYMT,LPSYMT(NPSYMT) + REAL*4 XPSYMT(NPSYMT),YPSYMT(NPSYMT),ZPSYMT(NPSYMT) +C +CCCC [INPUT:OVERSET NODES] +C +C [IN:MID NODE COLORING] +C +CCCC [WORK:MID NODES] +C +CCC [OUTPUT] + INTEGER*4 IERR + REAL*4 FXYZ(3,NP) +C +CCC [WORK] + REAL*4 RX(0:N,ME),RY(0:N,ME) +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj + INTEGER*4 MWRK + REAL*4 WRKN(MWRK,9) +C +CCC [LOCAL] + INTEGER*4 IMODE + DATA IMODE / 1 / +C + CHARACTER*60 ERMSGC + & /' ## SUBROUTINE CALLAP: FATAL ERROR REPORT ; RETURNED' / +C +C +C CAL. LAPLASIAN (DIVERGENCE OF GRADIENT) +C OF VARIABLE DEFINED AT ELEMENTS +C +C WRITTEN BY Y.YAMADE 2012.07.18 +C +C +C ARGUMENT LISTINGS +C +C (1) INPUT +C INT *4 MLOOP ; MAX. NUMBER OF COLORS +C INT *4 NLOOP ; NUMBER OF COLORS +C INT *4 LLOOP(I,4) ; ADRESS OF COLOR LOOPS +C INT *4 ME ; MAX. NUMBER OF TOTAL ELEMENTS +C INT *4 N ; =8 +C INT *4 N1 ; THE DIMENSION SIZE OF THE FIRST ELEMENTS OF THE +C INT *4 NE ; NUMBER OF ELEMENTS +C INT *4 NP ; NUMBER OF NODES +C INT *4 NEX (I); INCLUDES NUMBER OF ELEMENTS AND NUMBER OF LOCAL NODES +C INT *4 NE ; NUMBER OF ELEMENTS +C INT *4 NP ; NUMBER OF NODES +C INT *4 NODE (I,IE); NODE TABLE +C INT *4 IUT0 ; FILE NUMBER TO REPORT ERROR OCCURENCE +C REAL*4 S (IP); INPUT DATA +C REAL*4 DNX (I,IE); INTEGRATED ELEMENT VECTOR OF NX +C REAL*4 DNY (I,IE); INTEGRATED ELEMENT VECTOR OF NY +C REAL*4 DNZ (I,IE); INTEGRATED ELEMENT VECTOR OF NZ +C REAL*4 DNXI (I,IE); ELEMENT CENTER VALUE OF NX +C REAL*4 DNYI (I,IE); ELEMENT CENTER VALUE OF NY +C REAL*4 DNZI (I,IE); ELEMENT CENTER VALUE OF NZ +C REAL*4 CM (IP); INVERSED LUMPED MASS MATRIX +C +C === B.C.-[1] === ; INTERCONNECT B.C. +C INT *4 IPART ; SUB-DOMAIN NUMBER THAT THIS TASK SHOULD TAKE/IS +C TAKING CARE OF. IPART BEING SET ZERO MEANS THAT +C THE PROGRAM SHOULD RUN/IS RUNNING IN SERIAL +C MODE. +C INT *4 NDOM ; NUMBER OF THE NERIBERING SUB-DOMAINS +C INT*4 MBPDOM ; THE MAXIMUM NUMBER OF THE INTER-CONNECT +C BOUNDARY NODES FOR ONE NEIBERING SUB-DOMAIN +C INT *4 LDOM (IDOM); NEIBERING SUB-DOMAIN NUMBER +C INT *4 NBPDOM (IDOM); NUMBER OF INTER-CONNECT BOUNDARY NODES +C SHARING WITH THE IDOM'TH NEIBERING SUB-DOMAIN, +C LDOM(IDOM) +C INT *4 IPSLF(I,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +C NEIBERING SUB-DOMAIN, LDOM(IDOM) +C INT *4 IPSND(I,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C SUB-DOMAIN THAT IS RECEIVING THE CALLING +C TASK'S RESIDUALS. +C +C === B.C.-[2] === ; GRADIENT-ZETO B.C. +C INT *4 NPFIX ; NUMBER OF FIX BOUNDARY NODES +C INT *4 LPFIX (IB); FIX BOUNDARY NODES +C +C === B.C.-[3] === ; SYMMETRIC B.C. +C INT *4 NPSYMT ; NUMBER OF SYMMETRIC BOUNDARY NODES +C INT *4 LPSYMT (IB); SYMMETRIC BOUNDARY NODES +C REAL*4 XPSYMT (IB); X-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C REAL*4 YPSYMT (IB); Y-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C REAL*4 ZPSYMT (IB); Z-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C +C === B.C.-[4] === ; OVERSET B.C. +C +C === B.C.-[5] === ; MID NODES +C INT *4 NPMID; +C INT *4 NGAT ; +C INT *4 NSCT ; +C INT *4 LPMID1 (IBP); +C INT *4 LPMID2 (IBP); +C INT *4 LPMID3 (IBP); +C INT *4 LGAT (IDOM); +C INT *4 NPTGAT (IDOM); +C INT *4 LPMID(I,IDOM); +C INT *4 LPGAT(I,IDOM); +C INT *4 LSCT (IDOM); +C INT *4 NPTSCT (IDOM); +C INT *4 IEMID(I,IDOM); +C INT *4 IPSCT(I,IDOM); +C REAL*4 COMID1 (IBP); LOCAL COORDINATE IN INTERPOLATING ELEMENT +C REAL*4 COMID2 (IBP); LOCAL COORDINATE IN INTERPOLATING ELEMENT +C REAL*4 COMID3 (IBP); LOCAL COORDINATE IN INTERPOLATING ELEMENT +C +C (2) OUTPUT +C REAL*4 SA (IP); LAPLASIAN OF S +C INT *4 IERR ; RETURN CODE TO REPORT ERROR OCCURRENCE +C 0 --- NORMAL TERMINATION +C 1 --- A FATAL ERROR HAS OCCURRED +C +C (3) WORK +C REAL*4 FX (IP); X-COMPONET OF GRADIENT OF S +C REAL*4 FY (IP); Y-COMPONET OF GRADIENT OF S +C REAL*4 FZ (IP); Z-COMPONET OF GRADIENT OF S +C REAL*4 RX (I,IE); WORK REGION PASSED FOR CALLAP +C REAL*4 RY (I,IE); WORK REGION PASSED FOR CALLAP +C +C + IERR=0 +C + call maprof_time_start(TM_GRAD3X) + CALL GRAD3X(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NE,NP,NODE, + * S,DNXYZ,CM, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) + call maprof_time_stop(TM_GRAD3X) +C +C +C COMPUTE DIV(U) AT ELEMENTS +C +C OPERATION COUNTS: 26 FLOP /ELEMENT +C DATA LOADINGS : 40 WORDS/ELEMENT +C ( 16 WORDS CONTIGUOUSLY, +C 12 WORDS BY STRIDE, AND +C 12 WORDS BY LIST ) +C + call maprof_time_start(TM_FLD3X2) + CALL FLD3X2(IMODE,ME,NE,NP,NEX,N1,FXYZ, + & AS,NODE,DNXI,DNYI,DNZI,IUT0,IERR) + call maprof_time_stop(TM_FLD3X2) +C + RETURN + END diff --git a/FFB-MINI/src/xmpAPI_elm3dx.F b/FFB-MINI/src/xmpAPI_elm3dx.F new file mode 100755 index 0000000..52027de --- /dev/null +++ b/FFB-MINI/src/xmpAPI_elm3dx.F @@ -0,0 +1,131 @@ + SUBROUTINE ELM3DX(MGAUSS,IGAUSH, + * MELM,N1,NE,NP,NEX,X,Y,Z,NODE, + * SNI,DNXI,DNYI,DNZI,SN,DNX,DNY,DNZ, + * E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EXZ,EYZ, + * NN,NC,PSI,PSIC,WW,IUT0,IERR) + IMPLICIT NONE +C + INTEGER MGAUSS,IGAUSH + INTEGER N1,NE,NP,NSKIP4,NEX + INTEGER IELM + REAL*8 X, Y, Z + INTEGER NODE + REAL*4 SNI,DNXI,DNYI,DNZI + REAL*4 SN,DNX,DNY,DNZ,E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EYZ,EXZ + INTEGER IUT0,IERR + REAL*8 NN,NC,PSI,PSIC,WW +C + INTEGER*4 MELM + DIMENSION NEX(12) + DIMENSION X(NP),Y(NP),Z(NP) + DIMENSION NODE(N1,NE+1), + * SNI( N1,NE+1),DNXI(N1,NE+1),DNYI(N1,NE+1),DNZI(N1,NE+1), + * SN ( N1,NE+1),DNX (N1,NE+1),DNY (N1,NE+1),DNZ (N1,NE+1), + * E (MELM), + * EX (MELM),EY (MELM),EZ (MELM), + * EXX(MELM),EYY(MELM),EZZ(MELM), + * EXY(MELM),EYZ(MELM),EXZ(MELM) + DIMENSION NN ( N1,MGAUSS),NC ( N1), + * PSI(3,N1,MGAUSS),PSIC(3,N1),WW(MGAUSS) +C +C ARGUMENT LISTINGS +C (1) INPUT +C MGAUSS ; MAX. NUMBER OF GAUSS POINTS +C IGAUSH ; NUMBER OF GAUSS POINTS FOR HEX. +C +C X (IP); X-COORDINATES OF NODES +C Y (IP); Y-COORDINATES OF NODES +C Z (IP); Z-COORDINATES OF NODES +C NODE (I,IE); NODE NO. TABLE BASED ON ELEMENT +C +C ME ; MAX. NUMBER OF TOTAL ELEMENTS +C N ; NUMBER OF NODES ASSIGNED TO ONE ELEMENT +C NE ; NUMBER OF TOTAL ELEMENTS +C NP ; NUMBER OF TOTAL NODES +C NEX(I) ; INCLUDES NUMBER OF ELEMENTS AND NUMBER OF LOCAL NODES +C AS FOLOOWS +C NEX(1) ; NUMBER OF TET. ELEMENTS +C NEX(2) ; NUMBER OF PYRAMID ELEMENTS +C NEX(3) ; NUMBER OF WEGDE ELEMENTS +C NEX(4) ; NUMBER OF HEX. ELEMENTS +C NEX(5) ; NUMBER OF LOCAL NODES IN A TET. ELEMENT (=4) +C NEX(6) ; NUMBER OF LOCAL NODES IN A PYRAMID ELEMENT (=5) +C NEX(7) ; NUMBER OF LOCAL NODES IN A WEGDE ELEMENT (=6) +C NEX(8) ; NUMBER OF LOCAL NODES IN A HEX. ELEMENT (=8) +C +C IUT0 ; FILE NUMBER TO REPORT ERROR OCCURENCE +C +C (2) OUTPUT +C SNI (I,IE); ELEMENT CENTER VALUE OF N +C DNXI (I,IE); ELEMENT CENTER VALUE OF NX +C DNYI (I,IE); ELEMENT CENTER VALUE OF NY +C DNZI (I,IE); ELEMENT CENTER VALUE OF NZ +C +C SN (I,IE); INTEGRATED ELEMENT VECTOR OF N +C DNX (I,IE); INTEGRATED ELEMENT VECTOR OF NX +C DNY (I,IE); INTEGRATED ELEMENT VECTOR OF NY +C DNZ (I,IE); INTEGRATED ELEMENT VECTOR OF NZ +C +C E (IE,I,J); INTEGRATED ELEMENT MATRIX OF N*NT +C EX (I,IE,J); INTEGRATED ELEMENT MATRIX OF N*NXT +C EY (I,IE,J); INTEGRATED ELEMENT MATRIX OF N*NYT +C EZ (I,IE,J); INTEGRATED ELEMENT MATRIX OF N*NZT +C EXX (I,IE,J); INTEGRATED ELEMENT MATRIX OF NX*NXT +C EYY (I,IE,J); INTEGRATED ELEMENT MATRIX OF NY*NYT +C EZZ (I,IE,J); INTEGRATED ELEMENT MATRIX OF NZ*NZT +C EXY (I,IE,J); INTEGRATED ELEMENT MATRIX OF NX*NYT +C EXZ (I,IE,J); INTEGRATED ELEMENT MATRIX OF NX*NZT +C EYZ (I,IE,J); INTEGRATED ELEMENT MATRIX OF NY*NZT +C +C IERR ; RETURN CODE TO REPORT ERROR OCCURENCE +C 0 --- NORMAL TERMINATION +C 1 --- A FATAL ERROR HAS OCCURED +C +C +****************** +***** OTHERS ***** +****************** + INTEGER NEHEX,NHEX + INTEGER IE,J +C +***** ALIAS ***** + NEHEX=NEX(4) + NHEX=NEX(8) + NSKIP4=NEX(12) +C + DO 1000 IE=1,NE + DO 1100 J=1,N1 + SN (J,IE)=0.0E0 + DNX (J,IE)=0.0E0 + DNY (J,IE)=0.0E0 + DNZ (J,IE)=0.0E0 + 1100 CONTINUE + 1000 CONTINUE +C + IERR=0 + IE =1 + IELM=1 +C +********************************** +***** MAKE INTEGRALS FOR HEX ***** +********************************** + CALL SETHEX(N1,IGAUSH,NC,PSIC,NN,PSI,WW,IUT0,IERR) + IF(IERR.NE.0) GOTO 9999 + CALL INT3DX(NEHEX,N1,NHEX,NSKIP4,IGAUSH, + * NC,PSIC,NN,PSI,WW,X,Y,Z,NODE(1,IE), + * SNI(1,IE),DNXI(1,IE),DNYI(1,IE),DNZI(1,IE), + * SN (1,IE),DNX (1,IE),DNY (1,IE),DNZ (1,IE), + * E(IELM),EX(IELM),EY(IELM),EZ(IELM), + * EXX(IELM), EYY(IELM), EZZ(IELM), + * EXY(IELM), EXZ(IELM), EYZ(IELM)) + IE=IE+NEHEX + IELM=IELM+NSKIP4*NEHEX +C + RETURN +C + 9999 CONTINUE + WRITE(IUT0,*) 'ERROR OCCURED IN ELM3DX' + IERR=1 + RETURN +C + END diff --git a/FFB-MINI/src/xmpAPI_grad3x.F b/FFB-MINI/src/xmpAPI_grad3x.F new file mode 100755 index 0000000..5ed006d --- /dev/null +++ b/FFB-MINI/src/xmpAPI_grad3x.F @@ -0,0 +1,293 @@ + SUBROUTINE GRAD3X(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NE,NP,NODE, + * S,DNXYZ,CM, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) +#include "timing.h" + IMPLICIT NONE +C +CCCC [INPUT:LOOP] + INTEGER*4 MCOLOR,MCPART + INTEGER*4 NCOLOR(4),NCPART(MCOLOR,4),LLOOP(MCPART,MCOLOR,4) +C +CCCC [INPUT] + INTEGER*4 ME,N,N1,NE,NP,NODE(N1,NE), + * IUT0 + REAL*4 S(NE),DNXYZ(3,N1,ME),CM(NP) +C +CCCC [INPUT:OVERSET NODE DATA] +C +CCC [INPUT:INTER CONNECT NODES] + INTEGER*4 IPART,NDOM,MBPDOM, + * LDOM(NDOM),NBPDOM(NDOM), + * IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM) +C +CCC [INPUT:B.C. NODES] + INTEGER*4 NPFIX,LPFIX(NPFIX), + * NPSYMT,LPSYMT(NPSYMT) + REAL*4 XPSYMT(NPSYMT),YPSYMT(NPSYMT),ZPSYMT(NPSYMT) +C +CCCC [INPUT:OVERSET NODES] +C +C [IN:MID NODE COLORING] +C +CCCC [WORK:MID NODES] +C +CCC [OUTPUT] + INTEGER*4 IERR + REAL*4 FXYZ(3,NP) +C +CCC [WORK] + REAL*4 RX(0:N,ME),RY(0:N,ME) +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj + INTEGER*4 MWRK + REAL*4 WRKN(MWRK,9) +C +CCC [LOCAL] + INTEGER*4 ICOLOR,ICPART,IEE,IES, + * IP1,IP2,IP3,IP4,IP5,IP6,IP7,IP8, + * MAXBUF,IP,IE,IBP + REAL*4 SWRK,COEF +C + INTEGER*4 IDIM + DATA IDIM / 3 / +C + CHARACTER*60 ERMSGC + & /' ## SUBROUTINE GRAD3X: FATAL ERROR REPORT ; RETURNED' / +C +C CAL. GRADIENT OF SCALAR VARIABLE +C THIS ROUTINE SUPPORTS OVERSET NODES AND MID NODES. +C +C WRITTEN BY Y.YAMADE 2012.07.18 +C +C +C ARGUMENT LISTINGS +C +C (1) INPUT +C INT *4 MLOOP ; MAX. NUMBER OF COLORS +C INT *4 NLOOP ; NUMBER OF COLORS +C INT *4 LLOOP(I,4) ; ADRESS OF COLOR LOOPS +C INT *4 ME ; MAX. NUMBER OF TOTAL ELEMENTS +C INT *4 N ; =8 +C INT *4 N1 ; THE DIMENSION SIZE OF THE FIRST ELEMENTS OF THE +C INT *4 NE ; NUMBER OF ELEMENTS +C INT *4 NP ; NUMBER OF NODES +C INT *4 NE ; NUMBER OF ELEMENTS +C INT *4 NP ; NUMBER OF NODES +C INT *4 NODE (I,IE); NODE TABLE +C INT *4 IUT0 ; FILE NUMBER TO REPORT ERROR OCCURENCE +C REAL*4 S (IP); INPUT DATA +C REAL*4 DNX (I,IE); INTEGRATED ELEMENT VECTOR OF NX +C REAL*4 DNY (I,IE); INTEGRATED ELEMENT VECTOR OF NY +C REAL*4 DNZ (I,IE); INTEGRATED ELEMENT VECTOR OF NZ +C REAL*4 CM (IP); INVERSED LUMPED MASS MATRIX +C +C === B.C.-[1] === ; INTERCONNECT B.C. +C INT *4 IPART ; SUB-DOMAIN NUMBER THAT THIS TASK SHOULD TAKE/IS +C TAKING CARE OF. IPART BEING SET ZERO MEANS THAT +C THE PROGRAM SHOULD RUN/IS RUNNING IN SERIAL +C MODE. +C INT *4 NDOM ; NUMBER OF THE NERIBERING SUB-DOMAINS +C INT*4 MBPDOM ; THE MAXIMUM NUMBER OF THE INTER-CONNECT +C BOUNDARY NODES FOR ONE NEIBERING SUB-DOMAIN +C INT *4 LDOM (IDOM); NEIBERING SUB-DOMAIN NUMBER +C INT *4 NBPDOM (IDOM); NUMBER OF INTER-CONNECT BOUNDARY NODES +C SHARING WITH THE IDOM'TH NEIBERING SUB-DOMAIN, +C LDOM(IDOM) +C INT *4 IPSLF(I,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +C NEIBERING SUB-DOMAIN, LDOM(IDOM) +C INT *4 IPSND(I,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C SUB-DOMAIN THAT IS RECEIVING THE CALLING +C TASK'S RESIDUALS. +C +C === B.C.-[2] === ; GRADIENT-ZETO B.C. +C INT *4 NPFIX ; NUMBER OF FIX BOUNDARY NODES +C INT *4 LPFIX (IB); FIX BOUNDARY NODES +C +C === B.C.-[3] === ; SYMMETRIC B.C. +C INT *4 NPSYMT ; NUMBER OF SYMMETRIC BOUNDARY NODES +C INT *4 LPSYMT (IB); SYMMETRIC BOUNDARY NODES +C REAL*4 XPSYMT (IB); X-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C REAL*4 YPSYMT (IB); Y-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C REAL*4 ZPSYMT (IB); Z-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C +C === B.C.-[4] === ; OVERSET B.C. +C +C === B.C.-[5] === ; MID NODES +C INT *4 NPMID; +C INT *4 NGAT ; +C INT *4 NSCT ; +C INT *4 LPMID1 (IBP); +C INT *4 LPMID2 (IBP); +C INT *4 LPMID3 (IBP); +C INT *4 LGAT (IDOM); +C INT *4 NPTGAT (IDOM); +C INT *4 LPMID(I,IDOM); +C INT *4 LPGAT(I,IDOM); +C INT *4 LSCT (IDOM); +C INT *4 NPTSCT (IDOM); +C INT *4 IEMID(I,IDOM); +C INT *4 IPSCT(I,IDOM); +C REAL*4 COMID1 (IBP); LOCAL COORDINATE IN INTERPOLATING ELEMENT +C REAL*4 COMID2 (IBP); LOCAL COORDINATE IN INTERPOLATING ELEMENT +C REAL*4 COMID3 (IBP); LOCAL COORDINATE IN INTERPOLATING ELEMENT +C +C (2) OUTPUT +C REAL*4 FX (IP); X-COMPONET OF GRADIENT OF S +C REAL*4 FY (IP); Y-COMPONET OF GRADIENT OF S +C REAL*4 FZ (IP); Z-COMPONET OF GRADIENT OF S +C INT *4 IERR ; RETURN CODE TO REPORT ERROR OCCURRENCE +C 0 --- NORMAL TERMINATION +C 1 --- A FATAL ERROR HAS OCCURRED +C +C (3) WORK +C REAL*4 RX (I,IE); WORK REGION PASSED FOR CALLAP +C REAL*4 RY (I,IE); WORK REGION PASSED FOR CALLAP +C + IERR=0 + MAXBUF = NE*(N+1) +C + call maprof_time_start(TM_GRAD3X_OP0) + DO 1000 IP = 1 , NP + FXYZ(1,IP)=0.0E0 + FXYZ(2,IP)=0.0E0 + FXYZ(3,IP)=0.0E0 + 1000 CONTINUE + call maprof_time_stop(TM_GRAD3X_OP0) +C +C +C OPERATION COUNTS: 48 FLOP /ELEMENT +C DATA LOADINGS : 57 WORDS/ELEMENT +C ( 1 WORDS CONTIGUOUSLY, +C 32 WORDS BY STRIDE, AND +C 24 WORDS BY LIST ) +C + call maprof_time_start(TM_GRAD3X_OP1) + DO 1411 ICOLOR=1,NCOLOR(4) +!ocl norecurrence(FXYZ) + DO 1410 ICPART=1,NCPART(ICOLOR,4) + IES=LLOOP(ICPART ,ICOLOR,4) + IEE=LLOOP(ICPART+1,ICOLOR,4)-1 +!ocl nosimd +!ocl noswp + DO 1400 IE=IES,IEE + IP1=NODE(1,IE) + IP2=NODE(2,IE) + IP3=NODE(3,IE) + IP4=NODE(4,IE) + IP5=NODE(5,IE) + IP6=NODE(6,IE) + IP7=NODE(7,IE) + IP8=NODE(8,IE) +C + SWRK = S(IE) +C + FXYZ(1,IP1)=FXYZ(1,IP1)-SWRK*DNXYZ(1,1,IE) + FXYZ(2,IP1)=FXYZ(2,IP1)-SWRK*DNXYZ(2,1,IE) + FXYZ(3,IP1)=FXYZ(3,IP1)-SWRK*DNXYZ(3,1,IE) +C + FXYZ(1,IP2)=FXYZ(1,IP2)-SWRK*DNXYZ(1,2,IE) + FXYZ(2,IP2)=FXYZ(2,IP2)-SWRK*DNXYZ(2,2,IE) + FXYZ(3,IP2)=FXYZ(3,IP2)-SWRK*DNXYZ(3,2,IE) +C + FXYZ(1,IP3)=FXYZ(1,IP3)-SWRK*DNXYZ(1,3,IE) + FXYZ(2,IP3)=FXYZ(2,IP3)-SWRK*DNXYZ(2,3,IE) + FXYZ(3,IP3)=FXYZ(3,IP3)-SWRK*DNXYZ(3,3,IE) +C + FXYZ(1,IP4)=FXYZ(1,IP4)-SWRK*DNXYZ(1,4,IE) + FXYZ(2,IP4)=FXYZ(2,IP4)-SWRK*DNXYZ(2,4,IE) + FXYZ(3,IP4)=FXYZ(3,IP4)-SWRK*DNXYZ(3,4,IE) +C + FXYZ(1,IP5)=FXYZ(1,IP5)-SWRK*DNXYZ(1,5,IE) + FXYZ(2,IP5)=FXYZ(2,IP5)-SWRK*DNXYZ(2,5,IE) + FXYZ(3,IP5)=FXYZ(3,IP5)-SWRK*DNXYZ(3,5,IE) +C + FXYZ(1,IP6)=FXYZ(1,IP6)-SWRK*DNXYZ(1,6,IE) + FXYZ(2,IP6)=FXYZ(2,IP6)-SWRK*DNXYZ(2,6,IE) + FXYZ(3,IP6)=FXYZ(3,IP6)-SWRK*DNXYZ(3,6,IE) +C + FXYZ(1,IP7)=FXYZ(1,IP7)-SWRK*DNXYZ(1,7,IE) + FXYZ(2,IP7)=FXYZ(2,IP7)-SWRK*DNXYZ(2,7,IE) + FXYZ(3,IP7)=FXYZ(3,IP7)-SWRK*DNXYZ(3,7,IE) +C + FXYZ(1,IP8)=FXYZ(1,IP8)-SWRK*DNXYZ(1,8,IE) + FXYZ(2,IP8)=FXYZ(2,IP8)-SWRK*DNXYZ(2,8,IE) + FXYZ(3,IP8)=FXYZ(3,IP8)-SWRK*DNXYZ(3,8,IE) + 1400 CONTINUE + 1410 CONTINUE + 1411 CONTINUE + call maprof_time_stop(TM_GRAD3X_OP1) +C +C + call maprof_time_start(TM_GRAD3X_OP2) + DO 2010 IP=1,NP + WRKN(IP,4)=FXYZ(1,IP) + WRKN(IP,5)=FXYZ(2,IP) + WRKN(IP,6)=FXYZ(3,IP) + 2010 CONTINUE + call maprof_time_stop(TM_GRAD3X_OP2) +C +C +C SUPERIMPOSE NEIBERING ELEMENT CONTRIBUTIONS +C + call maprof_time_start(TM_GRAD3X_COM) + CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, + * WRKN(1,4),WRKN(1,5),WRKN(1,6),NP,IUT0,IERR, +C Fj +C * RX,RY,MAXBUF) + * rx_desc,ry_desc,MAXBUF) +C Fj + call maprof_time_stop(TM_GRAD3X_COM) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + call maprof_time_start(TM_GRAD3X_OP3) + DO 2020 IP=1,NP + FXYZ(1,IP)=WRKN(IP,4) + FXYZ(2,IP)=WRKN(IP,5) + FXYZ(3,IP)=WRKN(IP,6) + 2020 CONTINUE +C + DO 2100 IP=1,NP + FXYZ(1,IP)=FXYZ(1,IP)*CM(IP) + FXYZ(2,IP)=FXYZ(2,IP)*CM(IP) + FXYZ(3,IP)=FXYZ(3,IP)*CM(IP) + 2100 CONTINUE + call maprof_time_stop(TM_GRAD3X_OP3) +C +C +C +!ocl norecurrence(FXYZ) + DO 3000 IBP=1,NPFIX + IP=LPFIX(IBP) + FXYZ(1,IP)=0.0E0 + FXYZ(2,IP)=0.0E0 + FXYZ(3,IP)=0.0E0 + 3000 CONTINUE +C +!ocl norecurrence(FXYZ) + DO 3100 IBP = 1 , NPSYMT + COEF= XPSYMT(IBP)*FXYZ(1,LPSYMT(IBP)) + * +YPSYMT(IBP)*FXYZ(2,LPSYMT(IBP)) + * +ZPSYMT(IBP)*FXYZ(3,LPSYMT(IBP)) + FXYZ(1,LPSYMT(IBP)) = FXYZ(1,LPSYMT(IBP))-COEF*XPSYMT(IBP) + FXYZ(2,LPSYMT(IBP)) = FXYZ(2,LPSYMT(IBP))-COEF*YPSYMT(IBP) + FXYZ(3,LPSYMT(IBP)) = FXYZ(3,LPSYMT(IBP))-COEF*ZPSYMT(IBP) + 3100 CONTINUE +C +C +C + RETURN + END diff --git a/FFB-MINI/src/xmpAPI_les3x.F b/FFB-MINI/src/xmpAPI_les3x.F new file mode 100755 index 0000000..f0d04d9 --- /dev/null +++ b/FFB-MINI/src/xmpAPI_les3x.F @@ -0,0 +1,2238 @@ +C======================================================================C +C C +C SOFTWARE NAME : FRONTFLOW_BLUE.8.1 C +C C +C MAIN PRORGRAM : LES3X C +C C +C WRITTEN BY Y.YAMADE C +C WRITTEN BY H.YOSHIMURA C +C WRITTEN BY T.TAKAYAMA C +C C +C C +C CONTACT ADDRESS : IIS, THE UNIVERSITY OF TOKYO, CISS C +C C +C THERMO-FLUID ANALYSIS SOLVERS FOR LARGE-SCALE-ASSEMBLY C +C C +C======================================================================C +C* PROGRAM LES3X + SUBROUTINE LES3X(FILEIN) + use xmp_api + use mpi +#include "timing.h" +!#include "xmp_coarray.h" + IMPLICIT NONE +C + CHARACTER*(*) FILEIN +C + INTEGER*4 N0,N1,N2 + PARAMETER (N0=8,N1=9,N2=8) +C + INTEGER*4 ME,MP,MEP,MPP + INTEGER*4 MB,MDOM,MBPDOM + DATA MEP /20/ + DATA MPP /31/ +C + INTEGER*4 MER,MPR,MEPR,MPPR,MBR,MBPR,MDOMR + DATA MER /-1/ + DATA MPR /-1/ + DATA MEPR /-1/ + DATA MPPR /-1/ + DATA MBR /-1/ + DATA MBPR /-1/ + DATA MDOMR /-1/ + INTEGER*4 MWRK, + * MPWALL, + * MPINT,MPBODY +C + CHARACTER*30 DATE + DATA DATE / 'LES3X:VER. 42.06 :2013.03.25' / +C +CLES3X----------------------------------------------------------CLES3X +CCCCCC CONSTANTS DEFINITION CCCCCC +CLES3X----------------------------------------------------------CLES3X +CC +CC [A.01] CONSTANTS REGARDING CONTROL PARAMETERS +CC + INTEGER*4 IFORM, + * NMAXP,NMAXT, + * NTIME,ISTART, + * JPRESS,IVELIN + REAL*4 VISCM,DT, + * EPSP,EPST, + * TFINAL,UFINAL,VFINAL,WFINAL, + * VELIN0(3),BTDCOE(4), + * D000,U000,RHO000 +C + DATA IVELIN /0/ + DATA VELIN0 /0.0E0,0.0E0,0.0E0/ + DATA BTDCOE /0.1E0,0.0E0,0.0E0,0.0E0/ +C + DATA JPRESS /1/ +CC +CC [A.02] CONSTANTS REGARDING FILE ALLOCATIONS +CC + INTEGER*4 IACT,IRESV,IWRITE,INAME, + * ICAST,IDATA0,IALL,ISKIP,ISKIP1,ICHECK, + * JGRID + INTEGER*4 MAXPRN + CHARACTER*30 NAME + DATA IWRITE / 2 / + DATA INAME / 1 / + DATA MAXPRN / 200 / + DATA ICAST /0/ + DATA IDATA0 /0/ + DATA IALL /0/ + DATA ISKIP /0/ + DATA ISKIP1 /1/ + DATA ICHECK /999999/ + DATA JGRID /1/ +C + INTEGER*4 MCOM,NCOMFL,NCOMST + PARAMETER ( MCOM = 22 ) + CHARACTER*60 COMGEN,COMFLE(MCOM),COMSET(MCOM) + CHARACTER*60 FILEMS,FILEBC,FILEIF,FILEFF,FILEHS, + * FILEMR,FILEBR,FILEFR, + * FILELG,FILE + DATA FILELG / 'les3x.log' / +C* DATA FILEIN / 'PARMLES3X' / +C + INTEGER*4 IUT0,IUT5,IUT6,IUTLG + DATA IUT0 / 0 / + DATA IUT5 / 5 / + DATA IUT6 / 6 / + DATA IUTLG / 60 / +C + INTEGER*4 IUTMS,IUTBC,IUTIF,IUTFF, + * IUTMR,IUTBR,IUTFR + DATA IUTMS / 10 / + DATA IUTBC / 11 / + DATA IUTIF / 12 / + DATA IUTFF / 13 / + DATA IUTMR / 22 / + DATA IUTBR / 23 / + DATA IUTFR / 24 / +C + INTEGER*4 IINTRP + DATA IINTRP/0/ +CC +CC [A.03] CONSTANTS REGARDING SUBGRID-SCALE (SMAGORINSKY) MODEL +CC +CC +CC [A.04] CONSTANTS REGARDING NEAR-WALL TURBULENCE TREATMENTS +CC +CC +CC [A.05] CONSTANTS REGARDING NUMERICAL METHODS +CC +CC +CC [A.06] CONSTANTS REGARDING ELEMENT INTEGRATION +CC + INTEGER*4 MGAUSS,IGAUSH + PARAMETER (MGAUSS=100) + DATA IGAUSH / 64 / +CC +CC [A.07] CONSTANTS REGARDING REFINER I.F. +CC +C NRFN ;NUMBER OF REFINE (DEFAULT=0) +C IT MUST BE ZERO OR ONE IN THIS VERSION +C IRFNMW ;REFINED MESH AND BOUN FILE WILL BE OUTPUT +C WHEN THIS PARAMETER IS ONE (DEFAULT=0) +C IRFNFF ;READ IN FLOW DATA (U,V,W,P) WILL BE REFINED +C WHEN THIS PARAMETER IS ONE (DEFAULT=0) +C NGRID +C + INTEGER*4 NRFN,IRFNMW,IRFNFF,NGRID + DATA NRFN /0/ + DATA IRFNMW /0/ + DATA IRFNFF /0/ +#ifndef NO_REFINER + INTEGER*4 NLAYER,LLAYER,IRFNR + DIMENSION LLAYER(20) +#endif +CC +CC [A.08] CONSTANTS REGARDING COUPLER I.F. +CC +CC +CC [A.09] CONSTANTS REGARDING RENUMBERING +CC + INTEGER*4 JSORT,JCOLOR,JUNROL + INTEGER*4 NDIVX,NDIVY,NDIVZ + INTEGER*4 NEIP(4) + INTEGER*4 MCOLOR,MCPART + PARAMETER(MCOLOR=1000,MCPART=10000) + INTEGER*4 NCOLOR(4),NCPART(MCOLOR,4),LLOOP(MCPART,MCOLOR,4) +C + INTEGER*4,ALLOCATABLE::LPBTOA(:),LPATOB(:),LEBTOA(:),LEATOB(:) + DATA JSORT / 0/ + DATA JCOLOR / 0/ + DATA JUNROL / 0/ + DATA NDIVX /10/ + DATA NDIVY /10/ + DATA NDIVZ /10/ + DATA NEIP /1500,1500,1500,1500/ +C +CC [A.10] CONSTANTS REGARDING HISTORY DATA +CC +CC [A.11] CONSTANTS REGARDING ERROR TRAP +CC + INTEGER*4 IERR,IERRA,JESC,LERR(50) + DATA IERR /0/ + DATA JESC /0/ +C + CHARACTER*60 ERMSGB + * / ' ## PROGRAM LES3X: FATAL ERROR OCCURENCE; STOPPED ' / + CHARACTER*60 EREXP1 + * / ' ALLOCATING FAILED ' / + CHARACTER*60 EREXP2 + * / ' READ-IN DATA ARE NOT CONSISTENT WITH THE MESH DATA ' / + CHARACTER*60 EREXP5 + * / ' NUMBER OF WALL BOUNDARY NODES EXCEEDED LIMIT OF' / + CHARACTER*60 EREXP6 + * / ' FIELD MAXIMUM DIVERGENT EXCEEDED LIMIT OF' / + CHARACTER*60 EREXP7 + * / ' NUMBER OF NON-ZEROS IN CRS FORMAT EXCEEDED LIMIT OF' / + CHARACTER*60 EREXP8 + * / ' COORDINATE WHERE DIV. IS MAX:: ' / + CHARACTER*60 ERMSGC + * / ' ## PROGRAM LES3X : FATAL ERROR REPORT ; STOPPED ' / + CHARACTER*10 BLANK / ' ' / +CC +CC [A.12] CONSTANTS REGARDING FLOW RATE CONTROL +CC +CC +CC [A.13] CONSTANTS REGARDING MATRIX SOLVER +CC + INTEGER*4 MRCM,NMAXB,ISOLP + DATA MRCM / 4/ + DATA NMAXB /10/ + DATA ISOLP / 2/ +CC ISLOP: MATRIX SOLVER FLAG FOR PRES. EQ. (1:BCGSTAB, 2:RCM) +CC +C +CLES3X----------------------------------------------------------CLES3X +CCCCCC VARIABLES DEFINITION CCCCCC +CLES3X----------------------------------------------------------CLES3X +CC +CC [B.01] FIELD VARIABLES +CC + INTEGER*4 NE,NP,NEHEX,NEWED,NEPRD,NETET,NEX(12) + REAL*4,ALLOCATABLE:: + * X(:),Y (:),Z(:),U(:),V(:),W(:), + * PN(:),P(:), + * VISC(:), + * PRCM(:,:),APRCM(:,:) + REAL*8,ALLOCATABLE:: XD(:),YD(:),ZD(:) + INTEGER*4 NE_total, NP_total, total +C +CC +CC [B.02] ELEMENT'S SHAPE DEPENDENT CONSTANTS +CC + INTEGER*4 NTET,NPRD,NWED,NHEX + INTEGER*4 NSKIP1,NSKIP2,NSKIP3,NSKIP4 + PARAMETER (NTET=4,NSKIP1=NTET*NTET) + PARAMETER (NPRD=5,NSKIP2=NPRD*NPRD) + PARAMETER (NWED=6,NSKIP3=NWED*NWED) + PARAMETER (NHEX=8,NSKIP4=NHEX*NHEX) + REAL*4,ALLOCATABLE::CM(:), + * SNI(:,:),DNXI(:,:),DNYI(:,:),DNZI(:,:), + * SN(:,:),DNXYZ(:,:,:), + * E(:),EX(:),EY(:),EZ(:), + * EXX(:),EYY(:),EZZ(:),EXY(:),EXZ(:),EYZ(:) +CC +CC [B.03] ELEMENT'S CONNECTIVITY AND ATTRIBUTE LISTS +CC + INTEGER*4,ALLOCATABLE:: + * NODE(:,:), + * IENP(:,:),JENP(:,:),NEP(:), + * IPNP(:,:), + * NPP(:),NPP2(:), + * NUMIP(:),LTAB(:,:,:) + INTEGER*4 NPPMAX + +C + INTEGER*4 N2D,NS,NSP + PARAMETER (N2D=4, NS=6, NSP=4) + INTEGER*4 LOCAL(NSP,NS,4) + DATA LOCAL / + * 1,2,4,0, 2,3,4,0, 3,1,4,0, 1,3,2,0, 0,0,0,0, 0,0,0,0, + * 1,2,5,0, 2,3,5,0, 3,4,5,0, 4,1,5,0, 1,4,3,2, 0,0,0,0, + * 1,3,2,0, 4,5,6,0, 1,2,5,4, 2,3,6,5, 3,1,4,6, 0,0,0,0, + * 1,5,8,4, 2,3,7,6, 1,2,6,5, 3,4,8,7, 1,4,3,2, 5,6,7,8 + * / +CC +CC [B.04] ARRAYS FOR SPECIFYING BOUNDARY CONDITIONS +CC + INTEGER*4,ALLOCATABLE:: + * LPINLT(:), + * LPWALL(:), + * LPSYMT(:), + * LPFREE(:), + * LPCCL1(:),LPCCL2(:),LPBODY(:), + * LPINT1(:),LPINT2(:),LPINT3(:), + * LDOM(:),NBPDOM(:),IPSLF(:,:),IPSND(:,:) + + REAL*4,ALLOCATABLE:: + * UINLT (:),VINLT (:),WINLT (:), + * UWALL (:),VWALL (:),WWALL (:), + * XPSYMT(:),YPSYMT(:),ZPSYMT(:) +C + INTEGER*4 NPINLT,NPWALL,NPSYMT,NPFREE,NPCCL, + * NPBODY,NPINT, + * NDOM, + * NPMVB,NPSYM2 +C + DATA NPINLT /0/ + DATA NPWALL /0/ + DATA NPSYMT /0/ + DATA NPFREE /0/ + DATA NPCCL /0/ + DATA NPBODY /0/ + DATA NPINT /0/ + DATA NPMVB /0/ + DATA NPSYM2 /0/ +C + INTEGER*4 NPSET,NPSETR + INTEGER*4 NESET + DATA NPSET /0/ + DATA NPSETR /0/ + DATA NESET /0/ + INTEGER*4 NGAT,NSCT + DATA NGAT /0/ + DATA NSCT /0/ +CC +CC [FLUID FORCE OBJECT] +CC +CC [B.05] ARRAYS FOR SAVING TIME HISTORY +CC +CC [B.06] WORK REGION (MATRIX SOLVER) +CC + INTEGER*4 NCRS,NCRS2 + INTEGER*4,ALLOCATABLE:: IPCRS(:),ITPCRS(:) + REAL*4,ALLOCATABLE:: + * AWRK(:,:,:),ACRS (:),TACRS(:) +CC +CC [B.07] WORK REGION (REFINER I.F.) +CC + INTEGER*4,ALLOCATABLE:: + * NPB0(:),NPB1(:),NPB2(:),LPB1(:,:),LPB2(:,:) + REAL*4,ALLOCATABLE:: XPB1(:,:),XPB2(:,:), + * YPB1(:,:),YPB2(:,:), + * ZPB1(:,:),ZPB2(:,:) +CC +CC [B.08] WORK REGION (OTHER) +CC + INTEGER*4,ALLOCATABLE:: + * NODWK1(:,:),LEWRK(:,:), + * LWRK01(:),LWRK02(:),LWRK04(:) + REAL*4, ALLOCATABLE:: +CC Fj start 202103 +CC * RX(:,:)[:], RY(:,:)[:], WRKN(:), + * WRKN(:), +CC Fj end 202103 + * WRK01(:),WRK02(:),WRK03(:),WRK04(:), + * WRK05(:),WRK06(:),WRK07(:),WRK08(:), + * WRK09(:),WRK10(:),WRK11(:),WRK12(:), + * WRK13(:),WRK3(:,:) +CC Fj start 202103 + REAL*4 , POINTER :: RX ( : , : ) => null ( ) + REAL*4 , POINTER :: RY ( : , : ) => null ( ) + INTEGER*8 :: rx_desc, ry_desc + INTEGER*8, DIMENSION(2) :: rx_lb, rx_ub, ry_lb, ry_ub + INTEGER*4 :: img_dims(1) + INTEGER*4 :: status +C INTEGER :: ierr, nnn, me +CC Fj end 202103 + REAL*8,ALLOCATABLE:: + * DWRK01(:,:),DWRK02(:),DWRK03(:,:,:), + * DWRK04(:,:),DWRK05(:) +#ifndef NO_REFINER + INTEGER*4,ALLOCATABLE:: + * NODWK2(:,:),NODWK3(:,:), + * LWRK03(:),LWRK05(:),LWRK06(:) + REAL*8,ALLOCATABLE:: + * DWRK3(:) +#endif +CC +CC [B.09] SIMPLE METHOD +CC + REAL*4 EPSREP + REAL*4,ALLOCATABLE:: + * DT3D(:),UE(:),VE(:),WE(:) +CC +CC [B.XX] + REAL*4 ave_p, ave_v, max_v +C +C +CLES3X----------------------------------------------------------CLES3X +CCCCCC CONSTANTS DEFINITION (WORK) CCCCCC +CLES3X----------------------------------------------------------CLES3X +C + INTEGER*4 IPART,NPART,MAXBUF, + * NECHK,NPCHK1,NPCHK2, + * IDUM,INTPRN, + * I,J,IE,IELM,NELM,MELM,IP,NPW,IPW,NP0,NE0, + * ITIME, + * IBP,NITRP,NPZERO,NTIMEP, + * ISTEP, + * NITRU,NITRV,NITRW,NDUM,IRBUF,IONE + REAL*4 SIZEAL,SIZE, + * TIME,DEVLP1,DEVLP2,ACCELX,ACCELY,ACCELZ, + * RESU,RESV,RESW,RESP, + * DIVMAX,DIVAV,XDIVMX,YDIVMX,ZDIVMX,FJESC,FJESCA, + * DIVESC,WORD,FINITE,TIMEP,TIMEW,FRBUF +#ifndef NO_REFINER + INTEGER*4 IRFN +#endif +C + DATA WORD / 4.0E-6 / + DATA FINITE / 1.0E-20 / + DATA DIVMAX / 0.0E+0 / + DATA DIVESC / 1.0E+4 / + DATA XDIVMX / 0.0E0 / + DATA YDIVMX / 0.0E0 / + DATA ZDIVMX / 0.0E0 / + DATA TIMEP / 0.E0 / + DATA NTIMEP / 0 / + DATA NITRU,NITRV,NITRW,NITRP / 0,0,0,0 / + DATA NPW /0/ +C + DATA RESU /1.0E-10/ + DATA RESV /1.0E-10/ + DATA RESW /1.0E-10/ + DATA RESP /1.0E-10/ +C + DATA NECHK /0/ + DATA NPCHK1 /0/ + DATA NPCHK2 /0/ +C + INTEGER*4 ::AAA=0 +C +C +C********************************************************************** +C* * +C* * +C* <<<<< PROGRAM LES3X >>>>> * +C* * +C* * +C* ENTRY NAME ; LES3X * +C* FUNCTION ; TO INTEGRATE 3-D UNSTEADY INCOMPRESSIBLE * +C* NAVIER-STOKES EQUATIONS BY FINITE ELEMENT * +C* METHODS * +C* ( SINGLE-PRECISION WORD VERSION ) * +C* EXECUTION MODE ; SERIAL(SCALAR, VECTOR)/PARALLEL(THREAD,MPI)* +C* EXTRENAL LIBRARIES; 'MPI' IS CALLED FOR PARALLEL COMPUTATION * +C* WRITTEN BY ; C.KATO, * +C* INSTITUTE OF INDUSTRIAL SCIENCE, * +C* THE UNIVERSITY OF TOKYO. * +C* MODIFIED BY ; Y.GUO, * +C* INSTITUTE OF INDUSTRIAL SCIENCE, * +C* THE UNIVERSITY OF TOKYO. * +C* MODIFIED BY ; Y.YAMADE, * +C* INSTITUTE OF INDUSTRIAL SCIENCE, * +C* THE UNIVERSITY OF TOKYO. * +C* (MIZUHO INFORMATION & RESEACH INSTITUTE INC.)* +C* COPY RIGHT ; SEPTEMBER 14TH, 1988 BY HITACHI, LTD. * +C* APRIL 22ND, 2001 BY UNIVERSITY OF TOKYO * +C* JUNE 3RD, 2003 BY UNIVERSITY OF TOKYO * +C* JUNE 3RD, 2010 BY UNIVERSITY OF TOKYO * +C* * +C********************************************************************** +C +C >>>>> PROGRAM FUNCTIONS <<<<< +C 'LES3X' IS A GENERAL-PURPOSE FINITE ELEMENT PROGRAM THAT +C CALCULATES INCOMPRESSIBLE UNSTEADY FLOWS IN ARBITRARILY-SHAPED +C GEOMETRIES. THE GOVERNING EQUATIONS ADOPTED FOR THE COMPUTATION +C ARE UNSTEADY INCOMPRESSIBLE NAVIER-STOKES EQUATIONS +C REPRESENTED IN THE CARTETIAN COORDINATES SYSTEM. IT SUPPORTS +C MULTI ELEMENT TYPE. +C +C +C >>>>> FILE ALLOCATIONS <<<<< +C +C ALL THE FILES THAT 'LES3X' HANDLES ARE OF GENERAL FILE +C VERSION 1.1 FORMAT. +C +C (1) STANDARD ERROR-OUTPUT/INPUT/OUTPUT +C IUT0 ( OUTPUT ); FILE NUMBER TO WRITE ERROR MESSAGE +C IUT5 ( INPUT ); FILE NUMBER TO READ CALCULATION PARAMETERS +C AND FILE NAMES +C IUT6 ( OUTPUT ); FILE NUMBER TO WRITE CALCULATION SEQUENCE +C +C (2) PROGRAM-DEFAULT INPUT/OUTPUT +C +C NOTES ; FOLLOWING FILES MUST ALWAYS BE SPECIFIED IN THE ORDER +C LISTED BELOW IN THE PARAMETER FILE. THEY WILL ALWAYS BE +C INPUT/OUTPUT DURING THE EXECUTION, EXCEPT THE INITIAL +C FLOW FIELD FILE, WHICH WILL BE INPUT ONLY WHEN CONTROL +C PARAMETER 'ISTART' (DESCRIBED LATER) IS SET TO 1. VALUE. +C +C IUTMS( INPUT ); FILE NUMBER TO READ MESH DATA +C IUTBC( INPUT ); FILE NUMBER TO READ BOUNDARY CONDITIONS +C IUTIF( INPUT ); FILE NUMBER TO READ INITIAL FLOW FIELD +C IUTFF( OUTPUT ); FILE NUMBER TO WRITE FINAL FLOW FIELD +C +C FILEMS( INPUT ); FILE NAME TO READ MESH DATA +C FILEBC( INPUT ); FILE NAME TO READ BOUNDARY CONDITIONS +C FILEIF( INPUT ); FILE NAME TO READ INITIAL FLOW FIELD +C FILEFF(OUTPUT ); FILE NAME TO WRITE FINAL FLOW FIELD +C FILEHS(OUTPUT ); FILE NAME TO WRITE TIME HISTORIES +C +C (3) OPTIONAL INPUT/OUTPUT +C +C >>>>> VARIABLES AND CONSTANTS USED <<<<< +C (1) CONTROL VARIABLES +C IPART ; DOMAIN NUMBER THAT THIS TASK SHOULD COMPUTE/IS +C COMPUTING. IPART BEING SET ZERO MEANS THAT +C THE PROGRAM SHOULD RUN/IS RUNNING IN SERIAL +C MODE. +C +C IWRITE ; INFORM GF UTILITIES OF OUTPUT MODE +C INAME ; INFORM GF UTILITIES OF VARIABLE NAME MODE +C MAXPRN ; MAXIMUM TIME STEPS TO BE PRINTED OUT +C DT ; TIME INCREMENT +C NOTES ; TIME INCREMENT WILL BE KEPT CONSTANT TO THE +C SPECIFIED VALUE THROUGHOUT THE TIME INTEGRATION. +C VISCM ; MOLECULAR VISCOSITY +C +C EPSP ; CONVERGENCE CRITERIA FOR PRESSURE EQUATION +C NMAXP ; MAXIMUM ITERATIONS FOR PRESSURE EQUATION +C EPST ; CONVERGENCE CRITERIA FOR TRANSPORT EQUATIONS +C NMAXT ; MAXIMUM ITERATIONS FOR TRANSPORT EQUATIONS +C +C NOTES ; USER INPUT PARAMETERS 'EPST' AND 'NMAXT' ARE +C CONCERNED WITH THE TRANSPORT EQUATIONS THAT ARE +C SOLVED IMPLICITLY AND ARE NEEDED ONLY FOR IFORM=3 AND +C IFORM=4. IF YOU SELECT AN EXPLICIT FORMULATION +C BY SPECIFYING EITHER IFORM=0, 1, OR 2, PROVIDE DUMMY +C VALUES FOR THESE PARAMETERS. +C +C NTIME ; TIME STEPS TO BE INTEGRATED +C +C ISTART ; CONTROLS DEVELOPMENT OF THE FLOW FIELD +C 0 --- SET ZERO INITIAL FLOW FIELD +C 1 --- READ INITIAL FLOW FROM GIVEN FILE +C TFINAL ; FLOW FIELD DEVELOPMENT PARAMETER (SEE BELOW) +C UFINAL ; FLOW FIELD DEVELOPMENT PARAMETER (SEE BELOW) +C VFINAL ; FLOW FIELD DEVELOPMENT PARAMETER (SEE BELOW) +C WFINAL ; FLOW FIELD DEVELOPMENT PARAMETER (SEE BELOW) +C +C IN TERMS OF THE FLOW FIELD DEVELOPMENT, ISTART=0 +C IS IDENTICAL TO ISTART=1, EXCEPT THAT THE FORMER +C SETS ZERO INITIAL FLOW FIELD WHILE THE LATTER READS +C INITIAL FLOW FILED FROM A GIVEN FILE. THE FOLLOWING +C EXPLANATION WILL BE APPLICABLE BOTH FOR ISTART=0 AND +C ISTART=1. +C +C THOSE VALUES LISTED BELOW WILL BE EXPONENTIALLY +C DEVELOPED. NAMELY, AT EACH TIME STEP THEY WILL BE SET +C TO A VALUE THAT IS THE ORIGINAL VALUE MULTIPLIED BY +C THE FOLLOWING FUNCTION. +C +C VALUES GRADUALLY DEVELOPED: +C INLET BOUNDARY VELOCITIES +C MOVING WALL BOUNDARY VELOCITIES +C ANGULAR VELOCITY +C FRAME VELOCITIES FOR TRANSLATING FRAMES +C (FRAME ACCELERATION FOR FRAME NUMBER 1) +C +C FUNCTION APPLIED: 1-EXP(-TIME/TFINAL) +C +C WHERE 'TIME' IS THE CURRENT TIME OF THE INTEGRATION +C AND 'TFINAL' IS A USER INPUT PARAMETER TO CONTROL +C FLOW FIELD DEVELOPMENT. IF NO FURTHER DEVELOPMENT +C OF THE FLOW FIELD IS DESIRED, SIMPLY SET 'TFINAL' +C TO ZERO AND THIS FUNCTION WILL BECOME A CONSTANT +C VALUE OF ONE. THE DIVIDE EXCEPTION WILL BE INTERNALLY +C SUPPRESSED AND HENCE DOES NOT NEED TO BE TAKEN CARE +C BY THE USER. +C +C FOLLOWING UNIFORM ACCELERATION TERMS WILL ALSO BE +C ADDED TO THE MOMENTUM EQUATIONS FOR TRANSLATING +C FRAMES WITH FRAME NUMBER GREATER THAN 1 +C (SEE ALSO NOTES ON 'NFRAME'). +C +C ACCELERATION TERMS ADDED TO X, Y, Z DIRECTIONS: +C (UFINAL-UFRME0(IFRM))/TFINAL*EXP(-TIME/TFINAL) +C (VFINAL-VFRME0(IFRM))/TFINAL*EXP(-TIME/TFINAL) +C (WFINAL-WFRME0(IFRM))/TFINAL*EXP(-TIME/TFINAL) +C +C WHERE 'UFRME0(IFRM)', 'VFRME0(IFRM)', AND +C 'WFRME0(IFRM)' ARE THE FRAME VELOCITIES SET FOR +C MULTIFRAME COMPUTATIONS (DESCRIBED ELSEWHERE). NOTE +C THAT THESE ACCELERATION TERMS ARE CONSISTENT WITH THE +C ABOVE MENTIONED DEVELOPMENT FUNCTION. +C +C DEVLP1 ; FUNCTION APPLIED FOR BOUNDARY VALUES, ANGULAR +C VELOCITY AND FRAME VELOCITY. +C DEVLP2 ; FUNCTION APPLIED FOR ACCELERATIONS +C ACCELX ; X-DIR. ACCELERATION TERMS ADDED TO ALL FRAMES +C ACCELY ; Y-DIR. ACCELERATION TERMS ADDED TO ALL FRAMES +C ACCELZ ; Z-DIR. ACCELERATION TERMS ADDED TO ALL FRAMES +C +C (2) PROGRAM-DEFAULT MODEL CONSTANTS +C +C +C (3) INTEGER CONSTANTS +C ME ; MAX. NUMBER OF TOTAL ELEMENTS +C MP ; MAX. NUMBER OF TOTAL NODES +C MB ; MAX. NUMBER OF BOUNDARY NODES +C +C MDOM ; MAX. NUMBER OF THE SENDING/RECEIVING DOMAINS +C MBPDOM ; THE MAXIMUM NUMBER OF INTER-CONNECT/OVERSET +C BOUNDARY NODES FOR ONE NEIGHBORING DOMAIN. +C +C MWRK ; DIMENSION SIZE OF WORK AREA ( = MP ) +C MAXBUF ; DIMENSION SIZE OF WORK RX, RY ( = ME*N ) +C +C NE ; NUMBER OF TOTAL ELEMENTS +C NP ; NUMBER OF TOTAL NODES +C N0 ; NUMBER OF NODES ASSIGNED TO ONE ELEMENT ( = 8 ) +C +C (4) FIELD VARIABLES +C X (IP); X-COORDINATES OF NODES +C Y (IP); Y-COORDINATES OF NODES +C Z (IP); Z-COORDINATES OF NODES +C +C SX (IE); UPWIND VECTOR IN X-DIR. +C SY (IE); UPWIND VECTOR IN Y-DIR. +C SZ (IE); UPWIND VECTOR IN Z-DIR. +C +C U (IP); X-DIR. VELOCITY COMPONENT AT NODES +C V (IP); Y-DIR. VELOCITY COMPONENT AT NODES +C W (IP); Z-DIR. VELOCITY COMPONENT AT NODES +C P (IE); ELEMENT PRESSURE +C PN (IP); NODAL PRESSURE +C +C NOTES ; SINCE CURRENT GF SYSTEM HAS ONLY ONE TYPE OF DATA +C FOR FLOW VELOCITY AND PRESSURE, ALL THE VELOCITIES +C AND PRESSURES ABOVE WILL BE SAVED WITH A SAME KEY +C WORD '*VELO_3D' AND '*PRES_3E'. +C +C NOTES ; IN ORDER TO KEEP CONSISTENCY WITH THE FLOW FIELD +C DATA GENERATED BY THE CONVENTIONAL 'LES3D', 'LES3X' +C INPUTS/OUTPUTS ELEMENT-WISE PRESSURE. IT WILL CONVERT +C THE READ-IN ELEMENT-WISE PRESSURE TO NODAL VALUE FOR +C ITS INTERNAL USE AND RE-CONVERT THE NODAL PRESSURE TO +C ELEMENT-WISE VALUE FOR OUTPUTING TO A GENERAL FILE. +C +C (5) ELEMENT'S POSITION AND SHAPE DEPENDENT CONSTANTS +C CM (IP); INVERSED LUMPED MASS MATRIX +C +C SNI (I,IE); ELEMENT CENTER VALUE OF N +C DNXI (I,IE); ELEMENT CENTER VALUE OF NX +C DNYI (I,IE); ELEMENT CENTER VALUE OF NY +C DNZI (I,IE); ELEMENT CENTER VALUE OF NZ +C +C SN (I,IE); INTEGRATED ELEMENT VECTOR OF N +C DNX (I,IE); INTEGRATED ELEMENT VECTOR OF NX +C DNY (I,IE); INTEGRATED ELEMENT VECTOR OF NY +C DNZ (I,IE); INTEGRATED ELEMENT VECTOR OF NZ +C +C E (IE,I,J); INTEGRATED ELEMENT MATRIX OF N*NT +C EX (I,IE,J); INTEGRATED ELEMENT MATRIX OF N*NXT +C EY (I,IE,J); INTEGRATED ELEMENT MATRIX OF N*NYT +C EZ (I,IE,J); INTEGRATED ELEMENT MATRIX OF N*NZT +C EXX (I,IE,J); INTEGRATED ELEMENT MATRIX OF NX*NXT +C EYY (I,IE,J); INTEGRATED ELEMENT MATRIX OF NY*NYT +C EZZ (I,IE,J); INTEGRATED ELEMENT MATRIX OF NZ*NZT +C EXY (I,IE,J); INTEGRATED ELEMENT MATRIX OF NX*NYT +C EXZ (I,IE,J); INTEGRATED ELEMENT MATRIX OF NX*NZT +C EYZ (I,IE,J); INTEGRATED ELEMENT MATRIX OF NY*NZT +C NOTES ; IN THE ABOVE LISTS, N DENOTES THE SHAPE FUNCTION. +C NX, NY, NZ, RESPECTIVELY DENOTE, X-DERIVERTIVE, +C Y-DERIVERTIVE, AND Z-DERIVERTIVE OF SHAPE FUNCTION. +C NOTES ; E (IE,I,J) IS ALSO USED AS ELEMENT-WISE +C COEFFICIENT MATRIX +C +C (6) ELEMENT'S CONNECTIVITY SPECIFYING LISTS +C NODE (I,IE); NODE NO. TABLE BASED ON ELEMENT +C NODE2 (I,IE); NODE NO. TABLE BASED ON ELEMENT +C (DUMMY VARIABLE FOR READING MESH DATA) +C NUMIP (IP); NUMBER OF NEIGHBORING DOMAINS THAT NODE +C 'IP' BELONG TO +C +C +C (7) BOUNDARY CONDITIONS DATA +C A. INLET BOUNDARY +C NPINLT ; NUMBER OF INLET BOUNDARY NODES +C LPINLT (IBP); INLET BOUNDARY NODES +C UINLT (IBP); INLET BOUNDARY U-VELOCITIES +C VINLT (IBP); INLET BOUNDARY V-VELOCITIES +C WINLT (IBP); INLET BOUNDARY W-VELOCITIES +C +C B. WALL BOUNDARY +C NPWALL ; NUMBER OF WALL BOUNDARY NODES +C LPWALL (IBP); WALL BOUNDARY NODES +C UWALL (IBP); WALL BOUNDARY U-VELOCITIES +C VWALL (IBP); WALL BOUNDARY V-VELOCITIES +C WWALL (IBP); WALL BOUNDARY W-VELOCITIES +C +C C. SYMMETRIC BOUNDARY +C NPSYMT ; NUMBER OF SYMMETRIC BOUNDARY NODES +C LPSYMT (IBP); SYMMETRIC BOUNDARY NODES +C XPSYMT (IBP); X NORMAL OF SYMMETRIC BOUNDARY NODE +C YPSYMT (IBP); Y NORMAL OF SYMMETRIC BOUNDARY NODE +C ZPSYMT (IBP); Z NORMAL OF SYMMETRIC BOUNDARY NODE +C +C D. FREE BOUNDARY +C NPFREE ; NUMBER OF FREE BOUNDARY NODES +C LPFREE (IBP); FREE BOUNDARY NODES +C +C E. CYCLIC BOUNDARY +C NPCCL ; NUMBER OF CYCLIC BOUNDARY NODES +C LPCCL1 (IBP); CYCLIC BOUNDARY NODES-1 +C LPCCL2 (IBP); CYCLIC BOUNDARY NODES-2 +C +C F. FLUID FORCE CALCULATION SURFACE +C NPBODY ; NUMBER OF BODY SURFACE NODES +C LPBODY (IBP); BODY SURFACE NODES +C N2D ;NUMBER OF SURFACE ELEMENT DEFINING NODES ( =4 ) +C NOTES; WALL SHEAR COMPONENTS OF BODY FORCE WILL BE DIRECTLY +C CALCULATED BASED ON THE LOCAL TOTAL VISCOSITY AND +C LOCAL VELOCITY GRADIENT WITHOUT TAKING INTO ACCOUNT +C THE WALL VELOCITIES NOR THE WALL FUNCTION USED FOR THE +C MOMENTUM INTEGRATION. +C +C G. INTER-CONNECT BOUNDARY +C NPINT ; NUMBER OF INTER-CONNECT BOUNDARY NODES +C LPINT1 (IBP); INTER-CONNECT BOUNDARY NODES +C LPINT2 (IBP); CORRESPONDING DOMAIN NUMBERS +C LPINT3 (IBP); NODE NUMBER IN THE CORRESPONDING DOMAINS +C NDOM ; NUMBER OF THE NERIBERING DOMAINS +C LDOM (IDOM); NEIGHBORING DOMAIN NUMBER +C NBPDOM(IDOM); NUMBER OF INTER-CONNECT BOUNDARY NODES +C SHARING WITH THE IDOM'TH NEIGHBORING DOMAIN, +C LDOM(IDOM) +C IPSLF (IBP,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C CALLING TASK'S DOMAIN, FOR THE IDOM'TH +C NEIGHBORING DOMAIN, LDOM(IDOM) +C IPSND (IBP,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C DOMAIN THAT IS RECEIVING THE CALLING +C TASK'S RESIDUALS. +C +C H. OVERSET BOUNDARY NODES +C +C I. FLUID FORCE OBJECT (FFO) +C +C +C (8) TIME HISTORY DATA +C +C (13) COMMENT DATA +C COMGEN ; GENERIC FILE COMMENT STRING TO BE READ FROM +C PARAMETER FILE AT THE START OF TIME INTEGRATION +C AND TO BE WRITTEN TO ALL THE OUTPUT FILES +C COMFLE(ICOM); WORK ARRAY USED TO PASS FILE COMMENT +C STRINGS TO GENERAL FILE UTILITIES +C COMSET(ICOM); DUMMY ARRAY USED TO CALL GENERAL FILE UTILITIES +C +C NOTES ; FOR INPUT FILES, ALL THE FILE COMMENT STRINGS AND +C ALL THE SET COMMENT STRINGS WILL BE DISCARDED IN THE +C GENERAL FILE UTILITIES AFTER THEY ARE WRITTEN TO THE +C STANDARD OUTPUT. FOR OUTPUT FILES GENERIC FILE +C COMMENT STRING READ FROM THE PARAMETER FILE, FOLLOWED +C BY SPECIFIC FILE COMMENT STRINGS INTERNALLY DEFINED, +C WILL BE WRITTEN TO EACH OUTPUT FILE. +C NO SET COMMENT STRINGS WILL BE WRITTEN TO THE OUTPUT +C FILES EXCEPT THE CURRENT FLOW FIELD FILE, SURFACE +C PRESSURE DATA FILE, AND HISTORY DATA FILE TO WHICH +C INTERNALLY DEFINED SET COMMENT STRINGS (INDICATING +C CURRENT TIME AND TIME STEP OR IDENTIFYING EACH +C HISTORY DATA), WILL BE WRITTEN. +C +C +C (14) WORK AREAS AND DUMMY ARRAYS +C +C +C (15) RETURN CODE +C IERR ; RETURN CODE WHOSE VALUE WILL BE EITHER +C 0 --- INDICATING SUCCESSFUL TERMINATION +C OR 1 --- INDICATING OCCURENCE OF SOME ERROR CONDITIONS +C +C +C +C <<<<< QUERY EXECUTION MODE AND DOMAIN NUMBER >>>>> +C +C +C +C NDOM = 0 +C Fj start 202103 + call xmp_api_init +C Fj end 202103 +C + CALL DDINIT(NPART,IPART) +C +C IF(IPART.GE.1) NDOM = 1 +C + IF(IPART.GE.2) THEN + IUT6 = IUTLG + CALL MFNAME(FILELG,FILE,IPART,IUT0,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF + OPEN(IUT6,FILE=FILE,FORM='FORMATTED') + ENDIF +C + call opening(IUT6, DATE, FILEIN) +C +#ifdef PROF_MAPROF + call maprof_setup("FFB MINI", FFB_MINI_VERSION) + call maprof_add_section("Main_Loop", TM_MAIN_LOOP) + call maprof_add_section("vel3d1_op1", TM_VEL3D1_OP1) + call maprof_add_section("calax3_cal", TM_CALAX3_CAL) + call maprof_add_section("calax3_com", TM_CALAX3_COM) + call maprof_add_section("grad3x", TM_GRAD3X) + call maprof_add_section("fld3x2", TM_FLD3X2) +#endif +C +C +C <<<<< DISPLAY INTERNAL PARAMETER SETTINGS >>>>> +C +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'CONSTANTS REGARDING NUMERICAL METHOD' + WRITE(IUT6,*) ' NUMBER OF GAUSS POINTS FOR HEX. : IGAUSH=', IGAUSH +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'CONSTANTS REGARDING OUTPUT' + WRITE(IUT6,*) ' FILE WRITING MODE FLAG : IWRITE=', IWRITE + WRITE(IUT6,*) ' MAX. TIMESTEPS TO BE PRINTED-OUT: MAXPRN=', MAXPRN + WRITE(IUT6,*) ' EMERGENLY ESCAPE DIVERGENT LIMIT: DIVESC=', DIVESC +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' CONSTANTS REGARDING MATRIX SOLVER' + WRITE(IUT6,*) ' MATRIX SOLVER FOR PRESSURE : ISOLP =', ISOLP + WRITE(IUT6,*) ' INNER ITERATIONS NUMBER OF RCM : NMAXB =', NMAXB +C +C +C <<<<< READ CALCULATION PARAMETERS AND FILE NAMES +C TO ALLOCATE >>>>> +C +C + OPEN(IUT5,FILE=FILEIN,FORM='FORMATTED') +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** READING CALCULATION PARAMETERS **' + IACT=1 + CALL LESRPX(IACT,IUT0,IUT5,IUT6,IPART,COMGEN, + * IFORM , + * D000 ,U000 ,RHO000, + * VISCM , + * ISTART,NTIME ,DT , + * NMAXT ,NMAXP ,EPST ,EPSP , + * TFINAL,UFINAL,VFINAL,WFINAL, + * FILEMS,FILEBC,FILEIF,FILEFF,FILEHS, + * FILEMR,FILEBR,FILEFR,IERR) +C + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF + INTPRN = MAX(1,NTIME/MAXPRN) + WRITE(IUT6,*) ' DONE!' +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** READING OPTIONAL PARAMETERS **' + CALL LESROP(IUT0,IUT5,IUT6, + * ISOLP, + * NMAXB, + * BTDCOE,DIVESC, + * JSORT,JCOLOR,JUNROL,NDIVX,NDIVY,NDIVZ,NEIP, + * NRFN,IRFNMW,IRFNFF,NGRID, + * MER,MPR,MEPR,MPPR,MBR,MBPR,MDOMR, + * JPRESS,IVELIN,VELIN0, + * U000, + * EPSP,EPSREP, + * IWRITE,JGRID, + * IERR) +C + if (JPRESS .ne. 2) then + write(IUT6,*) '## MINIAPP MUST BE SET "#PRS_ELM"' + goto 9999 + end if +C + CLOSE(IUT5) + WRITE(IUT6,*) ' DONE!' +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** CHECKING SIZE OF MESH DATA **' + IACT = 1 + IONE=1 + IF(JGRID.EQ.1) THEN + CALL GFALL(IUT0,IUT6,IUTMS,FILEMS, + * MCOM,NCOMFL,COMFLE, + * MCOM,NCOMST,COMSET, + * IACT,IWRITE,INAME,IRESV, + * ICAST,IDATA0,IALL,ISKIP1,IERR, + * '*GRID_3D *NODE_3D !', + * NAME,IONE,NP,FRBUF,FRBUF,FRBUF, + * NAME,IONE,IONE,NE,NDUM,IRBUF, + * ICHECK) + ELSE + CALL GFALL(IUT0,IUT6,IUTMS,FILEMS, + * MCOM,NCOMFL,COMFLE, + * MCOM,NCOMST,COMSET, + * IACT,IWRITE,INAME,IRESV, + * ICAST,IDATA0,IALL,ISKIP1,IERR, + * '*GRID_3D%D *NODE_3D !', + * NAME,IONE,NP,FRBUF,FRBUF,FRBUF, + * NAME,IONE,IONE,NE,NDUM,IRBUF, + * ICHECK) + ENDIF + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF + WRITE(IUT6,*) ' DONE!' +C + NE_total = total(NE) + NP_total = total(NP) + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' TIME STEPS: ', NTIME + WRITE(IUT6,*) ' NUMBER OF ELEMENTS: ', NE + WRITE(IUT6,*) ' NUMBER OF NODES: ', NP + WRITE(IUT6,*) ' NUMBER OF TOTAL ELEMENTS: ', NE_total + WRITE(IUT6,*) ' NUMBER OF TOTAL NODES: ', NP_total +#ifdef PROF_MAPROF + call maprof_profile_add_problem_size("Ne", NE_total) + call maprof_profile_add_problem_size("Np", NP_total) + call maprof_profile_add_problem_size("Nstep", NTIME) +#endif +C + ME=NE+1000 + MP=NP+1000 + MAXBUF = ME*N0 +C + IF(NRFN.EQ.1) THEN + ME=ME*8 + MP=MP*8 + ENDIF + IF(NRFN.EQ.2) THEN + ME=ME*64 + MP=MP*64 + ENDIF +C + IF(MER .NE.-1) ME =MER + IF(MPR .NE.-1) MP =MPR + IF(MEPR.NE.-1) MEP=MEPR + IF(MPPR.NE.-1) MPP=MPPR +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'ALLOCATING VARIABLE FOR FIELD' +C + IF(ME.GT.MP) THEN + MWRK=ME + ELSE + MWRK=MP + ENDIF +C +C + MB=MP/2 + MBPDOM=MP/5 + MDOM=32 + IF(MBR .NE.-1) MB =MBR + IF(MBPR .NE.-1) MBPDOM=MBPR + IF(MDOMR.NE.-1) MDOM =MDOMR +C + MPWALL=MB + MPINT =MB + MPBODY=MB +C + SIZEAL = 0.E0 +C + SIZE = (10*FLOAT(MP)+(2+2*MRCM)*FLOAT(ME))*WORD + SIZEAL = SIZEAL+SIZE + WRITE(IUT6,'(A12,F8.2," MB ALLOCATED")') " FIELD :",SIZE + ALLOCATE( X(MP), STAT=LERR(01)) + ALLOCATE( Y(MP), STAT=LERR(02)) + ALLOCATE( Z(MP), STAT=LERR(03)) + ALLOCATE( U(MP), STAT=LERR(04)) + ALLOCATE( V(MP), STAT=LERR(05)) + ALLOCATE( W(MP), STAT=LERR(06)) + ALLOCATE(PN(MP), STAT=LERR(07)) + ALLOCATE( P(ME), STAT=LERR(10)) + ALLOCATE(VISC(ME), STAT=LERR(11)) + ALLOCATE( PRCM(MRCM,ME),STAT=LERR(12)) + ALLOCATE(APRCM(MRCM,ME),STAT=LERR(13)) + ALLOCATE(XD(MP), STAT=LERR(14)) + ALLOCATE(YD(MP), STAT=LERR(15)) + ALLOCATE(ZD(MP), STAT=LERR(16)) + LERR(08)=0 + LERR(09)=0 + CALL ERRCHK(IUT6,IPART,16,LERR,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + WRITE(IUT0,*) EREXP1 + GO TO 9999 + ENDIF + WRITE(IUT6,*) 'DONE' +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'ALLOCATING VARIABLE FOR ELEMENT SHAPE ' + SIZE = (1*FLOAT(MP)+72*FLOAT(ME))*WORD + SIZEAL = SIZEAL+SIZE + WRITE(IUT6,'(A12,F8.2," MB ALLOCATED")') " ELEMENT :",SIZE + ALLOCATE( SNI(N1,ME),STAT=LERR(01)) + ALLOCATE(DNXI(N1,ME),STAT=LERR(02)) + ALLOCATE( CM(MP),STAT=LERR(03)) + ALLOCATE(DNYI(N1,ME),STAT=LERR(04)) + ALLOCATE(DNZI(N1,ME),STAT=LERR(05)) + ALLOCATE( SN(N1,ME),STAT=LERR(06)) + ALLOCATE(DNXYZ(3,N1,ME),STAT=LERR(07)) + CALL ERRCHK(IUT6,IPART,7,LERR,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + WRITE(IUT0,*) EREXP1 + GO TO 9999 + ENDIF + WRITE(IUT6,*) 'DONE' +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'ALLOCATING VARIABLE FOR CONNECTIVITY' + SIZE = ((MEP*2+MPP+4)*FLOAT(MP)+81*FLOAT(ME))*WORD + SIZEAL = SIZEAL+SIZE + WRITE(IUT6,'(A12,F8.2," MB ALLOCATED")') " CONNECT :",SIZE + ALLOCATE(NODE(N1,ME), STAT=LERR(01)) + ALLOCATE(IENP(MEP,MP), STAT=LERR(02)) + ALLOCATE(JENP(MEP,MP), STAT=LERR(03)) + ALLOCATE(NEP(MP), STAT=LERR(04)) + ALLOCATE(IPNP(MPP,MP), STAT=LERR(05)) + ALLOCATE(NPP(MP), STAT=LERR(06)) + ALLOCATE(NPP2(MP), STAT=LERR(07)) + ALLOCATE(NUMIP(MP), STAT=LERR(08)) + ALLOCATE(LTAB(N1,N2,ME),STAT=LERR(09)) + CALL ERRCHK(IUT6,IPART,9,LERR,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + WRITE(IUT0,*) EREXP1 + GO TO 9999 + ENDIF + WRITE(IUT6,*) 'DONE' +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'ALLOCATING VARIABLE FOR B.C.' + SIZE = (19*FLOAT(MB)+(2*MBPDOM+2)*FLOAT(MDOM) + & +2*FLOAT(ME)+2*FLOAT(MP))*WORD + SIZEAL = SIZEAL+SIZE + WRITE(IUT6,'(A12,F8.2," MB ALLOCATED")') " BOUNDARY :",SIZE + ALLOCATE(LPINLT(MB), STAT=LERR(01)) + ALLOCATE(LPWALL(MB), STAT=LERR(02)) + ALLOCATE(LPSYMT(MB), STAT=LERR(03)) + ALLOCATE(LPFREE(MB), STAT=LERR(04)) + ALLOCATE(LPCCL1(MB), STAT=LERR(05)) + ALLOCATE(LPCCL2(MB), STAT=LERR(06)) + ALLOCATE(LPBODY(MB), STAT=LERR(07)) + ALLOCATE(LPINT1(MB), STAT=LERR(08)) + ALLOCATE(LPINT2(MB), STAT=LERR(09)) + ALLOCATE(LPINT3(MB), STAT=LERR(10)) + ALLOCATE(LDOM(MDOM), STAT=LERR(11)) + ALLOCATE(NBPDOM(MDOM), STAT=LERR(12)) + ALLOCATE(IPSLF(MBPDOM,MDOM),STAT=LERR(13)) + ALLOCATE(IPSND(MBPDOM,MDOM),STAT=LERR(14)) + ALLOCATE(UINLT (MB), STAT=LERR(15)) + ALLOCATE(VINLT (MB), STAT=LERR(16)) + ALLOCATE(WINLT (MB), STAT=LERR(17)) + ALLOCATE(UWALL (MB), STAT=LERR(18)) + ALLOCATE(VWALL (MB), STAT=LERR(19)) + ALLOCATE(WWALL (MB), STAT=LERR(20)) + ALLOCATE(XPSYMT(MB), STAT=LERR(21)) + ALLOCATE(YPSYMT(MB) ,STAT=LERR(22)) + ALLOCATE(ZPSYMT(MB) ,STAT=LERR(23)) + ALLOCATE(LPBTOA(MP) ,STAT=LERR(24)) + ALLOCATE(LPATOB(MP) ,STAT=LERR(25)) + ALLOCATE(LEBTOA(ME) ,STAT=LERR(26)) + ALLOCATE(LEATOB(ME) ,STAT=LERR(27)) + CALL ERRCHK(IUT6,IPART,27,LERR,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + WRITE(IUT0,*) EREXP1 + GO TO 9999 + ENDIF + WRITE(IUT6,*) 'DONE' +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'ALLOCATING VARIABLE FOR WORK-1' + SIZE = (0*FLOAT(MP)+72*FLOAT(ME))*WORD + SIZEAL = SIZEAL+SIZE + WRITE(IUT6,'(A12,F8.2," MB ALLOCATED")') " WORK-1 :",SIZE + ALLOCATE(AWRK(N1,N2,ME),STAT=LERR(01)) + CALL ERRCHK(IUT6,IPART,01,LERR,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + WRITE(IUT0,*) EREXP1 + GO TO 9999 + ENDIF + WRITE(IUT6,*) 'DONE' +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'ALLOCATING VARIABLE FOR WORK-2' + SIZE = (3*FLOAT(MDOM)+8*FLOAT(MBPDOM*MDOM))*WORD + SIZEAL = SIZEAL+SIZE + WRITE(IUT6,'(A12,F8.2," MB ALLOCATED")') " WORK-2 :",SIZE + ALLOCATE(NPB0(MDOM),STAT=LERR(01)) + ALLOCATE(NPB1(MDOM),STAT=LERR(02)) + ALLOCATE(NPB2(MDOM),STAT=LERR(03)) + ALLOCATE(LPB1(MBPDOM,MDOM),STAT=LERR(04)) + ALLOCATE(LPB2(MBPDOM,MDOM),STAT=LERR(05)) + ALLOCATE(XPB1(MBPDOM,MDOM),STAT=LERR(06)) + ALLOCATE(YPB1(MBPDOM,MDOM),STAT=LERR(07)) + ALLOCATE(ZPB1(MBPDOM,MDOM),STAT=LERR(08)) + ALLOCATE(XPB2(MBPDOM,MDOM),STAT=LERR(09)) + ALLOCATE(YPB2(MBPDOM,MDOM),STAT=LERR(10)) + ALLOCATE(ZPB2(MBPDOM,MDOM),STAT=LERR(11)) + CALL ERRCHK(IUT6,IPART,11,LERR,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + WRITE(IUT0,*) EREXP1 + GO TO 9999 + ENDIF + WRITE(IUT6,*) 'DONE' +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'ALLOCATING VARIABLE FOR WORK-3' + SIZE = (3*FLOAT(MP)+27*FLOAT(MWRK)+26*FLOAT(ME) + & +37*2*FLOAT(MGAUSS))*WORD + SIZEAL = SIZEAL+SIZE + WRITE(IUT6,'(A12,F8.2," MB ALLOCATED")') " WORK-3 :",SIZE + ALLOCATE(NODWK1(N0,ME),STAT=LERR(01)) + ALLOCATE(LEWRK(2,MWRK),STAT=LERR(02)) + ALLOCATE(LWRK01(MWRK), STAT=LERR(03)) + ALLOCATE(LWRK02(MWRK), STAT=LERR(04)) + ALLOCATE(LWRK04(MWRK), STAT=LERR(05)) +C ALLOCATE(RX(N1,ME)[*], STAT=LERR(06)) +C ALLOCATE(RY(N1,ME)[*], STAT=LERR(07)) + ALLOCATE(WRKN(MWRK*9), STAT=LERR(08)) + ALLOCATE(WRK01(MWRK), STAT=LERR(09)) + ALLOCATE(WRK02(MWRK), STAT=LERR(10)) + ALLOCATE(WRK03(MWRK), STAT=LERR(11)) + ALLOCATE(WRK04(MWRK), STAT=LERR(12)) + ALLOCATE(WRK05(MWRK), STAT=LERR(13)) + ALLOCATE(WRK06(MWRK), STAT=LERR(14)) + ALLOCATE(WRK07(MWRK), STAT=LERR(15)) + ALLOCATE(WRK08(MWRK), STAT=LERR(16)) + ALLOCATE(WRK09(MWRK), STAT=LERR(17)) + ALLOCATE(WRK10(MWRK), STAT=LERR(18)) + ALLOCATE(WRK11(MWRK), STAT=LERR(19)) + ALLOCATE(WRK12(MWRK), STAT=LERR(20)) + ALLOCATE(WRK13(MWRK), STAT=LERR(21)) + ALLOCATE(WRK3(3,MP), STAT=LERR(22)) + ALLOCATE(DWRK01(N1 ,MGAUSS), STAT=LERR(23)) + ALLOCATE(DWRK02(N1 ), STAT=LERR(24)) + ALLOCATE(DWRK03(3,N1,MGAUSS), STAT=LERR(25)) + ALLOCATE(DWRK04(3,N1 ), STAT=LERR(26)) + ALLOCATE(DWRK05( MGAUSS), STAT=LERR(27)) +C Fj start 202103 + rx_lb(1) = 1 + rx_lb(2) = 1 + rx_ub(1) = N1 + rx_ub(2) = ME + ry_lb(1) = 1 + ry_lb(2) = 1 + ry_ub(1) = N1 + ry_ub(2) = ME + call xmp_new_coarray(rx_desc, 4, 1, rx_lb, rx_ub, 1, img_dims) + call xmp_new_coarray(ry_desc, 4, 1, ry_lb, ry_ub, 1, img_dims) + call xmp_coarray_bind(rx_desc,RX) + call xmp_coarray_bind(ry_desc,RY) +C Fj start 202103 + CALL ERRCHK(IUT6,IPART,27,LERR,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + WRITE(IUT0,*) EREXP1 + GO TO 9999 + ENDIF + WRITE(IUT6,*) 'DONE' +C +#ifndef NO_REFINER + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'ALLOCATING VARIABLE FOR FEFINE' + SIZE = (3*2*FLOAT(MP)+3*FLOAT(MWRK)+16*FLOAT(ME))*WORD + SIZEAL = SIZEAL+SIZE + WRITE(IUT6,'(A12,F8.2," MB ALLOCATED")') " WORK-RFN ",SIZE + ALLOCATE(NODWK2(N0,ME),STAT=LERR(01)) + ALLOCATE(LWRK03(MWRK), STAT=LERR(02)) + ALLOCATE(LWRK05(MWRK), STAT=LERR(03)) + ALLOCATE(LWRK06(MWRK), STAT=LERR(04)) + ALLOCATE(NODWK3(N0,ME),STAT=LERR(05)) + ALLOCATE(DWRK3(MP*3), STAT=LERR(06)) + CALL ERRCHK(IUT6,IPART,6,LERR,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + WRITE(IUT0,*) EREXP1 + GO TO 9999 + ENDIF + WRITE(IUT6,*) 'DONE' +#endif +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'ALLOCATING VARIABLE FOR SIMPLE' + SIZE = (0*FLOAT(MP)+4*FLOAT(ME))*WORD + SIZEAL = SIZEAL+SIZE + WRITE(IUT6,'(A12,F8.2," MB ALLOCATED")') " SIMPLE :",SIZE + ALLOCATE(DT3D(ME), STAT=LERR(01)) + ALLOCATE(UE(ME), STAT=LERR(02)) + ALLOCATE(VE(ME), STAT=LERR(03)) + ALLOCATE(WE(ME), STAT=LERR(04)) + CALL ERRCHK(IUT6,IPART,4,LERR,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + WRITE(IUT0,*) EREXP1 + GO TO 9999 + ENDIF + WRITE(IUT6,*) 'DONE' +C +C +C <<<<< READ MESH DATA >>>>> +C +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** READING MESH DATA **' + IACT = 1 + IF(JGRID.EQ.1) THEN + CALL GFALL(IUT0,IUT6,IUTMS,FILEMS, + * MCOM,NCOMFL,COMFLE, + * MCOM,NCOMST,COMSET, + * IACT,IWRITE,INAME,IRESV, + * ICAST,IDATA0,IALL,ISKIP,IERR, + * '*GRID_3D *NODE_3D !', + * NAME,MP,NP,X,Y,Z, + * NAME,ME,N0,NE,NDUM,NODWK1, + * ICHECK) +CCTT //NORMALIZATION + DO 920 IP=1, NP + X(IP)=X(IP)/D000 + Y(IP)=Y(IP)/D000 + Z(IP)=Z(IP)/D000 + XD(IP)=DBLE(X(IP)) + YD(IP)=DBLE(Y(IP)) + ZD(IP)=DBLE(Z(IP)) + 920 CONTINUE +C + ELSE + CALL GFALL(IUT0,IUT6,IUTMS,FILEMS, + * MCOM,NCOMFL,COMFLE, + * MCOM,NCOMST,COMSET, + * IACT,IWRITE,INAME,IRESV, + * ICAST,IDATA0,IALL,ISKIP,IERR, + * '*GRID_3D%D *NODE_3D !', + * NAME,MP,NP,XD,YD,ZD, + * NAME,ME,N0,NE,NDUM,NODWK1, + * ICHECK) +CCTT //NORMALIZATION + DO 925 IP=1, NP + XD(IP)=XD(IP)/DBLE(D000) + YD(IP)=YD(IP)/DBLE(D000) + ZD(IP)=ZD(IP)/DBLE(D000) + X (IP)=REAL(XD(IP)) + Y (IP)=REAL(YD(IP)) + Z (IP)=REAL(ZD(IP)) + 925 CONTINUE + ENDIF + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF +C + DO 900 IE=1,NE + DO 910 I=1,N0 + NODE(I,IE)=NODWK1(I,IE) + 910 CONTINUE + 900 CONTINUE +C + WRITE(IUT6,*) ' DONE!' +C +C +C <<<<< READ BOUNDARY CONDITIONS DATA >>>>> +C +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** READING BOUNDARY CONDITIONS DATA **' + IACT = 1 + CALL GFALL(IUT0,IUT6,IUTBC,FILEBC, + * MCOM,NCOMFL,COMFLE, + * MCOM,NCOMST,COMSET, + * IACT,IWRITE,INAME,IRESV, + * ICAST,IDATA0,IALL,ISKIP,IERR, + *'*BC_INLT *BC_IV3D *BC_MWAL *BC_WV3D *BC_WALL '// + *'*BC_SYMT *BC_FREE *BC_CYCL *BC_BODY *BC_INTR !', + * NAME,MB,NPINLT,LPINLT, + * NAME,MB,NPINLT,UINLT,VINLT,WINLT, + * NAME,MB,NPWALL,LPWALL, + * NAME,MB,NPWALL,UWALL,VWALL,WWALL, + * NAME,MB,NPW ,LWRK01, + * NAME,MB,NPSYMT,LPSYMT, + * NAME,MB,NPFREE,LPFREE, + * NAME,MB,NPCCL ,LPCCL1,LPCCL2, + * NAME,MB,NPBODY,LPBODY, + * NAME,MB,NPINT, LPINT1,LPINT2,LPINT3, + * ICHECK) +C +CTT //NORMALIZE +C + DO IBP=1,NPINLT + UINLT(IBP)=UINLT(IBP)/U000 + VINLT(IBP)=VINLT(IBP)/U000 + WINLT(IBP)=WINLT(IBP)/U000 + ENDDO +C + DO IBP=1,NPWALL + UWALL(IBP)=UWALL(IBP)/U000 + VWALL(IBP)=VWALL(IBP)/U000 + WWALL(IBP)=WWALL(IBP)/U000 + ENDDO +C + IF(IVELIN.EQ.1) THEN + DO IBP=1,NPINLT + UINLT(IBP)=VELIN0(1) + VINLT(IBP)=VELIN0(2) + WINLT(IBP)=VELIN0(3) + ENDDO + ENDIF +C + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF +C + DO 1400 IP = 1 , NP + LWRK02(IP)=0 + 1400 CONTINUE +C + DO 1401 IBP = 1 , NPWALL + IP=LPWALL(IBP) + LWRK02(IP)=1 + 1401 CONTINUE +C + DO 1402 IPW = 1 , NPW + IP=LWRK01(IPW) + IF(LWRK02(IP).EQ.1) GOTO 1402 + NPWALL = NPWALL+1 + IF(NPWALL.GT.MB) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP5, MPWALL + GO TO 9999 + ENDIF + LPWALL(NPWALL) = IP + LWRK02(IP) = 1 + UWALL (NPWALL) = 0.0E0 + VWALL (NPWALL) = 0.0E0 + WWALL (NPWALL) = 0.0E0 + 1402 CONTINUE + WRITE(IUT6,*) ' DONE!' +C +C +C <<<<< READ INITIAL FIELD DATA >>>>> +C +C + IINTRP=0 +C + IF(ISTART.EQ.1) THEN + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** READING INITIAL FLOW DATA **' + IACT = 1 + CALL GFALL(IUT0,IUT6,IUTIF,FILEIF, + * MCOM,NCOMFL,COMFLE, + * MCOM,NCOMST,COMSET, + * IACT,IWRITE,INAME,IRESV, + * ICAST,IDATA0,IALL,ISKIP,IERR, + * '*TIME_PS *STEP_PS *VELO_3D *PRES_3E + * *PRES_3D !', + * NAME,TIMEP, + * NAME,NTIMEP, + * NAME,MP,NPCHK1,U,V,W, + * NAME,ME,NECHK ,P, + * NAME,MP,NPCHK2,PN, + * ICHECK) +C +CCTT //NORMALIZE +C + TIMEW=TIMEP + TIMEP=TIMEP/(D000/U000) + DO IP=1,NP + U (IP)=U (IP)/U000 + V (IP)=V (IP)/U000 + W (IP)=W (IP)/U000 + PN(IP)=PN(IP)/(RHO000*U000*U000) + ENDDO +C + DO IE=1,NE + P(IE)=P(IE)/(RHO000*U000*U000) + ENDDO +C + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) +C + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF +C + IF(NRFN.EQ.0 .AND. NPCHK1.NE.NP) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP2 + GO TO 9999 + ENDIF +C + IF(NRFN.EQ.0 .AND. NPCHK2.NE.NP) THEN + IINTRP=1 + IF(NECHK.NE.NE) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP2 + GO TO 9999 + ENDIF +C + WRITE(IUT6,*) 'PRESSURE READ IS DEFINEDED AT ELEMENT' +CCC WRITE(IUT0,*) 'ONLY NODAL PRESSURE IS AVAILABLE : STOP' +CCC GOTO 9999 + ENDIF +C + WRITE(IUT6,*) ' DONE!' + ENDIF +C + NP0=NP + NE0=NE +C +#ifndef NO_REFINER + IF(NRFN.EQ.0) GOTO 1600 +C + IF(NGRID.NE.-1) THEN + WRITE(IUT6,*) ' **PREPARE REFINE LAYER-NUMBER TABLE **' + NLAYER = NGRID +C + LLAYER(NRFN) = (NLAYER+1)/2 + IF(NRFN.LT.2) GOTO 1550 + DO 1560 IRFNR = 2, NRFN + IRFN = NRFN - IRFNR + 1 + LLAYER(IRFN) = (LLAYER(IRFN+1) + NLAYER + 1)/2 + 1560 CONTINUE + 1550 CONTINUE + DO 1599 IRFN=1, NRFN + WRITE(IUT6,*) "IRFN / LLAYER :", IRFN, LLAYER(IRFN) + 1599 CONTINUE + ENDIF +C + DO 1500 IRFN= 1 , NRFN + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** REFINING MESH, B.C. FLOW **' + WRITE(IUT6,*) ' IRFN = ',IRFN +C + IF(NGRID.NE.-1) NGRID = LLAYER(IRFN) +C + IF (NGRID.EQ.0) GOTO 1500 + IDUM=NE + CALL LRFNMS(IRFNFF,IRFN,NGRID, + * ME,MP,N1,NE,NP,XD,YD,ZD,U,V,W,PN,P,NODE,LWRK05, + * MB,NPWALL,LPWALL,UWALL,VWALL,WWALL, + * MB,NPINLT,LPINLT,UINLT,VINLT,WINLT, + * MB,NPFREE,LPFREE, + * MB,NPSYMT,LPSYMT, + * MB,NPBODY,LPBODY, + * MB,NPINT ,LPINT1,LPINT2,LPINT3, + * IPART, + * MDOM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, + * WRK01,WRK02,WRK03,LWRK06, + * DWRK3,LWRK01,LWRK02,WRK04,NODWK1,NODWK2,NODWK3, +C Fj +C * RX,RY,NPB0, + * rx_desc,ry_desc,NPB0, +C Fj + * NPB1,LPB1,XPB1,YPB1,ZPB1, + * NPB2,LPB2,XPB2,YPB2,ZPB2, + * LWRK03,LWRK04, + * IUT6,IUT0,IERR) +C + DO 1510 IP=1, NP + X (IP)=REAL(XD(IP)) + Y (IP)=REAL(YD(IP)) + Z (IP)=REAL(ZD(IP)) + 1510 CONTINUE +C + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF +C + WRITE(IUT6,*) ' ** DONE **' + WRITE(IUT6,*) BLANK +C + 1500 CONTINUE + DEALLOCATE(DWRK3) +C + 1600 CONTINUE +#endif /* REFINER */ +C +C +C <<<<< REORDERING >>>>> +C +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** REORDERING **' + CALL REORDR + & ( JSORT,JCOLOR,NDIVX,NDIVY,NDIVZ,NEIP, + & MP,ME,MWRK,NP,NE,N1,NODE, + & MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + & LPBTOA,LPATOB,LEBTOA,LEATOB, + & NETET,NEPRD,NEWED,NEHEX, + & NPINLT,NPWALL,NPSYMT,NPFREE,NPCCL ,NPBODY, + & NPINT , + & LPINLT,LPWALL,LPSYMT,LPFREE,LPCCL1,LPCCL2, + & LPBODY,LPINT1, + & X,Y,Z,XD,YD,ZD,U,V,W,PN,P, + & WRK01,IERR,IUT0,IUT6 ) + WRITE(IUT6,*) ' ** REORDERING ENDS **' + NEX( 1)=NETET + NEX( 2)=NEPRD + NEX( 3)=NEWED + NEX( 4)=NEHEX + NEX( 5)=NTET + NEX( 6)=NPRD + NEX( 7)=NWED + NEX( 8)=NHEX + NEX( 9)=NSKIP1 + NEX(10)=NSKIP2 + NEX(11)=NSKIP3 + NEX(12)=NSKIP4 + if (NETET.ne.0 .or. NEPRD.ne.0 .or. NEWED.ne.0) then + write(IUT0,*) + & ' MINIAPP SUPPORTS HEXAHEDRON FINITE ELEMENTS ONLY.' + GO TO 9999 + endif +C + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF + WRITE(IUT6,*) ' ** DONE **' +C +C +C <<<<< INITILIZE VARIAVLES >>>>> +C +C + DO 1900 IP = 1 , NP + IF(ISTART.EQ.1) GOTO 1900 + U (IP) = 0.E0 + V (IP) = 0.E0 + W (IP) = 0.E0 + 1900 CONTINUE +C + TFINAL = TFINAL+FINITE + DO 2000 IE = 1 , NE + VISC (IE) = VISCM + 2000 CONTINUE +C + IF(ISTART.EQ.0) THEN + DO 2020 IE=1, NE + P(IE) = 0.0E0 + 2020 CONTINUE + DO 2021 IP=1, NP + PN(IP) = 0.0E0 + 2021 CONTINUE + ELSE + IF (NECHK.EQ.0) THEN + DO 2022 IE=1,NE + P(IE)=0.0E0 + 2022 CONTINUE + ENDIF + IF (NPCHK2.EQ.0) THEN + DO 2023 IP=1,NP + PN(IP)=0.0E0 + 2023 CONTINUE + ENDIF + ENDIF +C +C +C <<<<< PREPARE FOR BOUNDARY CONDITIONS >>>>> +C +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** PREPARING FOR BOUNDARY CONDITIONS **' + CALL LESSFX(LOCAL,NODE,MB,NE,NP,N0,N1,NS,NSP,N2D, + * XD,YD,ZD, + * NPSYMT,NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, + * NPCCL ,LPCCL1,LPCCL2, + * IPART ,MPINT ,NPINT ,LPINT1,LPINT2,LPINT3, + * MDOM ,NDOM ,LDOM ,NBPDOM,MBPDOM,IPSLF,IPSND, +C Fj +C * LPBTOA,IUT0,IUT6,IERR,RX,RY, + * LPBTOA,IUT0,IUT6,IERR,rx_desc,ry_desc, +C Fj + * MWRK,WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, + * LWRK01,LEWRK) + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF + WRITE(IUT6,*) ' DONE!' +C + CALL DDCOM1(LPINT1,NPINT,NUMIP,NP,IUT0,IERR) + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF +C + WRITE(IUT6,*) ' DONE!' +C +C +C <<<<< INTEGRAL ELEMENT MATRICES >>>>> +C +C +C - INITIALIZE - + DO 2500 IE=1,NE + DO 2510 J=1,N1 + SNI ( J,IE)=0.0E0 + DNXI ( J,IE)=0.0E0 + DNYI ( J,IE)=0.0E0 + DNZI ( J,IE)=0.0E0 + SN ( J,IE)=0.0E0 + DNXYZ(1,J,IE)=0.0E0 + DNXYZ(2,J,IE)=0.0E0 + DNXYZ(3,J,IE)=0.0E0 + 2510 CONTINUE + 2500 CONTINUE +C + NELM=0 + NELM=NELM+NSKIP1*NETET + NELM=NELM+NSKIP2*NEPRD + NELM=NELM+NSKIP3*NEWED + NELM=NELM+NSKIP4*NEHEX +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'ALLOCATING VARIABLE FOR WORK-4' + SIZE = (10*FLOAT(NELM))*WORD + WRITE(IUT6,'(A12,F8.2," MB ALLOCATED")') " WORK-4 :",SIZE + SIZEAL = SIZEAL+SIZE + ALLOCATE(E (NELM+1),STAT=LERR(01)) + ALLOCATE(EX (NELM+1),STAT=LERR(02)) + ALLOCATE(EY (NELM+1),STAT=LERR(03)) + ALLOCATE(EZ (NELM+1),STAT=LERR(04)) + ALLOCATE(EXX(NELM+1),STAT=LERR(05)) + ALLOCATE(EYY(NELM+1),STAT=LERR(06)) + ALLOCATE(EZZ(NELM+1),STAT=LERR(07)) + ALLOCATE(EXY(NELM+1),STAT=LERR(08)) + ALLOCATE(EXZ(NELM+1),STAT=LERR(09)) + ALLOCATE(EYZ(NELM+1),STAT=LERR(10)) + CALL ERRCHK(IUT6,IPART,10,LERR,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + WRITE(IUT0,*) EREXP1 + GO TO 9999 + ENDIF + WRITE(IUT6,*) 'DONE' +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** INTEGRATING ELEMENT MATRICES **' + DO 2520 IELM=1,NELM + E (IELM)=0.0E0 + EX (IELM)=0.0E0 + EY (IELM)=0.0E0 + EZ (IELM)=0.0E0 + EXX(IELM)=0.0E0 + EYY(IELM)=0.0E0 + EZZ(IELM)=0.0E0 + EXY(IELM)=0.0E0 + EXZ(IELM)=0.0E0 + EYZ(IELM)=0.0E0 + 2520 CONTINUE +C + MELM=NELM+1 + CALL ELM3DX(MGAUSS,IGAUSH, + * MELM,N1,NE,NP,NEX,XD,YD,ZD,NODE, +C Fj + * SNI ,DNXI,DNYI,DNZI,SN,RX,RY,WRKN, +C * SNI ,DNXI,DNYI,DNZI,SN,rx_desc,ry_desc,WRKN, +C Fj + * E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EXZ,EYZ, + * DWRK01,DWRK02,DWRK03,DWRK04,DWRK05,IUT0,IERR) + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF + DEALLOCATE(DWRK01) + DEALLOCATE(DWRK02) + DEALLOCATE(DWRK03) + DEALLOCATE(DWRK04) + DEALLOCATE(DWRK05) +C + DO 2540 IE=1,NE + DO 2545 J=1,N1 + DNXYZ(1,J,IE)=RX(J,IE) + DNXYZ(2,J,IE)=RY(J,IE) + DNXYZ(3,J,IE)=WRKN(J+N1*(IE-1)) + 2545 CONTINUE + 2540 CONTINUE + WRITE(IUT6,*) 'DONE' +C +C +C <<<<< LUMP MASS MATRIX >>>>> +C +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** LUMPING MASS MATRIX **' + CALL LUMPEX(N1,NE,NP,NEX,NODE,NELM,E,CM) + IDUM=2 + DO IP=1,NP + WRK01(IP)=CM(IP) + WRK02(IP)=CM(IP) + ENDDO + IDUM=1 + CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fujitsu start 202103 +C * CM,CM,CM,NP,IUT0,IERR,RX,RY,MAXBUF) + * CM,CM,CM,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fujitsu end 202103 + DO 2700 IP=1,NP + CM(IP)=1.0E0/CM(IP) + 2700 CONTINUE + WRITE(IUT6,*) ' DONE!' +C + IF(IINTRP.EQ.1.OR.ISTART.EQ.1) THEN + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** INTERPOLATING PRESSURE TO NODES **' + CALL NODLEX(NODE,ME,NE,NP,N1,NEX,SN, + * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fj +C * P,PN,CM,IUT0,IERR,RX,RY,MAXBUF) + * P,PN,CM,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF + WRITE(IUT6,*) ' DONE!' + ENDIF +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** MAKING NEIBERING LISTS **' + CALL NEIBR2(NODE,NE,NP,N0,N1,ME,MP,MEP,MPP, + * IENP,JENP,NEP,IPNP,NPP,NPPMAX,IUT0,IERR) + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF (IERRA.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GOTO 9999 + ENDIF + WRITE(IUT6,*) ' DONE! ' +C + NCRS=0 + DO IP=1,NP + NCRS=NCRS+NPP(IP) + ENDDO +C + NCRS2=0 + IF (JUNROL.EQ.1) THEN + IF ( NPPMAX.LE.30) THEN +C == HEX == + NPPMAX=30 + ELSE + NPPMAX=0 + JUNROL=0 + ENDIF + NCRS2=NP*NPPMAX + ENDIF +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) 'ALLOCATING VARIABLE FOR WORK-5' + SIZE = (2*FLOAT(NCRS)+2*FLOAT(NCRS2))*WORD + SIZEAL = SIZEAL+SIZE + WRITE(IUT6,'(A12,F8.2," MB ALLOCATED")') " WORK-5 :",SIZE + ALLOCATE( IPCRS(NCRS), STAT=LERR(01)) + ALLOCATE( ACRS(NCRS), STAT=LERR(02)) + ALLOCATE( TACRS(NCRS2),STAT=LERR(03)) + ALLOCATE(ITPCRS(NCRS2),STAT=LERR(04)) + CALL ERRCHK(IUT6,IPART,4,LERR,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + WRITE(IUT0,*) EREXP1 + GO TO 9999 + ENDIF + WRITE(IUT6,*) 'DONE' +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,'(A12,F8.2," MB ALLOCATED")') " TOTAL :",SIZEAL +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** MAKING CRS FORMAT **' + CALL MKCRS(NP,MPP,NCRS,NPP,IPNP,NPP2,IPCRS,IERR) +C + IF(JUNROL.EQ.1) + *CALL CRSCVI(NP,NPPMAX,NCRS,NCRS2,NPP,IPCRS,ITPCRS) + WRITE(IUT6,*) ' NUMBER OF NON-ZEROS IN CRS FORMAT; NCRS =',NCRS + IF (IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGB + WRITE(IUT0,*) EREXP7,NCRS + GOTO 9999 + ENDIF + IF (NCRS-NPP2(NP) .NE. NPP(NP)) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GOTO 9999 + ENDIF +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** MAKING CRS LIST **' + CALL E2PLST(MEP,MPP,N2,N1,NP,NE,NEX, + * NODE,NEP,IENP,JENP,NPP,NPP2,IPNP, + * LTAB,IUT0,IERR) + IF (IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GOTO 9999 + ENDIF + WRITE(IUT6,*) ' DONE! ' +C + IF (NTIME.EQ.0) GOTO 5100 +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** CALCULATING INITIAL ELEMENT VALUES **' +C + CALL ICALEL(N1,NE,NP,NODE,U,V,W,UE,VE,WE) +C + WRITE(IUT6,*) BLANK + EPSREP=EPSP +C +C +C <<<<<<< TIME INTEGRATION LOOP START >>>>>>> +C +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** TIME CALCULATING INITIAL ELEMENT VALUES **' +C + ITIME = 0 +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** NOW ENTERING TIME MARCH LOOP **' +C CALL call maprof_time_start(TM_MAIN_LOOP) + CALL maprof_time_start(TM_MAIN_LOOP) + DO 5000 +C + CALL DDSYNC +C + ISTEP = ITIME +NTIMEP + TIME = ITIME*DT+TIMEP + TIMEW = TIME*(D000/U000) +C +C +C <<<<< CALCULATE MESH MOVING VELOCITY >>>>> +C +C + DEVLP1 = 1.E0-EXP(-TIME/TFINAL) + DEVLP2 = 1.E0/TFINAL*EXP(-TIME/TFINAL) +C + ACCELX = DEVLP2*UFINAL + ACCELY = DEVLP2*VFINAL + ACCELZ = DEVLP2*WFINAL +C + call maprof_time_start(TM_VEL3D1) + CALL VEL3D1 + * (MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * IFORM,BTDCOE,DT, + * ITIME,DEVLP1,ACCELX,ACCELY,ACCELZ, + * NMAXT,EPST,RESU,RESV,RESW,NITRU,NITRV,NITRW, + * ME,N0,N1,N2,NE,NP,NEX,NODE, + * U,V,W,VISC,UE,VE,WE, + * NELM,E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EXZ,EYZ, + * SN, + * NUMIP, + * AWRK,NPP,NCRS,IPCRS,ACRS,LTAB, + * NPINLT,LPINLT,UINLT,VINLT,WINLT, + * NPWALL,LPWALL,UWALL,VWALL,WWALL, + * NPSYMT,NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, + * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, + * LWRK01,LWRK02, + * WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, + * WRK07,WRK08,WRK09,WRK10,WRK11,WRK12, +C Fj +C * RX,RY, + * rx_desc,ry_desc, +C Fj + * JUNROL,NPPMAX,NCRS2,WRK13,TACRS,ITPCRS, + * IUT0,IERR) + call maprof_time_stop(TM_VEL3D1) + IF(IERR.NE.0) GOTO 9999 +C +C +C <<<<< SOLVE PRESSURE EQUATION >>>>> +C +C + IF(ITIME.GE.1) THEN + DO IE=1,NE + DT3D(IE)=DT + ENDDO + call maprof_time_start(TM_PRES3E) + CALL PRES3E(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N0,N1,NEX,NE,NP, + * MRCM,NMAXP,NMAXB,ISOLP, + * EPSP,EPSREP,DT3D, + * NODE,CM,SN,DNXYZ,DNXI,DNYI,DNZI, + * U,V,W,NPINLT,LPINLT,NPWALL,LPWALL, + * NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, + * NITRP,RESP,P,PN, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * LWRK01,LWRK02,WRK3,WRK01,WRK02,WRK03,WRK04, + * WRK05,WRK06,WRK07,WRK08,WRK09,WRK10, +C Fj +C * PRCM,APRCM,RX,RY,MWRK,WRKN, + * PRCM,APRCM,rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) + call maprof_time_stop(TM_PRES3E) + IF(IERR.NE.0) GOTO 9999 +C +C +C <<<<< COMPUTE NEXT TIME-STEP VELOCITY >>>>> +C +C + CALL VEL3D2(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * DIVMAX,DIVAV,XDIVMX,YDIVMX,ZDIVMX, + * DT,U,V,W,P,NODE,X,Y,Z,CM, + * DNXYZ,DNXI,DNYI,DNZI, + * ME,NE,NP,N0,N1,NEX, + * ITIME,DEVLP1, + * NPINLT,LPINLT,UINLT,VINLT,WINLT, + * NPWALL,LPWALL,UWALL,VWALL,WWALL, + * NPSYMT,NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, + * IPART ,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fj +C * RX,RY,MWRK,WRKN,WRK3,WRK01, + * rx_desc,ry_desc,MWRK,WRKN,WRK3,WRK01, +C Fj + * IUT0,IERR, + * WRK05) +C + ENDIF +C + IF(IERR.NE.0) GOTO 9999 +C + CALL CALUEL(N1,NE,NP,NEX,NODE,U,V,W,UE,VE,WE,IERR) +C +C +C <<<<< WRITE CALCULATION SEQUENCE >>>>> +C +C + IF(ITIME.GE.1 .AND. MOD(ITIME,INTPRN).EQ.0) THEN + call calave(NE, NP, P, U, V, W, ave_p, ave_v, max_v) + WRITE(IUT6,9000) ISTEP,NITRP,TIMEW,DIVMAX,RESP + WRITE(IUT6,9100) NITRU,NITRV,NITRW,RESU,RESV,RESW + WRITE(IUT6,9500) ave_p*(RHO000*U000*U000), + & ave_v*U000, max_v*U000 + ENDIF +C +C +C <<<<< EMERGENTLY END TIME MARCH TO SAVE FINAL FIELD >>>>> +C +C + IF(DIVMAX.GT.DIVESC) THEN + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ERMSGB + WRITE(IUT6,*) EREXP6, DIVESC + XDIVMX=XDIVMX*D000 + YDIVMX=YDIVMX*D000 + ZDIVMX=ZDIVMX*D000 + WRITE(IUT6,'(A32,3E13.5)') EREXP8, XDIVMX,YDIVMX,ZDIVMX + JESC = 1 + IF(IPART.LT.1) GO TO 7000 + ENDIF +C +C WHEN THE MAXIMUM DIVERGENCE EXCEEDS ESCAPE DIVERGENT LIMIT IN PARALLEL +C MODE, NOTICE OTHER PROCESSES TO STOP THE PROGRAM AFTER WRITING THE +C FIELD. +C + IF(IPART.GE.1) THEN + FJESC = FLOAT(JESC) + CALL DDCOM2(FJESC, FJESCA) + IF(FJESCA.GT.0.5E0) THEN + JESC = 1 + GO TO 7000 + ENDIF + ENDIF +C +C +C <<<<<<< TIME-MARCHING LOOP END >>>>>>> +C +C + IF(ITIME.GE.NTIME) GOTO 5100 + ITIME = ITIME + 1 +C + 5000 CONTINUE + 5100 CONTINUE + call maprof_time_stop(TM_MAIN_LOOP) +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** TIME MARCH LOOP ENDED **' +C + 7000 CONTINUE +C +C +C <<<<< MAKING OUTPUT DATA >>>>> +C +C + DO 7103 IP=1,NP + X (IP) = X(IP)*D000 + Y (IP) = Y(IP)*D000 + Z (IP) = Z(IP)*D000 + U (IP) = U(IP)*U000 + V (IP) = V(IP)*U000 + W (IP) = W(IP)*U000 + PN (IP) = PN(IP)*(RHO000*U000*U000) + 7103 CONTINUE +C + DO 7104 IE=1,NE + P (IE) = P (IE)*RHO000*U000*U000 + 7104 CONTINUE +C + DO 7105 IBP=1,NPINLT + UINLT(IBP)=UINLT(IBP)*U000 + VINLT(IBP)=VINLT(IBP)*U000 + WINLT(IBP)=WINLT(IBP)*U000 + 7105 CONTINUE +C + DO 7106 IBP=1,NPWALL + UWALL(IBP)=UWALL(IBP)*U000 + VWALL(IBP)=VWALL(IBP)*U000 + WWALL(IBP)=WWALL(IBP)*U000 + 7106 CONTINUE +C +C +C <<<<< CONVERTE OUTPUT DATA >>>>> +C +C + CALL DATCNV(NP,NE,MWRK, + * NPINLT,NPWALL,NPSYMT,NPFREE,NPCCL ,NPBODY, + * NPINT , + * LPINLT,LPWALL,LPSYMT,LPFREE,LPCCL1,LPCCL2, + * LPBODY,LPINT1, + * X,Y,Z,XD,YD,ZD,U,V,W,PN,P, + * LPATOB,LEATOB,WRK01,IERR,IUT0) +C + CALL NODCNV(NP,NE,N0,N1,NODE,LPATOB,LEATOB,NODWK1,IERR,IUT0) +C +C +C <<<<< SAVE REFINED MESH AND B.C. DATA >>>>> +C +C + IF(IRFNMW.EQ.1)THEN + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** WRITING REFINED MESH DATA **' + COMFLE(1) = COMGEN + WRITE(COMFLE(2),*) ' REFINED MESH DATA' +C + DO IE=1,NE + DO I=1,N0 + NODWK1(I,IE)=NODE(I,IE) + ENDDO + ENDDO +C + IACT = 2 + NDUM=N0 + CALL GFALL(IUT0,IUT6,IUTMR,FILEMR, + * MCOM,NCOMFL,COMFLE, + * MCOM,NCOMST,COMSET, + * IACT,IWRITE,INAME,IRESV, + * ICAST,IDATA0,IALL,ISKIP,IERR, + * '*GRID_3D%D *NODE_3D !', + * NAME,MP,NP,XD,YD,ZD, + * NAME,ME,N0,NE,NDUM,NODWK1, + * ICHECK) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) BLANK + WRITE(IUT0,*) ERMSGC + GO TO 9999 + ENDIF + WRITE(IUT6,*) ' DONE!' +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** WRITING REFINED B.C. DATA **' + IACT = 2 + NPZERO=0 + CALL GFALL(IUT0,IUT6,IUTBR,FILEBR, + * MCOM,NCOMFL,COMFLE, + * MCOM,NCOMST,COMSET, + * IACT,IWRITE,INAME,IRESV, + * ICAST,IDATA0,IALL,ISKIP,IERR, + *'*BC_INLT *BC_IV3D *BC_MWAL *BC_WV3D *BC_WALL '// + *'*BC_SYMT *BC_FREE *BC_CYCL *BC_BODY *BC_INTR !', + * NAME,MB,NPINLT,LPINLT, + * NAME,MB,NPINLT,UINLT,VINLT,WINLT, + * NAME,MB,NPWALL,LPWALL, + * NAME,MB,NPWALL,UWALL,VWALL,WWALL, + * NAME,MB,NPZERO,LWRK01, + * NAME,MB,NPSYMT,LPSYMT, + * NAME,MB,NPFREE,LPFREE, + * NAME,MB,NPCCL ,LPCCL1,LPCCL2, + * NAME,MB,NPBODY,LPBODY, + * NAME,MB,NPINT, LPINT1,LPINT2,LPINT3, + * ICHECK) + WRITE(IUT6,*) ' DONE!' +C + ENDIF +C +C +C <<<<< SAVE FINAL FIELD DATA >>>>> +C +C + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** WRITING FINAL FLOW DATA **' +C + COMFLE(1) = COMGEN + WRITE(COMFLE(2),*) ' FINAL FLOW DATA' + WRITE(COMFLE(3),*) ' TIME:',TIMEW + WRITE(COMFLE(4),*) ' STEP:',ISTEP + IACT = 2 + IF(NRFN.EQ.0) THEN + CALL GFALL(IUT0,IUT6,IUTFF,FILEFF, + * MCOM,4,COMFLE, + * MCOM,0,COMSET, + * IACT,IWRITE,INAME,IRESV, + * ICAST,IDATA0,IALL,ISKIP,IERR, + * '*TIME_PS *STEP_PS *VELO_3D *PRES_3E + * *PRES_3D !', + * NAME,TIMEW, + * NAME,ISTEP, + * NAME,MP,NP0,U,V,W, + * NAME,ME,NE0,P, + * NAME,MP,NP0,PN, + * ICHECK) + ELSE + CALL GFALL(IUT0,IUT6,IUTFF,FILEFF, + * MCOM,4,COMFLE, + * MCOM,0,COMSET, + * IACT,IWRITE,INAME,IRESV, + * ICAST,IDATA0,IALL,ISKIP,IERR, + * '*TIME_PS *STEP_PS *VELO_3D + * *PRES_3D !', + * NAME,TIMEW, + * NAME,ISTEP, + * NAME,MP,NP0,U,V,W, + * NAME,MP,NP0,PN, + * ICHECK) +C + COMFLE(1) = COMGEN + WRITE(COMFLE(2),*) ' FINAL FLOW DATA (REFINED)' + WRITE(COMFLE(3),*) ' TIME:',TIMEW + WRITE(COMFLE(4),*) ' STEP:',ISTEP + CALL GFALL(IUT0,IUT6,IUTFR,FILEFR, + * MCOM,4,COMFLE, + * MCOM,0,COMSET, + * IACT,IWRITE,INAME,IRESV, + * ICAST,IDATA0,IALL,ISKIP,IERR, + * '*TIME_PS *STEP_PS *VELO_3D *PRES_3E + * *PRES_3D !', + * NAME,TIMEW, + * NAME,ISTEP, + * NAME,MP,NP,U,V,W, + * NAME,ME,NE,P, + * NAME,MP,NP,PN, + * ICHECK) + ENDIF + WRITE(IUT6,*) ' DONE! ' +C +C +#ifdef PROF_MAPROF + if (IPART == 1) write(*,'(/,a)') '**** timings' + call maprof_print_time_mpi(TM_MAIN_LOOP, 'MAIN_LOOP*: ') + call maprof_print_time_mpi(TM_VEL3D1, ' VEL3D1: ') + call maprof_print_time_mpi(TM_VEL3D1_OP1, ' VEL3D1_OP1*: ') +C* call maprof_print_time_mpi(TM_VEL3D1_OP2, ' VEL3D1_OP2*: ') +C* call maprof_print_time_mpi(TM_VEL3D1_OP3, ' VEL3D1_OP3*: ') +C* call maprof_print_time_mpi(TM_VEL3D1_COM, ' DDCOMX: ') +C* call maprof_print_time_mpi(TM_CALUEL, ' CALUEL: ') + call maprof_print_time_mpi(TM_E2PMATR, ' E2PMATR: ') + call maprof_print_time_mpi(TM_DGNSCL, ' DGNSCL: ') + call maprof_print_time_mpi(TM_CLRCRS, ' CLRCRS: ') + call maprof_print_time_mpi(TM_CRSCVA, ' CRSCVA: ') + call maprof_print_time_mpi(TM_BCGS3X, ' BCGS3X: ') + call maprof_print_time_mpi(TM_BCGS3X_COM, ' DDCOM2: ') + call maprof_print_time_mpi(TM_CALAX3, ' CALAX3: ') + call maprof_print_time_mpi(TM_CALAX3_CAL, ' CALAX3_CAL*:') + call maprof_print_time_mpi(TM_CALAX3_COM, ' DDCOMX: ') + call maprof_print_time_mpi(TM_PRES3E, ' PRES3E: ') + call maprof_print_time_mpi(TM_PRES3E_COM, ' DDCOM2: ') + call maprof_print_time_mpi(TM_CALLAP, ' CALLAP: ') + call maprof_print_time_mpi(TM_GRAD3X, ' GRAD3X: ') + call maprof_print_time_mpi(TM_GRAD3X_OP0, ' GRAD3X_OP0*:') + call maprof_print_time_mpi(TM_GRAD3X_OP1, ' GRAD3X_OP1*:') + call maprof_print_time_mpi(TM_GRAD3X_OP2, ' GRAD3X_OP2*:') + call maprof_print_time_mpi(TM_GRAD3X_OP3, ' GRAD3X_OP3*:') + call maprof_print_time_mpi(TM_GRAD3X_COM, ' DDCOMX: ') + call maprof_print_time_mpi(TM_FLD3X2, ' FLD3X2: ') + call maprof_output() +#endif +C + IF(JESC.EQ.0) THEN + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** LES3X: SUCCESSFULLY TERMINATED **' + ELSE + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** LES3X: TERMINATED **' + ENDIF +C + IF(IPART.GE.2) CLOSE(IUT6) + CALL DDEXIT +C +C +C Fj start 202103 + call xmp_coarray_deallocate(rx_desc, status) + call xmp_coarray_deallocate(ry_desc, status) +C + call xmp_api_finalize +C +C call mpi_finalize(ierr) +C Fj end 202103 +C + STOP +C + 9999 CONTINUE +C +CC CALL DDSTOP(IPART,IUT0) + WRITE(IUT6,*) BLANK + WRITE(IUT6,*) ' ** LES3X: TERMINATED **' + IF(IPART.GE.2) CLOSE(IUT6) +C + CALL DDEXIT +C + STOP +C + 9000 FORMAT(/, + *' STEP' , I6 , ' N=' , I4 , + * ' TIME=', 1PE12.5 , ' MAXD=', 1PE12.5 , ' RESP=' , 1PE12.5) + 9100 FORMAT( + &18X, ' NU =', I12 , ' NV =', I12 , ' NW =', I12 , /, + &18X, ' RESU=', 1PE12.5 , ' RESV=', 1PE12.5 , ' RESW=', 1PE12.5) + 9500 FORMAT( + *18X, ' Pave=', 1PE12.5 , ' Vave=', 1PE12.5 , ' Vmax=', 1PE12.5) + 9600 FORMAT(' TIME:', 1PE12.5 ,' - ', 1PE12.5) + 9700 FORMAT(' STEP:', I12 ,' - ', I12) + END diff --git a/FFB-MINI/src/xmpAPI_lessfx.F b/FFB-MINI/src/xmpAPI_lessfx.F new file mode 100755 index 0000000..884ad1d --- /dev/null +++ b/FFB-MINI/src/xmpAPI_lessfx.F @@ -0,0 +1,325 @@ +C======================================================================C +C C +C SOFTWARE NAME : FRONTFLOW_BLUE.8.1 C +C C +C SUB ROUTINE : LESSFX C +C C +C WRITTEN BY Y.YAMADE C +C C +C C +C CONTACT ADDRESS : IIS, THE UNIVERSITY OF TOKYO, CISS C +C C +C THERMO-FLUID ANALYSIS SOLVERS FOR LARGE-SCALE-ASSEMBLY C +C C +C======================================================================C + SUBROUTINE LESSFX(LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, + * X,Y,Z, + * NPSYMT,NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, + * NPCCL ,LPCCL1,LPCCL2, + * IPART ,MPINT ,NPINT ,LPINT1,LPINT2,LPINT3, + * MDOM ,NDOM ,LDOM ,NBPDOM,MBPDOM,IPSLF,IPSND, +C Fj +C * LPBTOA,IUT0,IUT6,IERR,RX,RY, + * LPBTOA,IUT0,IUT6,IERR,rx_desc,ry_desc, +C Fj + * MWRK,WRK1,WRK2,WRK3,WRK4,WRK5,WRK6, + * IWRK,IWRK2) + IMPLICIT NONE + INTEGER*4 LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, + * NPSYMT,NPSYM2,LPSYMT,NPCCL ,LPCCL1,LPCCL2, + * IPART ,MPINT ,NPINT ,LPINT1,LPINT2,LPINT3, + * MDOM ,NDOM ,LDOM ,NBPDOM,MBPDOM,IPSLF,IPSND, + * LPBTOA,IUT0,IUT6,IERR,MWRK,IWRK,IWRK2 + REAL*8 X,Y,Z + REAL*4 XPSYMT,YPSYMT,ZPSYMT, + * RX,RY,WRK1,WRK2,WRK3,WRK4,WRK5,WRK6 + DIMENSION LOCAL(NSP,NS,4),NODE(N1,NE),X(NP),Y(NP),Z(NP), + * LPSYMT(MB), + * XPSYMT(MB),YPSYMT(MB),ZPSYMT(MB), + * LPCCL1(NPCCL),LPCCL2(NPCCL) , + * WRK1(NE),WRK2(NE),WRK3(NE),WRK4(NP),WRK5(NP),WRK6(NP), + * IWRK(MWRK),IWRK2(2,MWRK),RX(0:N,NE),RY(0:N,NE) +C Fj + INTEGER*8 :: rx_desc, ry_desc +C Fj +C + DIMENSION LPINT1(MPINT),LPINT2(MPINT),LPINT3(MPINT), + 1 LDOM (MDOM) ,NBPDOM(MDOM) , + 2 IPSLF(MBPDOM,MDOM),IPSND(MBPDOM,MDOM) +C + DIMENSION LPBTOA(NP) +C + CHARACTER*60 ERMSGC + & / ' ## SUBROUTINE LESSFX: FATAL ERROR REPORT ; RETURNED' / +C + REAL*4 EPS + DATA EPS /1.0E-10/ +C + INTEGER*4 MAXBUF,IERRA,IDIM,IDOM,IBP,IPCCL,IPINT1,IPINT2,MLST, + * IE,IS,IETYPE,I,NESYMT,IP,IESYMT,NNPS,IPSYMT + REAL*8 ABSNOR +C +C +C ( 3-D CALCULATION : SINGLE WORD & MULTI ELEMENT VERSION ) +C CODED BASED ON 'LESSRF' +C +C DO THE FOLLOWING BOUNDARY CONDITION PREPARATIONS FOR 'LES3D' +C +C +C (1) GENERATE NEIGHBORING DOMAIN LISTS FOR PARALLEL MODE EXECUTION +C +C (2) EXTRACT SYMMETRIC BOUNDARY SURFACES, CALCULATE THEIR NORMALS, +C AND ASSIGN SURFACE NORMALS TO SYMMETRIC NODES +C +C +C ARGUMENT LISTINGS +C (1) INPUT +C LOCAL (I,IS); NODE NUMBER TABLE DEFINING ELEMENT SURFACES +C NODE (I,IE); NODE NUMBER TABLE BASED ON ELEMENT +C NE ; NUMBER OF TOTAL ELEMENTS +C MP ; MAX. NUMBER OF TOTAL NODES +C NOTES ; MP DEFINES THE UPPER BOUND OF ARRAYS WRK1 AND WRK2 +C NP ; NUMBER OF TOTAL NODES +C N ; NUMBER OF NODES ASSIGNED TO ONE ELEMENT +C N2D ;NUMBER OF SURFACE ELEMENT DEFINING NODES ( =4 ) +C X (IP); X-COORDINATE OF GLOBAL NODES +C Y (IP); Y-COORDINATE OF GLOBAL NODES +C Z (IP); Z-COORDINATE OF GLOBAL NODES +C P (IE); ELEMENT PRESSURE +C GI (I); LOCAL GZAI COORDINATES OF ELEMENT'S NODES +C EI (I); LOCAL EATA COORDINATES OF ELEMENT'S NODES +C TI (I); LOCAL THETA COORDINATES OF ELEMENT'S NODES +C +C NPWALL ; NUMBER OF WALL BOUNDARY NODES +C MPWALL ; MAX. NUMBER OF WALL BOUNDARY NODES +C LPWALL (IBP); WALL BOUNDARY NODES +C MEPWL ; MAX. NUMBER OF ADJACENT WALL SURFACE ELEMENTS +C +C NPSYMT ; NUMBER OF SYMMETRIC BOUNDARY NODES +C LPSYMT (IBP); SYMMETRIC BOUNDARY NODES +C +C IPART ; SUB-DOMAIN NUMBER THAT THIS TASK SHOULD TAKE/IS +C TAKING CARE OF. IPART BEING SET ZERO MEANS THAT +C THE PROGRAM SHOULD RUN/IS RUNNING IN SERIAL +C MODE. +C NPINT ; NUMBER OF INTER-CONNECT BOUNDARY NODES +C LPINT1 (IBP); INTER-CONNECT BOUNDARY NODES +C LPINT2 (IBP); CORRESPONDING SUB-DOMAIN NUMBERS +C LPINT3 (IBP); NODE NUMBER IN THE CORRESPONDING SUB-DOMAINS +C MDOM ; MAX. NUMBER OF THE NEIBERING SUB-DOMAINS +C MBPDOM ; THE MAXIMUM NUMBER OF THE INTER-CONNECT +C BOUNDARY NODES FOR ONE NEIBERING SUB-DOMAIN +C +C IUT0 ; FILE NUMBER TO WRITE ERROR MESSAGE +C IUT6 ; FILE NUMBER TO WRITE CALCULATION SEQUENCE +C +C (2) OUTPUT +C NEWALL ; NUMBER OF WALL BOUNDARY ELEMENTS +C LEWALL(I,IBE); WALL BOUNDARY ELEMENT AND ITS SURFACE +C XNWALL (IBE); X NORMAL OF WALL BOUNDARY SURFACE +C YNWALL (IBE); Y NORMAL OF WALL BOUNDARY SURFACE +C ZNWALL (IBE); Z NORMAL OF WALL BOUNDARY SURFACE +C YP (IBE); DISTANCE BETWEEN WALL AND ITS OPPOSITE SURFACES +C +C XPSYMT (IBP); X-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C YPSYMT (IBP); Y-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C ZPSYMT (IBP); Z-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C +C NDOM ; NUMBER OF THE NERIBERING SUB-DOMAINS +C LDOM (IDOM); NEIBERING SUB-DOMAIN NUMBER +C NBPDOM(IDOM); NUMBER OF INTER-CONNECT BOUNDARY NODES +C SHARING WITH THE IDOM'TH NEIBERING SUB-DOMAIN, +C LDOM(IDOM) +C IPSLF (IBP,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +C NEIBERING SUB-DOMAIN, LDOM(IDOM) +C IPSND (IBP,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C SUB-DOMAIN THAT IS RECEIVING THE CALLING +C TASK'S RESIDUALS. +C +C IERR ; RETURN CODE WHOSE VALUE WILL BE EITHER +C 0 --- INDICATING SUCCESSFUL TERMINATION +C OR 1 --- INDICATING OCCURENCE OF SOME ERROR CONDITIONS +C +C (4) WORK +C WRK1 (IWRK); USED IN SURFEX, COSIN3, DDCOMX +C WRK2 (IWRK); USED IN SURFEX, DDCOMX +C NOTES ; GUARANTEE THE UPPER BOUND OF MP FOR WRK1 AND WRK2 +C WRK3 (IWRK); USED IN SURFEX +C IWRK (IWRK); USED INTERNALLY, AND IN SURFEX +C IWRK2(2,IWRK);USED INTERNALLY +C WRK4 (IWRK); USED INTERNALLY +C -WRK6 (IWRK); USED INTERNALLY +C RX (I,IE); USED IN DDCOMX +C RY (I,IE); USED IN DDCOMX +C +C + MAXBUF = NE*(N+1) +C +C +C +C GENERATE NEIGHBORING DOMAIN LISTS FOR PARALLEL EXECUTION +C +C +C + IF(IPART.GE.1) THEN + WRITE(IUT6,*) + WRITE(IUT6,*) ' LESSRF: GENERATING NEIGHBORING DOMAIN LIST' +C + CALL DDCOM0(LPINT1,LPINT2,LPINT3,NPINT,MDOM,MBPDOM, + * LDOM,NBPDOM,NDOM,IPSLF,IPSND,IUT0,IERR) + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + WRITE (*,*)"MAXBUF=",MAXBUF + IDIM = 0 + CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM, + * IPSLF,IPSND,MBPDOM,WRK4,WRK5,WRK6,NP, +C Fujitsu start 202103 +C * IUT0,IERR,RX,RY,MAXBUF) + * IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fujitsu end 202103 + + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C +C CONVERTE IPSND FROM BEFOR TO AFTER + DO 10 IDOM=1,NDOM + DO 15 IBP=1,NBPDOM(IDOM) + IPSND(IBP,IDOM)=LPBTOA(IPSND(IBP,IDOM)) + 15 CONTINUE + 10 CONTINUE +C + ELSE IF(NPCCL.NE.0) THEN + NPINT=NPCCL*2 + DO 50 IPCCL=1,NPCCL + IPINT1= IPCCL + IPINT2= IPCCL+NPCCL + LPINT1(IPINT1) = LPCCL1(IPCCL) + LPINT1(IPINT2) = LPCCL2(IPCCL) + LPINT2(IPINT1) = IPART + LPINT2(IPINT2) = IPART + LPINT3(IPINT1) = LPCCL2(IPCCL) + LPINT3(IPINT2) = LPCCL1(IPCCL) + 50 CONTINUE + CALL DDCOM0(LPINT1,LPINT2,LPINT3,NPINT,MDOM,MBPDOM, + * LDOM,NBPDOM,NDOM,IPSLF,IPSND,IUT0,IERR) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF + ENDIF +C +C +C PREPARE FOR SYMMETRIC BOUNDARY CONDITIONS +C +C +C +C NOTES; SYMMETRIC SURFACE EXTRACTION AND THEIR NORMAL CALCULATION ARE +C TEMPORARILY BE NEEDED FOR CALCULATING SYMMETRIC NODE NORMAL +C VECTORS. +C +C +C EXTRACT SYMMETRIC BOUNDARY SURFACES, CALCULATE THEIR NORMAL VECTORS +C AND ASSIGN SURFACE NORMAL VECTORS TO SYMMETRIC NODES +C +C + WRITE(IUT6,*) + WRITE(IUT6,*) ' LESSRF: EXTRACTING SYMMETRIC SURFACES,' + WRITE(IUT6,*) ' CALCULATING SYMMETRIC SURFACE NORMALS AND' + WRITE(IUT6,*) ' ASSIGNING SURFACE NORMAL TO SYMMETRIC NODE' +C + MLST = 2 + CALL SRFEXX(MWRK,MLST,NE,NP,N1,NS,NSP,N2D, + * LPSYMT,NPSYMT,LOCAL,NODE, + * IWRK2,NESYMT,MWRK,IWRK,IUT0,IERR) + NPSYM2=NPSYMT + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + CALL CSIN3X(LOCAL,X,Y,Z,NODE,NE,NP,N1,NS,NSP, + * IWRK2,NESYMT,WRK1,WRK2,WRK3,WRK4) +C + DO 400 IP = 1 , NP + WRK4(IP) = 0.E0 + WRK5(IP) = 0.E0 + WRK6(IP) = 0.E0 + 400 CONTINUE +C + DO 420 IESYMT = 1 , NESYMT + IE = IWRK2(1,IESYMT) + IS = IWRK2(2,IESYMT) + IF( NODE(8,IE).GE.1) THEN ! HEX + IETYPE = 4 + ELSE IF(NODE(6,IE).GE.1) THEN ! PRS + IETYPE = 3 + ELSE IF(NODE(5,IE).GE.1) THEN ! PYR + IETYPE = 2 + ELSE ! TET + IETYPE = 1 + END IF + IF(LOCAL(4,IS,IETYPE).GE.1) THEN ! QUADRILATERAL + NNPS = 4 + ELSE ! TRIANGLE + NNPS = 3 + ENDIF + DO 410 I = 1 , NNPS + IP = NODE(LOCAL(I,IS,IETYPE),IE) + WRK4(IP) = WRK4(IP)+WRK1(IESYMT) + WRK5(IP) = WRK5(IP)+WRK2(IESYMT) + WRK6(IP) = WRK6(IP)+WRK3(IESYMT) + 410 CONTINUE + 420 CONTINUE +C +C SUPERIMPOSE NEIGHBORING DOMAIN NORMAL VECTORS +C +C NOTES; THIS PART MUST BE CALLED FOR ALL THE DOMAINS WHETHER OR NOT +C THEY HAVE A SYMMETRIC NODE. A DOMAIN WHICH HAS NO SYMMETRIC +C NODE MUST ALSO SEND ITS ZERO (DUMMY) RESIDUALS TO ITS +C NEIGHBORING DOMAINS OTHERWISE COMMUNICATION WILL BE LOCKED. +C IF AT LEAST ONE OF THE NEIGHBORING DOMAIN POSSESSES A SYMMETRIC +C SURFACE, A DOMAIN MAY ALSO BE ASSIGNED A NORMAL VECTOR TO ITS +C SYMMETRIC NODE (WHICH IS ISOLATED IN THE DOMAIN) AFTER THE +C COMMUNICATION. +C + IDIM = 3 + CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM, + * IPSLF,IPSND,MBPDOM,WRK4,WRK5,WRK6,NP, +C Fujitsu start 202103 +C * IUT0,IERR,RX,RY,MAXBUF) + * IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fujitsu end 202103 + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + write(IUT0,*) 'DBG : error' + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + DO 430 IPSYMT = 1 , NPSYM2 + IP = LPSYMT(IPSYMT) + ABSNOR = SQRT(WRK4(IP)**2+WRK5(IP)**2+WRK6(IP)**2+EPS) + XPSYMT(IPSYMT) = WRK4(IP)/ABSNOR + YPSYMT(IPSYMT) = WRK5(IP)/ABSNOR + ZPSYMT(IPSYMT) = WRK6(IP)/ABSNOR + 430 CONTINUE +C + WRITE(IUT6,*) ' DONE!' + WRITE(IUT6,*) ' NESYMT=',NESYMT +C + RETURN + END diff --git a/FFB-MINI/src/xmpAPI_lrfnms.F b/FFB-MINI/src/xmpAPI_lrfnms.F new file mode 100755 index 0000000..7646af0 --- /dev/null +++ b/FFB-MINI/src/xmpAPI_lrfnms.F @@ -0,0 +1,917 @@ + SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, + * ME,MP,N1,NE,NP,X,Y,Z,U,V,W,PN,P,NODE,LEACNV, + * MPWALL,NPWALL,LPWALL,UWALL,VWALL,WWALL, + * MPINLT,NPINLT,LPINLT,UINLT,VINLT,WINLT, + * MPFREE,NPFREE,LPFREE, + * MPSYMT,NPSYMT,LPSYMT, + * MPBODY,NPBODY,LPBODY, + * MPINT ,NPINT ,LPINT1,LPINT2,LPINT3, + * IPART , + * MDOM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, + * UFIX,VFIX,WFIX,LPFRM, + * CRD,LWRK01,LWRK02,WRK04,NDRFN,NDORG,NODEBK, +C Fj +C * RX,RY,NPB0, + * rx_desc,ry_desc,NPB0, +C Fj + * NPB1,LPB1,XPB1,YPB1,ZPB1, + * NPB2,LPB2,XPB2,YPB2,ZPB2, + * ITYPOR,ITYPRF,IUT6,IUT0,IERR) +C + IMPLICIT NONE +C + INCLUDE "rcapRefiner.inc" +C + INTEGER*4 IRFNFF,IRFN,NGRID + INTEGER*4 ME,MP,N1,NE,NP + REAL*8 X(MP),Y(MP),Z(MP) + REAL*4 U(MP),V(MP),W(MP),PN(MP),P(ME) + INTEGER*4 NODE(N1,ME) + INTEGER*4 IUT6,IUT0,IERR,IERRA +C + INTEGER*4 MPWALL,NPWALL,LPWALL(MPWALL), + * MPINLT,NPINLT,LPINLT(MPINLT), + * MPFREE,NPFREE,LPFREE(MPFREE), + * MPSYMT,NPSYMT,LPSYMT(MPSYMT), + * MPBODY,NPBODY,LPBODY(MPBODY) + REAL*4 UWALL(MPWALL),VWALL(MPWALL),WWALL(MPWALL), + * UINLT(MPINLT),VINLT(MPINLT),WINLT(MPINLT) +C + INTEGER*4 IPART,MDOM, + * MPINT,NPINT,LPINT1(MPINT),LPINT2(MPINT),LPINT3(MPINT), + * NDOM,MBPDOM,LDOM(MDOM),NBPDOM(MDOM), + * IPSLF(MBPDOM,MDOM),IPSND(MBPDOM,MDOM) +C + INTEGER*4 NPB0(MDOM),NPB1(MDOM),NPB2(MDOM), + * LPB1(MBPDOM,MDOM),LPB2(MBPDOM,MDOM) + REAL*4 XPB1(MBPDOM,MDOM),XPB2(MBPDOM,MDOM), + * YPB1(MBPDOM,MDOM),YPB2(MBPDOM,MDOM), + * ZPB1(MBPDOM,MDOM),ZPB2(MBPDOM,MDOM) +C + INTEGER(KIND=1) :: IETYPE +C + INTEGER*4 KSEG,KQUD,KHEX + DATA KSEG / 0/ + DATA KQUD / 4/ + DATA KHEX /12/ +C + INTEGER*4 NBOUN + PARAMETER(NBOUN=7) + INTEGER*4 LBPRTY(NBOUN) + DATA LBPRTY( 1) / 4 / ! INLET + DATA LBPRTY( 2) / 6 / ! MOVING-WALL + DATA LBPRTY( 3) / 5 / ! WALL + DATA LBPRTY( 4) / 2 / ! SYMMETRIC + DATA LBPRTY( 5) / 3 / ! FREE + DATA LBPRTY( 6) / 1 / ! OVERSET + DATA LBPRTY( 7) / 7 / ! BODY + CHARACTER*20 BGNAME(3) + DATA BGNAME( 1) / "GROUP1" / + DATA BGNAME( 2) / "GROUP2" / + DATA BGNAME( 3) / "GROUP3" / + CHARACTER*10 CDOM + CHARACTER*50 CBUF +C +C [WORK] + REAL*4 UFIX(MP),VFIX(MP),WFIX(MP) + INTEGER*4 LPFRM(MP) + REAL*8 CRD(MP*3) + REAL*4 RX(ME*8),RY(ME*8) +C Fj + INTEGER*8 :: rx_desc, ry_desc +C Fj + INTEGER*4 NODED(8),LEACNV(ME) + INTEGER*4 LWRK01(ME),LWRK02(MP) + INTEGER*4 WRK04(MP),NDRFN(ME*8),NDORG(NE*8),NODEBK(8,NE) + INTEGER(KIND=1) ITYPOR(ME),ITYPRF(ME) +C +C [PARAMETER] + INTEGER*4 NOFSTP,NOFSTE + DATA NOFSTP /1/ + DATA NOFSTE /1/ + REAL*4 EPS + DATA EPS /1.0E-6/ +C +C [WORK] + INTEGER*4 MAXBUF,IDIM,NPB + REAL*4 X1,Y1,Z1,DX,DY,DZ,RR +C +C [COUNTER] + INTEGER*4 IE,IP,IB,IB1,IB2,NB,IDOM,I,J,NER,NPOLD, + * IP1,IP2,IP3,IP4,IP5,IP6,IP7,IP8, + * NE1,NE2 +C + CHARACTER*60 ERMSGC + & /' ## SUBROUTINE LRFNMS: ERROR OCCURED ; RETURNED' / +CC CHARACTER*60 ERMSG1 +CC & /' ## SUBROUTINE LRFNMS: LIST ERROR AT DDCOM0 ; RETURNED' / +CC CHARACTER*60 ERMSG2 +CC & /' ## SUBROUTINE RFNMSH: CANNOT OBTAIN PARENT NODE ; RETURNED' / + CHARACTER*60 ERMSG3 + & /' ## SUBROUTINE LRFNMS: INSUFFICIENT MEMORY ; RETURNED' / + CHARACTER*60 ERMSG4 + & /' ## SUBROUTINE LRFNMS: INVARIT NODE TABLE ; RETURNED' / + CHARACTER*60 ERMSG5 + & /' ## SUBROUTINE LRFNMS: INVARIT ELEMENT TYPE ; RETURNED' / + CHARACTER*60 ERMSG6 + & /' ## SUBROUTINE LRFNMS: INVARIT O.S. FRANE NUMBER ; RETURNED' / +C +C +C ARGUMENT LISTINGS +C ME (IN ); MAX. NUMBER OF TOTAL ELEMENTS +C MP (IN ); MAX. NUMBER OF TOTAL NODES +C NE (IN/OUT); NUMBER OF TOTAL ELEMENTS +C NP (IN/OUT); NUMBER OF TOTAL NODES +C N1 (IN ); DIMENSION OF 'NODE' +C NODE (IN/OUT); NODE NO. TABLE BASED ON ELEMENT +C X (IN/OUT); X-COORDINATES OF NODES +C Y (IN/OUT); Y-COORDINATES OF NODES +C Z (IN/OUT); Y-COORDINATES OF NODES +C +C BOUNDARY CONDITIONS +C +C B. WALL BOUNDARY +C MPWALL (IN ); MAX. NUMBER OF WALL BOUNDARY NODES +C NPWALL (IN/OUT); NUMBER OF WALL BOUNDARY NODES +C LPWALL (IN/OUT); WALL BOUNDARY NODES +C UWALL (IN/OUT); WALL BOUNDARY U-VELOCITIES +C VWALL (IN/OUT); WALL BOUNDARY V-VELOCITIES +C WWALL (IN/OUT); WALL BOUNDARY W-VELOCITIES +C +C C. MOVING WALL BOUNDARY +C MPINLT (IN ); MAX. NUMBER OF INLET BOUNDARY NODES +C NPINLT (IN/OUT); NUMBER OF INLET BOUNDARY NODES +C LPINLT (IN/OUT); INLET BOUNDARY NODES +C UINLT (IN/OUT); INLET BOUNDARY U-VELOCITIES +C VINLT (IN/OUT); INLET BOUNDARY V-VELOCITIES +C WINLT (IN/OUT); INLET BOUNDARY W-VELOCITIES +C +C D. FREE BOUNDARY +C MPFREE (IN ); MAX. NUMBER OF FREE BOUNDARY NODES +C NPFREE (IN/OUT); NUMBER OF FREE BOUNDARY NODES +C LPFREE (IN/OUT); FREE BOUNDARY NODES +C +C E. SYMMETRIC BOUNDARY +C MPSYMT (IN ); MAX. NUMBER OF SYMMETRIC BOUNDARY NODES +C NPSYMT (IN/OUT); NUMBER OF SYMMETRIC BOUNDARY NODES +C LPSYMT (IN/OUT); SYMMETRIC BOUNDARY NODES +C +C F. BODY BOUNDARY +C MPBODY (IN ); MAX. NUMBER OF BODY BOUNDARY NODES +C NPBODY (IN/OUT); NUMBER OF BODY BOUNDARY NODES +C LPBODY (IN/OUT); BODY BOUNDARY NODES +C +C G. INTER-CONNECT BOUNDARY +C IPART (IN ); SUB-DOMAIN NUMBER THAT THIS TASK SHOULD TAKE/IS +C TAKING CARE OF. IPART BEING SET ZERO MEANS THAT +C THE PROGRAM SHOULD RUN/IS RUNNING IN SERIAL +C MODE. +C LDOM (IN ); NEIBERING SUB-DOMAIN NUMBER +C NBPDOM (IN/OUT); NUMBER OF INTER-CONNECT BOUNDARY NODES +C SHARING WITH THE IDOM'TH NEIBERING SUB-DOMAIN, +C LDOM(IDOM) +C NDOM (IN ); NUMBER OF THE NERIBERING SUB-DOMAINS +C IPSLF (WORK ); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +C NEIBERING SUB-DOMAIN, LDOM(IDOM) +C IPSND (WORK ); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C SUB-DOMAIN THAT IS RECEIVING THE CALLING +C TASK'S RESIDUALS. +C MBPDOM (IN ); THE MAXIMUM NUMBER OF THE INTER-CONNECT +C BOUNDARY NODES FOR ONE NEIBERING SUB-DOMAIN +C +C + IERR=0 + MAXBUF=N1*ME +CC +CCHY [1] MAKE NEIGHBORING DOMAIN LIST +CC + IF(IPART.GE.1)THEN + WRITE(IUT6,*) + WRITE(IUT6,*) ' **LRFNMS** : MAKE NEIGHBORING DOMAIN LIST' + CALL DDCOM0(LPINT1,LPINT2,LPINT3,NPINT,MDOM,MBPDOM, + * LDOM,NBPDOM,NDOM,IPSLF,IPSND,IUT0,IERR) + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + IERR=IERRA + RETURN + ENDIF +C + IDIM = 0 + CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fj +C * WRK04,WRK04,WRK04,NP,IUT0,IERR,RX,RY,MAXBUF) + * WRK04,WRK04,WRK04,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + IERR=IERRA + RETURN + ENDIF + ENDIF +C +CC +CCHY [2] EXTRACT ELEMENTS TO BE REFINED +CC + WRITE(IUT6,*) + WRITE(IUT6,*) ' **LRFNMS** : EXTRACT ELEMENTS TO BE REFINED' + CALL EXTRFN(NGRID,ME,N1,NE,NP,NODE,NPBODY,LPBODY, + * NPINLT,LPINLT, + * LWRK01,LWRK02,LEACNV,NE1,NE2,NDORG,NODEBK, + * WRK04,IPART,NDOM,MBPDOM,LDOM,NBPDOM, + * IPSLF,IPSND,RX,RY,MAXBUF,IUT0,IERR) + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + IERR=IERRA + RETURN + ENDIF + WRITE(IUT6,*) ' **LRFNMS** : NUM. OF ELEM. TO BE REFINED',NE1 + WRITE(IUT6,*) ' **LRFNMS** : NUM. OF ELEM. NOT TO BE REFINED',NE2 +C +CC +CCHY [3] PRE-SET U,V,W,PN AT B.C. NODES +CC + DO 1000 IP=1,NP + UFIX(IP)=0.0 + VFIX(IP)=0.0 + WFIX(IP)=0.0 + LPFRM(IP)=0 + 1000 CONTINUE +C + DO 1100 IB=1,NPINLT + IP=LPINLT(IB) + UFIX(IP)=UINLT(IB) + VFIX(IP)=VINLT(IB) + WFIX(IP)=WINLT(IB) + 1100 CONTINUE +C + DO 1200 IB=1,NPWALL + IP=LPWALL(IB) + UFIX(IP)=UWALL(IB) + VFIX(IP)=VWALL(IB) + WFIX(IP)=WWALL(IB) + 1200 CONTINUE +C +CC +CCHY [4] INITIALIZE OF REFINER +CC + WRITE(IUT6,*) + WRITE(IUT6,*) ' **LRFNMS** : REFINER INITIALIZATION' + CALL RCAPINITREFINER(NOFSTP,NOFSTE) +CC +CCHY [5] SET COORDINATES TO REFINER +CC + WRITE(IUT6,*) + WRITE(IUT6,*) ' **LRFNMS** : SET COORDINATES TO REFINER' + DO 2000 IP=1,NP + CRD(IP*3-2)=X(IP) + CRD(IP*3-1)=Y(IP) + CRD(IP*3-0)=Z(IP) + 2000 CONTINUE + CALL RCAPSETNODE64(NP,CRD,-1,0) +CC +CCHY [6] SET B.C. NODES TO REFINER +CC +CC +CCHY [6.1] B.C. GROUP-1 +CC + WRITE(IUT6,*) + WRITE(IUT6,*) ' **LRFNMS** : SET B.C. NODES TO REFINER' +C + NB=0 + DO 3000 IB = 1 , NPINLT + NB=NB+1 + LWRK01(NB) = LPINLT(IB) + LWRK02(NB) = LBPRTY(1) + 3000 CONTINUE +C + DO 3010 IB = 1 , NPWALL + NB=NB+1 + LWRK01(NB) = LPWALL(IB) + LWRK02(NB) = LBPRTY(3) + 3010 CONTINUE +C + DO 3020 IB = 1 , NPSYMT + NB=NB+1 + LWRK01(NB) = LPSYMT(IB) + LWRK02(NB) = LBPRTY(4) + 3020 CONTINUE +C + DO 3040 IB = 1 , NPFREE + NB=NB+1 + LWRK01(NB) = LPFREE(IB) + LWRK02(NB) = LBPRTY(5) + 3040 CONTINUE +C + CBUF = BGNAME(1)//CHAR(0) + CALL RCAPAPPENDBNODEVARINT(CBUF,NB,LWRK01,LWRK02) + WRITE(IUT6,*) ' **LRFNMS** : SET B.C. NODES (1)',NB +CC +CCHY [6.2] B.C. GROUP-2 +CC + DO 3100 IB = 1 , NPBODY + LWRK01(IB) = LPBODY(IB) + LWRK02(IB) = LBPRTY(7) + 3100 CONTINUE +C + CBUF = BGNAME(2)//CHAR(0) + CALL RCAPAPPENDBNODEVARINT(CBUF,NPBODY,LWRK01,LWRK02) + WRITE(IUT6,*) ' **LRFNMS** : SET B.C. NODES (2)',NPBODY +CC +CCHY [6.3] B.C. GROUP-3 +CC + IF(IPART.GE.1)THEN + NB=0 + DO 3200 IDOM=1,NDOM + WRITE(CDOM,'(I8.8)') IDOM + CBUF = BGNAME(3)//CDOM//CHAR(0) + DO 3210 IB = 1 , NBPDOM(IDOM) + NB=NB+1 + LWRK01(IB) = IPSLF(IB,IDOM) + 3210 CONTINUE +C + CALL RCAPAPPENDBNODEGROUP(CBUF,NBPDOM(IDOM),LWRK01) + 3200 CONTINUE + WRITE(IUT6,*) ' **LRFNMS** : SET B.C. NODES (3)',NB + ENDIF +CC +CCHY [7] SET NODE TABLE TO REFINER +CC + DO 4000 IE=1,NE + DO 4010 I=1,8 + NDORG((IE-1)*8+I)=0 + 4010 CONTINUE + 4000 CONTINUE +C + J=0 + DO 4100 IE=1,NE1 + IF(NODE(8,IE).NE.0) THEN + IETYPE=RCAP_HEXAHEDRON + NDORG(J+1)=NODE(1,IE) + NDORG(J+2)=NODE(2,IE) + NDORG(J+3)=NODE(3,IE) + NDORG(J+4)=NODE(4,IE) + NDORG(J+5)=NODE(5,IE) + NDORG(J+6)=NODE(6,IE) + NDORG(J+7)=NODE(7,IE) + NDORG(J+8)=NODE(8,IE) + J=J+8 + ELSE IF(NODE(6,IE).NE.0) THEN + IETYPE=RCAP_WEDGE + NDORG(J+1)=NODE(1,IE) + NDORG(J+2)=NODE(2,IE) + NDORG(J+3)=NODE(3,IE) + NDORG(J+4)=NODE(4,IE) + NDORG(J+5)=NODE(5,IE) + NDORG(J+6)=NODE(6,IE) + J=J+6 + ELSE IF(NODE(5,IE).NE.0) THEN + IETYPE=RCAP_PYRAMID +CC +CC NOTE THAT DEFINITION OF PYRMID NODE TABLE IS DIFFERENT +CC IN FFB AND REVOCAP_REFINER +CC + NDORG(J+1)=NODE(5,IE) + NDORG(J+2)=NODE(1,IE) + NDORG(J+3)=NODE(2,IE) + NDORG(J+4)=NODE(3,IE) + NDORG(J+5)=NODE(4,IE) + J=J+5 + ELSE IF(NODE(4,IE).NE.0) THEN + IETYPE=RCAP_TETRAHEDRON + NDORG(J+1)=NODE(1,IE) + NDORG(J+2)=NODE(2,IE) + NDORG(J+3)=NODE(3,IE) + NDORG(J+4)=NODE(4,IE) + J=J+4 + ELSE + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSG4 + IERR=1 + RETURN + ENDIF +C + ITYPOR(IE)=IETYPE + 4100 CONTINUE +CC +CCHY [8] REFINE ELEMENTS +CC +CC COUNT NUMBER OF ARRAY + NER=0 + ITYPRF(1) = -1 + NDRFN (1) = -1 + IE=RCAPREFINEELEMENTMULTI(NE1,ITYPOR,NDORG,NER,ITYPRF,NDRFN) + IF(NE2+NER.GT.ME) THEN + WRITE(IUT0,*) ERMSG3 + IERR=1 + ENDIF + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF + ITYPRF(1) = 0 + NDRFN (1) = 0 + IE= RCAPREFINEELEMENTMULTI(NE1,ITYPOR,NDORG,NER,ITYPRF,NDRFN) + WRITE(IUT6,*) + WRITE(IUT6,*) ' **LRFNMS** : NUMBER OF ELEM. AFTER REFINE',NER+NE2 + +CC +CCHY [9] COMMIT REFINEMENT +CC + CALL RCAPCOMMIT() +CC +CCHY [10] SET NEW NODE TABLE +CC + J=0 + DO 5000 IE=1,NER + IETYPE=ITYPRF(IE) +C + IF(IETYPE.EQ.RCAP_HEXAHEDRON) THEN + NODE(1,IE)=NDRFN(J+1) + NODE(2,IE)=NDRFN(J+2) + NODE(3,IE)=NDRFN(J+3) + NODE(4,IE)=NDRFN(J+4) + NODE(5,IE)=NDRFN(J+5) + NODE(6,IE)=NDRFN(J+6) + NODE(7,IE)=NDRFN(J+7) + NODE(8,IE)=NDRFN(J+8) + J=J+8 + ELSE IF(IETYPE.EQ.RCAP_WEDGE) THEN + NODE(1,IE)=NDRFN(J+1) + NODE(2,IE)=NDRFN(J+2) + NODE(3,IE)=NDRFN(J+3) + NODE(4,IE)=NDRFN(J+4) + NODE(5,IE)=NDRFN(J+5) + NODE(6,IE)=NDRFN(J+6) + NODE(7,IE)=0 + NODE(8,IE)=0 + J=J+6 + ELSE IF(IETYPE.EQ.RCAP_PYRAMID) THEN +CC +CC NOTE THAT DEFINITION OF PYRMID NODE TABLE IS DIFFERENT +CC IN FFB AND REVOCAP_REFINER +CC + NODE(1,IE)=NDRFN(J+2) + NODE(2,IE)=NDRFN(J+3) + NODE(3,IE)=NDRFN(J+4) + NODE(4,IE)=NDRFN(J+5) + NODE(5,IE)=NDRFN(J+1) + NODE(6,IE)=0 + NODE(7,IE)=0 + NODE(8,IE)=0 + J=J+5 + ELSE IF(IETYPE.EQ.RCAP_TETRAHEDRON) THEN + NODE(1,IE)=NDRFN(J+1) + NODE(2,IE)=NDRFN(J+2) + NODE(3,IE)=NDRFN(J+3) + NODE(4,IE)=NDRFN(J+4) + NODE(5,IE)=0 + NODE(6,IE)=0 + NODE(7,IE)=0 + NODE(8,IE)=0 + J=J+4 + ELSE + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSG5 + IERR=1 + RETURN + ENDIF +C + 5000 CONTINUE +CC +CCHY [11] SET NEW COORDINATE +CC + NPOLD=NP +CC NP=0 + DO 5100 IE=1,NER + DO 5200 J=1,N1 + IF (NODE(J,IE).GT.NP) NP=NODE(J,IE) + 5200 CONTINUE + 5100 CONTINUE + WRITE(IUT6,*) ' **LRFNMS** : NUMBER OF NODES AFTER REFINE',NP +C + IF(NP.GT.MP) THEN + WRITE(IUT0,*) ERMSG3 + IERR=1 + ENDIF + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + CALL RCAPGETNODESEQ64(NP, 1,CRD) +C + DO 5300 IP=1,NP + X(IP)=CRD(IP*3-2) + Y(IP)=CRD(IP*3-1) + Z(IP)=CRD(IP*3-0) + 5300 CONTINUE +CC +CCHY [12] SET NEW B.C. NODE +CC +C +CC +CCHY [12.1] B.C. GROUP-1 +CC + CBUF = BGNAME(1)//CHAR(0) + NB = RCAPGETBNODEVARINTCOUNT(CBUF) + IF(NB.GT.MP) THEN + WRITE(IUT0,*) ERMSG3 + IERR=1 + ENDIF + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + IERR=IERRA + RETURN + ENDIF +C + CBUF = BGNAME(1)//CHAR(0) + CALL RCAPGETBNODEVARINT(CBUF,NB,LWRK01,LWRK02) +C + DO 6000 IB=1,NB + IP=LWRK01(IB) + J =LWRK02(IB) + IF (IP.LE.NPOLD) GOTO 6000 +C + IF(J.EQ.LBPRTY(1)) THEN + NPINLT=NPINLT+1 + IF(NPINLT.GT.MPINLT) THEN + WRITE(IUT0,*) ERMSG3 + IERR=1 + GOTO 6050 + ELSE + LPINLT(NPINLT)=IP + ENDIF + ENDIF +C + IF(J.EQ.LBPRTY(3)) THEN + NPWALL=NPWALL+1 + IF(NPWALL.GT.MPWALL) THEN + WRITE(IUT0,*) ERMSG3 + IERR=1 + GOTO 6050 + ELSE + LPWALL(NPWALL)=IP + ENDIF + ENDIF +C + IF(J.EQ.LBPRTY(4)) THEN + NPSYMT=NPSYMT+1 + IF(NPSYMT.GT.MPSYMT) THEN + WRITE(IUT0,*) ERMSG3 + IERR=1 + GOTO 6050 + ELSE + LPSYMT(NPSYMT)=IP + ENDIF + ENDIF +C + IF(J.EQ.LBPRTY(5)) THEN + NPFREE=NPFREE+1 + IF(NPFREE.GT.MPFREE) THEN + WRITE(IUT0,*) ERMSG3 + IERR=1 + GOTO 6050 + ELSE + LPFREE(NPFREE)=IP + ENDIF + ENDIF +C + 6000 CONTINUE +C + 6050 CONTINUE + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + IERR=IERRA + RETURN + ENDIF +C + WRITE(IUT6,*) + IF(NB.GT.0) + *WRITE(IUT6,*) ' **LRFNMS** : TOTAL NUMBER OF B.C. NODES:',NB + IF(NPINLT.GT.0) + *WRITE(IUT6,*) ' **LRFNMS** : NUMBER OF INLET NODES:',NPINLT + IF(NPWALL.GT.0) + *WRITE(IUT6,*) ' **LRFNMS** : NUMBER OF WALL NODES:',NPWALL + IF(NPSYMT.GT.0) + *WRITE(IUT6,*) ' **LRFNMS** : NUMBER OF SYMMETRIC NODES:',NPSYMT + IF(NPFREE.GT.0) + *WRITE(IUT6,*) ' **LRFNMS** : NUMBER OF FREE NODES:',NPFREE +CC +CCHY [12.2] B.C. GROUP-2 +CC + CBUF = BGNAME(2)//CHAR(0) + NB = RCAPGETBNODEVARINTCOUNT(CBUF) + IF(NB.GT.MPBODY) THEN + WRITE(IUT0,*) ERMSG3 + IERR=1 + ENDIF + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + IERR=IERRA + RETURN + ENDIF + CBUF = BGNAME(2)//CHAR(0) + CALL RCAPGETBNODEVARINT(CBUF,NB,LWRK01,LWRK02) +C + DO 6100 IB=1,NB + IP=LWRK01(IB) + IF (IP.LE.NPOLD) GOTO 6100 + NPBODY=NPBODY+1 + IF (NPBODY.GT.MPBODY) THEN + WRITE(IUT0,*) ERMSG3 + IERR=1 + GOTO 6150 + ENDIF + LPBODY(NPBODY)=IP + 6100 CONTINUE +C + 6150 CONTINUE + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + IERR=IERRA + RETURN + ENDIF +C + IF(NB.GT.0) + *WRITE(IUT6,*) + *' **LRFNMS** : NUMBER OF BODY NODES:',NPBODY +CC +CCHY [12.3] B.C. GROUP-3 +CC + IF(IPART.GE.1) THEN +C + DO 6200 IDOM=1,NDOM +C + IF (IRFN.EQ.1) THEN + NPB1(IDOM)=0 + DO 6300 IB=1,NBPDOM(IDOM) + NPB1(IDOM)=NPB1(IDOM)+1 + IP=IPSLF(IB,IDOM) + LPB1(NPB1(IDOM),IDOM)=IP + XPB1(NPB1(IDOM),IDOM)=X(IP) + YPB1(NPB1(IDOM),IDOM)=Y(IP) + ZPB1(NPB1(IDOM),IDOM)=Z(IP) + 6300 CONTINUE + ENDIF +C + WRITE(CDOM,'(I8.8)') IDOM + CBUF = BGNAME(3)//CDOM//CHAR(0) + NPB = RCAPGETBNODEGROUPCOUNT(CBUF) + IF(NPB.GT.MBPDOM) THEN + WRITE(IUT0,*) ERMSG3 + IERR=1 + GOTO 6500 + ENDIF +C + CALL RCAPGETBNODEGROUP(CBUF,NPB,IPSLF(1,IDOM)) +C + DO 6400 IB=1,NPB + IP=IPSLF(IB,IDOM) + IF (IP.LE.NPOLD) GOTO 6400 + NPB1(IDOM)=NPB1(IDOM)+1 + LPB1(NPB1(IDOM),IDOM)=IP + XPB1(NPB1(IDOM),IDOM)=REAL(X(IP)) + YPB1(NPB1(IDOM),IDOM)=REAL(Y(IP)) + ZPB1(NPB1(IDOM),IDOM)=REAL(Z(IP)) + 6400 CONTINUE + 6200 CONTINUE +C + 6500 CONTINUE + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + IERR=IERRA + RETURN + ENDIF +C + WRITE(IUT6,*),'DDCOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOM4' + CALL DDCOM4(NDOM,LDOM,MBPDOM, + * NPB1,LPB1,XPB1,YPB1,ZPB1, + * NPB2,LPB2,XPB2,YPB2,ZPB2, + * MAXBUF,RX,RY,IUT0,IERR) +C +CC +CC MAKE INTER-CONNECT NODES +CC + NPINT=0 + DO 6600 IDOM=1,NDOM +C + DO 6700 IB2=1,NPB2(IDOM) + LWRK01(IB2)=0 + 6700 CONTINUE +C + NPB0(IDOM)=0 + DO 6800 IB1=1,NPB1(IDOM) + X1=XPB1(IB1,IDOM) + Y1=YPB1(IB1,IDOM) + Z1=ZPB1(IB1,IDOM) +C + DO 6850 IB2=1,NPB2(IDOM) +C + IF(LWRK01(IB2).EQ.1) GOTO 6850 +C + DX=X1-XPB2(IB2,IDOM) + DY=Y1-YPB2(IB2,IDOM) + DZ=Z1-ZPB2(IB2,IDOM) + RR=DX*DX+DY*DY+DZ*DZ +C + IF(RR.LE.EPS*EPS) THEN + NPB0(IDOM)=NPB0(IDOM)+1 + NPINT =NPINT +1 + IF(NPINT.GT.MPINT) THEN + WRITE(IUT0,*) ERMSG3 + IERR=1 + GOTO 6900 + ENDIF +C + LPINT1(NPINT)=LPB1(IB1,IDOM) + LPINT2(NPINT)=LDOM(IDOM) + LPINT3(NPINT)=LPB2(IB2,IDOM) + LWRK01(IB2)=1 + GOTO 6800 + ENDIF +C + 6850 CONTINUE + 6800 CONTINUE + 6600 CONTINUE +C + 6900 CONTINUE + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + IERR=IERRA + RETURN + ENDIF +C + WRITE(IUT6,*) + WRITE(IUT6,*) + *' **LRFNMS** : NUMBER OF INTER-CONNECT NODES:',NPINT +C + CALL DDCOM4(NDOM,LDOM,MBPDOM, + * NPB0,LPB1,XPB1,YPB1,ZPB1, + * NPB2,LPB2,XPB2,YPB2,ZPB2, + * MAXBUF,RX,RY,IUT0,IERR) +C + DO 6950 IDOM=1,NDOM + IF(NPB0(IDOM).NE.NPB2(IDOM)) IERR=1 + 6950 CONTINUE + CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) + IF(IERRA.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + IERR=IERRA + RETURN + ENDIF +C + ENDIF +CC +CCHY [13] SET U,V,W,PN AT NEW NODES +CC + DO 7000 IP=NPOLD+1,NP + J = RCAPGETORIGINAL(IP,NODED) +C + IF(J.EQ.KSEG) THEN + IP1=NODED(1) + IP2=NODED(2) + UFIX(IP)=0.5E0*(UFIX(IP1)+UFIX(IP2)) + VFIX(IP)=0.5E0*(VFIX(IP1)+VFIX(IP2)) + WFIX(IP)=0.5E0*(WFIX(IP1)+WFIX(IP2)) + LPFRM(IP)=LPFRM(IP1) + IF(LPFRM(IP).LT.LPFRM(IP2)) LPFRM(IP)=LPFRM(IP2) + IF(IRFNFF.NE.1) GOTO 7000 + U (IP)=0.5E0*(U (IP1)+U (IP2)) + V (IP)=0.5E0*(V (IP1)+V (IP2)) + W (IP)=0.5E0*(W (IP1)+W (IP2)) + PN(IP)=0.5E0*(PN(IP1)+PN(IP2)) + ELSE IF(J.EQ.KQUD) THEN + IP1=NODED(1) + IP2=NODED(2) + IP3=NODED(3) + IP4=NODED(4) + UFIX(IP)=0.25E0*(UFIX(IP1)+UFIX(IP2)+UFIX(IP3)+UFIX(IP4)) + VFIX(IP)=0.25E0*(VFIX(IP1)+VFIX(IP2)+VFIX(IP3)+VFIX(IP4)) + WFIX(IP)=0.25E0*(WFIX(IP1)+WFIX(IP2)+WFIX(IP3)+WFIX(IP4)) + LPFRM(IP)=LPFRM(IP1) + IF(LPFRM(IP).LT.LPFRM(IP2)) LPFRM(IP)=LPFRM(IP2) + IF(LPFRM(IP).LT.LPFRM(IP3)) LPFRM(IP)=LPFRM(IP3) + IF(LPFRM(IP).LT.LPFRM(IP4)) LPFRM(IP)=LPFRM(IP4) + IF(IRFNFF.NE.1) GOTO 7000 + U (IP)=0.25E0*(U (IP1)+U (IP2)+U (IP3)+U (IP4)) + V (IP)=0.25E0*(V (IP1)+V (IP2)+V (IP3)+V (IP4)) + W (IP)=0.25E0*(W (IP1)+W (IP2)+W (IP3)+W (IP4)) + PN(IP)=0.25E0*(PN(IP1)+PN(IP2)+PN(IP3)+PN(IP4)) + ELSE IF(J.EQ.KHEX) THEN + IP1=NODED(1) + IP2=NODED(2) + IP3=NODED(3) + IP4=NODED(4) + IP5=NODED(5) + IP6=NODED(6) + IP7=NODED(7) + IP8=NODED(8) + IF(IRFNFF.NE.1) GOTO 7000 + U (IP)=0.125E0*( U (IP1)+U (IP2)+U (IP3)+U (IP4) + * +U (IP5)+U (IP6)+U (IP7)+U (IP8)) + V (IP)=0.125E0*( V (IP1)+V (IP2)+V (IP3)+V (IP4) + * +V (IP5)+V (IP6)+V (IP7)+V (IP8)) + W (IP)=0.125E0*( W (IP1)+W (IP2)+W (IP3)+W (IP4) + * +W (IP5)+W (IP6)+W (IP7)+W (IP8)) + PN(IP)=0.125E0*( PN(IP1)+PN(IP2)+PN(IP3)+PN(IP4) + * +PN(IP5)+PN(IP6)+PN(IP7)+PN(IP8)) +CC +CC NOTE THAT +CC NEW NODES FROM ELEMENT CENTER NEVER BECOME B.C. NODES +CC + ELSE + WRITE(IUT0,*) 'J=',J + WRITE(IUT0,*) ERMSG5 + IERR=1 + RETURN + ENDIF + 7000 CONTINUE +C + DO 7100 IB=1,NPINLT + IP=LPINLT(IB) + UINLT(IB)=UFIX(IP) + VINLT(IB)=VFIX(IP) + WINLT(IB)=WFIX(IP) + 7100 CONTINUE +C + DO 7200 IB=1,NPWALL + IP=LPWALL(IB) + UWALL(IB)=UFIX(IP) + VWALL(IB)=VFIX(IP) + WWALL(IB)=WFIX(IP) + 7200 CONTINUE +C +CC +CCHY [14] CLEAR +CC + CALL rcapClearRefiner() +CC +CCHY [15] TERMINATE REFINER +CC + CALL rcapTermRefiner() +CC +CCHY [16] ADD NODE TABLE NOT REFINED +CCC + DO 8000 IE=1,NE2 + DO 8100 I=1,8 + NODE(I,NER+IE)=NODEBK(I,IE) + 8100 CONTINUE + 8000 CONTINUE + NE=NER+NE2 +C +CC +CCHY [17] INTERPORATE NODE PRESSURE TO ELEMENTS +CCC + IF (IRFNFF.EQ.0) RETURN +C + DO 8200 IE = 1 , NE +C + IF(NODE(8,IE).NE.0) THEN + P(IE) = ( PN(NODE(1,IE))+ PN(NODE(2,IE)) + * +PN(NODE(3,IE))+ PN(NODE(4,IE)) + * +PN(NODE(5,IE))+ PN(NODE(6,IE)) + * +PN(NODE(7,IE))+ PN(NODE(8,IE)))/8.0E0 + ELSE IF(NODE(6,IE).NE.0) THEN + P(IE) = ( PN(NODE(1,IE))+ PN(NODE(2,IE)) + * +PN(NODE(3,IE))+ PN(NODE(4,IE)) + * +PN(NODE(5,IE))+ PN(NODE(6,IE)))/6.0E0 + ELSE IF(NODE(5,IE).NE.0) THEN + P(IE) = ( PN(NODE(1,IE))+ PN(NODE(2,IE)) + * +PN(NODE(3,IE))+ PN(NODE(4,IE)) + * +PN(NODE(5,IE)) )/5.0E0 + ELSE IF(NODE(4,IE).NE.0) THEN + P(IE) = ( PN(NODE(1,IE))+ PN(NODE(2,IE)) + * +PN(NODE(3,IE))+ PN(NODE(4,IE)))/4.0E0 + ELSE + IERR=1 + RETURN + ENDIF +C + 8200 CONTINUE +C + RETURN + END diff --git a/FFB-MINI/src/xmpAPI_nodlex.F b/FFB-MINI/src/xmpAPI_nodlex.F new file mode 100755 index 0000000..e0a6a5b --- /dev/null +++ b/FFB-MINI/src/xmpAPI_nodlex.F @@ -0,0 +1,81 @@ + SUBROUTINE NODLEX + * (NODE,ME,NE,NP,N1,NEX,SN, + * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fj +C * VALELM,VALNOD,CM,IUT0,IERR,BUFSND,BUFRCV,MAXBUF) + * VALELM,VALNOD,CM,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj +C + IMPLICIT NONE +C + INTEGER*4 ME,NE,NP,N1 + INTEGER*4 NODE(N1,NE),NEX(8) + REAL*4 SN (N1,ME) + INTEGER*4 IPART,NDOM,MBPDOM + INTEGER*4 LDOM(NDOM),NBPDOM(NDOM) + INTEGER*4 IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM) + REAL*4 VALELM(NE),VALNOD(NP),CM(NP) + INTEGER*4 IUT0,IERR,MAXBUF + REAL*4 BUFSND(MAXBUF),BUFRCV(MAXBUF) +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj +C + CHARACTER*60 ERMSGC + & /' ## SUBROUTINE NODLEX: FATAL ERROR REPORT ; RETURNED' / +C + INTEGER*4 NEHEX, NHEX +C + INTEGER*4 I,IE,IP,IDUM +C +C * START * +C + NEHEX=NEX(4) + NHEX =NEX(8) +C +C +C *** ZERO *** +C +C + DO 300 IP=1,NP + VALNOD(IP)=0.0E0 + 300 CONTINUE +C +C +C *** VALNOD = N * VALELM *** +C +C * HEX * + DO 107 I=1,NHEX + DO 106 IE=1,NEHEX + IP = NODE(I,IE) + VALNOD(IP) = VALNOD(IP) + SN(I,IE)*VALELM(IE) + 106 CONTINUE + 107 CONTINUE +C +C +C *** DATA COMMUNICATION *** +C +C + IDUM=1 + CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, + & VALNOD,VALNOD,VALNOD,NP,IUT0,IERR, +C Fj +C & BUFSND,BUFRCV,MAXBUF) + & rx_desc,ry_desc,MAXBUF) +C Fj + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + END IF +C +C +C *** INVERSE MASS MATRIX *** +C +C + DO 200 IP=1,NP + VALNOD(IP)=VALNOD(IP)*CM(IP) + 200 CONTINUE +C + RETURN + END diff --git a/FFB-MINI/src/xmpAPI_pres3e.F b/FFB-MINI/src/xmpAPI_pres3e.F new file mode 100755 index 0000000..eaf9990 --- /dev/null +++ b/FFB-MINI/src/xmpAPI_pres3e.F @@ -0,0 +1,210 @@ + SUBROUTINE PRES3E(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NEX,NE,NP, + * MRCM,NMAX,NMAXB,ISOLP, + * EPS,EPSRE,DT3D, + * NODE,CM,SN,DNXYZ,DNXI,DNYI,DNZI, + * U,V,W,NPINLT,LPINLT,NPMWAL,LPMWAL, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, + * NITR,RES,PE,PN, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * LPFIX,LFIX3D,FXYZ,WRK01,WRK02,WRK03,WRK04, + * WRK05,WRK06,WRK07,WRK08,WRK09,B, +C Fj +C * PRCM,APRCM,RX,RY,MWRK,WRKN, + * PRCM,APRCM,rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) +#include "timing.h" + IMPLICIT NONE +C +CCC [INPUT] + INTEGER*4 MCOLOR,MCPART + INTEGER*4 NCOLOR(4),NCPART(MCOLOR,4),LLOOP(MCPART,MCOLOR,4) + INTEGER*4 ME,N,N1,NEX(8),NE,NP,NMAX,IUT0 + INTEGER*4 MRCM,NMAXB,ISOLP + REAL*4 EPS,EPSRE,DT3D(NE) + INTEGER*4 NODE(N1,NE), + * NPINLT,LPINLT(NPINLT),NPSYMT,LPSYMT(NPSYMT), + * NPMWAL,LPMWAL(NPMWAL) + REAL*4 CM(NP),SN(N1,ME), + * DNXYZ(3,N1,ME), + * DNXI(N1,ME),DNYI(N1,ME),DNZI(N1,ME), + * U(NP),V(NP),W(NP) + REAL*4 XPSYMT(NPSYMT),YPSYMT(NPSYMT),ZPSYMT(NPSYMT) + INTEGER*4 IPART,NDOM,MBPDOM, + * LDOM(NDOM),NBPDOM(NDOM), + * IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM) +C +CCC [INPUT/OUTPUT] + REAL*4 PE(NE) +C +CCC [OUTPUT] + INTEGER*4 NITR,IERR + REAL*4 RES,PN(NP) +C +CCC [WORK] + INTEGER*4 LPFIX(NP),LFIX3D(NP) + REAL*4 RX(0:N,ME),RY(0:N,ME), + * B(NE),FXYZ(3,NP), + * WRK01(*),WRK02(*),WRK03(*),WRK04(*), + * WRK05(*),WRK06(*),WRK07(*),WRK08(*), + * WRK09(*) + REAL*4 PRCM(MRCM,NE),APRCM(MRCM,NE) + INTEGER*4 MWRK + REAL*4 WRKN(MWRK,9) +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj +C +C [IN:MID NODE COLORING] +C +CCCC [LOCAL] + INTEGER*4 IBP,NPFIX,MAXBUF,IP,IE +C + REAL*4 EPS0 + DATA EPS0 / 1.E-30 / +C + CHARACTER*60 ERMSGC + & /' ## SUBROUTINE PRES3E: FATAL ERROR REPORT ; RETURNED' / +C +C SOLVE CONTINUITY EQUATION DEFINED AT ELEMENTS +C +C WRITTEN BY Y.YAMADE 2011.01.20 +C +C NOTE THAT +C CURRENT VERSION DOES NOT SUPPORT OVERSET AND MID-NODES +C +C ARGUMENT LISTINGS +C (1) INPUT +C INT *4 N ; MAX. NUMBER OF NODES ASSIGNED TO ONE ELEMENT (=8) +C INT *4 N1 ; THE DIMENSION SIZE OF THE FIRST ELEMENTS OF THE +C PASSED ARRAYS 'NODET' +C INT *4 NE ; NUMBER OF ELEMENTS +C INT *4 NP ; NUMBER OF NODES +C INT *4 NEX (I); INCLUDES NUMBER OF ELEMENTS AND NUMBER OF LOCAL NODES +C INT *4 NE ; NUMBER OF ELEMENTS +C INT *4 NP ; NUMBER OF NODES +C INT *4 NMAX ; NUMBER OF MATRIX SOLVER ITERATIONS +C INT *4 IUT0 ; FILE NUMBER TO REPORT ERROR OCCURENCE +C REAL*4 EPS ; MAXIMUM ALLOWABLE ERROR +C INT *4 NODE (I,IE); NODE TABLE +C INT *4 NPINLT ; NUMBER OF INLET BOUNDARY NODES +C INT *4 LPINLT (IB); INLET BOUNDARY NODES +C INT *4 NPMWAL ; NUMBER OF MOVING-WALL BOUNDARY NODES +C INT *4 LPMWAL (IB); MOVING-WAL BOUNDARU NODES +C REAL*4 CM (IP); INVERSED LUMPED MASS MATRIX +C REAL*4 SN (I,IE); INTEGRATED ELEMENT VECTOR OF N +C REAL*4 DNX (I,IE); ELEMENT CENTER VALUE OF NX +C REAL*4 DNY (I,IE); ELEMENT CENTER VALUE OF NY +C REAL*4 DNZ (I,IE); ELEMENT CENTER VALUE OF NZ +C REAL*4 DNXI (I,IE); ELEMENT CENTER VALUE OF NX +C REAL*4 DNYI (I,IE); ELEMENT CENTER VALUE OF NX +C REAL*4 DNZI (I,IE); ELEMENT CENTER VALUE OF NX +C REAL*4 U (IP); X-DIR. VELOCITY COMPONENT +C REAL*4 V (IP); Y-DIR. VELOCITY COMPONENT +C REAL*4 W (IP); Z-DIR. VELOCITY COMPONENT +C +C (2) INPUT/OUTPUT +C REAL*4 PE (IE); PRESSURE AT ELEMENTS +C +C (3) OUTPUT +C INT *4 NITR ; ITERATION NUMBER OF MATRIX SOLVER +C REAL*4 RES ; L2-NORM RESIDUAL OF THE FINAL SOLUTION VECTOR +C REAL*4 PN (IP); PRESSURE AT NODES +C INT *4 IERR ; RETURN CODE TO REPORT ERROR OCCURENCE +C 0 --- NORMAL TERMINATION +C 1 --- A FATAL ERROR HAS OCCURED +C +C (4) WORK +C REAL*4 RX (I,IE); WORK REGION PASSED FOR MATRIX SOLVER +C REAL*4 RY (I,IE); WORK REGION PASSED FOR MATRIX SOLVER +C REAL*4 B (IE); WORK REGION PASSED FOR R.H.S. VECTOR +C REAL*4 WRK01 (IE); WORK REGION PASSED FOR MATRIX SOLVER +C REAL*4 WRK02 (IE); WORK REGION PASSED FOR MATRIX SOLVER +C REAL*4 WRK03 (IE); WORK REGION PASSED FOR MATRIX SOLVER +C REAL*4 WRK04 (IE); WORK REGION PASSED FOR MATRIX SOLVER +C REAL*4 WRK05 (IE); WORK REGION PASSED FOR MATRIX SOLVER +C REAL*4 WRK06 (IE); WORK REGION PASSED FOR MATRIX SOLVER +C REAL*4 WRK07 (IE); WORK REGION PASSED FOR MATRIX SOLVER +C REAL*4 WRK08 (IE); WORK REGION PASSED FOR MATRIX SOLVER +C REAL*4 WRK09 (IE); WORK REGION PASSED FOR MATRIX SOLVER +C + IERR=0 + MAXBUF=NE*(N+1) +C + DO 1000 IP=1,NP + LFIX3D(IP)=0 + 1000 CONTINUE +C +!ocl norecurrence(LFIX3D) + DO 1100 IBP=1,NPINLT + IP=LPINLT(IBP) + LFIX3D(IP)=1 + 1100 CONTINUE +C +!ocl norecurrence(LFIX3D) + DO 1200 IBP=1,NPMWAL + IP=LPMWAL(IBP) + LFIX3D(IP)=1 + 1200 CONTINUE +C + NPFIX=0 + DO 1450 IP=1,NP + IF(LFIX3D(IP).EQ.0) GOTO 1450 + NPFIX=NPFIX+1 + LPFIX(NPFIX)=IP + 1450 CONTINUE +C + CALL FILD3X(ME,NE,NP,NEX,N1, + * U,V,W,B,NODE,DNXI,DNYI,DNZI) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + DO 1550 IE=1,NE + B(IE)=B(IE)/DT3D(IE) + 1550 CONTINUE +C +C + IF(ISOLP.EQ.1) THEN + call maprof_time_start(TM_BCGSXE) + CALL BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NEX,NE,NP,NMAX,EPS,EPSRE, + * NODE,CM,DNXYZ,DNXI,DNYI,DNZI, + * B,NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, + * NITR,RES,PE, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, + * FXYZ,RX,RY,MWRK,WRKN, + * IUT0,IERR) + call maprof_time_stop(TM_BCGSXE) + ELSE IF(ISOLP.EQ.2) THEN + CALL RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NEX,NE,NP,NMAX,EPS,EPSRE,MRCM,NMAXB, + * NODE,CM,DNXYZ,DNXI,DNYI,DNZI, + * B,NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, + * NITR,RES,PE, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * FXYZ,WRK01,WRK02,WRK03,WRK04, + * WRK05,WRK06,WRK07,WRK08,WRK09,PRCM,APRCM, + * RX,RY,MWRK,WRKN, + * IUT0,IERR) + ELSE + IERR=1 + ENDIF +C + IF(IERR.NE.0) THEN + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + CALL NODLEX(NODE,ME,NE,NP,N1,NEX,SN, + * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, + * PE,PN,CM,IUT0,IERR,RX,RY,MAXBUF) +C +C + RETURN + END diff --git a/FFB-MINI/src/xmpAPI_rcmelm.F b/FFB-MINI/src/xmpAPI_rcmelm.F new file mode 100755 index 0000000..0856a7f --- /dev/null +++ b/FFB-MINI/src/xmpAPI_rcmelm.F @@ -0,0 +1,518 @@ + SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NEX,NE,NP,NMAX,EPS,EPSRE,MRCM,NMAXB, + * NODE,CM,DNXYZ,DNXI,DNYI,DNZI, + * B,NPFIX,LPFIX,NPSYMT,LPSYMT, + * XPSYMT,YPSYMT,ZPSYMT,NITRCM,RESR,S, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * FXYZ,WRK01,WRK02,WRK03,WRK04, + * WRK05,WRK06,RRCM, W1RCM,W2RCM,PRCM,APRCM, +C Fj +C * RX,RY,MWRK,WRKN, + * rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) +#include "timing.h" + IMPLICIT NONE +C +CCC [LOOP] + INTEGER*4 MCOLOR,MCPART + INTEGER*4 NCOLOR(4),NCPART(MCOLOR,4),LLOOP(MCPART,MCOLOR,4) +C +CCC [FOR MID.NODES] +C +CCC [INPUT] + INTEGER*4 ME,N,N1,NEX(8),NE,NP,NMAX,MRCM,NMAXB,IUT0 + REAL*4 EPS,EPSRE + INTEGER*4 NODE(N1,NE) + REAL*4 CM(NP), + * DNXYZ(3,N1,ME), + * DNXI(N1,ME),DNYI(N1,ME),DNZI(N1,ME), + * B(NE) + REAL*4 XPSYMT(NPSYMT),YPSYMT(NPSYMT),ZPSYMT(NPSYMT) + INTEGER*4 NPFIX,LPFIX(NPFIX), + * NPSYMT,LPSYMT(NPSYMT) + INTEGER*4 IPART,NDOM,MBPDOM, + * LDOM(NDOM),NBPDOM(NDOM), + * IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM) +C +C [IN:MID NODE COLORING] +C +CCC [INPUT/OUTPUT] + REAL*4 S(NE) + REAL*4 RES +C +CCC [OUTPUT] + INTEGER*4 NITR,IERR +C +CCC [WORK] + REAL*4 RX(0:N,ME),RY(0:N,ME), + * FXYZ(3,NP),WRK01(NE),WRK02(NE), + * WRK03(NE),WRK04(NE),WRK05(NE),WRK06(NE), + * RRCM(NE),PRCM(MRCM,NE),APRCM(MRCM,NE), + * W1RCM(NE),W2RCM(NE) +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj + INTEGER*4 MWRK + REAL*4 WRKN(MWRK,9) +C +CCCC [LOCL] + INTEGER*4 IE + REAL*4 BDOT,BDOTA,RESR,RSDOT,RSDOTA + INTEGER*4 LRCM(MRCM) + REAL*4 ARCM(MRCM,MRCM),BRCM(MRCM) +C + INTEGER*4 MRCMAX + DATA MRCMAX / 10 / + REAL*4 EPS0,EPSB + DATA EPS0 / 1.E-30 / + DATA EPSB / -1.0E0 / +C + INTEGER*4 I,J,NITRCM,NITRB,NMAXRCM,NRCM + REAL*4 ATEMP,BTEMP,BUF,RESB +C + CHARACTER*60 ERMSGC + & /' ## SUBROUTINE RESCUT: FATAL ERROR REPORT ; RETURNED' / +C + CHARACTER*60 ERMSG1 + & /' ## SUBROUTINE RESCUT: INVALID PARAMETER ; RETURNED' / +C +C SOLVE MATRIX EQUATION AT ELEMENTS BY BI-CGSTAB METHOS +C ( 3-D CALCULATION : SINGLE WORD & MULTI ELEMENT VERSION ) +C +C WRITTEN BY Y. YAMADE 2011.01.20 +C +C OPERATION COUNTS: FLOP /ELEMENT/ITERATION +C DATA LOADINGS : WORDS/ELEMENT/ITERATION +C ( WORDS CONTIGUOUSLY, +C WORDS BY 4-WORD STRIDE, AND +C WORDS BY LIST ) +C +C ARGUMENT LISTINGS +C (1) INPUT +C INT *4 ME ; MAX. NUMBER OF TOTAL ELEMENTS +C INT *4 N1 ; THE DIMENSION SIZE OF THE FIRST ELEMENTS OF THE +C PASSED ARRAYS 'NODET' +C INT *4 NE ; NUMBER OF ELEMENTS +C INT *4 NP ; NUMBER OF NODES +C INT *4 NEX (I); INCLUDES NUMBER OF ELEMENTS AND NUMBER OF LOCAL NODES +C INT *4 NE ; NUMBER OF ELEMENTS +C INT *4 NP ; NUMBER OF NODES +C INT *4 NMAX ; NUMBER OF MATRIX SOLVER ITERATIONS +C INT *4 IUT0 ; FILE NUMBER TO REPORT ERROR OCCURENCE +C REAL*4 EPS ; MAXIMUM ALLOWABLE ERROR +C INT *4 NODE (I,IE); NODE TABLE +C REAL*4 CM (IP); INVERSED LUMPED MASS MATRIX +C REAL*4 DNX (I,IE); ELEMENT CENTER VALUE OF NX +C REAL*4 DNY (I,IE); ELEMENT CENTER VALUE OF NY +C REAL*4 DNZ (I,IE); ELEMENT CENTER VALUE OF NZ +C REAL*4 DNXI (I,IE); ELEMENT CENTER VALUE OF NX +C REAL*4 DNYI (I,IE); ELEMENT CENTER VALUE OF NX +C REAL*4 DNZI (I,IE); ELEMENT CENTER VALUE OF NX +C +C REAL*4 B (IE); GLOBAL FORCE VECTOR +C INT *4 NFIX ; NUMBER OF FIX BOUNDARY NODES +C INT *4 LPFIX (IB); FIX BOUNDARY NODES +C +C INT *4 IPART ; SUB-DOMAIN NUMBER THAT THIS TASK SHOULD TAKE/IS +C TAKING CARE OF. IPART BEING SET ZERO MEANS THAT +C THE PROGRAM SHOULD RUN/IS RUNNING IN SERIAL +C MODE. +C INT *4 NDOM ; NUMBER OF THE NERIBERING SUB-DOMAINS +C INT*4 MBPDOM ; THE MAXIMUM NUMBER OF THE INTER-CONNECT +C BOUNDARY NODES FOR ONE NEIBERING SUB-DOMAIN +C INT *4 LDOM (IDOM); NEIBERING SUB-DOMAIN NUMBER +C INT *4 NBPDOM (IDOM); NUMBER OF INTER-CONNECT BOUNDARY NODES +C SHARING WITH THE IDOM'TH NEIBERING SUB-DOMAIN, +C LDOM(IDOM) +C INT *4 IPSLF(I,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +C NEIBERING SUB-DOMAIN, LDOM(IDOM) +C INT *4 IPSND(I,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C SUB-DOMAIN THAT IS RECEIVING THE CALLING +C TASK'S RESIDUALS. +C +C (2) INPUT/OUTPUT +C REAL*4 S (IE); GLOBAL SOLUTION VECTOR (PROVIDE INITIAL GUESS) +C +C (3) OUTPUT +C INT *4 NITR ; NUMBER OF ITERATIONS DONE +C REAL*4 RES ; L2-NORM RESIDUAL OF THE FINAL SOLUTION VECTOR +C INT *4 IERR ; RETURN CODE TO REPORT ERROR OCCURRENCE +C 0 --- NORMAL TERMINATION +C 1 --- A FATAL ERROR HAS OCCURRED +C +C (4) WORK +C REAL*4 RX (I,IE); WORK REGION PASSED FOR CALLAP +C REAL*4 RY (I,IE); WORK REGION PASSED FOR CALLAP +C REAL*4 R0 (IE); WORK REGION +C REAL*4 RK (IE); WORK REGION +C REAL*4 PK (IE); WORK REGION +C REAL*4 APK (IE); WORK REGION +C REAL*4 ATK (IE); WORK REGION +C REAL*4 TK (IE); WORK REGION +C +C + IERR = 0 +C + IF(MRCM.GE.MRCMAX) THEN + WRITE(IUT0,*) ERMSG1 + IERR = 1 + RETURN + ENDIF +C + NMAXRCM = NMAX/NMAXB + IF(NMAXRCM.EQ.0) RETURN +C +C +C +C +C +C SET INITIAL RESIDUAL :RRCM +C + call maprof_time_start(TM_CALLAP) + CALL CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NE,NP,NEX,NODE, + * S,RRCM,DNXYZ,DNXI,DNYI,DNZI, + * CM, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) + call maprof_time_stop(TM_CALLAP) +C + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + DO 100 IE = 1 , NE + RRCM(IE) = B(IE)-RRCM(IE) + W1RCM(IE) = 0.E0 + 100 CONTINUE + NITR = 0 + NITRCM = 0 +C +C CALCULATE MAGNITUDE OF B + BDOT = 0.E0 +C 2F/1W(1C) + DO 200 IE = 1 , NE + BDOT = BDOT+B(IE)*B(IE) + 200 CONTINUE +C + IF(IPART.GE.1) THEN + call maprof_time_start(TM_PRES3E_COM) + CALL DDCOM2(BDOT,BDOTA) + call maprof_time_stop(TM_PRES3E_COM) + BDOT = BDOTA + ENDIF +C + IF(ABS(BDOT).LE.EPS0) BDOT = 1.0E0 +C + RSDOT = 0.E0 + DO 300 IE = 1 , NE + RSDOT = RSDOT+RRCM(IE)*RRCM(IE) + 300 CONTINUE +C + IF(IPART.GE.1) THEN + call maprof_time_start(TM_PRES3E_COM) + CALL DDCOM2(RSDOT,RSDOTA) + call maprof_time_stop(TM_PRES3E_COM) + RSDOT = RSDOTA + ENDIF +C + RES = SQRT(RSDOT) + RESR = RES/SQRT(BDOT) +CCYYDEB--- +CC WRITE(90,'(I8,2E13.5)') NITR,RES,RESR +C WRITE(90,*) 'RES',NITRCM,RES,RESR +CCYYDEB--- +#if 0 + IF(RESR.LE.EPSRE) RETURN +#else + if (EPS > 0.0) then + IF(RESR.LE.EPSRE) RETURN + end if +#endif +C + 350 CONTINUE + NITR = NITR+1 +C + IF(NITR.GE.MRCM) THEN + NRCM = MRCM + ELSE + NRCM = NITR + ENDIF +C +C +C COMPUTE PRODUCT OF COEFFICIENT MATRIX AND SEARCH-DIRECTION VECTOR +C AND INNER PRODUCT OF COMPUTED PRODUCT AND SEARCH-DIRECTION VECTOR +C +C +C +C +C COMPUTE TEMPORARY SOLUTION OF RESIDUAL EQUATION : +C + call maprof_time_start(TM_BCGSXE) + CALL BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NEX,NE,NP,NMAXB,EPSB,EPSRE, + * NODE,CM,DNXYZ,DNXI,DNYI,DNZI, + * RRCM, + * NPFIX,LPFIX,NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, + * NITRB,RESB,W1RCM, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) + call maprof_time_stop(TM_BCGSXE) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + call maprof_time_start(TM_CALLAP) + CALL CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NE,NP,NEX,NODE, + * W1RCM,W2RCM,DNXYZ,DNXI,DNYI,DNZI, + * CM, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, + * FXYZ,RX,RY,MWRK,WRKN, + * IUT0,IERR) + call maprof_time_stop(TM_CALLAP) +C + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C +C +C UPDATE SEARCH VECTORS AND PRODUCTION OF MATRIX AND SERACH VECTORS +C +C +C 0F/6W(6S) +*POPTION INDEP(PRCM,APRCM) +!CDIR LOOPCNT=6 + DO 410 J = NRCM, 2, -1 +!CDIR VECTOR + DO 400 IE = 1 , NE + PRCM (J,IE) = PRCM (J-1,IE) + APRCM(J,IE) = APRCM(J-1,IE) + 400 CONTINUE + 410 CONTINUE +C +C J = 1 +C 0F/2W(2C) + DO 500 IE = 1 , NE + PRCM (1,IE) = W1RCM(IE) + APRCM(1,IE) = W2RCM(IE) + 500 CONTINUE +C +C +C INITIALIZE FOR RESIDUAL MINIMIZE EQUATION +C +C + DO 610 I = 1 , NRCM + DO 600 J = 1 , NRCM + ARCM(I,J) = 0.E0 + 600 CONTINUE + 610 CONTINUE +C + DO 700 I = 1 , NRCM + BRCM(I) = 0.E0 + 700 CONTINUE +C +C +C COMPUTE COEFFICIENTS OF RESIDUAL MINIMIZE EQUATION +C +C +C 34F/36W(32S+4C) +!CDIR NOVECTOR + DO 820 I = 1 , NRCM +!CDIR NOVECTOR + DO 810 J = 1 , NRCM + ATEMP = 0.E0 +!CDIR VECTOR + DO 800 IE = 1 , NE + ATEMP = ATEMP+APRCM(I,IE)*APRCM(J,IE) + 800 CONTINUE + ARCM(I,J) = ATEMP + 810 CONTINUE + 820 CONTINUE +C I == 1 +!CDIR NOVECTOR + DO 910 J = 1 , NRCM + BTEMP = 0.E0 +!CDIR VECTOR + DO 900 IE = 1 , NE + BTEMP = BTEMP+APRCM(J,IE)*RRCM(IE) + 900 CONTINUE + BRCM(J) = BTEMP + 910 CONTINUE +C + IF(IPART.GE.1) THEN + DO 1020 J = 1 , NRCM + DO 1010 I = 1 , NRCM + call maprof_time_start(TM_PRES3E_COM) + CALL DDCOM2(ARCM(I,J),BUF) + call maprof_time_stop(TM_PRES3E_COM) + ARCM(I,J) = BUF + 1010 CONTINUE + call maprof_time_start(TM_PRES3E_COM) + CALL DDCOM2(BRCM(J),BUF) + call maprof_time_stop(TM_PRES3E_COM) + BRCM(J) = BUF + 1020 CONTINUE + ENDIF +C +C +C SOLVE RESIDUAL MINIMIZE EQUATION :BRCM +C +C + CALL MATGAU(MRCM,NRCM,ARCM,BRCM,W1RCM,LRCM,IUT0,IERR) +C + IF(IERR.NE.0) THEN + WRITE(IUT0,*)ERMSGC + RETURN + ENDIF +C +C +C UPDATE SEAECH VECTOR +C +C +C J == 1 +C 1F/1W(1S) + DO 1100 IE = 1 , NE + PRCM(1,IE) = BRCM(1)*PRCM(1,IE) + 1100 CONTINUE +C +C 6F/6W(6S) +*POPTION INDEP(PRCM,BRCM) + DO 1210 IE = 1 , NE + DO 1200 J = 2, NRCM + PRCM(1,IE) = PRCM(1,IE)+BRCM(J)*PRCM(J,IE) + 1200 CONTINUE + 1210 CONTINUE +C +C +C UPDATE SOLUTION VECTOR +C +C +C 1F/2W(1C+1S) + DO 1300 IE = 1 , NE + S(IE) = S(IE)+PRCM(1,IE) + 1300 CONTINUE +C +C +C +C COMPUTE RESIDUAL +C +C + call maprof_time_start(TM_CALLAP) + CALL CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NE,NP,NEX,NODE, + * S,RRCM,DNXYZ,DNXI,DNYI,DNZI, + * CM, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) + call maprof_time_stop(TM_CALLAP) +C + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C +C 1F/2W(2C) + DO 1400 IE = 1 , NE + RRCM(IE) = B(IE)-RRCM(IE) + 1400 CONTINUE +C +C 2F/1W(1C) + RSDOT = 0.E0 + DO 1500 IE = 1 , NE + RSDOT = RSDOT+RRCM(IE)*RRCM(IE) + 1500 CONTINUE +C + IF(IPART.GE.1) THEN + call maprof_time_start(TM_PRES3E_COM) + CALL DDCOM2(RSDOT,RSDOTA) + call maprof_time_stop(TM_PRES3E_COM) + RSDOT = RSDOTA + ENDIF +C + RES = SQRT(RSDOT) + RESR = RES/SQRT(BDOT) +CCYYDEB--- +CC WRITE(90,'(I8,2E13.5)') NITR,RES,RESR +C WRITE(90,*) 'RES',NITRCM,RES,RESR +CCYYDEB--- +C + NITRCM = NITRCM+NITRB +#if 0 + IF(RESR.LE.EPSRE.OR.RES.LE.EPS) RETURN +#else + if (EPS > 0.0) then + IF(RESR.LE.EPSRE.OR.RES.LE.EPS) RETURN + end if +#endif + IF(NITR.EQ.NMAXRCM) RETURN +C +C +C COMPUTE PRODUCTION OF MATRIX AND TEPORARY SOLUTION +C +C + DO 1600 IE = 1 , NE + W1RCM(IE) = PRCM(1,IE) + 1600 CONTINUE +C + call maprof_time_start(TM_CALLAP) + CALL CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * ME,N,N1,NE,NP,NEX,NODE, + * W1RCM,W2RCM,DNXYZ,DNXI,DNYI,DNZI, + * CM, + * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, + * NPFIX,LPFIX, + * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, +C Fj +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fj + * IUT0,IERR) + call maprof_time_stop(TM_CALLAP) +C + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C 0F/1W(1C) + DO 1700 IE = 1 , NE + APRCM(1,IE) = W2RCM(IE) + W1RCM(IE) = 0.E0 + 1700 CONTINUE +C +C RETURN IF ITERATION NUMBER HAS REACHED THE GIVEN MAXIMUM NUMBER, +C OTHERWISE CONTINUE ITERATIONS UNTIL SOLUTION IS CONVERGED +C +C + GO TO 350 +C + END diff --git a/FFB-MINI/src/xmpAPI_vel3d1.F b/FFB-MINI/src/xmpAPI_vel3d1.F new file mode 100755 index 0000000..92b24d0 --- /dev/null +++ b/FFB-MINI/src/xmpAPI_vel3d1.F @@ -0,0 +1,834 @@ + SUBROUTINE VEL3D1 + * (MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * IFORM,BTDCOE,DT, + * ITIME,DEVLP1,ACCELX,ACCELY,ACCELZ, + * NMAX,EPS,RESU,RESV,RESW,NITRU,NITRV,NITRW, + * ME,N,N1,N2,NE,NP,NEX,NODE, + * U,V,W,VISC,UE,VE,WE, + * MELM,E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EXZ,EYZ, + * SN, + * NUMIP, + * A,NPP,NCRS,IPCRS,APCRS,LTAB, + * NPINLT,LPINLT,UINLT,VINLT,WINLT, + * NPWALL,LPWALL,UWALL,VWALL,WWALL, + * NPSYMT,NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, + * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, + * LPFIX,LFIX3D, + * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,A0,AR,RHSU,RHSV,RHSW, +C Fj +C * RX,RY, + * rx_desc,ry_desc, +C Fj + * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS, + * IUT0,IERR) +C +#include "timing.h" + IMPLICIT NONE +C + INTEGER*4 MCOLOR,MCPART + INTEGER*4 NCOLOR(4),NCPART(MCOLOR,4),LLOOP(MCPART,MCOLOR,4) +C + INTEGER*4 IFORM,ITIME,IPART, + * NMAX,NUMIP, + * ME,N,NEX,N1,N2,NE,NP,MELM, + * NODE, + * LPFIX,NCRS, + * NPWALL,LPWALL, + * NPINLT,LPINLT, + * NPSYMT,NPSYM2,LPSYMT, + * NDOM,LDOM,MBPDOM,NBPDOM,IPSLF,IPSND, + * LFIX3D, + * NPP,IPCRS,LTAB,IUT0,IERR + REAL*4 BTDCOE(4),DT,EPS,RESU,RESV,RESW, + * DEVLP1,ACCELX,ACCELY,ACCELZ, + * U,V,W,VISC, + * SN,E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EXZ,EYZ, + * UINLT,VINLT,WINLT, + * UWALL,VWALL,WWALL,XPSYMT,YPSYMT,ZPSYMT, + * RX,RY, + * A,UG,VG,WG,UE,VE,WE, + * WRK01,WRK02,WRK03,WRK04,A0,AR, + * RHSU,RHSV,RHSW,APCRS + INTEGER*8 rx_desc,ry_desc +C + +C + DIMENSION NEX(12), + * U(NP),V(NP),W(NP),UE(NE),VE(NE),WE(NE), + * NODE(N1,NE), + * VISC(NE), + * SN(N1,NE),E(MELM), + * EX (MELM),EY (MELM),EZ (MELM), + * EXX(MELM),EYY(MELM),EZZ(MELM), + * EXY(MELM),EXZ(MELM),EYZ(MELM) +C + DIMENSION NUMIP(NP),LPFIX(NP),LPINLT(NPINLT), + * LPWALL(NPWALL),LPSYMT(NPSYM2), + * UINLT(NPINLT),VINLT(NPINLT),WINLT (NPINLT), + * UWALL(NPWALL),VWALL(NPWALL),WWALL(NPWALL), + * XPSYMT(NPSYM2),YPSYMT(NPSYM2),ZPSYMT(NPSYM2), + * LDOM(NDOM),NBPDOM(NDOM), + * IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM) +C + DIMENSION RX(0:N,ME),RY(0:N,ME),LFIX3D(NP), + * A(N1,N2,NE),UG(*),VG(*),WG(*), + * WRK01(*),WRK02(*),WRK03(*),WRK04(*),A0(NP),AR(NP), + * RHSU(NP),RHSV(NP),RHSW(NP) +C + DIMENSION NPP(NP),IPCRS(NCRS),APCRS(NCRS),LTAB(N1,N2,NE) +C + REAL*4 DIJ(8,8) + DATA DIJ / 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + & 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + & 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, + & 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, + & 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, + & 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, + & 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, + & 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0 / +C + INTEGER*4 NEHEX,NHEX,NSKIP4, + * IP,IE,I,J,IITRE, + * IES,IEE, + * IP1,IP2,IP3,IP4,IP5,IP6,IP7,IP8,IBP, + * IELM0, + * IELM1A,IELM2A,IELM3A,IELM4A, + * IELM5A,IELM6A,IELM7A,IELM8A, + * IELM1B,IELM2B,IELM3B,IELM4B, + * IELM5B,IELM6B,IELM7B,IELM8B, + * MAXBUF,NPFIX,IDUM,NITRE, + * NITRU,NITRV,NITRW,IRESU,IRESV,IRESW, + * IERR1,IERR2,IERR3,ICOLOR,ICPART + REAL*4 DTH,CEB1,CEB2,UU,VV,WW,AT, + * AT1,AT2,AT3,AT4,AT5,AT6,AT7,AT8, + * AC1,AC2,AC3,AC4,AC5,AC6,AC7,AC8, + * AD1,AD2,AD3,AD4,AD5,AD6,AD7,AD8, + * CRHS1,CRHS2,CRHS3,CRHS4,CRHS5,CRHS6,CRHS7,CRHS8, + * ABTD1,ABTD2,ABTD3,ABTD4,ABTD5,ABTD6,ABTD7,ABTD8, + * FX0,FY0,FZ0, + * COF +C +C [FULL UNROOL] + INTEGER*4 JUNROL + INTEGER*4 NPPMAX,NCRS2,ITPCRS(NCRS2) + REAL*4 TS(0:NP),TACRS(NCRS2) +C +C [IN:MID NODE COLORING] +C + CHARACTER*60 ERMSGC + & /' ## SUBROUTINE VEL3D1: FATAL ERROR REPORT ; RETURNED' / +C + INTEGER*4 IBCGS + DATA IBCGS / 0 / + REAL*4 WRK05(NP) +C +C +C CALCULATE VELOCITY PREDICTOR +C ( 3-D CALCULATION : SINGLE WORD & MULTI ELEMENT VERSION ) +C 2009.01.30 Y.YAMADE +C +C ************ COMPUTATIONAL COST EXCEPT FOR MATRIX SOLVER ******* +C =============================HEX====================================== +C OPERATION COUNTS: 2656 FLOP /ELEMENT +C DATA LOADINGS : 1417 WORDS/ELEMENT +C ( 41 WORDS CONTIGUOUSLY, +C 978 WORDS BY STRIDE, AND +C 398 WORDS BY LIST ) +C +C ARGUMENT LISTINGS +C (1) INPUT +C NLOOP ;NUMBER OF LOOPS +C LLOOP ;POINTER FOR SPLITTED ELEMENT LIST +C IFORM ; SPECIFIES MOMENTUM EQUATIONS METHOD +C +C TIMER ; PRESENT TIME OF OVERSET CONDITIONS DATA +C NOTES ; 'TIMER' WILL BE REFERED TO FOR INTER-FLAME OVERSET. +C DT ; TIME INCTREMENT +C +C NMAX ; NUMBER OF MATRIX SOLVER ITERATIONS +C EPS ; MAXIMUM ALLOWABLE ERROR +C +C ITIME ; CUREENT TIME STEP +C +C DEVLP1 ; DEVELOPMENT FUNCTION FOR INLET VELOCITIES +C DEVLP2 ; DEVELOPMENT FUNCTION FOR ALL THE OTHER VALUES +C ACCELX ; X-DIR. ACCELERATION TERMS ADDED TO ALL FRAMES +C ACCELY ; Y-DIR. ACCELERATION TERMS ADDED TO ALL FRAMES +C ACCELZ ; Z-DIR. ACCELERATION TERMS ADDED TO ALL FRAMES +C +C ME ; MAX. NUMBER OF TOTAL ELEMENTS +C N ; NUMBER OF NODES ASSIGNED TO ONE ELEMENT +C NE ; NUMBER OF TOTAL ELEMENTS +C NP ; NUMBER OF TOTAL NODES +C NEX(I) ; INCLUDES NUMBER OF ELEMENTS AND NUMBER OF LOCAL NODES +C AS FOLOOWS +C NEX(1) ; NUMBER OF TET. ELEMENTS +C NEX(2) ; NUMBER OF PYRAMID ELEMENTS +C NEX(3) ; NUMBER OF WEGDE ELEMENTS +C NEX(4) ; NUMBER OF HEX. ELEMENTS +C NEX(5) ; NUMBER OF LOCAL NODES IN A TET. ELEMENT (=4) +C NEX(6) ; NUMBER OF LOCAL NODES IN A PYRAMID ELEMENT (=5) +C NEX(7) ; NUMBER OF LOCAL NODES IN A WEGDE ELEMENT (=6) +C NEX(8) ; NUMBER OF LOCAL NODES IN A HEX. ELEMENT (=8) +C +C NODE (I,IE); NODE NO. TABLE BASED ON ELEMENT +C VISC (IE); ELEMENT VISCOSITY +C +C SN (I,IE); INTEGRATED ELEMENT VECTOR OF N +C E (I,J,IE); INTEGRATED ELEMENT MATRIX OF N*NT +C EX (I,J,IE); INTEGRATED ELEMENT MATRIX OF N*NXT +C EY (I,J,IE); INTEGRATED ELEMENT MATRIX OF N*NYT +C EZ (I,J,IE); INTEGRATED ELEMENT MATRIX OF N*NZT +C EXX (I,J,IE); INTEGRATED ELEMENT MATRIX OF NX*NXT +C EYY (I,J,IE); INTEGRATED ELEMENT MATRIX OF NY*NYT +C EZZ (I,J,IE); INTEGRATED ELEMENT MATRIX OF NZ*NZT +C EXY (I,J,IE); INTEGRATED ELEMENT MATRIX OF NX*NYT +C EXZ (I,J,IE); INTEGRATED ELEMENT MATRIX OF NX*NZT +C EYZ (I,J,IE); INTEGRATED ELEMENT MATRIX OF NY*NZT +C +C NUMIP (IP); NUMBER OF NEIGHBORING DOMAINS THAT NODE +C 'IP' BELONG TO +C +C A. INLET BOUNDARY +C NPINLT ; NUMBER OF INLET BOUNDARY NODES +C LPINLT (IBP); INLET BOUNDARY NODES +C UINLT (IBP); INLET BOUNDARY U-VELOCITIES +C VINLT (IBP); INLET BOUNDARY V-VELOCITIES +C WINLT (IBP); INLET BOUNDARY W-VELOCITIES +C +C B. WALL BOUNDARY +C NPWALL ; NUMBER OF WALL BOUNDARY NODES +C LPWALL (IBP); WALL BOUNDARY NODES +C UWALL (IBP); WALL BOUNDARY U-VELOCITIES +C VWALL (IBP); WALL BOUNDARY V-VELOCITIES +C WWALL (IBP); WALL BOUNDARY W-VELOCITIES +C +C C. SYMMETRIC BOUNDARY +C NPSYMT ; NUMBER OF SYMMETRIC BOUNDARY NODES +C LPSYMT (IBP); SYMMETRIC BOUNDARY NODES +C XPSYMT (IBP); X-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C YPSYMT (IBP); Y-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C ZPSYMT (IBP); Z-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C +C D. INTER-CONNECT BOUNDARY +C IPART ; SUB-DOMAIN NUMBER THAT THIS TASK SHOULD TAKE/IS +C TAKING CARE OF. IPART BEING SET ZERO MEANS THAT +C THE PROGRAM SHOULD RUN/IS RUNNING IN SERIAL +C MODE. +C +C LDOM (IDOM); NEIBERING SUB-DOMAIN NUMBER +C NBPDOM(IDOM); NUMBER OF INTER-CONNECT BOUNDARY NODES +C SHARING WITH THE IDOM'TH NEIBERING SUB-DOMAIN, +C LDOM(IDOM) +C NDOM ; NUMBER OF THE NERIBERING SUB-DOMAINS +C IPSLF (IBP,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +C NEIBERING SUB-DOMAIN, LDOM(IDOM) +C IPSND (IBP,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C SUB-DOMAIN THAT IS RECEIVING THE CALLING +C TASK'S RESIDUALS. +C MBPDOM ; THE MAXIMUM NUMBER OF THE INTER-CONNECT +C BOUNDARY NODES FOR ONE NEIBERING SUB-DOMAIN +C +C +C NPP (IP); NUMBER OF ADJACENT NODES TO NODE IP +C NCRS ; NUMBER OF NONZERO ELEMENTS IN MATRIX OF CRS FORMAT +C IPCRS (ICRS); NODE NO. TABLE BASED ON CRS FORMAT +C LTAB(J1,J2,IE); CRS INDEX TABLE FOR NODE-BASE MATRIX +C COEFFICIENT +C +C IUT0 ; FILE NUMBER TO REPORT ERROR OCCURENCE +C +C (2) OUTPUT +C RESU ;RESIDUAL OF U-EQUATION +C RESV ;RESIDUAL OF V-EQUATION +C RESW ;RESIDUAL OF W-EQUATION +C +C NRNU ;NUMBER OF U-EQUATION ITERATIONS +C NRNV ;NUMBER OF V-EQUATION ITERATIONS +C NRNW ;NUMBER OF W-EQUATION ITERATIONS +C +C IERR ; RETURN CODE TO REPORT ERROR OCCURENCE +C 0 --- NORMAL TERMINATION +C 1 --- A FATAL ERROR HAS OCCURED +C +C (3) INPUT-OUTPUT +C U (IP); X-DIR. VELOCITY COMPONENT +C V (IP); Y-DIR. VELOCITY COMPONENT +C W (IP); Z-DIR. VELOCITY COMPONENT +C +C (4) WORK +C LPFIX (IB); VELOCITY FIX BOUNDARY CONDITION NODES WHICH ARE +C COMPOSED OF INLET, WALL, SYMMETRIC AND OVERSET +C BOUNDARY CONDITIONS NODES +C LWROK (IP); WORK REGION TO MAKE 'LPFIX' +C +C A (I,J,IE); ELEMENT-WISE MATRIX COEFFICIENT WHICH INCLUDES +C ALL THE ERMS AND WILL BE PASSED TO THE MATROX SOLVER +C APCRS (ICRS); NODE-BASE MATRIX COEFFICIENT WHICH INCLUDES +C ALL THE ERMS AND WILL BE PASSED TO THE MATRIX SOLVER +C +C UG (IE); WORK REGION FOR U-VELOCITY AT ELEMENTS +C VG (IE); WORK REGION FOR V-VELOCITY AT ELEMENTS +C WG (IE); WORK REGION FOR W-VELOCITY AT ELEMENTS +C +C RHSU (IP); WORK REGION FOR RHS IN U-EQUATION +C RHSV (IP); WORK REGION FOR RHS IN V-EQUATION +C RHSW (IP); WORK REGION FOR RHS IN W-EQUATION + IERR=0 +C +C +CVEL3D1 [ 1.] SET CONTROL PARAMETER +C + MAXBUF = NE*(N+1) + DTH=DT*0.5E0 +C +CC CRANCK-NICOLSON FOR CONVECTION TERM + CEB1 = 0.0E0 + IF(IFORM.EQ.5) CEB1 = 1.0E0 +C + IF(IFORM.EQ.1.OR.IFORM.EQ.2) THEN +CC CRANCK-NICOLSON FOR DIFF. TERM + CEB2 = 0.0E0 + ELSE +CC EULAR BACKWARD DIFF. TERM + CEB2 = 1.0E0 + END IF +C + IF(IFORM.EQ.2 .OR. IFORM.EQ.4 .OR. IFORM.EQ.5) THEN +C + ELSE + BTDCOE(1) = 0.0E0 + BTDCOE(2) = 0.0E0 + BTDCOE(3) = 0.0E0 + BTDCOE(4) = 0.0E0 + END IF +C + NEHEX=NEX(4) + NHEX=NEX(8) + NSKIP4=NEX(12) +C +CVEL3D1 [ 2.] MAKE FIX BOUNDARY NODES LIST (LPFIX) +C + DO 1000 IP = 1 , NP + LFIX3D(IP) = 0 + LPFIX(IP) = 0 + 1000 CONTINUE +C +*POPTION INDEP(LFIX3D) +C*$*ASSERT PERMUTATION ( LPINLT ) +!ocl norecurrence(LFIX3D) + DO 1010 IBP = 1 , NPINLT + LFIX3D(LPINLT(IBP))=1 + U(LPINLT(IBP)) = DEVLP1*UINLT(IBP) + V(LPINLT(IBP)) = DEVLP1*VINLT(IBP) + W(LPINLT(IBP)) = DEVLP1*WINLT(IBP) + 1010 CONTINUE +C +*POPTION INDEP(LFIX3D) +C*$*ASSERT PERMUTATION ( LPWALL ) +!ocl norecurrence(LFIX3D) + DO 1020 IBP = 1 , NPWALL + LFIX3D(LPWALL(IBP))=1 + 1020 CONTINUE +C +CCCCCC*POPTION INDEP(LFIX3D) +CCCCCCC*$*ASSERT PERMUTATION ( LPSYMT ) +CCCCCC DO 130 IBP = 1 , NPSYMT +CCCCCC LFIX3D(LPSYMT(IBP))=1 +CCCCCC 130 CONTINUE +C +*POPTION INDEP(LPFIX) +C*$*ASSERT PERMUTATION ( LFIX3D ) + NPFIX=0 +!ocl norecurrence(LFIX3D) + DO 1070 IP = 1 , NP + IF(LFIX3D(IP).EQ.0) GO TO 1070 + NPFIX=NPFIX+1 + LPFIX(NPFIX) = IP + 1070 CONTINUE +C +C INITIALIZE + DO 1080 IP = 1 , NP + AR (IP)=0.0E0 + WRK04(IP)=0.0E0 + 1080 CONTINUE + IF(ITIME.EQ.0)THEN + GO TO 3000 + ENDIF +C +CVEL3D1 [ 3.] CAL. TIME, ADV. AND VIS. TEAM IN LHS AND RHS +C +C* call maprof_time_start(TM_CALUEL) + CALL CALUEL(N1,NE,NP,NEX,NODE,U,V,W,UE,VE,WE,IERR) +C* call maprof_time_stop(TM_CALUEL) +C + DO 1100 IP=1,NP + RHSU(IP)=0.0E0 + RHSV(IP)=0.0E0 + RHSW(IP)=0.0E0 + A0 (IP)=0.0E0 + 1100 CONTINUE +C + DO 1220 IE=1,NE + DO 1210 I=1,N + DO 1200 J=1,N + A(J,I,IE)=0.0E0 + 1200 CONTINUE + 1210 CONTINUE + 1220 CONTINUE +C +C OPERATION COUNTS: FLOP /ELEMENT +C DATA LOADINGS : 1792 WORDS/ELEMENT +C ( 32 WORDS CONTIGUOUSLY, +C 512 WORDS BY STRIDE, AND +C 216 WORDS BY LIST ) +C + call maprof_time_start(TM_VEL3D1_OP1) + DO 1343 ICOLOR=1,NCOLOR(4) +!ocl norecurrence(A,A0,AR,WRK04,RHSU,RHSV,RHSW) + DO 1342 ICPART=1,NCPART(ICOLOR,4) + IES=LLOOP(ICPART ,ICOLOR,4) + IEE=LLOOP(ICPART+1,ICOLOR,4)-1 +!ocl nosimd +!ocl noswp + DO 1341 IE=IES,IEE + DO 1340 I=1,NHEX + IP =NODE(I,IE) + IP1=NODE(1,IE) + IP2=NODE(2,IE) + IP3=NODE(3,IE) + IP4=NODE(4,IE) + IP5=NODE(5,IE) + IP6=NODE(6,IE) + IP7=NODE(7,IE) + IP8=NODE(8,IE) +C + UU=UE(IE) + VV=VE(IE) + WW=WE(IE) +C + IELM0 = 1+NSKIP4*(IE-1) +C + IELM1A=IELM0+(I-1)+NHEX*0 + IELM2A=IELM0+(I-1)+NHEX*1 + IELM3A=IELM0+(I-1)+NHEX*2 + IELM4A=IELM0+(I-1)+NHEX*3 + IELM5A=IELM0+(I-1)+NHEX*4 + IELM6A=IELM0+(I-1)+NHEX*5 + IELM7A=IELM0+(I-1)+NHEX*6 + IELM8A=IELM0+(I-1)+NHEX*7 + IELM1B=IELM0+0+NHEX*(I-1) + IELM2B=IELM0+1+NHEX*(I-1) + IELM3B=IELM0+2+NHEX*(I-1) + IELM4B=IELM0+3+NHEX*(I-1) + IELM5B=IELM0+4+NHEX*(I-1) + IELM6B=IELM0+5+NHEX*(I-1) + IELM7B=IELM0+6+NHEX*(I-1) + IELM8B=IELM0+7+NHEX*(I-1) +C + ABTD1 = 0.5E0*DT*BTDCOE(4) + & *(UU*(UU*EXX(IELM1A)+VV*EXY(IELM1A)+WW*EXZ(IELM1A)) + & +VV*(UU*EXY(IELM1B)+VV*EYY(IELM1A)+WW*EYZ(IELM1A)) + & +WW*(UU*EXZ(IELM1B)+VV*EYZ(IELM1B)+WW*EZZ(IELM1A)))*DTH + ABTD2 = 0.5E0*DT*BTDCOE(4) + & *(UU*(UU*EXX(IELM2A)+VV*EXY(IELM2A)+WW*EXZ(IELM2A)) + & +VV*(UU*EXY(IELM2B)+VV*EYY(IELM2A)+WW*EYZ(IELM2A)) + & +WW*(UU*EXZ(IELM2B)+VV*EYZ(IELM2B)+WW*EZZ(IELM2A)))*DTH + ABTD3 = 0.5E0*DT*BTDCOE(4) + & *(UU*(UU*EXX(IELM3A)+VV*EXY(IELM3A)+WW*EXZ(IELM3A)) + & +VV*(UU*EXY(IELM3B)+VV*EYY(IELM3A)+WW*EYZ(IELM3A)) + & +WW*(UU*EXZ(IELM3B)+VV*EYZ(IELM3B)+WW*EZZ(IELM3A)))*DTH + ABTD4 = 0.5E0*DT*BTDCOE(4) + & *(UU*(UU*EXX(IELM4A)+VV*EXY(IELM4A)+WW*EXZ(IELM4A)) + & +VV*(UU*EXY(IELM4B)+VV*EYY(IELM4A)+WW*EYZ(IELM4A)) + & +WW*(UU*EXZ(IELM4B)+VV*EYZ(IELM4B)+WW*EZZ(IELM4A)))*DTH + ABTD5 = 0.5E0*DT*BTDCOE(4) + & *(UU*(UU*EXX(IELM5A)+VV*EXY(IELM5A)+WW*EXZ(IELM5A)) + & +VV*(UU*EXY(IELM5B)+VV*EYY(IELM5A)+WW*EYZ(IELM5A)) + & +WW*(UU*EXZ(IELM5B)+VV*EYZ(IELM5B)+WW*EZZ(IELM5A)))*DTH + ABTD6 = 0.5E0*DT*BTDCOE(4) + & *(UU*(UU*EXX(IELM6A)+VV*EXY(IELM6A)+WW*EXZ(IELM6A)) + & +VV*(UU*EXY(IELM6B)+VV*EYY(IELM6A)+WW*EYZ(IELM6A)) + & +WW*(UU*EXZ(IELM6B)+VV*EYZ(IELM6B)+WW*EZZ(IELM6A)))*DTH + ABTD7 = 0.5E0*DT*BTDCOE(4) + & *(UU*(UU*EXX(IELM7A)+VV*EXY(IELM7A)+WW*EXZ(IELM7A)) + & +VV*(UU*EXY(IELM7B)+VV*EYY(IELM7A)+WW*EYZ(IELM7A)) + & +WW*(UU*EXZ(IELM7B)+VV*EYZ(IELM7B)+WW*EZZ(IELM7A)))*DTH + ABTD8 = 0.5E0*DT*BTDCOE(4) + & *(UU*(UU*EXX(IELM8A)+VV*EXY(IELM8A)+WW*EXZ(IELM8A)) + & +VV*(UU*EXY(IELM8B)+VV*EYY(IELM8A)+WW*EYZ(IELM8A)) + & +WW*(UU*EXZ(IELM8B)+VV*EYZ(IELM8B)+WW*EZZ(IELM8A)))*DTH +C + AT =E(IELM1A)+E(IELM2A)+E(IELM3A)+E(IELM4A) + * +E(IELM5A)+E(IELM6A)+E(IELM7A)+E(IELM8A) + AT1=DIJ(1,I)*AT + AT2=DIJ(2,I)*AT + AT3=DIJ(3,I)*AT + AT4=DIJ(4,I)*AT + AT5=DIJ(5,I)*AT + AT6=DIJ(6,I)*AT + AT7=DIJ(7,I)*AT + AT8=DIJ(8,I)*AT +C + AC1=(UU*EX(IELM1A)+VV*EY(IELM1A)+WW*EZ(IELM1A))*DTH + AC2=(UU*EX(IELM2A)+VV*EY(IELM2A)+WW*EZ(IELM2A))*DTH + AC3=(UU*EX(IELM3A)+VV*EY(IELM3A)+WW*EZ(IELM3A))*DTH + AC4=(UU*EX(IELM4A)+VV*EY(IELM4A)+WW*EZ(IELM4A))*DTH + AC5=(UU*EX(IELM5A)+VV*EY(IELM5A)+WW*EZ(IELM5A))*DTH + AC6=(UU*EX(IELM6A)+VV*EY(IELM6A)+WW*EZ(IELM6A))*DTH + AC7=(UU*EX(IELM7A)+VV*EY(IELM7A)+WW*EZ(IELM7A))*DTH + AC8=(UU*EX(IELM8A)+VV*EY(IELM8A)+WW*EZ(IELM8A))*DTH +C + AD1=(EXX(IELM1A)+EYY(IELM1A)+EZZ(IELM1A))*VISC(IE)*DTH + AD2=(EXX(IELM2A)+EYY(IELM2A)+EZZ(IELM2A))*VISC(IE)*DTH + AD3=(EXX(IELM3A)+EYY(IELM3A)+EZZ(IELM3A))*VISC(IE)*DTH + AD4=(EXX(IELM4A)+EYY(IELM4A)+EZZ(IELM4A))*VISC(IE)*DTH + AD5=(EXX(IELM5A)+EYY(IELM5A)+EZZ(IELM5A))*VISC(IE)*DTH + AD6=(EXX(IELM6A)+EYY(IELM6A)+EZZ(IELM6A))*VISC(IE)*DTH + AD7=(EXX(IELM7A)+EYY(IELM7A)+EZZ(IELM7A))*VISC(IE)*DTH + AD8=(EXX(IELM8A)+EYY(IELM8A)+EZZ(IELM8A))*VISC(IE)*DTH +C + A(1,I,IE)=AT1+AC1*(1.0E0+CEB1)+AD1*(1.0E0+CEB2)+ABTD1 + A(2,I,IE)=AT2+AC2*(1.0E0+CEB1)+AD2*(1.0E0+CEB2)+ABTD2 + A(3,I,IE)=AT3+AC3*(1.0E0+CEB1)+AD3*(1.0E0+CEB2)+ABTD3 + A(4,I,IE)=AT4+AC4*(1.0E0+CEB1)+AD4*(1.0E0+CEB2)+ABTD4 + A(5,I,IE)=AT5+AC5*(1.0E0+CEB1)+AD5*(1.0E0+CEB2)+ABTD5 + A(6,I,IE)=AT6+AC6*(1.0E0+CEB1)+AD6*(1.0E0+CEB2)+ABTD6 + A(7,I,IE)=AT7+AC7*(1.0E0+CEB1)+AD7*(1.0E0+CEB2)+ABTD7 + A(8,I,IE)=AT8+AC8*(1.0E0+CEB1)+AD8*(1.0E0+CEB2)+ABTD8 + A0(IP)=A0(IP)+A(I,I,IE) +C + WRK04(IP)=WRK04(IP) + * +A(1,I,IE)+A(2,I,IE)+A(3,I,IE)+A(4,I,IE) + * +A(5,I,IE)+A(6,I,IE)+A(7,I,IE)+A(8,I,IE) +C + CRHS1=AT1-AC1*(1.0E0-CEB1)-AD1*(1.0E0-CEB2)-ABTD1 + CRHS2=AT2-AC2*(1.0E0-CEB1)-AD2*(1.0E0-CEB2)-ABTD2 + CRHS3=AT3-AC3*(1.0E0-CEB1)-AD3*(1.0E0-CEB2)-ABTD3 + CRHS4=AT4-AC4*(1.0E0-CEB1)-AD4*(1.0E0-CEB2)-ABTD4 + CRHS5=AT5-AC5*(1.0E0-CEB1)-AD5*(1.0E0-CEB2)-ABTD5 + CRHS6=AT6-AC6*(1.0E0-CEB1)-AD6*(1.0E0-CEB2)-ABTD6 + CRHS7=AT7-AC7*(1.0E0-CEB1)-AD7*(1.0E0-CEB2)-ABTD7 + CRHS8=AT8-AC8*(1.0E0-CEB1)-AD8*(1.0E0-CEB2)-ABTD8 +C + RHSU(IP)=RHSU(IP) + * +CRHS1*U(IP1)+CRHS2*U(IP2)+CRHS3*U(IP3)+CRHS4*U(IP4) + * +CRHS5*U(IP5)+CRHS6*U(IP6)+CRHS7*U(IP7)+CRHS8*U(IP8) + RHSV(IP)=RHSV(IP) + * +CRHS1*V(IP1)+CRHS2*V(IP2)+CRHS3*V(IP3)+CRHS4*V(IP4) + * +CRHS5*V(IP5)+CRHS6*V(IP6)+CRHS7*V(IP7)+CRHS8*V(IP8) + RHSW(IP)=RHSW(IP) + * +CRHS1*W(IP1)+CRHS2*W(IP2)+CRHS3*W(IP3)+CRHS4*W(IP4) + * +CRHS5*W(IP5)+CRHS6*W(IP6)+CRHS7*W(IP7)+CRHS8*W(IP8) + 1340 CONTINUE + 1341 CONTINUE + 1342 CONTINUE + 1343 CONTINUE + call maprof_time_stop(TM_VEL3D1_OP1) +C + DO IP=1,NP + AR(IP)=A0(IP) + IF (AR(IP).EQ.0.0E0) AR(IP)=1.0E0 + ENDDO +C +C +C +CVEL3D1 [ 5.] CAL. ACCELERATION TERMS IN RHS +C +C + FX0=ACCELX + FY0=ACCELY + FZ0=ACCELZ +!ocl norecurrence(WRK01,WRK02,WRK03) + DO 1500 IE = 1,NE + WRK01(IE)=FX0*DT + WRK02(IE)=FY0*DT + WRK03(IE)=FZ0*DT + 1500 CONTINUE +C +C OPERATION COUNTS: 48 FLOP /ELEMENT +C DATA LOADINGS : 43 WORDS/ELEMENT +C ( 3 WORDS CONTIGUOUSLY, +C 16 WORDS BY STRIDE, AND +C 24 WORDS BY LIST ) +C +C* call maprof_time_start(TM_VEL3D1_OP2) +#ifdef USE_DETAIL + call start_collection('vel3d1_1642') +#endif + DO 1642 ICOLOR=1,NCOLOR(4) +!ocl norecurrence(RHSU,RHSV,RHSW) + DO 1641 ICPART=1,NCPART(ICOLOR,4) + IES=LLOOP(ICPART ,ICOLOR,4) + IEE=LLOOP(ICPART+1,ICOLOR,4)-1 + DO 1640 IE=IES,IEE + IP1=NODE(1,IE) + IP2=NODE(2,IE) + IP3=NODE(3,IE) + IP4=NODE(4,IE) + IP5=NODE(5,IE) + IP6=NODE(6,IE) + IP7=NODE(7,IE) + IP8=NODE(8,IE) +C + RHSU(IP1)=RHSU(IP1)+WRK01(IE)*SN(1,IE) + RHSU(IP2)=RHSU(IP2)+WRK01(IE)*SN(2,IE) + RHSU(IP3)=RHSU(IP3)+WRK01(IE)*SN(3,IE) + RHSU(IP4)=RHSU(IP4)+WRK01(IE)*SN(4,IE) + RHSU(IP5)=RHSU(IP5)+WRK01(IE)*SN(5,IE) + RHSU(IP6)=RHSU(IP6)+WRK01(IE)*SN(6,IE) + RHSU(IP7)=RHSU(IP7)+WRK01(IE)*SN(7,IE) + RHSU(IP8)=RHSU(IP8)+WRK01(IE)*SN(8,IE) +C + RHSV(IP1)=RHSV(IP1)+WRK02(IE)*SN(1,IE) + RHSV(IP2)=RHSV(IP2)+WRK02(IE)*SN(2,IE) + RHSV(IP3)=RHSV(IP3)+WRK02(IE)*SN(3,IE) + RHSV(IP4)=RHSV(IP4)+WRK02(IE)*SN(4,IE) + RHSV(IP5)=RHSV(IP5)+WRK02(IE)*SN(5,IE) + RHSV(IP6)=RHSV(IP6)+WRK02(IE)*SN(6,IE) + RHSV(IP7)=RHSV(IP7)+WRK02(IE)*SN(7,IE) + RHSV(IP8)=RHSV(IP8)+WRK02(IE)*SN(8,IE) +C + RHSW(IP1)=RHSW(IP1)+WRK03(IE)*SN(1,IE) + RHSW(IP2)=RHSW(IP2)+WRK03(IE)*SN(2,IE) + RHSW(IP3)=RHSW(IP3)+WRK03(IE)*SN(3,IE) + RHSW(IP4)=RHSW(IP4)+WRK03(IE)*SN(4,IE) + RHSW(IP5)=RHSW(IP5)+WRK03(IE)*SN(5,IE) + RHSW(IP6)=RHSW(IP6)+WRK03(IE)*SN(6,IE) + RHSW(IP7)=RHSW(IP7)+WRK03(IE)*SN(7,IE) + RHSW(IP8)=RHSW(IP8)+WRK03(IE)*SN(8,IE) + 1640 CONTINUE + 1641 CONTINUE + 1642 CONTINUE +#ifdef USE_DETAIL + call stop_collection('vel3d1_1642') +#endif +C* call maprof_time_stop(TM_VEL3D1_OP2) +C +C +CVEL3D1 [ 6.] MAKE CRS MATIRX + call maprof_time_start(TM_E2PMATR) + CALL E2PMATR(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * N2,N1,NE,NCRS,A,APCRS,LTAB,IERR) + call maprof_time_stop(TM_E2PMATR) + IF (IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C +C +C SUPERIMPOSE DIAGONAL TERM (AR) +C + IDUM=1 +C* call maprof_time_start(TM_VEL3D1_COM) + CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fj +C * AR,AR,AR,NP,IUT0,IERR,RX,RY,MAXBUF) + * AR,AR,AR,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj +C* call maprof_time_stop(TM_VEL3D1_COM) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C +CVEL3D1 [10.] DIAGONAL SCALING OF CRS MATRIX AND RHS +C + call maprof_time_start(TM_DGNSCL) + CALL DGNSCL(APCRS,AR,NP,NCRS,NPP) + call maprof_time_stop(TM_DGNSCL) +C + DO 4100 IP=1, NP + RHSU(IP) = RHSU(IP)/AR(IP) + RHSV(IP) = RHSV(IP)/AR(IP) + RHSW(IP) = RHSW(IP)/AR(IP) + 4100 CONTINUE +C + IDUM = 3 +C* call maprof_time_start(TM_VEL3D1_COM) + CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fj +C * RHSU,RHSV,RHSW,NP,IUT0,IERR,RX,RY,MAXBUF) + * RHSU,RHSV,RHSW,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fj +C* call maprof_time_stop(TM_VEL3D1_COM) + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C +CVEL3D1 [11.] CLEAR CRS MATRIX FOR DIRICHLET B.C. +C + call maprof_time_start(TM_CLRCRS) + CALL CLRCRS(APCRS,NP,NCRS,IPCRS,NPP, + * LFIX3D,NUMIP,UG) + call maprof_time_stop(TM_CLRCRS) +C +CVEL3D1 [12.] ITERATIONS FOR SOLVING MOMENTUM EQUATIONS +C + 3000 CONTINUE +C + IF(ITIME.EQ.0) THEN + NITRE=0 + ELSE + NITRE=1 + END IF +C + NITRU=0 + NITRV=0 + NITRW=0 +C + IRESU=0 + IRESV=0 + IRESW=0 +C + DO 5000 IITRE=0,NITRE + IF(IITRE.EQ.0) GOTO 5100 +C +C OPERATION COUNTS: 4 FLOP /NODE/NPPAVE/ITERATION +C DATA LOADINGS : 6 WORDS/NODE/NPPAVE/ITERATION +C ( 4 WORDS CONTIGUOUSLY, +C 0 WORDS BY 4-WORD STRIDE, AND +C 2 WORDS BY LIST ) +C NPPAVE: AVE. NUMBER OF ADJACENT NODES AT A NODE +C --> 15 (TET), 20 (PRD,WED), 27(HEX.) +C +C----------------------------------------------------------------------- +C +C +C +CVEL3D1 [12.1] CALL MATRIX SOLVER +C + IERR1=1 + IERR2=1 + IERR3=1 +C + call maprof_time_start(TM_CRSCVA) + IF(JUNROL.EQ.1) THEN + CALL CRSCVA(NP,NPPMAX,NCRS,NCRS2,NPP,APCRS,TACRS) + ENDIF + call maprof_time_stop(TM_CRSCVA) +C + call maprof_time_start(TM_BCGS3X) + CALL BCGS3X(IBCGS,IPART,NMAX,EPS,ME,N,NE,NP, + * NPP,NCRS,IPCRS,APCRS,RHSU,U,NITRU,RESU, + * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, +C Fj +C * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, + * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05, + * rx_desc,ry_desc, +C Fj + * IUT0,IERR1, + * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) + IF(NITRU.LT.NMAX) IRESU=1 + CALL BCGS3X(IBCGS,IPART,NMAX,EPS,ME,N,NE,NP, + * NPP,NCRS,IPCRS,APCRS,RHSV,V,NITRV,RESV, + * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, +C Fj +C * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, + * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05, + * rx_desc,ry_desc, +C Fj + * IUT0,IERR2, + * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) + IF(NITRV.LT.NMAX) IRESV=1 + CALL BCGS3X(IBCGS,IPART,NMAX,EPS,ME,N,NE,NP, + * NPP,NCRS,IPCRS,APCRS,RHSW,W,NITRW,RESW, + * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, +C Fj +C * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, + * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05, + * rx_desc,ry_desc, +C Fj + * IUT0,IERR3, + * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) + IF(NITRW.LT.NMAX) IRESW=1 + call maprof_time_stop(TM_BCGS3X) +C + IF(IERR1.NE.0 .OR. IERR2.NE.0 .OR. IERR3.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C +CVEL3D1 [12.2] SET BOUNDARY CONDITIONS +C + 5100 CONTINUE +C +C +CCYY A. INLET BOUNDARY CONDITIONS +C +*POPTION INDEP(U,V,W) +C*$*ASSERT PERMUTATION ( LPINLT ) +!ocl norecurrence(U,V,W) + DO 6010 IBP = 1 , NPINLT + U(LPINLT(IBP)) = DEVLP1*UINLT(IBP) + V(LPINLT(IBP)) = DEVLP1*VINLT(IBP) + W(LPINLT(IBP)) = DEVLP1*WINLT(IBP) + 6010 CONTINUE +C +C +CCYY B. WALL BOUNDARY CONDITIONS +C +C +*POPTION INDEP(U,V,W) +C*$*ASSERT PERMUTATION ( LPWALL ) +!ocl norecurrence(U,V,W) + DO 6020 IBP = 1 , NPWALL + U(LPWALL(IBP)) = DEVLP1*UWALL(IBP) + V(LPWALL(IBP)) = DEVLP1*VWALL(IBP) + W(LPWALL(IBP)) = DEVLP1*WWALL(IBP) + 6020 CONTINUE +C +C +CCYY C. SYMMETRIC BOUNDARY CONDITIONS +C +C +*POPTION INDEP(U,V,W) +C*$*ASSERT PERMUTATION ( LPSYMT ) +!ocl norecurrence(U,V,W) + DO 6040 IBP = 1 , NPSYMT + COF = XPSYMT(IBP)*U(LPSYMT(IBP)) + * +YPSYMT(IBP)*V(LPSYMT(IBP)) + * +ZPSYMT(IBP)*W(LPSYMT(IBP)) + U(LPSYMT(IBP)) = U(LPSYMT(IBP))-COF*XPSYMT(IBP) + V(LPSYMT(IBP)) = V(LPSYMT(IBP))-COF*YPSYMT(IBP) + W(LPSYMT(IBP)) = W(LPSYMT(IBP))-COF*ZPSYMT(IBP) + 6040 CONTINUE +C +C +C +C - RETURN IF ALL THREE EQUATIONS ARE CONVERGED - +C + IF(IRESU*IRESV*IRESW.EQ.1) THEN + RETURN + ENDIF +C +C +CCC SET R.H.S THAT CORRESPONDS TO THE DIRECLET BOUNDARY NODES +C +C +*POPTION INDEP(RHSU,RHSV,RHSW) +C*$*ASSERT PERMUTATION ( LPFIX ) +!ocl norecurrence(RHSU,RHSV,RHSW) + DO 6200 IBP = 1 , NPFIX + IP=LPFIX(IBP) + RHSU(IP) = U(IP) + RHSV(IP) = V(IP) + RHSW(IP) = W(IP) + 6200 CONTINUE +C + 5000 CONTINUE +C + RETURN + END diff --git a/FFB-MINI/src/xmpAPI_vel3d2.F b/FFB-MINI/src/xmpAPI_vel3d2.F new file mode 100755 index 0000000..b9babb1 --- /dev/null +++ b/FFB-MINI/src/xmpAPI_vel3d2.F @@ -0,0 +1,408 @@ + SUBROUTINE VEL3D2(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, + * DIVMAX,DIVAV,XDIVMX,YDIVMX,ZDIVMX, + * DT,U,V,W,P,NODE,X,Y,Z,CM, + * DNXYZ,DNXI,DNYI,DNZI, + * ME,NE,NP,N,N1,NEX, + * ITIME,DEVLP1, + * NPINLT,LPINLT,UINLT,VINLT,WINLT, + * NPWALL,LPWALL,UWALL,VWALL,WWALL, + * NPSYMT,NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, + * IPART ,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fj +C * RX,RY,MWRK,WRKN,FXYZ,UG, + * rx_desc,ry_desc,MWRK,WRKN,FXYZ,UG, +C Fj + * IUT0,IERR, + * WRK02) + IMPLICIT NONE +C + REAL*4 WRK02(NE) + INTEGER*4 MWRK + REAL*4 WRKN(MWRK,9) +C + INTEGER*4 MCOLOR,MCPART + INTEGER*4 NCOLOR(4),NCPART(MCOLOR,4),LLOOP(MCPART,MCOLOR,4) +C + INTEGER*4 NODE, + * ME,NE,NP,N,N1,NEX, + * ITIME, + * NPINLT,LPINLT,NPWALL,LPWALL,NPSYMT,NPSYM2,LPSYMT, + * IPART ,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, + * IUT0,IERR + REAL*4 DIVMAX,DIVAV,XDIVMX,YDIVMX,ZDIVMX, + * DT,U,V,W,P,X,Y,Z,CM, + * DNXYZ,DNXI,DNYI,DNZI, + * DEVLP1, + * UINLT,VINLT,WINLT,UWALL,VWALL,WWALL, + * XPSYMT,YPSYMT,ZPSYMT, + * RX,RY,FXYZ,UG +C Fj + INTEGER*8 rx_desc,ry_desc +C Fj +C +C [IN:MID NODE COLORING] +C + DIMENSION NEX(8) + DIMENSION + 2 U(NP),V(NP),W(NP),P(NE),X(NP),Y(NP),Z(NP), + 3 NODE(N1,NE),CM(NP), + 4 DNXYZ(3,N1,ME), + 5 DNXI(N1,ME),DNYI(N1,ME),DNZI(N1,ME) +C + DIMENSION LPINLT(NPINLT), + 1 UINLT (NPINLT),VINLT (NPINLT),WINLT (NPINLT), + 2 LPWALL(NPWALL), + 3 UWALL(NPWALL), VWALL(NPWALL), WWALL(NPWALL), + 4 LPSYMT(NPSYM2), + 5 XPSYMT(NPSYM2),YPSYMT(NPSYM2),ZPSYMT(NPSYM2) +C + DIMENSION LDOM(NDOM),NBPDOM(NDOM), + 1 IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM) +C + DIMENSION RX(0:N,ME),RY(0:N,ME), + 1 FXYZ(3,NP),UG(*) +C + CHARACTER*60 ERMSGC + & /' ## SUBROUTINE FRCT2T: FATAL ERROR REPORT ; RETURNED' / +C + INTEGER*4 IDIM +C + INTEGER*4 ICOLOR,ICPART,IES,IEE, + * IP1,IP2,IP3,IP4,IP5,IP6,IP7,IP8,IBP, + * MAXBUF,IP,IE,IEMAX,NUM + REAL*4 COF,PWRK,ABSDIV +C +C COMPUTE NEXT TIME STEP VELOCITY BY FINITE ELEMENT METHODS +C ( 3-D CALCULATION : SINGLE WORD & MULTI ELEMENT VERSION ) +C 2009.01.13 Y.YAMADE +C +C ************ COMPUTATIONAL COST EXCEPT FOR MATRIX SOLVER ******* +C =============================HEX====================================== +C OPERATION COUNTS: 36 FLOP /ELEMENT +C DATA LOADINGS : 57 WORDS/ELEMENT +C ( 1 WORDS CONTIGUOUSLY, +C 32 WORDS BY STRIDE, AND +C 24 WORDS BY LIST ) +C +C ARGUMENT LISTINGS +C (1) INPUT +C NLOOP ;NUMBER OF LOOPS +C LLOOP ;POINTER FOR SPLITTED ELEMENT LIST +C +C DT ; TIME INCTREMENT +C P (IE); ELEMENT PRESSURE +C NODE (I,IE); NODE NO. TABLE BASED ON ELEMENT +C X (IP); X-COORDINATES OF NODES +C Y (IP); Y-COORDINATES OF NODES +C Z (IP); Z-COORDINATES OF NODES +C +C IBUSNQ ; FLAG FOR BOUSSINESQ ASSUMPTION +C 0 -- CAL. DENSITY CHANGE FOR ONLY GRAVITY TERM +C 1 -- CAL. DENSITY CHANGE FOR ALL THE TERM +C RHO (IE); DENSITY +C +C CM (IP); LUMPED MASS MATRIX +C +C VOL (IE); ELEMENT VOLUME +C +C NEX(I) ; INCLUDES NUMBER OF ELEMENTS AND NUMBER OF LOCAL NODES +C AS FOLOOWS +C NEX(1) ; NUMBER OF TET. ELEMENTS +C NEX(2) ; NUMBER OF PYRAMID ELEMENTS +C NEX(3) ; NUMBER OF WEGDE ELEMENTS +C NEX(4) ; NUMBER OF HEX. ELEMENTS +C NEX(5) ; NUMBER OF LOCAL NODES IN A TET. ELEMENT (=4) +C NEX(6) ; NUMBER OF LOCAL NODES IN A PYRAMID ELEMENT (=5) +C NEX(7) ; NUMBER OF LOCAL NODES IN A WEGDE ELEMENT (=6) +C NEX(8) ; NUMBER OF LOCAL NODES IN A HEX. ELEMENT (=8) +C +C NP ; NUMBER OF TOTAL NODES +C N ; NUMBER OF NODES ASSIGNED TO ONE ELEMENT ( = 8 ) +C +C ITIME ; CUREENT TIME STEP +C DEVLP1 ; DEVELOPMENT FUNCTION FOR INLET VELOCITIES +C DEVLP2 ; DEVELOPMENT FUNCTION FOR ALL THE OTHER VALUES +C +C NPSLID ; NUMBER OF SOLID MEDIA NODES +C LPSLID (IBP); SOLID MEDIA NODES +C +C A. INLET BOUNDARY +C NPINLT ; NUMBER OF INLET BOUNDARY NODES +C LPINLT (IBP); INLET BOUNDARY NODES +C UINLT (IBP); INLET BOUNDARY U-VELOCITIES +C VINLT (IBP); INLET BOUNDARY V-VELOCITIES +C WINLT (IBP); INLET BOUNDARY W-VELOCITIES +C +C B. WALL BOUNDARY +C NPWALL ; NUMBER OF WALL BOUNDARY NODES +C LPWALL (IBP); WALL BOUNDARY NODES +C UWALL (IBP); WALL BOUNDARY U-VELOCITIES +C VWALL (IBP); WALL BOUNDARY V-VELOCITIES +C WWALL (IBP); WALL BOUNDARY W-VELOCITIES +C XPWALL (IBP); X NORMAL OF WALL BOUNDARY NODES +C YPWALL (IBP); Y NORMAL OF WALL BOUNDARY NODES +C ZPWALL (IBP); Z NORMAL OF WALL BOUNDARY NODES +C NPCON ; NUMBER OF CORNER WALL NODES +C LPCON (IPC); CORNER WALL NODES +C +C C. SYMMETRIC BOUNDARY +C NPSYMT ; NUMBER OF SYMMETRIC BOUNDARY NODES +C LPSYMT (IBP); SYMMETRIC BOUNDARY NODES +C XPSYMT (IBP); X-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C YPSYMT (IBP); Y-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C ZPSYMT (IBP); Z-DIR COMPONENT OF SYMMETRIC NODE NORMAL VECTOR +C +C D. CYCLIC BOUNDARY +C NPCCL ; NUMBER OF CYCLIC BOUNDARY NODES +C LPCCL1 (IBP); CYCLIC BOUNDARY NODES-1 +C LPCCL2 (IBP); CYCLIC BOUNDARY NODES-2 +C +C E. INTER-CONNECT BOUNDARY +C IPART ; SUB-DOMAIN NUMBER THAT THIS TASK SHOULD TAKE/IS +C TAKING CARE OF. IPART BEING SET ZERO MEANS THAT +C THE PROGRAM SHOULD RUN/IS RUNNING IN SERIAL +C MODE. +C +C LDOM (IDOM); NEIBERING SUB-DOMAIN NUMBER +C NBPDOM(IDOM); NUMBER OF INTER-CONNECT BOUNDARY NODES +C SHARING WITH THE IDOM'TH NEIBERING SUB-DOMAIN, +C LDOM(IDOM) +C NDOM ; NUMBER OF THE NERIBERING SUB-DOMAINS +C IPSLF (IBP,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C CALLING TASK'S SUB-DOMAIN, FOR THE IDOM'TH +C NEIBERING SUB-DOMAIN, LDOM(IDOM) +C IPSND (IBP,IDOM); INTER-CONNECT BOUNDARY NODE NUMBER IN THE +C SUB-DOMAIN THAT IS RECEIVING THE CALLING +C TASK'S RESIDUALS. +C MBPDOM ; THE MAXIMUM NUMBER OF THE INTER-CONNECT +C BOUNDARY NODES FOR ONE NEIBERING SUB-DOMAIN +C +C IUT0 ; FILE NUMBER TO REPORT ERROR OCCURENCE +C +C (2) OUTPUT +C IERR ; RETURN CODE TO REPORT ERROR OCCURENCE +C 0 --- NORMAL TERMINATION +C 1 --- A FATAL ERROR HAS OCCURED +C DIVMAX ; MAXIMUM ABSOLUTE DIVERGENT +C DIVAV ; SPATIALLY AVERAGED ABSOLUTE DIVERGENT +C +C (3) INPUT-OUTPUT +C U (IP); X-DIR. VELOCITY COMPONENT +C V (IP); Y-DIR. VELOCITY COMPONENT +C W (IP); Z-DIR. VELOCITY COMPONENT +C +C (4) WORK +C RX (I,IE); HOLDS X-DIR. ELEMENT MOMENTUM RESIDUAL +C RY (I,IE); HOLDS Y-DIR. ELEMENT MOMENTUM RESIDUAL +C FX (IP); HOLDS X-DIR. NODE MOMENTUM RESIDUAL +C FY (IP); HOLDS Y-DIR. NODE MOMENTUM RESIDUAL +C FZ (IP); HOLDS Z-DIR. NODE MOMENTUM RESIDUAL +C UG (IE); HOLDS ELEMENT CENTER U-VELOCITY +C VG (IE); HOLDS ELEMENT CENTER V-VELOCITY +C WG (IE); HOLDS ELEMENT CENTER W-VELOCITY +C +C + MAXBUF = NE*(N+1) +C +C + IF(ITIME.EQ.0) GO TO 400 +C +C +C CAL. PRESSURE (TO ALL FRAMES) +C +C + DO 100 IP = 1 , NP + FXYZ(1,IP)=0.0E0 + FXYZ(2,IP)=0.0E0 + FXYZ(3,IP)=0.0E0 + 100 CONTINUE +C + DO 110 IE=1, NE + WRK02(IE) = P(IE) + 110 CONTINUE +C +C OPERATION COUNTS: 48 FLOP /ELEMENT +C DATA LOADINGS : 57 WORDS/ELEMENT +C ( 1 WORDS CONTIGUOUSLY, +C 32 WORDS BY STRIDE, AND +C 24 WORDS BY LIST ) +C + DO 242 ICOLOR=1,NCOLOR(4) +!ocl norecurrence(FXYZ) + DO 241 ICPART=1,NCPART(ICOLOR,4) + IES=LLOOP(ICPART ,ICOLOR,4) + IEE=LLOOP(ICPART+1,ICOLOR,4)-1 +!ocl nosimd +!ocl noswp + DO 240 IE=IES,IEE + IP1=NODE(1,IE) + IP2=NODE(2,IE) + IP3=NODE(3,IE) + IP4=NODE(4,IE) + IP5=NODE(5,IE) + IP6=NODE(6,IE) + IP7=NODE(7,IE) + IP8=NODE(8,IE) +C + PWRK = WRK02(IE) +C + FXYZ(1,IP1)=FXYZ(1,IP1)+PWRK*DNXYZ(1,1,IE) + FXYZ(2,IP1)=FXYZ(2,IP1)+PWRK*DNXYZ(2,1,IE) + FXYZ(3,IP1)=FXYZ(3,IP1)+PWRK*DNXYZ(3,1,IE) +C + FXYZ(1,IP2)=FXYZ(1,IP2)+PWRK*DNXYZ(1,2,IE) + FXYZ(2,IP2)=FXYZ(2,IP2)+PWRK*DNXYZ(2,2,IE) + FXYZ(3,IP2)=FXYZ(3,IP2)+PWRK*DNXYZ(3,2,IE) +C + FXYZ(1,IP3)=FXYZ(1,IP3)+PWRK*DNXYZ(1,3,IE) + FXYZ(2,IP3)=FXYZ(2,IP3)+PWRK*DNXYZ(2,3,IE) + FXYZ(3,IP3)=FXYZ(3,IP3)+PWRK*DNXYZ(3,3,IE) +C + FXYZ(1,IP4)=FXYZ(1,IP4)+PWRK*DNXYZ(1,4,IE) + FXYZ(2,IP4)=FXYZ(2,IP4)+PWRK*DNXYZ(2,4,IE) + FXYZ(3,IP4)=FXYZ(3,IP4)+PWRK*DNXYZ(3,4,IE) +C + FXYZ(1,IP5)=FXYZ(1,IP5)+PWRK*DNXYZ(1,5,IE) + FXYZ(2,IP5)=FXYZ(2,IP5)+PWRK*DNXYZ(2,5,IE) + FXYZ(3,IP5)=FXYZ(3,IP5)+PWRK*DNXYZ(3,5,IE) +C + FXYZ(1,IP6)=FXYZ(1,IP6)+PWRK*DNXYZ(1,6,IE) + FXYZ(2,IP6)=FXYZ(2,IP6)+PWRK*DNXYZ(2,6,IE) + FXYZ(3,IP6)=FXYZ(3,IP6)+PWRK*DNXYZ(3,6,IE) +C + FXYZ(1,IP7)=FXYZ(1,IP7)+PWRK*DNXYZ(1,7,IE) + FXYZ(2,IP7)=FXYZ(2,IP7)+PWRK*DNXYZ(2,7,IE) + FXYZ(3,IP7)=FXYZ(3,IP7)+PWRK*DNXYZ(3,7,IE) +C + FXYZ(1,IP8)=FXYZ(1,IP8)+PWRK*DNXYZ(1,8,IE) + FXYZ(2,IP8)=FXYZ(2,IP8)+PWRK*DNXYZ(2,8,IE) + FXYZ(3,IP8)=FXYZ(3,IP8)+PWRK*DNXYZ(3,8,IE) + 240 CONTINUE + 241 CONTINUE + 242 CONTINUE +C +C + DO 260 IP=1,NP + WRKN(IP,1)=FXYZ(1,IP) + WRKN(IP,2)=FXYZ(2,IP) + WRKN(IP,3)=FXYZ(3,IP) + 260 CONTINUE +C +C +C SUPERIMPOSE NEIBERING ELEMENT CONTRIBUTIONS +C +C + IDIM=3 + CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, + * WRKN(1,1),WRKN(1,2),WRKN(1,3),NP,IUT0,IERR, +C Fj +C * RX,RY,MAXBUF) + * rx_desc,ry_desc,MAXBUF) +C Fj + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + DO 270 IP=1,NP + FXYZ(1,IP)=WRKN(IP,1) + FXYZ(2,IP)=WRKN(IP,2) + FXYZ(3,IP)=WRKN(IP,3) + 270 CONTINUE +C +C UPDATE VELOCITY COMPONENTS +C + +C + DO 300 IP = 1 , NP + U(IP) = U(IP)+DT*CM(IP)*FXYZ(1,IP) + V(IP) = V(IP)+DT*CM(IP)*FXYZ(2,IP) + W(IP) = W(IP)+DT*CM(IP)*FXYZ(3,IP) + 300 CONTINUE +C + 400 CONTINUE +C +C +C SET AND PRESCRIBE BOUNDARY CONDITIONS +C +C A. INLET BOUNDARY CONDITIONS +C +*POPTION INDEP(U,V,W) +C*$*ASSERT PERMUTATION ( LPINLT ) +!ocl norecurrence(U,V,W) + DO 500 IBP = 1 , NPINLT + U(LPINLT(IBP)) = DEVLP1*UINLT(IBP) + V(LPINLT(IBP)) = DEVLP1*VINLT(IBP) + W(LPINLT(IBP)) = DEVLP1*WINLT(IBP) + 500 CONTINUE +C +C B. WALL BOUNDARY CONDITIONS +C +*POPTION INDEP(U,V,W) +C*$*ASSERT PERMUTATION ( LPWALL ) +!ocl norecurrence(U,V,W) + DO 600 IBP = 1 , NPWALL + U(LPWALL(IBP)) = DEVLP1*UWALL(IBP) + V(LPWALL(IBP)) = DEVLP1*VWALL(IBP) + W(LPWALL(IBP)) = DEVLP1*WWALL(IBP) + 600 CONTINUE +C +C C. SYMMETRIC BOUNDARY CONDITIONS +C +*POPTION INDEP(U,V,W) +C*$*ASSERT PERMUTATION ( LPSYMT ) +!ocl norecurrence(U,V,W) + DO 700 IBP = 1 , NPSYMT + COF = XPSYMT(IBP)*U(LPSYMT(IBP)) + & +YPSYMT(IBP)*V(LPSYMT(IBP)) + & +ZPSYMT(IBP)*W(LPSYMT(IBP)) + U(LPSYMT(IBP)) = U(LPSYMT(IBP))-COF*XPSYMT(IBP) + V(LPSYMT(IBP)) = V(LPSYMT(IBP))-COF*YPSYMT(IBP) + W(LPSYMT(IBP)) = W(LPSYMT(IBP))-COF*ZPSYMT(IBP) + 700 CONTINUE +C +C +C +C COMPUTE DIV(U) AT ELEMENTS +C +C OPERATION COUNTS: 26 FLOP /ELEMENT +C DATA LOADINGS : 40 WORDS/ELEMENT +C ( 16 WORDS CONTIGUOUSLY, +C 12 WORDS BY STRIDE, AND +C 12 WORDS BY LIST ) +C +C + CALL FILD3X(ME,NE,NP,NEX,N1, + & U,V,W,UG,NODE,DNXI,DNYI,DNZI) +C + DIVMAX = 0.E0 + DIVAV = 0.E0 + IEMAX = 1 +C + NUM=0 + DO 2000 IE = 1 , NE + NUM=NUM+1 + ABSDIV = ABS(UG(IE)) + DIVAV = DIVAV+ABSDIV + IF(DIVMAX.LE.ABSDIV) IEMAX=IE + DIVMAX = AMAX1(ABSDIV,DIVMAX) + 2000 CONTINUE + DIVAV=DIVAV/FLOAT(NUM) +C +C === HEX === + XDIVMX=( X(NODE(1,IEMAX))+X(NODE(2,IEMAX)) + * +X(NODE(3,IEMAX))+X(NODE(4,IEMAX)) + * +X(NODE(5,IEMAX))+X(NODE(6,IEMAX)) + * +X(NODE(7,IEMAX))+X(NODE(8,IEMAX)) )/8.0E0 + YDIVMX=( Y(NODE(1,IEMAX))+Y(NODE(2,IEMAX)) + * +Y(NODE(3,IEMAX))+Y(NODE(4,IEMAX)) + * +Y(NODE(5,IEMAX))+Y(NODE(6,IEMAX)) + * +Y(NODE(7,IEMAX))+Y(NODE(8,IEMAX)) )/8.0E0 + ZDIVMX=( Z(NODE(1,IEMAX))+Z(NODE(2,IEMAX)) + * +Z(NODE(3,IEMAX))+Z(NODE(4,IEMAX)) + * +Z(NODE(5,IEMAX))+Z(NODE(6,IEMAX)) + * +Z(NODE(7,IEMAX))+Z(NODE(8,IEMAX)) )/8.0E0 +C + RETURN + END From 5aea15e857b0eaa8af6ad97173f43a5d9e8c25df Mon Sep 17 00:00:00 2001 From: tozaki_chisae Date: Wed, 10 Mar 2021 17:08:00 +0900 Subject: [PATCH 47/70] remove 2 files. --- FFB-MINI/src/Makefile.coarray | 129 --------------------------- FFB-MINI/src/dd_mpi/Makefile.coarray | 28 ------ 2 files changed, 157 deletions(-) delete mode 100755 FFB-MINI/src/Makefile.coarray delete mode 100755 FFB-MINI/src/dd_mpi/Makefile.coarray diff --git a/FFB-MINI/src/Makefile.coarray b/FFB-MINI/src/Makefile.coarray deleted file mode 100755 index 0d65c70..0000000 --- a/FFB-MINI/src/Makefile.coarray +++ /dev/null @@ -1,129 +0,0 @@ -LD = -LDFLAGS = - -include ./make_setting - -ifndef LD -LD = $(FC) -endif -ifndef LDFLAGS -LDFLAGS = $(FFLAGS) -endif - -LES3X.MPI = ../bin/les3x.mpi -FFB_MINI = ../bin/ffb_mini - -VERSION = 1.0.0 - -FFLAGS += -DFFB_MINI_VERSION=\"$(VERSION)\" - -all: $(LES3X.MPI) $(FFB_MINI) - -OBJS = \ - les3x.o bcgs3x.o bcgsxe.o calax3.o \ - callap.o caluel.o clrcrs.o \ - csin3x.o datcnv.o dgnscl.o e2plst.o e2pmtr.o \ - elm3dx.o errchk.o \ - fild3x.o fld3x2.o \ - grad3x.o icalel.o int3dx.o \ - lesrop.o lesrpx.o lessfx.o lumpex.o \ - match4.o matgau.o mkcrs.o neibr2.o \ - nodlex.o pres3e.o rcmelm.o reordr.o \ - rfname.o sethex.o srfexx.o subcnv.o \ - vel3d1.o vel3d2.o \ - mfname.o \ - miniapp_util.o - -ifeq (, $(findstring -DNO_METIS, $(FFLAGS))) -OBJS += metis_wrapper.o -endif - -ifeq (, $(findstring -DNO_REFINER, $(FFLAGS))) -OBJS += lrfnms.o extrfn.o -endif - -param.h: param.h.in - ./make_param_h.sh $< > $@ - -ifneq (, $(findstring -DPROF_MAPROF, $(FFLAGS))) - -MAPROF_DIR = ma_prof/src -MAPROF_LIB = $(MAPROF_DIR)/libmaprof_f.a - -FFLAGS += -I$(MAPROF_DIR) -LDFLAGS += -L$(MAPROF_DIR) -LIBS += -lmaprof_f - -$(OBJS): $(MAPROF_LIB) - -export -$(MAPROF_LIB): - $(MAKE) -C $(MAPROF_DIR) f_mpi MAPROF_F="FC FFLAGS" MAX_SECTIONS=30 - -endif - -LIB_GF2 = gf2/libgf2.a -LIB_DD_MPI = dd_mpi/libdd_mpi.a -#LIB_DD_MPI = dd_mpi/dd_mpi.o dd_mpi/ddcom4.o - -$(LIB_GF2): - $(MAKE) -C gf2 - -$(LIB_DD_MPI): - $(MAKE) -C dd_mpi - -$(LES3X.MPI): $(LIB_GF2) $(LIB_DD_MPI) $(OBJS) les3x_main.o - $(LD) $(LDFLAGS) -o $@ $(OBJS) les3x_main.o $(LIB_GF2) $(LIB_DD_MPI) $(LIBS) - -$(FFB_MINI): $(LIB_GF2) $(LIB_DD_MPI) $(OBJS) ffb_mini_main.o makemesh.o - $(LD) $(LDFLAGS) -o $@ $(OBJS) ffb_mini_main.o makemesh.o $(LIB_GF2) $(LIB_DD_MPI) $(LIBS) - -.SUFFIXES: -.SUFFIXES: .f .f90 .F .F90 .c .o - -.c.o: - $(CC) $(CFLAGS) -c $< -.f.o: - $(FC) $(FFLAGS) -c $< -.f90.o: - $(FC) $(FFLAGS) -c $< -.F.o: - $(FC) $(FFLAGS) -c $< -.F90.o: - $(FC) $(FFLAGS) -c $< - -clean: - rm -rf *.o *.mod *.xmod *.lst param.h - $(MAKE) -C gf2 clean - $(MAKE) -C dd_mpi clean -ifneq (, $(findstring -DPROF_MAPROF, $(FFLAGS))) - $(MAKE) -C $(MAPROF_DIR) clean -endif - -distclean: clean - rm -rf ../bin/* - $(MAKE) -C ../test clean - -test: $(FFB_MINI) - $(MAKE) -C ../test - -test_k: $(FFB_MINI) - $(MAKE) -C ../test test_k - -test_fx10: $(FFB_MINI) - $(MAKE) -C ../test test_fx10 - - -ffb_mini_main.o: param.h -ffb_mini_main.o: makemesh.o - -les3x.o: timing.h -vel3d1.o: timing.h -bcgs3x.o: timing.h -pres3e.o: timing.h -bcgs3x.o: timing.h -rcmelm.o: timing.h -bcgsxe.o: timing.h -callap.o: timing.h -calax3.o: timing.h -grad3x.o: timing.h diff --git a/FFB-MINI/src/dd_mpi/Makefile.coarray b/FFB-MINI/src/dd_mpi/Makefile.coarray deleted file mode 100755 index d270a26..0000000 --- a/FFB-MINI/src/dd_mpi/Makefile.coarray +++ /dev/null @@ -1,28 +0,0 @@ -include ../make_setting - -AR ?= ar -ARFLAGS ?= rv -RANLIB ?= ranlib - -all: libdd_mpi.a - -OBJS = dd_mpi.o ddcom4.o - -libdd_mpi.a: $(OBJS) - $(AR) $(ARFLAGS) $@ $(OBJS) - $(RANLIB) $@ - -.SUFFIXES: -.SUFFIXES: .f .F .c .F90 .o - -.c.o: - $(CC) $(CFLAGS) -c $< -.f.o: - $(FC) $(FFLAGS) -c $< -.F.o: - $(FC) $(FFLAGS) -c $< -.F90.o: - $(FC) $(FFLAGS) -c $< - -clean: - rm -rf *.o *.a From 7b89972bd8b5ad6733ca053cb74bf1c06d600a32 Mon Sep 17 00:00:00 2001 From: tozaki_chisae Date: Wed, 10 Mar 2021 17:56:52 +0900 Subject: [PATCH 48/70] add 1 file. --- FFB-MINI/src/xmpAPI_ffb_mini_main.F90 | 223 ++++++++++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100755 FFB-MINI/src/xmpAPI_ffb_mini_main.F90 diff --git a/FFB-MINI/src/xmpAPI_ffb_mini_main.F90 b/FFB-MINI/src/xmpAPI_ffb_mini_main.F90 new file mode 100755 index 0000000..9991516 --- /dev/null +++ b/FFB-MINI/src/xmpAPI_ffb_mini_main.F90 @@ -0,0 +1,223 @@ +program ffb_mini +! Fujitsu start 202103 + use mpi +! Fujitsu end 202103 + use makemesh +! Fujitsu start 202103 +! include "mpif.h" +! Fujitsu end 202103 + !implicit none + + integer :: ierr + include 'param.h' + character(9), parameter :: PARAM_FILE = 'PARMLES3X' + integer, parameter :: U = 10 + integer :: myrank, nprocs + integer :: line + integer :: narg, iarg +! integer :: trim + integer :: npx, npy, npz, n, ipx, ipy, ipz, ip + character(20) :: file_parm, file_mesh, file_boun + + character(10) :: test ="off" + character(10) :: ntime = "100" + character(10) :: isolp = "2" + character(10) :: dt = "0.005" + character(10) :: nmaxp = "50" + character(10) :: nmaxt = "5" +!*character(10) :: epst = "1.0e-30" +!*character(10) :: epsp = "1.0e-30" + character(10) :: epst = "0.0" + character(10) :: epsp = "0.0" +!*character(60) :: reorder_ndiv = "#RO_SORT 1 10 10 10" +!*character(60) :: color_partsize = "#RO_COLR 1 2000 2000 2000 2000" +!*character(60) :: unroll = "#AXUNROL" + character(60) :: reorder_ndiv = "" + character(60) :: color_partsize = "" + character(60) :: unroll = "" + + intrinsic :: command_argument_count + +! Fujitsu start 202103 + call MPI_Init(ierr) +! Fujitsu end 202103 + call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr) + call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr) + + narg = command_argument_count() + if (narg < 4) call print_usage_and_exit + + npx = get_int_arg(1) + npy = get_int_arg(2) + npz = get_int_arg(3) + n = get_int_arg(4) + + if (myrank == 0) then + write(*, *) + write(*, '(a)') 'generate mesh data ...' + write(*, '(" domain arrangement: ", i0, " x ", i0, " x ", i0)') & + npx, npy, npz + write(*, '(" number of elements in each domain: ", i0, "^3 = ", i0)') & + n, n**3 + write(*, '(" total number of elements: ", i0)') & + n**3 * npx * npy * npz + write(*, *) + end if + call flush(6) + call MPI_Barrier(MPI_COMM_WORLD, ierr) + + if (npx*npy*npz /= nprocs) then + if (myrank == 0) then + write(*, '(a)') '***error: npx*npy*npz /= nprocs' + end if + call print_usage_and_exit + end if + + ip = myrank + 1 + + do iarg = 5, narg + call parse_arg(iarg) + end do + + write(file_parm, '("PARMLES3X.P", i4.4)') ip + open(U, file=file_parm, form="formatted") + do line = 1, NLINE + call substitute_param(param_lines(line), '%ntime%', ntime) + call substitute_param(param_lines(line), '%isolp%', isolp) + call substitute_param(param_lines(line), '%dt%', dt) + call substitute_param(param_lines(line), '%nmaxp%', nmaxp) + call substitute_param(param_lines(line), '%nmaxt%', nmaxt) + call substitute_param(param_lines(line), '%epst%', epst) + call substitute_param(param_lines(line), '%epsp%', epsp) + call substitute_param(param_lines(line), '%reorder_ndiv%', reorder_ndiv) + call substitute_param(param_lines(line), '%color_partsize%', color_partsize) + call substitute_param(param_lines(line), '%unroll%', unroll) + write(U, '(a)') trim(param_lines(line)) + end do + close(U) + + if (test == 'on') then + call MPI_Finalize(ierr) + stop + end if + call makemesh_init(npx, npy, npz, n) + + ! nprocs = (ip-1) = (ipx-1) + (ipy-1) * npx + (ipz-1) * npx*npy + ipx = mod(myrank, npx) + 1 + ipy = mod(myrank/npx, npy) + 1 + ipz = myrank/(npx*npy) + 1 + + write(file_mesh, '("MESH.P", i4.4)') ip + write(file_boun, '("BOUN.P", i4.4)') ip + + call makemesh_mesh(ipx, ipy, ipz, file_mesh) + call makemesh_boun(ipx, ipy, ipz, file_boun) + + call flush(6) + call MPI_Barrier(MPI_COMM_WORLD, ierr) + if (myrank == 0) then + write(*, '(/,a,/)') '... done' + end if + + call LES3X(file_parm) + +contains + +subroutine print_usage_and_exit() + if (myrank == 0) then + write(*, '(a)') "usage: ffb_mini npx npy npz n [param=value ...]" + end if + call MPI_Abort(MPI_COMM_WORLD, 1, ierr) +end subroutine print_usage_and_exit + +subroutine print_message_and_exit(msg) + character(*), intent(in) :: msg + if (myrank == 0) then + write(*, '(a)') msg + end if + call MPI_Abort(MPI_COMM_WORLD, 1, ierr) +end subroutine print_message_and_exit + +function get_int_arg(i) result(val) + integer, intent(in) :: i + integer :: val + integer :: len, stat + character(256) :: arg + + call get_command_argument(i, arg, len, stat) + if (stat /= 0) call print_usage_and_exit() + read(arg, '(i255)', iostat=stat) val + if (stat /= 0) call print_usage_and_exit() +end function get_int_arg + +subroutine parse_arg(i) + integer, intent(in) :: i + integer :: len, stat, ip + character(256) :: arg + character(20) :: key, val + integer :: num + + call get_command_argument(i, arg, len, stat) + if (stat /= 0) call print_usage_and_exit() + ip = index(arg, '=') + if (ip == 0) call print_usage_and_exit() + key = arg(1:ip-1) + val = trim(arg(ip+1:)) + select case(key) + case ('test') + test = val + case ('ntime') + ntime = val + case ('isolp') + isolp = val + case ('dt') + dt = val + case ('nmaxp') + nmaxp = val + case ('nmaxt') + nmaxt = val + case ('epsp') + epsp = val + case ('epst') + epst = val + case ('color_partsize') + read(val, *, iostat=stat) num + if (stat /= 0) call print_message_and_exit("*** invalid argument: "//trim(arg)) + if (num > 0) then + write(color_partsize, '("#RO_COLR 1", 4(" ", i0))') num, num, num, num + else + color_partsize = '' + end if + case ('reorder_ndiv') + read(val, *, iostat=stat) num + if (stat /= 0) call print_message_and_exit("*** invalid argument: "//trim(arg)) + if (num > 0) then + write(reorder_ndiv, '("#RO_SORT 1", 3(" ", i0))') num, num, num + else + reorder_ndiv = '' + end if + case ('unroll') + if (val == 'on') then + unroll = "#AXUNROL" + else if (val == 'off') then + unroll = '' + else + call print_message_and_exit("*** invalid argument: "//trim(arg)) + end if + case default + call print_message_and_exit("*** invalid argument: "//trim(arg)) + end select + +end subroutine parse_arg + +subroutine substitute_param(line, key, val) + character(*), intent(inout) :: line + character(*), intent(in) :: key, val + integer :: ip0, ip1 + ip0 = index(line, key) + if (ip0 == 0) return + ip1 = ip0 + len_trim(key) - 1 + line = line(1:ip0-1) // trim(val) // trim(line(ip1+1:)) +end subroutine substitute_param + +end program ffb_mini From 4212b918e3fffe8916b15a30906a6922346d7583 Mon Sep 17 00:00:00 2001 From: "Yoshikawa, Hiroyuki" Date: Wed, 10 Mar 2021 18:00:44 +0900 Subject: [PATCH 49/70] add make_setting.xmpAPI_gcc --- FFB-MINI/src/make_setting | 14 +++++++----- FFB-MINI/src/make_setting.xmpAPI_gcc | 33 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 6 deletions(-) create mode 100755 FFB-MINI/src/make_setting.xmpAPI_gcc diff --git a/FFB-MINI/src/make_setting b/FFB-MINI/src/make_setting index 81d6748..928db76 100755 --- a/FFB-MINI/src/make_setting +++ b/FFB-MINI/src/make_setting @@ -1,20 +1,22 @@ CC = mpicc -#FC = xmpf90 +FC = mpif90 OMNI_HOME = $(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') -FC = mpif90 -#FFLAGS = -I$(OMNI_HOME)/include -J$(OMNI_HOME)/include -fopenmp -FFLAGS = -I$(OMNI_HOME)/include -fopenmp LIBS = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') +MPIBIN = $(shell dirname `which mpicc`) +MPIHOME = $(shell dirname ${MPIBIN}) + DEFINE += -DNO_METIS DEFINE += -DNO_REFINER # timing option DEFINE += -DPROF_MAPROF -MPIHOME =/usr/local/openmpi-2.1.1.gnu/ -CFLAGS += $(DEFINE) -O2 + +#FFLAGS = -I$(OMNI_HOME)/include -J$(OMNI_HOME)/include -fopenmp +FFLAGS = -I$(OMNI_HOME)/include -fopenmp FFLAGS += $(DEFINE) -O2 -I$(MPIHOME)/include +CFLAGS += $(DEFINE) -O2 ifeq (, $(findstring -DNO_METIS, $(FFLAGS))) diff --git a/FFB-MINI/src/make_setting.xmpAPI_gcc b/FFB-MINI/src/make_setting.xmpAPI_gcc new file mode 100755 index 0000000..928db76 --- /dev/null +++ b/FFB-MINI/src/make_setting.xmpAPI_gcc @@ -0,0 +1,33 @@ +CC = mpicc +FC = mpif90 + +OMNI_HOME = $(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') +LIBS = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') + +MPIBIN = $(shell dirname `which mpicc`) +MPIHOME = $(shell dirname ${MPIBIN}) + +DEFINE += -DNO_METIS +DEFINE += -DNO_REFINER + +# timing option +DEFINE += -DPROF_MAPROF + +#FFLAGS = -I$(OMNI_HOME)/include -J$(OMNI_HOME)/include -fopenmp +FFLAGS = -I$(OMNI_HOME)/include -fopenmp +FFLAGS += $(DEFINE) -O2 -I$(MPIHOME)/include +CFLAGS += $(DEFINE) -O2 + + +ifeq (, $(findstring -DNO_METIS, $(FFLAGS))) +METIS = $(HOME)/opt/metis5 +#METIS = $(HOME)/opt/metis4 +CFLAGS += -I$(METIS)/include +LIBS += -L$(METIS)/lib -lmetis +endif + +ifeq (, $(findstring -DNO_REFINER, $(FFLAGS))) +REFINER = $(HOME)/opt/REVOCAP_Refiner +FFLAGS += -I$(REFINER)/include +LIBS += -L$(REFINER)/lib -lRcapRefiner -lstdc++ +endif From f437f67621f707160b6ddaa6fe658103624439b1 Mon Sep 17 00:00:00 2001 From: tozaki_chisae Date: Thu, 11 Mar 2021 18:48:54 +0900 Subject: [PATCH 50/70] [WIP] modify 15 files. --- FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 | 7 ++-- FFB-MINI/src/xmpAPI_bcgs3x.F | 20 +++++------ FFB-MINI/src/xmpAPI_bcgsxe.F | 16 ++++----- FFB-MINI/src/xmpAPI_calax3.F | 12 +++---- FFB-MINI/src/xmpAPI_callap.F | 12 +++---- FFB-MINI/src/xmpAPI_elm3dx.F | 29 +++++++++++++-- FFB-MINI/src/xmpAPI_grad3x.F | 12 +++---- FFB-MINI/src/xmpAPI_les3x.F | 52 +++++++++++++-------------- FFB-MINI/src/xmpAPI_lessfx.F | 9 +++-- FFB-MINI/src/xmpAPI_lrfnms.F | 12 +++---- FFB-MINI/src/xmpAPI_nodlex.F | 12 +++---- FFB-MINI/src/xmpAPI_pres3e.F | 23 ++++++++---- FFB-MINI/src/xmpAPI_rcmelm.F | 24 ++++++------- FFB-MINI/src/xmpAPI_vel3d1.F | 26 +++++++------- FFB-MINI/src/xmpAPI_vel3d2.F | 12 +++---- 15 files changed, 155 insertions(+), 123 deletions(-) diff --git a/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 b/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 index c45455a..93fc662 100755 --- a/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 +++ b/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 @@ -928,7 +928,6 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! Fujitsu start 202103 ! call xmp_api_init ! -! write(*, '("DBG1 : snd_desc = ", i8)') snd_desc snd_lb(1) = 1 snd_ub(1) = MAXBUF rcv_lb(1) = 1 @@ -1165,7 +1164,7 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! img_dims(1), ME, xmp_this_image() ! write(*, '(" : START_RR = ",i16," END_RR = ",i16)') & ! START_RR, END_RR -!! +! ! write(*, '("DBG3 : BUFSND = ", e12.6," ",e12.6)') & ! BUFSND(START_RR), BUFSND(END_RR) ! write(*, '("DBG3 : BUFSND = ", e12.6," ",e12.6)') & @@ -1364,8 +1363,8 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT !CTTDEBG ! ! Fujitsu start 202103 -! call xmp_free_array_section(snd_sec) -! call xmp_free_array_section(rcv_sec) + call xmp_free_array_section(snd_sec) + call xmp_free_array_section(rcv_sec) ! Fujitsu end 202103 ! #ifdef USE_BARRIER diff --git a/FFB-MINI/src/xmpAPI_bcgs3x.F b/FFB-MINI/src/xmpAPI_bcgs3x.F index efbca40..226f301 100755 --- a/FFB-MINI/src/xmpAPI_bcgs3x.F +++ b/FFB-MINI/src/xmpAPI_bcgs3x.F @@ -1,10 +1,10 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, * NPP,NCRS,IPCRS,AAAPC,B,S,NITR,RESR, * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, -C Fj +C Fujitsu start 202103 C * WEIGHT,R0,RK,PK,APK,ATK,TK,S0,RX,RY, * WEIGHT,R0,RK,PK,APK,ATK,TK,S0,rx_desc,ry_desc, -C Fj +C Fujitsu end 202103 * IUT0,IERR, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) #include "timing.h" @@ -34,9 +34,9 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, CCC [WORK] REAL*4 RX(0:N,ME),RY(0:N,ME),WEIGHT(NP), * R0(NP),RK(NP),PK(NP),APK(NP),ATK(NP),TK(NP),S0(NP) -C Fj +C Fujitsu start 202103 INTEGER*8 rx_desc,ry_desc -C Fj +C Fujitsu end 202103 C C [FULL UNROLL] INTEGER*4 JUNROL @@ -154,10 +154,10 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, call maprof_time_start(TM_CALAX3) CALL CALAX3(AAAPC, S, RK, NP, NE, NCRS, IPCRS, NPP, * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj +C Fujitsu start 202103 C * RX,RY,IUT0,IERR, * rx_desc,ry_desc,IUT0,IERR, -C Fj +C Fujitsu end 202103 * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) call maprof_time_stop(TM_CALAX3) C @@ -222,10 +222,10 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, call maprof_time_start(TM_CALAX3) CALL CALAX3(AAAPC, PK, APK, NP, NE, NCRS, IPCRS, NPP, * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj +C Fujitsu start 202103 C * RX,RY,IUT0,IERR, * rx_desc,ry_desc,IUT0,IERR, -C Fj +C Fujitsu end 202103 * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) call maprof_time_stop(TM_CALAX3) C @@ -283,10 +283,10 @@ SUBROUTINE BCGS3X(IMODE,IPART,NMAX,EPS,ME,N,NE,NP, call maprof_time_start(TM_CALAX3) CALL CALAX3(AAAPC, TK, ATK, NP, NE, NCRS, IPCRS, NPP, * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj +C Fujitsu start 202103 C * RX,RY,IUT0,IERR, * rx_desc,ry_desc,IUT0,IERR, -C Fj +C Fujitsu end 202103 * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) call maprof_time_stop(TM_CALAX3) C diff --git a/FFB-MINI/src/xmpAPI_bcgsxe.F b/FFB-MINI/src/xmpAPI_bcgsxe.F index 9dd6c8f..7b82951 100755 --- a/FFB-MINI/src/xmpAPI_bcgsxe.F +++ b/FFB-MINI/src/xmpAPI_bcgsxe.F @@ -4,11 +4,11 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * B,NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT,NITR,RESR,S, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, -C Fj +C Fujitsu start 202103 C * R0,RK,PK,APK,ATK,TK,FXYZ,RX,RY,MWRK,WRKN, * R0,RK,PK,APK,ATK,TK,FXYZ,rx_desc,ry_desc,MWRK, * WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -165,10 +165,10 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj +C Fujitsu start 202103 C * FXYZ,RX,RY,MWRK,WRKN, * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C @@ -236,10 +236,10 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj +C Fujitsu start 202103 C * FXYZ,RX,RY,MWRK,WRKN, * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C @@ -302,10 +302,10 @@ SUBROUTINE BCGSXE(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj +C Fujitsu start 202103 C * FXYZ,RX,RY,MWRK,WRKN, * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C diff --git a/FFB-MINI/src/xmpAPI_calax3.F b/FFB-MINI/src/xmpAPI_calax3.F index aa0feed..e64b1f0 100755 --- a/FFB-MINI/src/xmpAPI_calax3.F +++ b/FFB-MINI/src/xmpAPI_calax3.F @@ -1,10 +1,10 @@ C======================================================================= SUBROUTINE CALAX3(A, S, AS, NP, NE, NCRS, IPCRS, NPP, * N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj +C Fujitsu start 202103 C * RX,RY,IUT0,IERR, * rx_desc,ry_desc,IUT0,IERR, -C Fj +C Fujitsu end 202103 * JUNROL,NPPMAX,NCRS2,TS,TA,ITPCRS) C======================================================================= #include "timing.h" @@ -19,9 +19,9 @@ SUBROUTINE CALAX3(A, S, AS, NP, NE, NCRS, IPCRS, NPP, INTEGER MAXBUF,IDUM INTEGER N,ME,IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,IUT0,IERR REAL*4 RX,RY -C Fj +C Fujitsu start 202103 INTEGER*8 rx_desc,ry_desc -C Fj +C Fujitsu end 202103 C DIMENSION LDOM(NDOM),NBPDOM(NDOM) DIMENSION IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM) @@ -183,10 +183,10 @@ SUBROUTINE CALAX3(A, S, AS, NP, NE, NCRS, IPCRS, NPP, call maprof_time_start(TM_CALAX3_COM) IDUM = 1 CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj +C Fujitsu start 202103 C * AS,AS,AS,NP,IUT0,IERR,RX,RY,MAXBUF) * AS,AS,AS,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj +C Fujitsu end 202103 call maprof_time_stop(TM_CALAX3_COM) IF(IERR.NE.0) THEN WRITE(IUT0,*) diff --git a/FFB-MINI/src/xmpAPI_callap.F b/FFB-MINI/src/xmpAPI_callap.F index 8f0945c..82cff17 100755 --- a/FFB-MINI/src/xmpAPI_callap.F +++ b/FFB-MINI/src/xmpAPI_callap.F @@ -5,10 +5,10 @@ SUBROUTINE CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj +C Fujitsu start 202103 C * FXYZ,RX,RY,MWRK,WRKN, * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -47,9 +47,9 @@ SUBROUTINE CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, C CCC [WORK] REAL*4 RX(0:N,ME),RY(0:N,ME) -C Fj +C Fujitsu start 202103 INTEGER*8 rx_desc,ry_desc -C Fj +C Fujitsu end 202103 INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) C @@ -166,10 +166,10 @@ SUBROUTINE CALLAP(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj +C Fujitsu start 202103 C * FXYZ,RX,RY,MWRK,WRKN, * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) call maprof_time_stop(TM_GRAD3X) C diff --git a/FFB-MINI/src/xmpAPI_elm3dx.F b/FFB-MINI/src/xmpAPI_elm3dx.F index 52027de..b304fbc 100755 --- a/FFB-MINI/src/xmpAPI_elm3dx.F +++ b/FFB-MINI/src/xmpAPI_elm3dx.F @@ -1,8 +1,14 @@ SUBROUTINE ELM3DX(MGAUSS,IGAUSH, * MELM,N1,NE,NP,NEX,X,Y,Z,NODE, - * SNI,DNXI,DNYI,DNZI,SN,DNX,DNY,DNZ, +C Fujitsu start 202103 +C * SNI,DNXI,DNYI,DNZI,SN,DNX,DNY,DNZ, + * SNI,DNXI,DNYI,DNZI,SN,rx_desc,ry_desc,DNZ, +C Fujitsu end 202103 * E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EXZ,EYZ, * NN,NC,PSI,PSIC,WW,IUT0,IERR) +C Fujitsu end 202103 + use xmp_api +C Fujitsu end 202103 IMPLICIT NONE C INTEGER MGAUSS,IGAUSH @@ -11,7 +17,16 @@ SUBROUTINE ELM3DX(MGAUSS,IGAUSH, REAL*8 X, Y, Z INTEGER NODE REAL*4 SNI,DNXI,DNYI,DNZI - REAL*4 SN,DNX,DNY,DNZ,E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EYZ,EXZ +C Fujitsu start 202103 +C REAL*4 SN,DNX,DNY,DNZ,E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EYZ,EXZ + REAL*4 SN,DNZ,E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EYZ,EXZ + REAL*4 , POINTER :: DNX ( : , : ) => null ( ) + REAL*4 , POINTER :: DNY ( : , : ) => null ( ) + INTEGER*8 :: rx_desc, ry_desc + INTEGER*8, DIMENSION(2) :: rx_lb, rx_ub, ry_lb, ry_ub + INTEGER*4 :: img_dims(1) + INTEGER*4 :: status +C Fujitsu end 202103 INTEGER IUT0,IERR REAL*8 NN,NC,PSI,PSIC,WW C @@ -20,7 +35,10 @@ SUBROUTINE ELM3DX(MGAUSS,IGAUSH, DIMENSION X(NP),Y(NP),Z(NP) DIMENSION NODE(N1,NE+1), * SNI( N1,NE+1),DNXI(N1,NE+1),DNYI(N1,NE+1),DNZI(N1,NE+1), - * SN ( N1,NE+1),DNX (N1,NE+1),DNY (N1,NE+1),DNZ (N1,NE+1), +C Fujitsu start 202103 +C * SN ( N1,NE+1),DNX (N1,NE+1),DNY (N1,NE+1),DNZ (N1,NE+1), + * SN ( N1,NE+1),DNZ (N1,NE+1), +C Fujitsu end 202103 * E (MELM), * EX (MELM),EY (MELM),EZ (MELM), * EXX(MELM),EYY(MELM),EZZ(MELM), @@ -88,6 +106,11 @@ SUBROUTINE ELM3DX(MGAUSS,IGAUSH, INTEGER NEHEX,NHEX INTEGER IE,J C +C Fujitsu start 202103 + call xmp_coarray_bind(rx_desc,DNX) + call xmp_coarray_bind(ry_desc,DNY) +C Fujitsu end 202103 +C ***** ALIAS ***** NEHEX=NEX(4) NHEX=NEX(8) diff --git a/FFB-MINI/src/xmpAPI_grad3x.F b/FFB-MINI/src/xmpAPI_grad3x.F index 5ed006d..07491d5 100755 --- a/FFB-MINI/src/xmpAPI_grad3x.F +++ b/FFB-MINI/src/xmpAPI_grad3x.F @@ -4,10 +4,10 @@ SUBROUTINE GRAD3X(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj +C Fujitsu start 202103 C * FXYZ,RX,RY,MWRK,WRKN, * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -45,9 +45,9 @@ SUBROUTINE GRAD3X(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, C CCC [WORK] REAL*4 RX(0:N,ME),RY(0:N,ME) -C Fj +C Fujitsu start 202103 INTEGER*8 rx_desc,ry_desc -C Fj +C Fujitsu end 202103 INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) C @@ -242,10 +242,10 @@ SUBROUTINE GRAD3X(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, call maprof_time_start(TM_GRAD3X_COM) CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * WRKN(1,4),WRKN(1,5),WRKN(1,6),NP,IUT0,IERR, -C Fj +C Fujitsu start 202103 C * RX,RY,MAXBUF) * rx_desc,ry_desc,MAXBUF) -C Fj +C Fujitsu end 202103 call maprof_time_stop(TM_GRAD3X_COM) IF(IERR.NE.0) THEN WRITE(IUT0,*) diff --git a/FFB-MINI/src/xmpAPI_les3x.F b/FFB-MINI/src/xmpAPI_les3x.F index f0d04d9..9d238dc 100755 --- a/FFB-MINI/src/xmpAPI_les3x.F +++ b/FFB-MINI/src/xmpAPI_les3x.F @@ -324,15 +324,15 @@ SUBROUTINE LES3X(FILEIN) * NODWK1(:,:),LEWRK(:,:), * LWRK01(:),LWRK02(:),LWRK04(:) REAL*4, ALLOCATABLE:: -CC Fj start 202103 +CC Fujitsu start 202103 CC * RX(:,:)[:], RY(:,:)[:], WRKN(:), * WRKN(:), -CC Fj end 202103 +CC Fujitsu end 202103 * WRK01(:),WRK02(:),WRK03(:),WRK04(:), * WRK05(:),WRK06(:),WRK07(:),WRK08(:), * WRK09(:),WRK10(:),WRK11(:),WRK12(:), * WRK13(:),WRK3(:,:) -CC Fj start 202103 +CC Fujitsu start 202103 REAL*4 , POINTER :: RX ( : , : ) => null ( ) REAL*4 , POINTER :: RY ( : , : ) => null ( ) INTEGER*8 :: rx_desc, ry_desc @@ -340,7 +340,7 @@ SUBROUTINE LES3X(FILEIN) INTEGER*4 :: img_dims(1) INTEGER*4 :: status C INTEGER :: ierr, nnn, me -CC Fj end 202103 +CC Fujitsu end 202103 REAL*8,ALLOCATABLE:: * DWRK01(:,:),DWRK02(:),DWRK03(:,:,:), * DWRK04(:,:),DWRK05(:) @@ -754,9 +754,9 @@ SUBROUTINE LES3X(FILEIN) C C C NDOM = 0 -C Fj start 202103 +C Fujitsu start 202103 call xmp_api_init -C Fj end 202103 +C Fujitsu end 202103 C CALL DDINIT(NPART,IPART) C @@ -1131,7 +1131,7 @@ SUBROUTINE LES3X(FILEIN) ALLOCATE(DWRK03(3,N1,MGAUSS), STAT=LERR(25)) ALLOCATE(DWRK04(3,N1 ), STAT=LERR(26)) ALLOCATE(DWRK05( MGAUSS), STAT=LERR(27)) -C Fj start 202103 +C Fujitsu start 202103 rx_lb(1) = 1 rx_lb(2) = 1 rx_ub(1) = N1 @@ -1144,7 +1144,7 @@ SUBROUTINE LES3X(FILEIN) call xmp_new_coarray(ry_desc, 4, 1, ry_lb, ry_ub, 1, img_dims) call xmp_coarray_bind(rx_desc,RX) call xmp_coarray_bind(ry_desc,RY) -C Fj start 202103 +C Fujitsu start 202103 CALL ERRCHK(IUT6,IPART,27,LERR,IERR) IF(IERR.NE.0) THEN WRITE(IUT0,*) BLANK @@ -1452,10 +1452,10 @@ SUBROUTINE LES3X(FILEIN) * MDOM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * WRK01,WRK02,WRK03,LWRK06, * DWRK3,LWRK01,LWRK02,WRK04,NODWK1,NODWK2,NODWK3, -C Fj +C Fujitsu start 202103 C * RX,RY,NPB0, * rx_desc,ry_desc,NPB0, -C Fj +C Fujitsu end 202103 * NPB1,LPB1,XPB1,YPB1,ZPB1, * NPB2,LPB2,XPB2,YPB2,ZPB2, * LWRK03,LWRK04, @@ -1575,10 +1575,10 @@ SUBROUTINE LES3X(FILEIN) * NPCCL ,LPCCL1,LPCCL2, * IPART ,MPINT ,NPINT ,LPINT1,LPINT2,LPINT3, * MDOM ,NDOM ,LDOM ,NBPDOM,MBPDOM,IPSLF,IPSND, -C Fj +C Fujitsu start 202103 C * LPBTOA,IUT0,IUT6,IERR,RX,RY, * LPBTOA,IUT0,IUT6,IERR,rx_desc,ry_desc, -C Fj +C Fujitsu end 202103 * MWRK,WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, * LWRK01,LEWRK) CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) @@ -1665,10 +1665,10 @@ SUBROUTINE LES3X(FILEIN) MELM=NELM+1 CALL ELM3DX(MGAUSS,IGAUSH, * MELM,N1,NE,NP,NEX,XD,YD,ZD,NODE, -C Fj - * SNI ,DNXI,DNYI,DNZI,SN,RX,RY,WRKN, -C * SNI ,DNXI,DNYI,DNZI,SN,rx_desc,ry_desc,WRKN, -C Fj +C Fujitsu start 202103 +C * SNI ,DNXI,DNYI,DNZI,SN,RX,RY,WRKN, + * SNI ,DNXI,DNYI,DNZI,SN,rx_desc,ry_desc,WRKN, +C Fujitsu end 202103 * E,EX,EY,EZ,EXX,EYY,EZZ,EXY,EXZ,EYZ, * DWRK01,DWRK02,DWRK03,DWRK04,DWRK05,IUT0,IERR) CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) @@ -1720,10 +1720,10 @@ SUBROUTINE LES3X(FILEIN) WRITE(IUT6,*) ' ** INTERPOLATING PRESSURE TO NODES **' CALL NODLEX(NODE,ME,NE,NP,N1,NEX,SN, * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj +C Fujitsu start 202103 C * P,PN,CM,IUT0,IERR,RX,RY,MAXBUF) * P,PN,CM,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj +C Fujitsu end 202103 IF(IERRA.NE.0) THEN WRITE(IUT0,*) BLANK WRITE(IUT0,*) ERMSGC @@ -1874,10 +1874,10 @@ SUBROUTINE LES3X(FILEIN) * LWRK01,LWRK02, * WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, * WRK07,WRK08,WRK09,WRK10,WRK11,WRK12, -C Fj +C Fujitsu start 202103 C * RX,RY, * rx_desc,ry_desc, -C Fj +C Fujitsu end 202103 * JUNROL,NPPMAX,NCRS2,WRK13,TACRS,ITPCRS, * IUT0,IERR) call maprof_time_stop(TM_VEL3D1) @@ -1903,10 +1903,10 @@ SUBROUTINE LES3X(FILEIN) * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * LWRK01,LWRK02,WRK3,WRK01,WRK02,WRK03,WRK04, * WRK05,WRK06,WRK07,WRK08,WRK09,WRK10, -C Fj +C Fujitsu start 202103 C * PRCM,APRCM,RX,RY,MWRK,WRKN, * PRCM,APRCM,rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) call maprof_time_stop(TM_PRES3E) IF(IERR.NE.0) GOTO 9999 @@ -1925,10 +1925,10 @@ SUBROUTINE LES3X(FILEIN) * NPWALL,LPWALL,UWALL,VWALL,WWALL, * NPSYMT,NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, * IPART ,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj +C Fujitsu start 202103 C * RX,RY,MWRK,WRKN,WRK3,WRK01, * rx_desc,ry_desc,MWRK,WRKN,WRK3,WRK01, -C Fj +C Fujitsu end 202103 * IUT0,IERR, * WRK05) C @@ -2203,14 +2203,14 @@ SUBROUTINE LES3X(FILEIN) CALL DDEXIT C C -C Fj start 202103 +C Fujitsu start 202103 call xmp_coarray_deallocate(rx_desc, status) call xmp_coarray_deallocate(ry_desc, status) C call xmp_api_finalize C C call mpi_finalize(ierr) -C Fj end 202103 +C Fujitsu end 202103 C STOP C diff --git a/FFB-MINI/src/xmpAPI_lessfx.F b/FFB-MINI/src/xmpAPI_lessfx.F index 884ad1d..7d15901 100755 --- a/FFB-MINI/src/xmpAPI_lessfx.F +++ b/FFB-MINI/src/xmpAPI_lessfx.F @@ -18,10 +18,10 @@ SUBROUTINE LESSFX(LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, * NPCCL ,LPCCL1,LPCCL2, * IPART ,MPINT ,NPINT ,LPINT1,LPINT2,LPINT3, * MDOM ,NDOM ,LDOM ,NBPDOM,MBPDOM,IPSLF,IPSND, -C Fj +C Fujitsu start 202103 C * LPBTOA,IUT0,IUT6,IERR,RX,RY, * LPBTOA,IUT0,IUT6,IERR,rx_desc,ry_desc, -C Fj +C Fujitsu end 202103 * MWRK,WRK1,WRK2,WRK3,WRK4,WRK5,WRK6, * IWRK,IWRK2) IMPLICIT NONE @@ -39,9 +39,9 @@ SUBROUTINE LESSFX(LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, * LPCCL1(NPCCL),LPCCL2(NPCCL) , * WRK1(NE),WRK2(NE),WRK3(NE),WRK4(NP),WRK5(NP),WRK6(NP), * IWRK(MWRK),IWRK2(2,MWRK),RX(0:N,NE),RY(0:N,NE) -C Fj +C Fujitsu start 202103 INTEGER*8 :: rx_desc, ry_desc -C Fj +C Fujitsu end 202103 C DIMENSION LPINT1(MPINT),LPINT2(MPINT),LPINT3(MPINT), 1 LDOM (MDOM) ,NBPDOM(MDOM) , @@ -304,7 +304,6 @@ SUBROUTINE LESSFX(LOCAL,NODE,MB,NE,NP,N,N1,NS,NSP,N2D, C Fujitsu end 202103 CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) IF(IERRA.NE.0) THEN - write(IUT0,*) 'DBG : error' WRITE(IUT0,*) WRITE(IUT0,*) ERMSGC RETURN diff --git a/FFB-MINI/src/xmpAPI_lrfnms.F b/FFB-MINI/src/xmpAPI_lrfnms.F index 7646af0..133a958 100755 --- a/FFB-MINI/src/xmpAPI_lrfnms.F +++ b/FFB-MINI/src/xmpAPI_lrfnms.F @@ -10,10 +10,10 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, * MDOM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * UFIX,VFIX,WFIX,LPFRM, * CRD,LWRK01,LWRK02,WRK04,NDRFN,NDORG,NODEBK, -C Fj +C Fujitsu start 202103 C * RX,RY,NPB0, * rx_desc,ry_desc,NPB0, -C Fj +C Fujitsu end 202103 * NPB1,LPB1,XPB1,YPB1,ZPB1, * NPB2,LPB2,XPB2,YPB2,ZPB2, * ITYPOR,ITYPRF,IUT6,IUT0,IERR) @@ -77,9 +77,9 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, INTEGER*4 LPFRM(MP) REAL*8 CRD(MP*3) REAL*4 RX(ME*8),RY(ME*8) -C Fj +C Fujitsu start 202103 INTEGER*8 :: rx_desc, ry_desc -C Fj +C Fujitsu end 202103 INTEGER*4 NODED(8),LEACNV(ME) INTEGER*4 LWRK01(ME),LWRK02(MP) INTEGER*4 WRK04(MP),NDRFN(ME*8),NDORG(NE*8),NODEBK(8,NE) @@ -201,10 +201,10 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, C IDIM = 0 CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj +C Fujitsu start 202103 C * WRK04,WRK04,WRK04,NP,IUT0,IERR,RX,RY,MAXBUF) * WRK04,WRK04,WRK04,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj +C Fujitsu end 202103 CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) IF(IERRA.NE.0) THEN WRITE(IUT0,*) diff --git a/FFB-MINI/src/xmpAPI_nodlex.F b/FFB-MINI/src/xmpAPI_nodlex.F index e0a6a5b..7c8fd60 100755 --- a/FFB-MINI/src/xmpAPI_nodlex.F +++ b/FFB-MINI/src/xmpAPI_nodlex.F @@ -1,10 +1,10 @@ SUBROUTINE NODLEX * (NODE,ME,NE,NP,N1,NEX,SN, * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj +C Fujitsu start 202103 C * VALELM,VALNOD,CM,IUT0,IERR,BUFSND,BUFRCV,MAXBUF) * VALELM,VALNOD,CM,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj +C Fujitsu end 202103 C IMPLICIT NONE C @@ -17,9 +17,9 @@ SUBROUTINE NODLEX REAL*4 VALELM(NE),VALNOD(NP),CM(NP) INTEGER*4 IUT0,IERR,MAXBUF REAL*4 BUFSND(MAXBUF),BUFRCV(MAXBUF) -C Fj +C Fujitsu start 202103 INTEGER*8 rx_desc,ry_desc -C Fj +C Fujitsu end 202103 C CHARACTER*60 ERMSGC & /' ## SUBROUTINE NODLEX: FATAL ERROR REPORT ; RETURNED' / @@ -59,10 +59,10 @@ SUBROUTINE NODLEX IDUM=1 CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, & VALNOD,VALNOD,VALNOD,NP,IUT0,IERR, -C Fj +C Fujitsu start 202103 C & BUFSND,BUFRCV,MAXBUF) & rx_desc,ry_desc,MAXBUF) -C Fj +C Fujitsu end 202103 IF(IERR.NE.0) THEN WRITE(IUT0,*) WRITE(IUT0,*) ERMSGC diff --git a/FFB-MINI/src/xmpAPI_pres3e.F b/FFB-MINI/src/xmpAPI_pres3e.F index eaf9990..fe35946 100755 --- a/FFB-MINI/src/xmpAPI_pres3e.F +++ b/FFB-MINI/src/xmpAPI_pres3e.F @@ -9,10 +9,10 @@ SUBROUTINE PRES3E(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * LPFIX,LFIX3D,FXYZ,WRK01,WRK02,WRK03,WRK04, * WRK05,WRK06,WRK07,WRK08,WRK09,B, -C Fj +C Fujitsu start 202103 C * PRCM,APRCM,RX,RY,MWRK,WRKN, * PRCM,APRCM,rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -52,9 +52,9 @@ SUBROUTINE PRES3E(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, REAL*4 PRCM(MRCM,NE),APRCM(MRCM,NE) INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) -C Fj +C Fujitsu start 202103 INTEGER*8 rx_desc,ry_desc -C Fj +C Fujitsu end 202103 C C [IN:MID NODE COLORING] C @@ -177,7 +177,10 @@ SUBROUTINE PRES3E(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * NITR,RES,PE, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, - * FXYZ,RX,RY,MWRK,WRKN, +C Fujitsu start 202103 +C * FXYZ,RX,RY,MWRK,WRKN, + * FXYZ,rx_desc,ry_desc,MWRK,WRKN, +C Fujitsu end 202103 * IUT0,IERR) call maprof_time_stop(TM_BCGSXE) ELSE IF(ISOLP.EQ.2) THEN @@ -190,7 +193,10 @@ SUBROUTINE PRES3E(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * FXYZ,WRK01,WRK02,WRK03,WRK04, * WRK05,WRK06,WRK07,WRK08,WRK09,PRCM,APRCM, - * RX,RY,MWRK,WRKN, +C Fujitsu start 202103 +C * RX,RY,MWRK,WRKN, + * rx_desc,ry_desc,MWRK,WRKN, +C Fujitsu end 202103 * IUT0,IERR) ELSE IERR=1 @@ -203,7 +209,10 @@ SUBROUTINE PRES3E(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, C CALL NODLEX(NODE,ME,NE,NP,N1,NEX,SN, * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, - * PE,PN,CM,IUT0,IERR,RX,RY,MAXBUF) +C Fujitsu start 202103 +C * PE,PN,CM,IUT0,IERR,RX,RY,MAXBUF) + * PE,PN,CM,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fujitsu end 202103 C C RETURN diff --git a/FFB-MINI/src/xmpAPI_rcmelm.F b/FFB-MINI/src/xmpAPI_rcmelm.F index 0856a7f..914c029 100755 --- a/FFB-MINI/src/xmpAPI_rcmelm.F +++ b/FFB-MINI/src/xmpAPI_rcmelm.F @@ -6,10 +6,10 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * FXYZ,WRK01,WRK02,WRK03,WRK04, * WRK05,WRK06,RRCM, W1RCM,W2RCM,PRCM,APRCM, -C Fj +C Fujitsu start 202103 C * RX,RY,MWRK,WRKN, * rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) #include "timing.h" IMPLICIT NONE @@ -50,9 +50,9 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * WRK03(NE),WRK04(NE),WRK05(NE),WRK06(NE), * RRCM(NE),PRCM(MRCM,NE),APRCM(MRCM,NE), * W1RCM(NE),W2RCM(NE) -C Fj +C Fujitsu start 202103 INTEGER*8 rx_desc,ry_desc -C Fj +C Fujitsu end 202103 INTEGER*4 MWRK REAL*4 WRKN(MWRK,9) C @@ -178,10 +178,10 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj +C Fujitsu start 202103 C * FXYZ,RX,RY,MWRK,WRKN, * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C @@ -267,10 +267,10 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * NITRB,RESB,W1RCM, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * WRK01,WRK02,WRK03,WRK04,WRK05,WRK06, -C Fj +C Fujitsu start 202103 C * FXYZ,RX,RY,MWRK,WRKN, * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) call maprof_time_stop(TM_BCGSXE) IF(IERR.NE.0) THEN @@ -427,10 +427,10 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj +C Fujitsu start 202103 C * FXYZ,RX,RY,MWRK,WRKN, * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C @@ -491,10 +491,10 @@ SUBROUTINE RCMELM(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * IPART,NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND, * NPFIX,LPFIX, * NPSYMT,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, -C Fj +C Fujitsu start 202103 C * FXYZ,RX,RY,MWRK,WRKN, * FXYZ,rx_desc,ry_desc,MWRK,WRKN, -C Fj +C Fujitsu end 202103 * IUT0,IERR) call maprof_time_stop(TM_CALLAP) C diff --git a/FFB-MINI/src/xmpAPI_vel3d1.F b/FFB-MINI/src/xmpAPI_vel3d1.F index 92b24d0..4b1e22e 100755 --- a/FFB-MINI/src/xmpAPI_vel3d1.F +++ b/FFB-MINI/src/xmpAPI_vel3d1.F @@ -15,10 +15,10 @@ SUBROUTINE VEL3D1 * IPART,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * LPFIX,LFIX3D, * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,A0,AR,RHSU,RHSV,RHSW, -C Fj +C Fujitsu start 202103 C * RX,RY, * rx_desc,ry_desc, -C Fj +C Fujitsu end 202103 * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS, * IUT0,IERR) C @@ -49,7 +49,9 @@ SUBROUTINE VEL3D1 * A,UG,VG,WG,UE,VE,WE, * WRK01,WRK02,WRK03,WRK04,A0,AR, * RHSU,RHSV,RHSW,APCRS +C Fujitsu start 202103 INTEGER*8 rx_desc,ry_desc +C Fujitsu end 202103 C C @@ -624,10 +626,10 @@ SUBROUTINE VEL3D1 IDUM=1 C* call maprof_time_start(TM_VEL3D1_COM) CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj +C Fujitsu start 202103 C * AR,AR,AR,NP,IUT0,IERR,RX,RY,MAXBUF) * AR,AR,AR,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj +C Fujitsu end 202103 C* call maprof_time_stop(TM_VEL3D1_COM) IF(IERR.NE.0) THEN WRITE(IUT0,*) @@ -650,10 +652,10 @@ SUBROUTINE VEL3D1 IDUM = 3 C* call maprof_time_start(TM_VEL3D1_COM) CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj +C Fujitsu start 202103 C * RHSU,RHSV,RHSW,NP,IUT0,IERR,RX,RY,MAXBUF) * RHSU,RHSV,RHSW,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) -C Fj +C Fujitsu end 202103 C* call maprof_time_stop(TM_VEL3D1_COM) IF(IERR.NE.0) THEN WRITE(IUT0,*) @@ -717,33 +719,33 @@ SUBROUTINE VEL3D1 CALL BCGS3X(IBCGS,IPART,NMAX,EPS,ME,N,NE,NP, * NPP,NCRS,IPCRS,APCRS,RHSU,U,NITRU,RESU, * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, -C Fj +C Fujitsu start 202103 C * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05, * rx_desc,ry_desc, -C Fj +C Fujitsu end 202103 * IUT0,IERR1, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) IF(NITRU.LT.NMAX) IRESU=1 CALL BCGS3X(IBCGS,IPART,NMAX,EPS,ME,N,NE,NP, * NPP,NCRS,IPCRS,APCRS,RHSV,V,NITRV,RESV, * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, -C Fj +C Fujitsu start 202103 C * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05, * rx_desc,ry_desc, -C Fj +C Fujitsu end 202103 * IUT0,IERR2, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) IF(NITRV.LT.NMAX) IRESV=1 CALL BCGS3X(IBCGS,IPART,NMAX,EPS,ME,N,NE,NP, * NPP,NCRS,IPCRS,APCRS,RHSW,W,NITRW,RESW, * NDOM,MBPDOM,LDOM,NBPDOM,IPSLF,IPSND,NUMIP, -C Fj +C Fujitsu start 202103 C * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05,RX,RY, * UG,VG,WG,WRK01,WRK02,WRK03,WRK04,WRK05, * rx_desc,ry_desc, -C Fj +C Fujitsu end 202103 * IUT0,IERR3, * JUNROL,NPPMAX,NCRS2,TS,TACRS,ITPCRS) IF(NITRW.LT.NMAX) IRESW=1 diff --git a/FFB-MINI/src/xmpAPI_vel3d2.F b/FFB-MINI/src/xmpAPI_vel3d2.F index b9babb1..830befa 100755 --- a/FFB-MINI/src/xmpAPI_vel3d2.F +++ b/FFB-MINI/src/xmpAPI_vel3d2.F @@ -8,10 +8,10 @@ SUBROUTINE VEL3D2(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * NPWALL,LPWALL,UWALL,VWALL,WWALL, * NPSYMT,NPSYM2,LPSYMT,XPSYMT,YPSYMT,ZPSYMT, * IPART ,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, -C Fj +C Fujitsu start 202103 C * RX,RY,MWRK,WRKN,FXYZ,UG, * rx_desc,ry_desc,MWRK,WRKN,FXYZ,UG, -C Fj +C Fujitsu end 202103 * IUT0,IERR, * WRK02) IMPLICIT NONE @@ -36,9 +36,9 @@ SUBROUTINE VEL3D2(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, * UINLT,VINLT,WINLT,UWALL,VWALL,WWALL, * XPSYMT,YPSYMT,ZPSYMT, * RX,RY,FXYZ,UG -C Fj +C Fujitsu start 202103 INTEGER*8 rx_desc,ry_desc -C Fj +C Fujitsu end 202103 C C [IN:MID NODE COLORING] C @@ -295,10 +295,10 @@ SUBROUTINE VEL3D2(MCOLOR,MCPART,NCOLOR,NCPART,LLOOP, IDIM=3 CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, * WRKN(1,1),WRKN(1,2),WRKN(1,3),NP,IUT0,IERR, -C Fj +C Fujitsu start 202103 C * RX,RY,MAXBUF) * rx_desc,ry_desc,MAXBUF) -C Fj +C Fujitsu end 202103 IF(IERR.NE.0) THEN WRITE(IUT0,*) WRITE(IUT0,*) ERMSGC From ad5be6ce1ad25f7c55c8021eca116ebc675b2e8e Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 12 Mar 2021 16:52:23 +0900 Subject: [PATCH 51/70] Fix xmp-api local_put routine argments. --- .../src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 index 67d5dd8..8e441a5 100755 --- a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_tran3c2_incore_v_mpiomp.F90 @@ -180,6 +180,7 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP T2BufSend_ub(3) = LenOccBat call xmp_new_local_array(T2BufSend_local_desc,8,3,T2BufSend_lb, & T2BufSend_ub,loc(T2BufSend)) + call xmp_new_array_section(T2BufSend_local_sec,3) IdxBF_RI_MyRank_lb(1) = 1 IdxBF_RI_MyRank_ub(1) = NBF_RI @@ -188,8 +189,6 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP call xmp_new_array_section(IdxBF_RI_MyRank_local_sec,1) call xmp_new_array_section(IdxBF_RI_Irank_sec,1) - call xmp_new_array_section(T2BufRecv_sec,3) - call xmp_new_array_section(T2BufSend_local_sec,3) !coarray @@ -203,6 +202,7 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP T2BufRecv_ub(3)=LenOccBat call xmp_new_coarray(T2BufRecv_desc,8,3,T2BufRecv_lb,T2BufRecv_ub,1,img_dims) call xmp_coarray_bind(T2BufRecv_desc,T2BufRecv) + call xmp_new_array_section(T2BufRecv_sec,3) !! ! ! test @@ -312,18 +312,18 @@ SUBROUTINE RIMP2_Tran3c2_InCore_V_MPIOMP ! CALL MPI_Wait(ireq(4), istat4, IErr) call xmp_array_section_set_triplet(T2BufSend_local_sec, & - 1,int(1,kind=8),int(LenOccBat,kind=8),1,status) + 1,int(1,kind=8),int(MXNActO,kind=8),1,status) call xmp_array_section_set_triplet(T2BufSend_local_sec, & 2,int(1,kind=8),int(MXNBF_RI_MyRank,kind=8),1,status) call xmp_array_section_set_triplet(T2BufSend_local_sec, & - 3,int(1,kind=8),int(MXNActO,kind=8),1,status) + 3,int(1,kind=8),int(LenOccBat,kind=8),1,status) call xmp_array_section_set_triplet(T2BufRecv_sec, & - 1,int(1,kind=8),int(LenOccBat,kind=8),1,status) + 1,int(1,kind=8),int(MXNActO,kind=8),1,status) call xmp_array_section_set_triplet(T2BufRecv_sec, & 2,int(1,kind=8),int(MXNBF_RI_MyRank,kind=8),1,status) call xmp_array_section_set_triplet(T2BufRecv_sec, & - 3,int(1,kind=8),int(MXNActO,kind=8),1,status) + 3,int(1,kind=8),int(LenOccBat,kind=8),1,status) !T2BufRecv(:,:,:)[Iranksend+1] = T2BufSend(:,:,:) !sync all From 1df13b2d5efdd2274d072ae6923d8f4706b5a4ec Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 12 Mar 2021 17:27:43 +0900 Subject: [PATCH 52/70] Add configure files for XMP-API in NTCHEM-MINI. --- .../linux64_xmp_api_gfortran.config.sh.in | 14 +++++ .../linux64_xmp_api_gfortran.makeconfig.in | 63 +++++++++++++++++++ .../platforms/config_mine.xmpAPI_gfortran | 2 +- 3 files changed, 78 insertions(+), 1 deletion(-) create mode 100755 NTCHEM-MINI/config/linux64_xmp_api_gfortran.config.sh.in create mode 100755 NTCHEM-MINI/config/linux64_xmp_api_gfortran.makeconfig.in diff --git a/NTCHEM-MINI/config/linux64_xmp_api_gfortran.config.sh.in b/NTCHEM-MINI/config/linux64_xmp_api_gfortran.config.sh.in new file mode 100755 index 0000000..88dfefa --- /dev/null +++ b/NTCHEM-MINI/config/linux64_xmp_api_gfortran.config.sh.in @@ -0,0 +1,14 @@ +# +export TARGET=LINUX64 +unset USE_MPI + +# if you want to use MPICH, you can set the environmental variables as +# follos (see ./GA/README) +# +# export MPI_USE=yes +# export MPI_INCLUDE=/usr/include +# export MPI_LIB=/usr/lib +# export LIBMPI=-lmpi + +export LARGE_FILES=yes + diff --git a/NTCHEM-MINI/config/linux64_xmp_api_gfortran.makeconfig.in b/NTCHEM-MINI/config/linux64_xmp_api_gfortran.makeconfig.in new file mode 100755 index 0000000..7a092f2 --- /dev/null +++ b/NTCHEM-MINI/config/linux64_xmp_api_gfortran.makeconfig.in @@ -0,0 +1,63 @@ +# XMP-API + USE_XMP_API = yes +# TODO + OMNI_HOME=/data/nfsWORK4/omni_gnu + OMNI_INC=$(OMNI_HOME)/include + OMNI_LIB=-L/data/nfsWORK4/omni_gnu/lib -lxmp -std=gnu99 -lm -fopenmp + + MPIHOME= + + + + TARGET = LINUX64 + + DMACRO = -UDEBUG +# DMACRO+=-DSUPPORT_R16 +# DMACRO+=-DHAVE_ERF + + INC = -I$(INCLUDE) -I$(LOCALINC) -I$(MPIHOME)/include -I$(OMNI_INC) + MOD = -J$(LOCALINC) + INCMOD = $(INC) $(MOD) + +# FCONVERT = + + F77C = mpif90 + F77FLAGS = -cpp -fopenmp $(DMACRO) $(INCMOD) -m64 -O2 -std=legacy -DNOQUAD + F77FLAGSNOOPT = $(DMACRO) $(INCMOD) -m64 -std=legacy -DNOQUAD -Wuninitialized -Wall -Wunderflow -fbounds-check + + F90C = mpif90 + F90FLAGS = -cpp -fopenmp $(DMACRO) $(INCMOD) -m64 -O2 -std=legacy -DNOQUAD $(OMNI_LIB) -I$(OMNI_INC) + F90FLAGSNOOPT = $(DMACRO) $(INCMOD) -m64 -std=legacy -Wuninitialized -Wall -Wunderflow -fbounds-check -DNOQUAD + + MODSUFFIX = mod + + CC = gcc + CFLAGS = $(INC) -O3 + + CXX = g++ + CXXFLAGS = $(INC) -O3 + CXXLIB = -lstdc++ + + MPIFLAGS = -UMPIINT8 + MPILDFLAGS = + + OMPFLAGS = -fopenmp + OMPLDFLAGS = -fopenmp + +# LD = xmpf90 -fc=gfortran + LD = mpif90 + LDFLAGS = -L$(LIB) $(MPILIB) -I$(OMNI_INC) + + AR = ar + ARFLAGS = cr + RANLIB = ranlib + + MAKE = make + + SHELL = /bin/sh + MV = /bin/mv -f + RM = /bin/rm -f + CP = /bin/cp -f + MKDIR = /bin/mkdir + LN = /bin/ln + diff --git a/NTCHEM-MINI/platforms/config_mine.xmpAPI_gfortran b/NTCHEM-MINI/platforms/config_mine.xmpAPI_gfortran index 3a41172..a8a88ca 100755 --- a/NTCHEM-MINI/platforms/config_mine.xmpAPI_gfortran +++ b/NTCHEM-MINI/platforms/config_mine.xmpAPI_gfortran @@ -9,7 +9,7 @@ LDFLAGS= --lapack='-llapack' \ --blas='-lblas' \ --atlas= \ -linux64_xmp_omp_gfortran +linux64_xmp_api_gfortran cd ./config ln -sf makeconfig makeconfig.xmp From 7e946926fe35dbd6a0d043aa6e370b02d0bb251d Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Fri, 12 Mar 2021 18:28:30 +0900 Subject: [PATCH 53/70] modify around put_local --- NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 index 7a1d0c3..15d8d89 100755 --- a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 @@ -3676,6 +3676,11 @@ subroutine COMM_var( & call xmp_new_coarray(v_spl_recvc_desc, 8, 2, v_spl_lb, v_spl_ub, 1, img_dims) call xmp_coarray_bind(v_spl_recvc_desc, v_spl_recvc) + call xmp_new_array_section(v_npl_sec, 2) + call xmp_new_array_section(v_spl_sec, 2) + + call xmp_new_local_array(v_npl_send_l_desc, 8, 2, v_npl_lb, v_npl_ub, loc(v_npl_send)) + call xmp_new_local_array(v_spl_send_l_desc, 8, 2, v_spl_lb, v_spl_ub, loc(v_spl_send)) !--- 2020 Fujitsu end v_npl_recvc = 0.d0 v_spl_recvc = 0.d0 @@ -3818,6 +3823,12 @@ subroutine COMM_var( & endif !--- 2020 Fujitsu + call xmp_free_array_section(v_npl_sec) + call xmp_free_array_section(v_spl_sec) + + call xmp_free_local_array(v_npl_send_l_desc) + call xmp_free_local_array(v_spl_send_l_desc) + call xmp_coarray_deallocate(v_npl_recvc_desc, ierr) call xmp_coarray_deallocate(v_spl_recvc_desc, ierr) !--- 2020 Fujitsu end From fb6fb4da61b217a836c568c3aa10f01fd0b50f34 Mon Sep 17 00:00:00 2001 From: tozaki_chisae Date: Fri, 12 Mar 2021 18:56:27 +0900 Subject: [PATCH 54/70] [WIP] modify 1 file. --- FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 | 37 +++++++-------------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 b/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 index 93fc662..6716c6f 100755 --- a/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 +++ b/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 @@ -813,9 +813,9 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT INTEGER*8 :: snd_desc, rcv_desc INTEGER*8 :: snd_sec, rcv_sec INTEGER*8, DIMENSION(1) :: snd_lb, snd_ub, rcv_lb, rcv_ub - INTEGER*8 :: st_desc, st_l_desc - INTEGER*8 :: st_sec, st_l_sec - INTEGER*8, DIMENSION(1) :: st_lb, st_ub, st_l_lb, st_l_ub + INTEGER*8 :: st_desc + INTEGER*8 :: st_sec + INTEGER*8, DIMENSION(1) :: st_lb, st_ub INTEGER*8 :: start_pos, end_pos INTEGER*4 :: img_dims(1) INTEGER*4 status @@ -828,7 +828,6 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! Fujitsu start 202103 ! INTEGER ,ALLOCATABLE :: START_R(:)[:] INTEGER*4 , POINTER :: START_R ( : ) => null ( ) - INTEGER*4 , POINTER :: start_rr_p ( : ) => null ( ) ! Fujitsu end 202103 ! INTEGER ,ALLOCATABLE :: END_R(:)[:] INTEGER ,ALLOCATABLE :: START_S(:) @@ -941,13 +940,8 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! allocate(START_R(1:NP)[*]) st_lb(1) = 1 st_ub(1) = NP - st_l_lb(1) = 1 - st_l_ub(1) = 1 call xmp_new_coarray(st_desc, 4, 1, st_lb, st_ub, 1, img_dims) -! call xmp_new_local_array(st_l_desc, 4, 1, st_l_lb, st_l_ub) - call xmp_new_coarray(st_l_desc, 4, 1, st_l_lb, st_l_ub, 1, img_dims) call xmp_coarray_bind(st_desc,START_R) - call xmp_coarray_bind(st_l_desc,start_rr_p) ! Fujitsu end 202103 ! allocate(END_R(1:NP)[*]) allocate(START_S(1:NP)) @@ -1122,7 +1116,6 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT call xmp_new_array_section(snd_sec,1) call xmp_new_array_section(rcv_sec,1) call xmp_new_array_section(st_sec,1) - call xmp_new_array_section(st_l_sec,1) ! Fujitsu start 202103 ! DO IDOM = 1, NDOM @@ -1135,16 +1128,12 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT end_pos = ME call xmp_array_section_set_triplet(st_sec,1, & start_pos,end_pos,1,status) - start_pos = 1 - end_pos = 1 - call xmp_array_section_set_triplet(st_l_sec,1, & - start_pos,end_pos,1,status) img_dims(1) = LDOM(IDOM) - call xmp_coarray_get(img_dims,st_desc,st_sec, & - st_l_desc,st_l_sec,status) - START_RR = start_rr_p(1) + call xmp_coarray_get_scalar(img_dims,st_desc,st_sec, & + START_RR,status); ! Fujitsu end 202103 END_RR = START_RR + (END_S(LDOM(IDOM)) - START_S(LDOM(IDOM))) +! ! Fujitsu start 202103 ! BUFRCV(START_RR:END_RR)[LDOM(IDOM)] = & ! BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) @@ -1159,17 +1148,6 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT img_dims = LDOM(IDOM) call xmp_coarray_put(img_dims,rcv_desc,rcv_sec, & snd_desc,snd_sec,status); -! IF(IDOM .EQ. 1) THEN -! write(*, '("DBG2 : img_dims = ",i4," ME = ",i4," this_img = ",i4)') & -! img_dims(1), ME, xmp_this_image() -! write(*, '(" : START_RR = ",i16," END_RR = ",i16)') & -! START_RR, END_RR -! -! write(*, '("DBG3 : BUFSND = ", e12.6," ",e12.6)') & -! BUFSND(START_RR), BUFSND(END_RR) -! write(*, '("DBG3 : BUFSND = ", e12.6," ",e12.6)') & -! BUFSND(1), BUFSND(1) -! ENDIF ! Fujitsu end 202103 END DO @@ -1365,6 +1343,9 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! Fujitsu start 202103 call xmp_free_array_section(snd_sec) call xmp_free_array_section(rcv_sec) + call xmp_free_array_section(st_sec) +! + call xmp_coarray_deallocate(st_desc, status) ! Fujitsu end 202103 ! #ifdef USE_BARRIER From 8096f55778bca3bd7b2a9ec0ee3eacc0bf4bf1c8 Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Fri, 12 Mar 2021 19:35:51 +0900 Subject: [PATCH 55/70] modify around coarray in NICAM --- NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 index 15d8d89..767efeb 100755 --- a/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_comm_xmpAPI.f90 @@ -2823,7 +2823,7 @@ subroutine COMM_data_transfer(& !allocate(caf_recvbuf(bufsize1,bufsize2)[*]) caf_recvbuf_lb(1) = 1; caf_recvbuf_ub(1) = bufsize1 caf_recvbuf_lb(2) = 1; caf_recvbuf_ub(2) = bufsize2 - call xmp_new_coarray(caf_recvbuf_desc, 4, 2, caf_recvbuf_lb, caf_recvbuf_ub, 1, img_dims) + call xmp_new_coarray(caf_recvbuf_desc, 8, 2, caf_recvbuf_lb, caf_recvbuf_ub, 1, img_dims) call xmp_coarray_bind(caf_recvbuf_desc, caf_recvbuf) !--- 2020 Fujitsu end @@ -3984,7 +3984,7 @@ subroutine COMM_data_transfer_nopl(& !allocate(caf_recvbuf(bufsize1,bufsize2)[*]) caf_recvbuf_lb(1) = 1; caf_recvbuf_ub(1) = bufsize1 caf_recvbuf_lb(2) = 1; caf_recvbuf_ub(2) = bufsize2 - call xmp_new_coarray(caf_recvbuf_desc, 4, 2, caf_recvbuf_lb, caf_recvbuf_ub, 1, img_dims) + call xmp_new_coarray(caf_recvbuf_desc, 8, 2, caf_recvbuf_lb, caf_recvbuf_ub, 1, img_dims) call xmp_coarray_bind(caf_recvbuf_desc, caf_recvbuf) !--- 2020 Fujitsu end From 5ca770ac6ebc2db88f260ed9c8d6a8babf52ebe5 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Mon, 15 Mar 2021 09:20:09 +0900 Subject: [PATCH 56/70] Remove tmp file. --- NTCHEM-MINI/config/makeconfig_tmp_xmpAPI | 80 ------------------------ 1 file changed, 80 deletions(-) delete mode 100644 NTCHEM-MINI/config/makeconfig_tmp_xmpAPI diff --git a/NTCHEM-MINI/config/makeconfig_tmp_xmpAPI b/NTCHEM-MINI/config/makeconfig_tmp_xmpAPI deleted file mode 100644 index f0177c6..0000000 --- a/NTCHEM-MINI/config/makeconfig_tmp_xmpAPI +++ /dev/null @@ -1,80 +0,0 @@ -# - NTQC_TOP = /home/tago.kazuma/data/20210301/XMP-Applications/NTCHEM-MINI - LAPACKLIB = -llapack - BLASLIB = -lblas - ATLASLIB = - HOSTTYPE = linux64_xmp_api_omp_gfortran - - BIN = /home/tago.kazuma/data/20210301/XMP-Applications/NTCHEM-MINI/bin - LIB = /home/tago.kazuma/data/20210301/XMP-Applications/NTCHEM-MINI/lib - INCLUDE = /home/tago.kazuma/data/20210301/XMP-Applications/NTCHEM-MINI/include - TESTS = /home/tago.kazuma/data/20210301/XMP-Applications/NTCHEM-MINI/tests - SCRATCH = /home/tago.kazuma/scr/ntchem - LOCALBIN = . - LOCALLIB = . - LOCALINC = . - PARALLEL = mpiomp - MPIHOME=/usr/local/openmpi-2.1.1.gnu - #MPIHOME=/usr/local/openmpi-1.8.1.gnu -# XMP-API - USE_XMP_API = yes -# TODO - OMNI_HOME=/data/nfsWORK4/omni_gnu - OMNI_INC=$(OMNI_HOME)/include - OMNI_LIB=-L/data/nfsWORK4/omni_gnu/lib -lxmp -std=gnu99 -lm -fopenmp - - - LDFLAGS_NOMAIN = - - TARGET = LINUX64 - - DMACRO = -UDEBUG -# DMACRO+=-DSUPPORT_R16 -# DMACRO+=-DHAVE_ERF - - INC = -I$(INCLUDE) -I$(LOCALINC) -I$(MPIHOME)/include -I$(OMNI_INC) - MOD = -J$(LOCALINC) - INCMOD = $(INC) $(MOD) - -# FCONVERT = - - F77C = mpif90 - F77FLAGS = -cpp -fopenmp $(DMACRO) $(INCMOD) -m64 -O2 -std=legacy -DNOQUAD - F77FLAGSNOOPT = $(DMACRO) $(INCMOD) -m64 -std=legacy -DNOQUAD -Wuninitialized -Wall -Wunderflow -fbounds-check - - F90C = mpif90 - F90FLAGS = -cpp -fopenmp $(DMACRO) $(INCMOD) -m64 -O2 -std=legacy -DNOQUAD $(OMNI_LIB) -I$(OMNI_INC) - F90FLAGSNOOPT = $(DMACRO) $(INCMOD) -m64 -std=legacy -Wuninitialized -Wall -Wunderflow -fbounds-check -DNOQUAD - - MODSUFFIX = mod - - CC = gcc - CFLAGS = $(INC) -O3 - - CXX = g++ - CXXFLAGS = $(INC) -O3 - CXXLIB = -lstdc++ - - MPIFLAGS = -UMPIINT8 - MPILDFLAGS = - - OMPFLAGS = -fopenmp - OMPLDFLAGS = -fopenmp - -# LD = xmpf90 -fc=gfortran - LD = mpif90 - LDFLAGS = -L$(LIB) $(MPILIB) -I$(OMNI_INC) - - AR = ar - ARFLAGS = cr - RANLIB = ranlib - - MAKE = make - - SHELL = /bin/sh - MV = /bin/mv -f - RM = /bin/rm -f - CP = /bin/cp -f - MKDIR = /bin/mkdir - LN = /bin/ln - From 29d18a935ea00fc7e77ec90b21e57641cb31428d Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Mon, 15 Mar 2021 14:00:42 +0900 Subject: [PATCH 57/70] modify to use xmp_new_coarray_mem for coarray of characher(:) --- NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 b/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 index 8d35279..c692260 100755 --- a/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 +++ b/NICAM-DC-MINI/src/share/mod_adm_xmpAPI.f90 @@ -435,10 +435,8 @@ subroutine ADM_proc_stop !coarray character(len=ADM_NSYS) :: request !--- 2020 Fujitsu !character(len=ADM_NSYS) :: request[*] - integer, POINTER :: request(:) => null() + character(len=ADM_NSYS), POINTER :: request => null() integer(8) :: request_desc - integer(8), dimension(1) :: request_lb, request_ub - integer(8) :: request_sec integer(4), dimension(1) :: img_dims !--- 2020 Fujitsu end integer :: ierr @@ -446,17 +444,14 @@ subroutine ADM_proc_stop !--------------------------------------------------------------------------- !--- 2020 Fujitsu - request_lb(1) = 1; request_ub(1) = ADM_NSYS - call xmp_new_coarray(request_desc, 1, 1, request_lb, request_ub, 1, img_dims) + call xmp_new_coarray_mem(request_desc, ADM_NSYS, 1, img_dims) call xmp_coarray_bind(request_desc, request) - call xmp_new_array_section(request_sec, 1) !--- 2020 Fujitsu end if ( ADM_run_type == ADM_MULTI_PRC ) then write(ADM_LOG_FID,*) write(ADM_LOG_FID,*) 'MPI process going to STOP...' - !!!request='STOP' - request=1 + request='STOP' !coarray ! call MPI_BCAST( request, & !--- starting address ! ADM_NSYS, & !--- number of array @@ -469,9 +464,8 @@ subroutine ADM_proc_stop if(ll /= ADM_prc_me) then !--- 2020 Fujitsu !request[ll] = request - call xmp_array_section_set_triplet(request_sec, 1, int(1,kind=8),int(ADM_NSYS,kind=8),1, ierr) img_dims(1) = ll - call xmp_coarray_put(img_dims, request_desc,request_sec, request_desc,request_sec, ierr) + call xmp_coarray_mem_put(img_dims, request_desc, request, ADM_NSYS, ierr) !--- 2020 Fujitsu end endif end do From c041a1fe437eb27858403c0b3ca94eb243300d1e Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Tue, 16 Mar 2021 02:18:29 +0900 Subject: [PATCH 58/70] modify Mkjobshell of NICAM --- NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmpAPI.sh | 5 ++--- NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmpAPI.sh | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmpAPI.sh b/NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmpAPI.sh index 0c614f4..a2e87cb 100644 --- a/NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmpAPI.sh +++ b/NICAM-DC-MINI/sysdep/Mkjobshell.Fugaku-xmpAPI.sh @@ -61,8 +61,7 @@ cat << EOF1 > run.sh # ################################################################################ #PJM --rsc-list "node=${NMPI}" -#PJM --rsc-list "elapse=02:00:00" -#PJM --mpi "use-rankdir" +#PJM --rsc-list "elapse=01:00:00" #PJM -j #PJM -s # @@ -70,7 +69,7 @@ export PARALLEL=8 export OMP_NUM_THREADS=8 # run -${MPIEXEC} ./${BINNAME} || exit +${MPIEXEC} ./${BINNAME} -Wl,-T || exit ################################################################################ EOF1 diff --git a/NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmpAPI.sh b/NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmpAPI.sh index a67bd94..8156788 100644 --- a/NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmpAPI.sh +++ b/NICAM-DC-MINI/sysdep/Mkjobshell.Linux64-gnu-ompi-xmpAPI.sh @@ -57,7 +57,7 @@ done cat << EOF2 >> run.sh # run -${MPIEXEC} ./${BINNAME} || exit +${MPIEXEC} -x OMNI_ONESIDED_HEAP_SIZE=28192M ./${BINNAME} || exit ################################################################################ EOF2 From bd8bbaaf308ee4c59d7e674371a81ba0211928a5 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Tue, 16 Mar 2021 11:50:46 +0900 Subject: [PATCH 59/70] [WIP] Update make_setting file of NTCHEM for XMP-API. --- MODYLAS-MINI/src/make_setting.xmp_api | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/MODYLAS-MINI/src/make_setting.xmp_api b/MODYLAS-MINI/src/make_setting.xmp_api index 71e1a5e..f014422 100755 --- a/MODYLAS-MINI/src/make_setting.xmp_api +++ b/MODYLAS-MINI/src/make_setting.xmp_api @@ -14,7 +14,5 @@ OMNI_INC = -I$(OMNI_HOME)/include OMNI_LIB = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') -FFLAGS += -I$(MPIHOME)/include $(OMNI_LIB) $(OMNI_INC) - -# TODO: temporal option -#FFLAGS += -fcoarray=lib +LIBS = $(OMNI_LIB) +FFLAGS += -I$(MPIHOME)/include $(OMNI_INC) From 7735d93adffdddede93ec607fc706cb1d78b46c5 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Tue, 16 Mar 2021 12:06:04 +0900 Subject: [PATCH 60/70] Update coarray to xmp-api in NTCHAM. --- MODYLAS-MINI/src/xmpAPI_comm.f | 412 ++++++++- MODYLAS-MINI/src/xmpAPI_comm_3.f | 765 +++++++++++++++-- MODYLAS-MINI/src/xmpAPI_comm_fmm.f | 1009 ++++++++++++++++++++--- MODYLAS-MINI/src/xmpAPI_fmodules.f | 71 +- MODYLAS-MINI/src/xmpAPI_k_energy.f | 8 +- MODYLAS-MINI/src/xmpAPI_mpitool.f | 5 +- MODYLAS-MINI/src/xmpAPI_nve_integrate.f | 7 +- 7 files changed, 2059 insertions(+), 218 deletions(-) diff --git a/MODYLAS-MINI/src/xmpAPI_comm.f b/MODYLAS-MINI/src/xmpAPI_comm.f index 969c883..0fce60f 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm.f +++ b/MODYLAS-MINI/src/xmpAPI_comm.f @@ -73,13 +73,9 @@ subroutine init_comm_bound() use trj_mpi use md_fmm_domdiv_flg use mpivar + use xmp_api implicit none integer(4) :: itmp - integer(8), dimension(1) :: irbuff_p_lb, irbuff_p_ub, - & irbuff_m_lb, irbuff_m_ub, irsbuf_p_lb, irsbuf_p_ub, - & irsbuf_m_lb, irsbuf_m_ub - integer(8), dimension(2) :: rbuff_p_lb,rbuff_p_ub, - & rbuff_m_lb, rbuff_m_ub npz = nzdiv npy = nydiv @@ -105,22 +101,59 @@ subroutine init_comm_bound() allocate( isbucket(max_mvseg, nczdiv+2, ncydiv+2, ncxdiv+2)) allocate( ncseg(nczdiv+2, ncydiv+2, ncxdiv+2)) allocate( ncatom(nczdiv+2, ncydiv+2, ncxdiv+2)) + allocate(buffp (6,max_cellcbd*max_mvatom)) + buffp_lb(1) = 1 + buffp_lb(2) = 1 + buffp_ub(1) = 6 + buffp_ub(2) = max_cellcbd*max_mvatom + call xmp_new_local_array(buffp_local_desc,8,2, + & buffp_lb,buffp_ub,loc(buffp)) + call xmp_new_array_section(buffp_local_sec,2) + allocate(buffm (6,max_cellcbd*max_mvatom)) + buffm_lb(1) = 1 + buffm_lb(2) = 1 + buffm_ub(1) = 6 + buffm_ub(2) = max_cellcbd*max_mvatom + call xmp_new_local_array(buffm_local_desc,8,2, + & buffm_lb,buffm_ub,loc(buffm)) + call xmp_new_array_section(buffm_local_sec,2) + allocate(ibuffp ( max_cellcbd*max_mvatom)) + ibuffp_lb(1) = 1 + ibuffp_ub(1) = max_cellcbd*max_mvatom + call xmp_new_local_array(ibuffp_local_desc,8,1, + & ibuffp_lb,ibuffp_ub,loc(ibuffp)) + call xmp_new_array_section(ibuffp_local_sec,1) + allocate(ibuffm ( max_cellcbd*max_mvatom)) + allocate(isbufp (2*max_cellcbd + 1 + max_cellcbd*max_mvseg)) allocate(isbufm (2*max_cellcbd + 1 + max_cellcbd*max_mvseg)) + isbufp_lb(1) = 1 + isbufp_ub(1) = max_cellcbd*max_mvatom + isbufm_lb(1) = 1 + isbufm_ub(1) = max_cellcbd*max_mvatom + call xmp_new_local_array(isbufp_local_desc,4,1, + & isbufp_lb,isbufp_ub,loc(isbufp)) + call xmp_new_local_array(isbufm_local_desc,4,1, + & isbufm_lb,isbufm_ub,loc(isbufm)) + call xmp_new_array_section(isbufp_local_sec,1) + call xmp_new_array_section(isbufm_local_sec,1) + + allocate( ncatmw(32, nczdiv+2, ncydiv+2, ncxdiv+2) ) ! !allocate(rbuff_p (6,max_cellcbd*max_mvatom)[*]) rbuff_p_lb(1) = 1 rbuff_p_lb(2) = 1 - rbuff_p_ub(1) = max_cellcbd*max_mvatom - rbuff_p_ub(2) = 6 + rbuff_p_ub(1) = 6 + rbuff_p_ub(2) = max_cellcbd*max_mvatom call xmp_new_coarray(rbuff_p_desc,8,2, - & rbuff_p_lb,rbuff_p_ub,1,comm_bd_img_dims) + & rbuff_p_lb,rbuff_p_ub,1,img_dims) call xmp_coarray_bind(rbuff_p_desc,rbuff_p) + call xmp_new_array_section(rbuff_p_sec,2) ! !allocate(rbuff_m (6,max_cellcbd*max_mvatom)[*]) rbuff_m_lb(1) = 1 @@ -128,36 +161,41 @@ subroutine init_comm_bound() rbuff_m_ub(1) = max_cellcbd*max_mvatom rbuff_m_ub(2) = 6 call xmp_new_coarray(rbuff_m_desc,8,2, - & rbuff_m_lb,rbuff_m_ub,1,comm_bd_img_dims) + & rbuff_m_lb,rbuff_m_ub,1,img_dims) call xmp_coarray_bind(rbuff_m_desc,rbuff_m) + call xmp_new_array_section(rbuff_m_sec,2) ! !allocate(irbuff_p( max_cellcbd*max_mvatom)[*]) irbuff_p_lb(1) = 1 irbuff_p_ub(1) = max_cellcbd*max_mvatom call xmp_new_coarray(irbuff_p_desc,4,1, - & irbuff_p_lb,irbuff_p_ub,1,comm_bd_img_dims) + & irbuff_p_lb,irbuff_p_ub,1,img_dims) call xmp_coarray_bind(irbuff_p_desc,irbuff_p) + call xmp_new_array_section(irbuff_p_sec,1) ! !allocate(irbuff_m( max_cellcbd*max_mvatom)[*]) irbuff_m_lb(1) = 1 irbuff_m_ub(1) = max_cellcbd*max_mvatom call xmp_new_coarray(irbuff_m_desc,4,1, - & irbuff_m_lb,irbuff_m_ub,1,comm_bd_img_dims) + & irbuff_m_lb,irbuff_m_ub,1,img_dims) call xmp_coarray_bind(irbuff_m_desc,irbuff_m) + call xmp_new_array_section(irbuff_m_sec,1) ! !allocate(irsbuf_p(2*max_cellcbd + 1 + max_cellcbd*max_mvseg)[*]) irsbuf_p_lb(1) = 1 irsbuf_p_ub(1) = 2*max_cellcbd + 1 + max_cellcbd*max_mvseg call xmp_new_coarray(irsbuf_p_desc,4,1, - & irsbuf_p_lb,irsbuf_p_ub,1,comm_bd_img_dims) + & irsbuf_p_lb,irsbuf_p_ub,1,img_dims) call xmp_coarray_bind(irsbuf_p_desc,irsbuf_p) + call xmp_new_array_section(irsbuf_p_sec,1) ! !allocate(irsbuf_m(2*max_cellcbd + 1 + max_cellcbd*max_mvseg)[*]) irsbuf_m_lb(1) = 1 irsbuf_m_ub(1) = 2*max_cellcbd + 1 + max_cellcbd*max_mvseg call xmp_new_coarray(irsbuf_m_desc,4,1, - & irsbuf_m_lb,irsbuf_m_ub,1,comm_bd_img_dims) + & irsbuf_m_lb,irsbuf_m_ub,1,img_dims) call xmp_coarray_bind(irsbuf_m_desc,irsbuf_m) + call xmp_new_array_section(irsbuf_m_sec,1) return end @@ -535,12 +573,16 @@ subroutine comm_bound() !coarray & irsbuf_p, ncsr, MPI_INTEGER, !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) - call xmp_new_array_section(irsbuf_p_sec,1) - call xmp_array_section_set_triplet(irsbuf_p_sec,1,1, - & ncs+1,1,status) - comm_bd_img_dims(1) = ipz_dest+1 - irsbuf_p(1:ncs+1)[ipz_dest+1] = isbufp(1:ncs+1) ! Put + img_dims(1) = ipz_dest+1 + ! irsbuf_p(1:ncs+1)[ipz_dest+1] = isbufp(1:ncs+1) ! Put + call xmp_array_section_set_triplet(isbufp_local_sec, + & 1,int(1,kind=8),int(ncs+1,kind=8),1,status) + call xmp_array_section_set_triplet(irsbuf_p_sec, + & 1,int(1,kind=8),int(ncs+1,kind=8),1,status) + img_dims(1) = ipz_dest+1 + call xmp_coarray_put_local(img_dims,irsbuf_p_desc,irsbuf_p_sec, + & isbufp_local_desc,isbufp_local_sec,status) !sync all call xmp_sync_all(status) @@ -552,7 +594,20 @@ subroutine comm_bound() !coarray & rbuff_p, 6*ncar, MPI_DOUBLE_PRECISION, !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) - rbuff_p(:,1:nca)[ipz_dest+1] = buffp(:,1:nca) ! Put + !rbuff_p(:,1:nca)[ipz_dest+1] = buffp(:,1:nca) ! Put + call xmp_array_section_set_triplet(rbuff_p_sec,1,int(1,kind=8), + & int(6,kind=8),1,status) + call xmp_array_section_set_triplet(rbuff_p_sec,2,int(1,kind=8), + & int(nca,kind=8),1,status) + + call xmp_array_section_set_triplet(buffp_local_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(buffp_local_desc, + & 2,int(1,kind=8),int(nca,kind=8),1,status) + + img_dims(1) = ipz_dest+1 + call xmp_coarray_put_local(img_dims,rbuff_p_desc,rbuff_p_sec, + & buffp_local_desc,buffp_local_sec,status) ! sync all call xmp_sync_all(status) !! @@ -562,7 +617,18 @@ subroutine comm_bound() !coarray & irbuff_p, ncar, MPI_INTEGER, !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) - irbuff_p(1:nca)[ipz_dest+1] = ibuffp(1:nca) ! Put + !irbuff_p(1:nca)[ipz_dest+1] = ibuffp(1:nca) ! Put + call xmp_array_section_set_triplet(irbuff_p_sec,1,int(1,kind=8), + & int(nca,kind=8),1,status) + + call xmp_array_section_set_triplet(ibuffp_local_sec, + & 1,int(1,kind=8),int(nca,kind=8),1,status) + + img_dims(1) = ipz_dest+1 + call xmp_coarray_put_local(img_dims,irbuff_p_desc,irbuff_p_sec, + & ibuffp_local_desc,ibuffp_local_sec,status) + + ! sync all call xmp_sync_all(status) @@ -703,7 +769,20 @@ subroutine comm_bound() !coarray & irsbuf_m, ncsr, MPI_INTEGER, !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) - irsbuf_m(1:ncs+1)[ipz_dest+1] = isbufm(1:ncs+1) ! Put + !irsbuf_m(1:ncs+1)[ipz_dest+1] = isbufm(1:ncs+1) ! Put + + call xmp_array_section_set_triplet(irsbuf_m_sec,1,int(1,kind=8), + & int(ncs+1,kind=8),1,status) + + call xmp_array_section_set_triplet(isbufm_local_sec, + & 1,int(1,kind=8),int(ncs+1,kind=8),1,status) + + img_dims(1) = ipz_dest+1 + call xmp_coarray_put_local(img_dims,irsbuf_m_desc,irsbuf_m_sec, + & isbufm_local_desc,isbufm_local_sec,status) + + + ! sync all call xmp_sync_all(status) !! @@ -714,7 +793,23 @@ subroutine comm_bound() !coarray & rbuff_m, 6*ncar, MPI_DOUBLE_PRECISION, !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) - rbuff_m(1:6,1:nca)[ipz_dest+1] = buffm(1:6,1:nca) ! Put + !rbuff_m(1:6,1:nca)[ipz_dest+1] = buffm(1:6,1:nca) ! Put + + call xmp_array_section_set_triplet(rbuff_m_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(rbuff_m_sec, + & 2,int(1,kind=8),int(nca,kind=8),1,status) + + call xmp_array_section_set_triplet(buffm_local_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(buffm_local_sec, + & 2,int(1,kind=8),int(nca,kind=8),1,status) + + img_dims(1) = ipz_dest+1 + call xmp_coarray_put_local(img_dims,rbuff_m_desc,rbuff_m_sec, + & buffm_local_desc,buffm_local_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -724,7 +819,19 @@ subroutine comm_bound() !coarray & irbuff_m, ncar, MPI_INTEGER, !coarray & ipz_src, ipz_src, !coarray & mpi_comm_world, istatus, ierr ) - irbuff_m(1:nca)[ipz_dest+1] = ibuffm(1:nca) ! Put + !irbuff_m(1:nca)[ipz_dest+1] = ibuffm(1:nca) ! Put + + call xmp_array_section_set_triplet(irbuff_m_sec, + & 1,int(1,kind=8),int(nca,kind=8),1,status) + + call xmp_array_section_set_triplet(ibuffm_local_sec, + & 1,int(1,kind=8),int(nca,kind=8),1,status) + + img_dims(1) = ipz_dest+1 + call xmp_coarray_put_local(img_dims,irbuff_m_desc,irbuff_m_sec, + & ibuffm_local_desc,ibuffm_local_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -1063,7 +1170,20 @@ subroutine comm_bound() !coarray & mpi_comm_world, istatus, ierr ) ! sync all ! do Not change call xmp_sync_all(status) - irsbuf_p(1:ncsp+1)[ipy_dest+1] = isbufp(1:ncsp+1) ! Put + !irsbuf_p(1:ncsp+1)[ipy_dest+1] = isbufp(1:ncsp+1) ! Put + + call xmp_array_section_set_triplet(irsbuf_p_sec, + & 1,int(1,kind=8),int(ncsp+1,kind=8),1,status) + + call xmp_array_section_set_triplet(isbufp_local_sec, + & 1,int(1,kind=8),int(ncsp+1,kind=8),1,status) + + img_dims(1) = ipy_dest+1 + call xmp_coarray_put_local(img_dims,irsbuf_p_desc,irsbuf_p_sec, + & isbufp_local_desc,isbufp_local_sec,status) + + + ! sync all call xmp_sync_all(status) !! @@ -1074,7 +1194,22 @@ subroutine comm_bound() !coarray & rbuff_p, 6*ncarp, MPI_DOUBLE_PRECISION, !coarray & ipy_src, ipy_src, !coarray & mpi_comm_world, istatus, ierr ) - rbuff_p(1:6,1:ncap)[ipy_dest+1] = buffp(1:6,1:ncap) ! Put + + !rbuff_p(1:6,1:ncap)[ipy_dest+1] = buffp(1:6,1:ncap) ! Put + + call xmp_array_section_set_triplet(rbuff_p_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(rbuff_p_sec, + & 2,int(1,kind=8),int(ncap,kind=8),1,status) + + call xmp_array_section_set_triplet(buffp_local_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(buffp_local_sec, + & 2,int(1,kind=8),int(ncap,kind=8),1,status) + + img_dims(1) = ipy_dest+1 + call xmp_coarray_put_local(img_dims,rbuff_p_desc,rbuff_p_sec, + & buffp_local_desc,buffp_local_sec,status) !! !coarray call mpi_sendrecv(ibuffp, ncap, MPI_INTEGER, @@ -1082,7 +1217,19 @@ subroutine comm_bound() !coarray & irbuff_p, ncarp, MPI_INTEGER, !coarray & ipy_src, ipy_src, !coarray & mpi_comm_world, istatus, ierr ) - irbuff_p(1:ncap)[ipy_dest+1] = ibuffp(1:ncap) ! Put + !irbuff_p(1:ncap)[ipy_dest+1] = ibuffp(1:ncap) ! Put + + call xmp_array_section_set_triplet(irbuff_p_sec, + & 1,int(1,kind=8),int(ncap,kind=8),1,status) + + call xmp_array_section_set_triplet(ibuffp_local_sec, + & 1,int(1,kind=8),int(ncap,kind=8),1,status) + + img_dims(1) = ipy_dest+1 + call xmp_coarray_put_local(img_dims,irbuff_p_desc,irbuff_p_sec, + & ibuffp_local_desc,ibuffp_local_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -1105,7 +1252,18 @@ subroutine comm_bound() !coarray & irsbuf_m, ncsr, MPI_INTEGER, !coarray & ipy_src, ipy_src, !coarray & mpi_comm_world, istatus, ierr ) - irsbuf_m(1:ncsm+1)[ipy_dest+1] = isbufm(1:ncsm+1) ! Put + !irsbuf_m(1:ncsm+1)[ipy_dest+1] = isbufm(1:ncsm+1) ! Put + + call xmp_array_section_set_triplet(irsbuf_m_sec, + & 1,int(1,kind=8),int(ncsm+1,kind=8),1,status) + + call xmp_array_section_set_triplet(isbufm_local_sec, + & 1,int(1,kind=8),int(ncsm+1,kind=8),1,status) + + img_dims(1) = ipy_dest+1 + call xmp_coarray_put_local(img_dims,irsbuf_m_desc,irsbuf_m_sec, + & isbufm_local_desc,isbufm_local_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -1116,7 +1274,22 @@ subroutine comm_bound() !coarray & rbuff_m, 6*ncarm, MPI_DOUBLE_PRECISION, !coarray & ipy_src, ipy_src, !coarray & mpi_comm_world, istatus, ierr ) - rbuff_m(1:6,1:ncam)[ipy_dest+1] = buffm(1:6,1:ncam) ! Put + !rbuff_m(1:6,1:ncam)[ipy_dest+1] = buffm(1:6,1:ncam) ! Put + + call xmp_array_section_set_triplet(rbuff_m_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(rbuff_m_sec, + & 2,int(1,kind=8),int(ncam,kind=8),1,status) + + call xmp_array_section_set_triplet(buffm_local_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(buffm_local_sec, + & 2,int(1,kind=8),int(ncam,kind=8),1,status) + + img_dims(1) = ipy_dest+1 + call xmp_coarray_put_local(img_dims,rbuff_m_desc,rbuff_m_sec, + & buffm_local_desc,buffm_local_sec,status) + !! !coarray call mpi_sendrecv(ibuffm, ncam, MPI_INTEGER, @@ -1124,7 +1297,17 @@ subroutine comm_bound() !coarray & irbuff_m, ncarm, MPI_INTEGER, !coarray & ipy_src, ipy_src, !coarray & mpi_comm_world, istatus, ierr ) - irbuff_m(1:ncam)[ipy_dest+1] = ibuffm(1:ncam) ! Put + !irbuff_m(1:ncam)[ipy_dest+1] = ibuffm(1:ncam) ! Put + + call xmp_array_section_set_triplet(irbuff_m_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + + call xmp_array_section_set_triplet(ibuffm_local_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + + img_dims(1) = ipy_dest+1 + call xmp_coarray_put_local(img_dims,irbuff_m_desc,irbuff_m_sec, + & ibuffm_local_desc,ibuffm_local_sec,status) ! sync all call xmp_sync_all(status) !! @@ -1364,7 +1547,17 @@ subroutine comm_bound() !coarray & mpi_comm_world, istatus, ierr ) ! sync all ! do Not change call xmp_sync_all(status) - irsbuf_p(1:ncs+1)[ipx_dest+1] = isbufp(1:ncs+1) ! Put + !irsbuf_p(1:ncs+1)[ipx_dest+1] = isbufp(1:ncs+1) ! Put + + call xmp_array_section_set_triplet(irsbuf_p_sec, + & 1,int(1,kind=8),int(ncs+1,kind=8),1,status) + + call xmp_array_section_set_triplet(isbufp_local_sec, + & 1,int(1,kind=8),int(ncs+1,kind=8),1,status) + + img_dims(1) = ipy_dest+1 + call xmp_coarray_put_local(img_dims,irsbuf_p_desc,irsbuf_p_sec, + & isbufp_local_desc,isbufp_local_sec,status) ! sync all call xmp_sync_all(status) !! @@ -1375,7 +1568,22 @@ subroutine comm_bound() !coarray & rbuff_p, 6*ncar, MPI_DOUBLE_PRECISION, !coarray & ipx_src, ipx_src, !coarray & mpi_comm_world, istatus, ierr ) - rbuff_p(1:6,1:nca)[ipx_dest+1] = buffp(1:6,1:nca) ! Put + !rbuff_p(1:6,1:nca)[ipx_dest+1] = buffp(1:6,1:nca) ! Put + + call xmp_array_section_set_triplet(rbuff_p_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(rbuff_p_sec, + & 2,int(1,kind=8),int(nca,kind=8),1,status) + + call xmp_array_section_set_triplet(buffp_local_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(buffp_local_sec, + & 2,int(1,kind=8),int(nca,kind=8),1,status) + + img_dims(1) = ipy_dest+1 + call xmp_coarray_put_local(img_dims,rbuff_p_desc,rbuff_p_sec, + & buffp_local_desc,buffp_local_sec,status) + !! !coarray call mpi_sendrecv(ibuffp, nca, MPI_INTEGER, @@ -1383,7 +1591,18 @@ subroutine comm_bound() !coarray & irbuff_p, ncar, MPI_INTEGER, !coarray & ipx_src, ipx_src, !coarray & mpi_comm_world, istatus, ierr ) - irbuff_p(1:nca)[ipx_dest+1] = ibuffp(1:nca) ! Put + !irbuff_p(1:nca)[ipx_dest+1] = ibuffp(1:nca) ! Put + + call xmp_array_section_set_triplet(irbuff_p_sec, + & 1,int(1,kind=8),int(nca,kind=8),1,status) + + call xmp_array_section_set_triplet(ibuffp_local_sec, + & 1,int(1,kind=8),int(nca,kind=8),1,status) + + img_dims(1) = ipx_dest+1 + call xmp_coarray_put_local(img_dims,irbuff_p_desc,irbuff_p_sec, + & ibuffp_local_desc,ibuffp_local_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -1460,7 +1679,18 @@ subroutine comm_bound() !coarray & mpi_comm_world, istatus, ierr ) ! sync all call xmp_sync_all(status) - irsbuf_m(1:ncs+1)[ipx_dest+1] = isbufm(1:ncs+1) ! Put + !irsbuf_m(1:ncs+1)[ipx_dest+1] = isbufm(1:ncs+1) ! Put + + call xmp_array_section_set_triplet(irsbuf_m_sec, + & 1,int(1,kind=8),int(ncs+1,kind=8),1,status) + + call xmp_array_section_set_triplet(isbufm_local_sec, + & 1,int(1,kind=8),int(nca,kind=8),1,status) + + img_dims(1) = ipx_dest+1 + call xmp_coarray_put_local(img_dims,irsbuf_m_desc,irsbuf_m_sec, + & isbufm_local_desc,isbufm_local_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -1471,7 +1701,22 @@ subroutine comm_bound() !coarray & rbuff_m, 6*ncar, MPI_DOUBLE_PRECISION, !coarray & ipx_src, ipx_src, !coarray & mpi_comm_world, istatus, ierr ) - rbuff_m(1:6,1:nca)[ipx_dest+1] = buffm(1:6,1:nca) ! Put + !rbuff_m(1:6,1:nca)[ipx_dest+1] = buffm(1:6,1:nca) ! Put + + call xmp_array_section_set_triplet(rbuff_m_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(rbuff_m_sec, + & 2,int(1,kind=8),int(nca,kind=8),1,status) + + call xmp_array_section_set_triplet(buffm_local_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(buffm_local_sec, + & 2,int(1,kind=8),int(nca,kind=8),1,status) + + img_dims(1) = ipx_dest+1 + call xmp_coarray_put_local(img_dims,rbuff_m_desc,rbuff_m_sec, + & buffm_local_desc,buffm_local_sec,status) + !! !coarray call mpi_sendrecv(ibuffm, nca, MPI_INTEGER, @@ -1479,7 +1724,18 @@ subroutine comm_bound() !coarray & irbuff_m, ncar, MPI_INTEGER, !coarray & ipx_src, ipx_src, !coarray & mpi_comm_world, istatus, ierr ) - irbuff_m(1:nca)[ipx_dest+1] = ibuffm(1:nca) ! Put + !irbuff_m(1:nca)[ipx_dest+1] = ibuffm(1:nca) ! Put + + call xmp_array_section_set_triplet(irbuff_m_sec, + & 1,int(1,kind=8),int(nca,kind=8),1,status) + + call xmp_array_section_set_triplet(ibuffm_local_sec, + & 1,int(1,kind=8),int(nca,kind=8),1,status) + + img_dims(1) = ipx_dest+1 + call xmp_coarray_put_local(img_dims,irbuff_m_desc,irbuff_m_sec, + & ibuffm_local_desc,ibuffm_local_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -1809,42 +2065,60 @@ subroutine pre_record_data use md_fmm_domdiv_flg use md_segment use mpivar + use xmp_api implicit none integer(4) :: i,nsum integer(4) :: i0,k0,i00 include 'mpif.h' integer(4) :: ierr real(8),allocatable :: snd(:,:),rcv(:,:) + integer(8) :: snd_local_sec + integer(8) :: snd_local_desc + integer(8), dimension(2) :: snd_lb, snd_ub + !coarray integer(4),allocatable :: natmlist(:),natmdisp(:) integer(4),allocatable :: natmdisp(:) ! integer(4),allocatable :: natmlist(:)[:] integer(4), POINTER :: natmlist(:) => null () integer(8) :: natmlist_desc + integer(8) :: natmlist_sec + integer(8), dimension(1) :: natmlist_lb, natmlist_ub integer(4),allocatable :: natmlist_tmp(:) ! integer,allocatable :: ndis(:)[:], mdis(:)[:] integer, POINTER :: ndis(:) => null () integer, POINTER :: mdis(:) => null () - integer(8) :: ndis_desc + integer(8) :: mdis_sec integer(8) :: mdis_desc + integer(8), dimension(1) :: mdis_lb, mdis_ub + + integer(8) :: ndis_desc + integer(8), dimension(1) :: ndis_lb, ndis_ub + ! real(8),allocatable :: rcvx(:,:)[:] real(8), POINTER :: rcvx(:,:) => null () integer(8) :: rcvx_desc + integer(8) :: rcvx_sec + integer(8), dimension(2) :: rcvx_lb, rcvx_ub integer :: me, np, ms, mm !! integer(4),allocatable :: nrearrange(:) integer(4) :: m2i_tmp(na1cell*lxdiv*lydiv*lzdiv) + integer(8) :: m2i_tmp_local_sec + integer(8) :: m2i_tmp_local_desc + integer(8),dimension(1) :: m2i_tmp_lb,m2i_tmp_ub + integer(4) :: img_dims(1) - integer(8), dimension(1) :: natmlist_lb, natmlist_ub, - & ndis_lb, ndis_ub, mdis_lb, mdis_ub - integer(8), dimension(2) :: rcvx_lb, rcvx_ub + integer(4) :: status !coarray - me = this_image() - np = num_images() + !me = this_image() + me = xmp_this_image() + !np = num_images() + np = xmp_num_images() ! allocate(ndis(np)[*]) ! allocate(mdis(n)[*]) ! allocate(rcvx(6,n)[*]) @@ -1863,6 +2137,12 @@ subroutine pre_record_data call xmp_coarray_bind(mdis_desc,mdis) call xmp_coarray_bind(rcvx_desc,rcvx) + m2i_tmp_lb(1) = 1 + m2i_tmp_ub(1) = na1cell*lxdiv*lydiv*lzdiv + call xmp_new_local_array(m2i_tmp_local_desc,8,1, + & m2i_tmp_lb,m2i_tmp_ub,loc(m2i_tmp)) + call xmp_new_array_section(m2i_tmp_local_sec,1) + !! if(nprocs.eq.1) then @@ -1878,6 +2158,15 @@ subroutine pre_record_data else allocate(nrearrange(n)) allocate(snd(6,n)) + snd_lb(1) = 1 + snd_lb(2) = 1 + snd_ub(1) = 6 + snd_ub(2) = n + call xmp_new_local_array(snd_local_desc,8,2, + & snd_lb,snd_ub,loc(snd)) + call xmp_new_array_section(snd_local_sec,2) + + allocate(rcv(6,n)) !coarray allocate(natmlist(nprocs),natmdisp(nprocs)) !allocate(natmlist(nprocs)[*]) @@ -1886,6 +2175,7 @@ subroutine pre_record_data call xmp_new_coarray(natmlist_desc,8,2, & natmlist_lb,natmlist_ub,1,img_dims) call xmp_coarray_bind(natmlist_desc,natmlist) + call xmp_new_array_section(natmlist_sec,2) allocate(natmlist_tmp(nprocs)) allocate(natmdisp(nprocs)) @@ -1912,7 +2202,13 @@ subroutine pre_record_data !coarray! !coarray natmdisp(1) = 0 do mm = 1,np - natmlist(me)[mm] = nselfatm ! Put + !natmlist(me)[mm] = nselfatm ! Put + call xmp_array_section_set_triplet(natmlist_sec, + & 1,int(me,kind=8),int(1,kind=8),1,status) + img_dims(1) = mm + call xmp_coarray_put_scalar(img_dims,natmlist_desc, + & natmlist_sec,nselfatm,status) + ! sync all call xmp_sync_all(status) enddo @@ -1928,7 +2224,19 @@ subroutine pre_record_data !coarray & nrearrange,natmlist,natmdisp,mpi_integer, !coarray & mpiout,mpi_comm_world,ierr) ms = natmdisp(me) - mdis(ms:ms+nselfatm-1)[mpiout+1] = m2i_tmp(1:nselfatm) + !mdis(ms:ms+nselfatm-1)[mpiout+1] = m2i_tmp(1:nselfatm) + + call xmp_array_section_set_triplet(mdis_sec, + & 1,int(ms,kind=8),int(ms+nselfatm-1,kind=8),1,status) + + call xmp_array_section_set_triplet(m2i_tmp_local_sec, + & 1,int(1,kind=8),int(nselfatm,kind=8),1,status) + + img_dims(1) = mpiout+1 + call xmp_coarray_put_local(img_dims,mdis_desc,mdis_sec, + & m2i_tmp_local_desc,m2i_tmp_local_sec,status) + + ! sync all call xmp_sync_all(status) nrearrange = mdis @@ -1958,7 +2266,23 @@ subroutine pre_record_data !coarray & rcv,natmlist,natmdisp,mpi_double_precision, !coarray & mpiout,mpi_comm_world,ierr) ms = natmdisp(me)/6 - rcvx(1:6,ms:ms+nselfatm-1)[mpiout+1] = snd(1:6,1:nselfatm) + !rcvx(1:6,ms:ms+nselfatm-1)[mpiout+1] = snd(1:6,1:nselfatm) + + call xmp_array_section_set_triplet(rcvx_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(rcvx_sec, + & 2,int(ms,kind=8),int(ms+nselfatm-1,kind=8),1,status) + + + call xmp_array_section_set_triplet(snd_local_sec, + & 1,int(1,kind=8),int(6,kind=8),1,status) + call xmp_array_section_set_triplet(snd_local_sec, + & 2,int(1,kind=8),int(nselfatm,kind=8),1,status) + + img_dims(1) = mpiout+1 + call xmp_coarray_put_local(img_dims,rcvx_desc,rcvx_sec, + & snd_local_desc,snd_local_sec,status) + ! sync all call xmp_sync_all(status) rcv = rcvx diff --git a/MODYLAS-MINI/src/xmpAPI_comm_3.f b/MODYLAS-MINI/src/xmpAPI_comm_3.f index 8960179..88a5de9 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm_3.f +++ b/MODYLAS-MINI/src/xmpAPI_comm_3.f @@ -33,6 +33,7 @@ subroutine init_comm_direct_3() use md_fmm use md_fmm_domdiv_flg use mpivar + use xmp_api implicit none INCLUDE 'mpif.h' @@ -46,18 +47,110 @@ subroutine init_comm_direct_3() ncydiv = lydiv nczdiv = lzdiv - allocate(icbufp ((ncell/npy)*(ncell/npx)*2)[*]) - allocate(ircbufp((ncell/npy)*(ncell/npx)*2)[*]) - allocate(icbufm ((ncell/npy)*(ncell/npx)*2)[*]) - allocate(ircbufm((ncell/npy)*(ncell/npx)*2)[*]) - allocate(ibuffp (na1cell*(ncell/npy)*(ncell/npx)*2)[*]) - allocate(irbuffp(na1cell*(ncell/npy)*(ncell/npx)*2)[*]) - allocate(ibuffm (na1cell*(ncell/npy)*(ncell/npx)*2)[*]) - allocate(irbuffm(na1cell*(ncell/npy)*(ncell/npx)*2)[*]) - allocate(buffp (3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) - allocate(rbuffp(3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) - allocate(buffm (3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) - allocate(rbuffm(3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) +! allocate(icbufp ((ncell/npy)*(ncell/npx)*2)[*]) + icbufp_lb(1) = 1 + icbufp_ub(1) = (ncell/npy)*(ncell/npx)*2 + call xmp_new_coarray(icbufp_desc,4,1, + & icbufp_lb,icbufp_ub,1,img_dims) + call xmp_coarray_bind(icbufp_desc,icbufp) + call xmp_new_array_section(icbufp_sec,1) + +! allocate(ircbufp((ncell/npy)*(ncell/npx)*2)[*]) + ircbufp_lb(1) = 1 + ircbufp_ub(1) = (ncell/npy)*(ncell/npx)*2 + call xmp_new_coarray(ircbufp_desc,4,1, + & ircbufp_lb,ircbufp_ub,1,img_dims) + call xmp_coarray_bind(ircbufp_desc,ircbufp) + call xmp_new_array_section(ircbufp_sec,1) + + !allocate(icbufm ((ncell/npy)*(ncell/npx)*2)[*]) + icbufm_lb(1) = 1 + icbufm_ub(1) = (ncell/npy)*(ncell/npx)*2 + call xmp_new_coarray(icbufm_desc,4,1, + & icbufm_lb,icbufm_ub,1,img_dims) + call xmp_coarray_bind(icbufm_desc,icbufm) + call xmp_new_array_section(icbufm_sec,1) + + !allocate(ircbufm((ncell/npy)*(ncell/npx)*2)[*]) + ircbufm_lb(1) = 1 + ircbufm_ub(1) = (ncell/npy)*(ncell/npx)*2 + call xmp_new_coarray(ircbufm_desc,4,1, + & ircbufm_lb,ircbufm_ub,1,img_dims) + call xmp_coarray_bind(ircbufm_desc,ircbufm) + call xmp_new_array_section(ircbufm_sec,1) + + !allocate(ibuffp (na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + ibuffp_lb(1) = 1 + ibuffp_ub(1) = na1cell*(ncell/npy)*(ncell/npx)*2 + call xmp_new_coarray(ibuffp_desc,4,1, + & ibuffp_lb,ibuffp_ub,1,img_dims) + call xmp_coarray_bind(ibuffp_desc,ibuffp) + call xmp_new_array_section(ibuffp_sec,1) + + !allocate(irbuffp(na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + irbuffp_lb(1) = 1 + irbuffp_ub(1) = na1cell*(ncell/npy)*(ncell/npx)*2 + call xmp_new_coarray(irbuffp_desc,4,1, + & irbuffp_lb,irbuffp_ub,1,img_dims) + call xmp_coarray_bind(irbuffp_desc,irbuffp) + call xmp_new_array_section(irbuffp_sec,1) + + !allocate(ibuffm (na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + ibuffm_lb(1) = 1 + ibuffm_ub(1) = na1cell*(ncell/npy)*(ncell/npx)*2 + call xmp_new_coarray(ibuffm_desc,4,1, + & ibuffm_lb,ibuffm_ub,1,img_dims) + call xmp_coarray_bind(ibuffm_desc,ibuffm) + call xmp_new_array_section(ibuffm_sec,1) + + !allocate(irbuffm(na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + irbuffm_lb(1) = 1 + irbuffm_ub(1) = na1cell*(ncell/npy)*(ncell/npx)*2 + call xmp_new_coarray(irbuffm_desc,4,1, + & irbuffm_lb,irbuffm_ub,1,img_dims) + call xmp_coarray_bind(irbuffm_desc,irbuffm) + call xmp_new_array_section(irbuffm_sec,1) + + !allocate(buffp (3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + buffp_lb(1) = 1 + buffp_ub(1) = 3 + buffp_lb(2) = 1 + buffp_ub(2) = na1cell*(ncell/npy)*(ncell/npx)*2 + call xmp_new_coarray(buffp_desc,4,2, + & buffp_lb,buffp_ub,1,img_dims) + call xmp_coarray_bind(buffp_desc,buffp) + call xmp_new_array_section(buffp_sec,2) + + !allocate(rbuffp(3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + rbuffp_lb(1) = 1 + rbuffp_ub(1) = 3 + rbuffp_lb(2) = 1 + rbuffp_ub(2) = na1cell*(ncell/npy)*(ncell/npx)*2 + call xmp_new_coarray(rbuffp_desc,4,2, + & rbuffp_lb,rbuffp_ub,1,img_dims) + call xmp_coarray_bind(rbuffp_desc,rbuffp) + call xmp_new_array_section(rbuffp_sec,2) + + !allocate(buffm (3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + buffm_lb(1) = 1 + buffm_ub(1) = 3 + buffm_lb(2) = 1 + buffm_ub(2) = na1cell*(ncell/npy)*(ncell/npx)*2 + call xmp_new_coarray(buffm_desc,4,2, + & buffm_lb,buffm_ub,1,img_dims) + call xmp_coarray_bind(buffm_desc,buffm) + call xmp_new_array_section(buffm_sec,2) + + !allocate(rbuffm(3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) + rbuffm_lb(1) = 1 + rbuffm_ub(1) = 3 + rbuffm_lb(2) = 1 + rbuffm_ub(2) = na1cell*(ncell/npy)*(ncell/npx)*2 + call xmp_new_coarray(rbuffm_desc,4,2, + & rbuffm_lb,rbuffm_ub,1,img_dims) + call xmp_coarray_bind(rbuffm_desc,rbuffm) + call xmp_new_array_section(rbuffm_sec,2) + return end @@ -123,6 +216,12 @@ subroutine comm_direct_3() ! ver.20120314 integer nd integer(4) status !! + call xmp_new_array_section(na_per_cell_local_sec,3) + call xmp_new_array_section(na_per_cell_remote_sec,3) + call xmp_new_array_section(wkxyz_local_sec,2) + call xmp_new_array_section(wkxyz_remote_sec,2) + call xmp_new_array_section(m2i_local_sec,1) + call xmp_new_array_section(m2i_remote_sec,1) c----- common parameters for coordinate communication. ----- ipx=mod(myrank,npx) @@ -186,8 +285,25 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipz_mdest, myrank, !coarray & ircbufm, nccm, MPI_INTEGER, ipz_msrc, ipz_msrc, !coarray & mpi_comm_world, istatus, ierr ) - ircbufp(1:nccp)[ipz_pdest+1] = icbufp(1:nccp) ! Put - ircbufm(1:nccm)[ipz_mdest+1] = icbufm(1:nccm) ! Put + + !ircbufp(1:nccp)[ipz_pdest+1] = icbufp(1:nccp) ! Put + call xmp_array_section_set_triplet(ircbufp_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + call xmp_array_section_set_triplet(icbufp_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + img_dims(1) = ipz_pdest+1 + call xmp_coarray_put(img_dims,ircbufp_desc,ircbufp_sec, + & icbufp_desc,icbufp_sec,status) + + !ircbufm(1:nccm)[ipz_mdest+1] = icbufm(1:nccm) ! Put + call xmp_array_section_set_triplet(ircbufm_sec, + & 1,int(1,kind=8),int(nccm,kind=8),1,status) + call xmp_array_section_set_triplet(icbufm_sec, + & 1,int(1,kind=8),int(nccm,kind=8),1,status) + img_dims(1) = ipz_mdest+1 + call xmp_coarray_put(img_dims,ircbufm_desc,ircbufm_sec, + & icbufm_desc,icbufm_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -296,10 +412,52 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & myrank, irbuffm, ncarm, MPI_INTEGER, !coarray & ipz_msrc, ipz_msrc, !coarray & mpi_comm_world, istatus, ierr ) - rbuffp(1:3,1:ncap)[ipz_pdest+1] = buffp(1:3,1:ncap) ! Put - irbuffp(1:ncap)[ipz_pdest+1] = ibuffp(1:ncap) ! Put - rbuffm(1:3,1:ncam)[ipz_mdest+1] = buffm(1:3,1:ncam) ! Put - irbuffm(1:ncam)[ipz_mdest+1] = ibuffm(1:ncam) ! Put + + !rbuffp(1:3,1:ncap)[ipz_pdest+1] = buffp(1:3,1:ncap) ! Put + call xmp_array_section_set_triplet(rbuffp_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(rbuffp_sec, + & 2,int(1,kind=8),int(ncap,kind=8),1,status) + call xmp_array_section_set_triplet(buffp_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(buffp_sec, + & 2,int(1,kind=8),int(ncap,kind=8),1,status) + img_dims(1) = ipz_pdest+1 + call xmp_coarray_put(img_dims,rbuffp_desc,rbuffp_sec, + & buffp_desc,buffp_sec,status) + + !irbuffp(1:ncap)[ipz_pdest+1] = ibuffp(1:ncap) ! Put + call xmp_array_section_set_triplet(irbuffp_sec, + & 1,int(1,kind=8),int(ncap,kind=8),1,status) + call xmp_array_section_set_triplet(ibuffp_sec, + & 1,int(1,kind=8),int(ncap,kind=8),1,status) + img_dims(1) = ipz_pdest+1 + call xmp_coarray_put(img_dims,irbuffp_desc,irbuffp_sec, + & ibuffp_desc,ibuffp_sec,status) + + + !rbuffm(1:3,1:ncam)[ipz_mdest+1] = buffm(1:3,1:ncam) ! Put + call xmp_array_section_set_triplet(rbuffm_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(rbuffm_sec, + & 2,int(1,kind=8),int(ncap,kind=8),1,status) + call xmp_array_section_set_triplet(buffm_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(buffm_sec, + & 2,int(1,kind=8),int(ncap,kind=8),1,status) + img_dims(1) = ipz_mdest+1 + call xmp_coarray_put(img_dims,rbuffm_desc,rbuffm_sec, + & buffm_desc,buffm_sec,status) + + !irbuffm(1:ncam)[ipz_mdest+1] = ibuffm(1:ncam) ! Put + call xmp_array_section_set_triplet(irbuffm_sec, + & 1,int(1,kind=8),int(ncap,kind=8),1,status) + call xmp_array_section_set_triplet(ibuffm_sec, + & 1,int(1,kind=8),int(ncap,kind=8),1,status) + img_dims(1) = ipz_mdest+1 + call xmp_coarray_put(img_dims,irbuffm_desc,irbuffm_sec, + & ibuffm_desc,ibuffm_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -374,8 +532,25 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipz_mdest, myrank, !coarray & icbufm, nccm, MPI_INTEGER, ipz_msrc, ipz_msrc, !coarray & mpi_comm_world, istatus, ierr ) - icbufp(1:nccp)[ipz_pdest+1] = ircbufp(1:nccp) ! Put - icbufm(1:nccm)[ipz_mdest+1] = ircbufm(1:nccm) ! Put + + !icbufp(1:nccp)[ipz_pdest+1] = ircbufp(1:nccp) ! Put + call xmp_array_section_set_triplet(icbufp_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + call xmp_array_section_set_triplet(ircbufp_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + img_dims(1) = ipz_pdest+1 + call xmp_coarray_put(img_dims,icbufp_desc,icbufp_sec, + & ircbufp_desc,ircbufp_sec,status) + + !icbufm(1:nccm)[ipz_mdest+1] = ircbufm(1:nccm) ! Put + call xmp_array_section_set_triplet(icbufm_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + call xmp_array_section_set_triplet(ircbufm_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + img_dims(1) = ipz_mdest+1 + call xmp_coarray_put(img_dims,icbufm_desc,icbufm_sec, + & ircbufm_desc,ircbufm_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -449,10 +624,50 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipz_mdest, myrank, !coarray & ibuffm, ncar2m, MPI_INTEGER, ipz_msrc, ipz_msrc, !coarray & mpi_comm_world, istatus, ierr ) - buffp(1:3,1:ncarp)[ipz_pdest+1] = rbuffp(1:3,1:ncarp) ! Put - ibuffp(1:ncarp)[ipz_pdest+1] = irbuffp(1:ncarp) ! Put - buffm(1:3,1:ncarm)[ipz_mdest+1] = rbuffm(1:3,1:ncarm) ! Put - ibuffm(1:ncarm)[ipz_mdest+1] = irbuffm(1:ncarm) ! Put + !buffp(1:3,1:ncarp)[ipz_pdest+1] = rbuffp(1:3,1:ncarp) ! Put + call xmp_array_section_set_triplet(buffp_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(buffp_sec, + & 2,int(1,kind=8),int(ncarp,kind=8),1,status) + call xmp_array_section_set_triplet(rbuffp_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(rbuffp_sec, + & 2,int(1,kind=8),int(ncarp,kind=8),1,status) + img_dims(1) = ipz_pdest+1 + call xmp_coarray_put(img_dims,buffp_desc,buffp_sec, + & rbuffp_desc,rbuffp_sec,status) + + !ibuffp(1:ncarp)[ipz_pdest+1] = irbuffp(1:ncarp) ! Put + call xmp_array_section_set_triplet(ibuffp_sec, + & 1,int(1,kind=8),int(ncarp,kind=8),1,status) + call xmp_array_section_set_triplet(irbuffp_sec, + & 1,int(1,kind=8),int(ncarp,kind=8),1,status) + img_dims(1) = ipz_pdest+1 + call xmp_coarray_put(img_dims,ibuffp_desc,ibuffp_sec, + & irbuffp_desc,irbuffp_sec,status) + + !buffm(1:3,1:ncarm)[ipz_mdest+1] = rbuffm(1:3,1:ncarm) ! Put + call xmp_array_section_set_triplet(buffm_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(buffm_sec, + & 2,int(1,kind=8),int(ncarm,kind=8),1,status) + call xmp_array_section_set_triplet(rbuffm_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(rbuffm_sec, + & 2,int(1,kind=8),int(ncarm,kind=8),1,status) + img_dims(1) = ipz_mdest+1 + call xmp_coarray_put(img_dims,buffm_desc,buffm_sec, + & rbuffm_desc,rbuffm_sec,status) + + !ibuffm(1:ncarm)[ipz_mdest+1] = irbuffm(1:ncarm) ! Put + call xmp_array_section_set_triplet(ibuffm_sec, + & 1,int(1,kind=8),int(ncarm,kind=8),1,status) + call xmp_array_section_set_triplet(irbuffm_sec, + & 1,int(1,kind=8),int(ncarm,kind=8),1,status) + img_dims(1) = ipz_mdest+1 + call xmp_coarray_put(img_dims,ibuffm_desc,ibuffm_sec, + & irbuffm_desc,irbuffm_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -557,11 +772,54 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipy_msrc, ipy_msrc, !coarray & mpi_comm_world, istatus, ierr ) nd = abs(icyp1 - icyp0) - na_per_cell(:, icybp0:icybp0+nd, icx)[ipy_pdest+1] - . = na_per_cell(:, icyp0:icyp0 +nd, icx) ! Put +! ! na_per_cell(:, icybp0:icybp0+nd, icx)[ipy_pdest+1] +! !. = na_per_cell(:, icyp0:icyp0 +nd, icx) ! Put + + + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 2,int(icybp0,kind=8),int(icybp0+nd,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 3,int(icx,kind=8),int(1,kind=8),1,status) + + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 2,int(icyp0,kind=8),int(icyp0+nd,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 3,int(icx,kind=8),int(1,kind=8),1,status) + + img_dims(1) = ipy_pdest+1 + call xmp_coarray_put(img_dims,na_per_cell_desc, + & na_per_cell_remote_sec, + & na_per_cell_desc,na_per_cell_local_sec,status) + + nd = abs(icym1 - icym0) - na_per_cell(:, icybm0:icybm0+nd, icx)[ipy_mdest+1] - . = na_per_cell(:, icym0:icym0 +nd, icx) ! Put +! ! na_per_cell(:, icybm0:icybm0+nd, icx)[ipy_mdest+1] +! ! . = na_per_cell(:, icym0:icym0 +nd, icx) ! Put + + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 2,int(icybm0,kind=8),int(icybm0+nd,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 3,int(icx,kind=8),int(1,kind=8),1,status) + + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 2,int(icym0,kind=8),int(icym0+nd,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 3,int(icx,kind=8),int(1,kind=8),1,status) + + img_dims(1) = ipy_mdest+1 + call xmp_coarray_put(img_dims, + & na_per_cell_desc,na_per_cell_remote_sec, + & na_per_cell_desc,na_per_cell_local_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -634,18 +892,73 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipy_mdest, myrank, !coarray & m2i(icarm), ncam, MPI_INTEGER, ipy_msrc, ipy_msrc, !coarray & mpi_comm_world, istatus, ierr ) - wkxyz(:,icarp:icarp+ncap-1)[ipy_pdest+1] - . = wkxyz(:,icasp:icasp+ncap-1) ! Put - m2i(icarp:icarp+ncap-1)[ipy_pdest+1] - . = m2i(icasp:icasp+ncap-1) ! Put + +! ! wkxyz(:,icarp:icarp+ncap-1)[ipy_pdest+1] +! !. = wkxyz(:,icasp:icasp+ncap-1) ! Put + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 2,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) + + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 2,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) + + img_dims(1) = ipy_pdest+1 + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, + & wkxyz_desc,wkxyz_local_sec,status) + + +! ! m2i(icarp:icarp+ncap-1)[ipy_pdest+1] +! !. = m2i(icasp:icasp+ncap-1) ! Put + call xmp_array_section_set_triplet(m2i_remote_sec, + & 1,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) + + call xmp_array_section_set_triplet(m2i_local_sec, + & 1,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) + + img_dims(1) = ipy_pdest+1 + call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, + & m2i_desc,m2i_local_sec,status) + + + ! sync all call xmp_sync_all(status) - wkxyz(:,icarm:icarm+ncam-1)[ipy_mdest+1] - . = wkxyz(:,icasm:icasm+ncam-1) ! Put - m2i(icarm:icarm+ncam-1)[ipy_mdest+1] - . = m2i(icasm:icasm+ncam-1) ! Put +! ! wkxyz(:,icarm:icarm+ncam-1)[ipy_mdest+1] +! !. = wkxyz(:,icasm:icasm+ncam-1) ! Put + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 2,int(icarp,kind=8),int(icarp+ncam-1,kind=8),1,status) + + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 2,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) + + img_dims(1) = ipy_mdest+1 + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, + & wkxyz_desc,wkxyz_local_sec,status) + + + +! ! m2i(icarm:icarm+ncam-1)[ipy_mdest+1] +! !. = m2i(icasm:icasm+ncam-1) ! Put + call xmp_array_section_set_triplet(m2i_remote_sec, + & 1,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) + + call xmp_array_section_set_triplet(m2i_local_sec, + & 1,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) + + img_dims(1) = ipy_mdest+1 + call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, + & m2i_desc,m2i_local_sec,status) + ! sync all call xmp_sync_all(status) + !! #else call mpi_irecv(wkxyz(1,icarp), 3*ncap, @@ -697,13 +1010,55 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipy_msrc, ipy_msrc, !coarray & mpi_comm_world, istatus, ierr ) nd = abs(icyp1 - icyp0) - na_per_cell(:, icybp0:icybp0+nd, icx)[ipy_pdest+1] - . = na_per_cell(:, icybp1st:icybp1st+nd, icx) ! Put + +! ! na_per_cell(:, icybp0:icybp0+nd, icx)[ipy_pdest+1] +! !. = na_per_cell(:, icybp1st:icybp1st+nd, icx) ! Put + + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 2,int(icybp0,kind=8),int(icybp0+nd,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 3,int(icx,kind=8),int(1,kind=8),1,status) + + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 2,int(icybp1st,kind=8),int(icybp1st+nd,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 3,int(icx,kind=8),int(1,kind=8),1,status) + + img_dims(1) = ipy_pdest+1 + call xmp_coarray_put(img_dims, + & na_per_cell_desc,na_per_cell_remote_sec, + & na_per_cell_desc,na_per_cell_local_sec,status) + ! sync all call xmp_sync_all(status) nd = abs(icym1 - icym0) - na_per_cell(:, icybm0:icybm0+nd, icx)[ipy_pdest+1] - . = na_per_cell(:, icybm1st:icybm1st+nd, icx) ! Put +! ! na_per_cell(:, icybm0:icybm0+nd, icx)[ipy_pdest+1] +! !. = na_per_cell(:, icybm1st:icybm1st+nd, icx) ! Put + + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 2,int(icybm0,kind=8),int(icybm0+nd,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 3,int(icx,kind=8),int(1,kind=8),1,status) + + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 2,int(icybm1st,kind=8),int(icybm1st+nd,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 3,int(icx,kind=8),int(1,kind=8),1,status) + + img_dims(1) = ipy_pdest+1 + call xmp_coarray_put(img_dims, + & na_per_cell_desc,na_per_cell_remote_sec, + & na_per_cell_desc,na_per_cell_local_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -766,18 +1121,74 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipy_mdest, myrank, !coarray & m2i(icarm), ncam, MPI_INTEGER,ipy_msrc,ipy_msrc, !coarray & mpi_comm_world, istatus, ierr ) - wkxyz(:,icarp:icarp+ncap-1)[ipy_pdest+1] - . = wkxyz(:,icasp:icasp+ncap-1) ! Put - m2i(icarp:icarp+ncap-1)[ipy_pdest+1] - . = m2i(icasp:icasp+ncap-1) ! Put + + +! ! wkxyz(:,icarp:icarp+ncap-1)[ipy_pdest+1] +! !. = wkxyz(:,icasp:icasp+ncap-1) ! Put + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 2,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) + + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 2,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) + + img_dims(1) = ipy_pdest+1 + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, + & wkxyz_desc,wkxyz_local_sec,status) + +! ! m2i(icarp:icarp+ncap-1)[ipy_pdest+1] +! !. = m2i(icasp:icasp+ncap-1) ! Put + call xmp_array_section_set_triplet(m2i_remote_sec, + & 1,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) + + call xmp_array_section_set_triplet(m2i_local_sec, + & 1,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) + + img_dims(1) = ipy_pdest+1 + call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, + & m2i_desc,m2i_local_sec,status) + + ! sync all call xmp_sync_all(status) - wkxyz(:,icarm:icarm+ncam-1)[ipy_mdest+1] - . = wkxyz(:,icasm:icasm+ncam-1) ! Put - m2i(icarm:icarm+ncam-1)[ipy_mdest+1] - . = m2i(icasm:icasm+ncam-1) ! Put + +! ! wkxyz(:,icarm:icarm+ncam-1)[ipy_mdest+1] +! !. = wkxyz(:,icasm:icasm+ncam-1) ! Put + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 2,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) + + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 2,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) + + img_dims(1) = ipy_mdest+1 + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, + & wkxyz_desc,wkxyz_local_sec,status) + + + +! ! m2i(icarm:icarm+ncam-1)[ipy_mdest+1] +! !. = m2i(icasm:icasm+ncam-1) ! Put + call xmp_array_section_set_triplet(m2i_remote_sec, + & 1,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) + + call xmp_array_section_set_triplet(m2i_local_sec, + & 1,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) + + img_dims(1) = ipy_mdest+1 + call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, + & m2i_desc,m2i_local_sec,status) + ! sync all call xmp_sync_all(status) + + !! #else call mpi_irecv(wkxyz(1,icarp), 3*ncap, @@ -855,11 +1266,32 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & na_per_cell(icz0,icy0,icxbp0), !coarray & nccp, MPI_INTEGER, ipx_psrc, ipx_psrc, !coarray & mpi_comm_world, istatus, ierr ) - na_per_cell(icz0:icz1,icy0:icy1,icxbp0:icxbp0+(icxp1-icxp0)) - . [ipx_pdest+1] - . = na_per_cell(icz0:icz1,icy0:icy1,icxp0:icxp1) ! Put +! ! na_per_cell(icz0:icz1,icy0:icy1,icxbp0:icxbp0+(icxp1-icxp0)) +! !. [ipx_pdest+1] +! !. = na_per_cell(icz0:icz1,icy0:icy1,icxp0:icxp1) ! Put + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 3,int(icxbp0,kind=8),int(icxbp0+(icxp1-icxp0),kind=8),1,status) + + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 3,int(icxp0,kind=8),int(icxp1,kind=8),1,status) + + img_dims(1) = ipx_pdest+1 + call xmp_coarray_put(img_dims, + & na_per_cell_desc,na_per_cell_remote_sec, + & na_per_cell_desc,na_per_cell_local_sec,status) + + ! sync all call xmp_sync_all(status) + !! #endif !coarray call mpi_sendrecv(na_per_cell(icz0,icy0,icxm0), nccm, @@ -867,9 +1299,29 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & na_per_cell(icz0,icy0,icxbm0), nccm, MPI_INTEGER, !coarray & ipx_msrc, ipx_msrc, !coarray & mpi_comm_world, istatus, ierr ) - na_per_cell(icz0:icz1,icy0:icy1,icxbm0:icxbm0+(icxm1-icxm0)) - . [ipx_mdest+1] - . = na_per_cell(icz0:icz1,icy0:icy1,icxm0:icxm1) ! Put +! ! na_per_cell(icz0:icz1,icy0:icy1,icxbm0:icxbm0+(icxm1-icxm0)) +! !. [ipx_mdest+1] +! !. = na_per_cell(icz0:icz1,icy0:icy1,icxm0:icxm1) ! Put + + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 3,int(icxbm0,kind=8),int(icxbm0+(icxm1-icxm0),kind=8),1,status) + + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 3,int(icxm0,kind=8),int(icxm1,kind=8),1,status) + + img_dims(1) = ipx_mdest+1 + call xmp_coarray_put(img_dims, + & na_per_cell_desc,na_per_cell_remote_sec, + & na_per_cell_desc,na_per_cell_local_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -956,10 +1408,34 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipx_pdest, myrank, !coarray & m2i(icarp), ncap, MPI_INTEGER, ipx_psrc, ipx_psrc, !coarray & mpi_comm_world, istatus, ierr ) - wkxyz(:,icarp:icarp+ncap-1)[ipx_pdest+1] - . = wkxyz(:,icasp:icasp+ncap-1) ! Put - m2i(icarp:icarp+ncap-1)[ipx_pdest+1] - . = m2i(icasp:icasp+ncap-1) ! Put +! ! wkxyz(:,icarp:icarp+ncap-1)[ipx_pdest+1] +! !. = wkxyz(:,icasp:icasp+ncap-1) ! Put + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 2,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) + + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 2,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) + + img_dims(1) = ipx_pdest+1 + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, + & wkxyz_desc,wkxyz_local_sec,status) + +! ! m2i(icarp:icarp+ncap-1)[ipx_pdest+1] +! !. = m2i(icasp:icasp+ncap-1) ! Put + call xmp_array_section_set_triplet(m2i_remote_sec, + & 1,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) + + call xmp_array_section_set_triplet(m2i_local_sec, + & 1,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) + + img_dims(1) = ipx_pdest+1 + call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, + & m2i_desc,m2i_local_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -973,12 +1449,36 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipx_mdest, myrank, !coarray & m2i(icarm), ncam, MPI_INTEGER, ipx_msrc, ipx_msrc, !coarray & mpi_comm_world, istatus, ierr ) - wkxyz(:,icarm:icarm+ncam-1)[ipx_mdest+1] - . = wkxyz(:,icasm:icasm+ncam-1) ! Put - m2i(icarm:icarm+ncam-1)[ipx_mdest+1] - . = m2i(icasm:icasm+ncam-1) ! Put +! ! wkxyz(:,icarm:icarm+ncam-1)[ipx_mdest+1] +! !. = wkxyz(:,icasm:icasm+ncam-1) ! Put + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 2,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) + + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 2,int(icasm,kind=8),int(icasm+ncap-1,kind=8),1,status) + + img_dims(1) = ipx_mdest+1 + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, + & wkxyz_desc,wkxyz_local_sec,status) + +! ! m2i(icarm:icarm+ncam-1)[ipx_mdest+1] +! !. = m2i(icasm:icasm+ncam-1) ! Put + call xmp_array_section_set_triplet(m2i_remote_sec, + & 1,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) + + call xmp_array_section_set_triplet(m2i_local_sec, + & 1,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) + + img_dims(1) = ipx_mdest+1 + call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, + & m2i_desc,m2i_local_sec,status) ! sync all call xmp_sync_all(status) + !! #else #ifndef HALFDIREE @@ -1046,8 +1546,31 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipx_psrc, ipx_psrc, !coarray & mpi_comm_world, istatus, ierr ) nd = abs(icxp1 - icxp0) - na_per_cell(icz0:icz1,icy0:icy1,icxbp0:icxbp0+nd)[ipx_pdest+1] - .= na_per_cell(icz0:icz1,icy0:icy1,icxbp1st:icxbp1st+nd) +! ! na_per_cell(icz0:icz1,icy0:icy1,icxbp0:icxbp0+nd)[ipx_pdest+1] +! !.= na_per_cell(icz0:icz1,icy0:icy1,icxbp1st:icxbp1st+nd) + + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 3,int(icxbp0,kind=8),int(icxbp0+nd,kind=8),1,status) + + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 3,int(icxbp1st,kind=8),int(icxbp1st+nd,kind=8),1,status) + + img_dims(1) = ipx_pdest+1 + call xmp_coarray_put(img_dims, + & na_per_cell_desc,na_per_cell_remote_sec, + & na_per_cell_desc,na_per_cell_local_sec,status) + + + + ! sync all call xmp_sync_all(status) !! @@ -1058,8 +1581,30 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipx_msrc, ipx_msrc, !coarray & mpi_comm_world, istatus, ierr ) nd = abs(icxm1 - icxm0) - na_per_cell(icz0:icz1,icy0:icy1,icxbm0:icxbm0+nd)[ipx_pdest+1] - .= na_per_cell(icz0:icz1,icy0:icy1,icxbm1st:icxbm1st+nd) + +! ! na_per_cell(icz0:icz1,icy0:icy1,icxbm0:icxbm0+nd)[ipx_pdest+1] +! !.= na_per_cell(icz0:icz1,icy0:icy1,icxbm1st:icxbm1st+nd) + + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_remote_sec, + & 3,int(icxbm0,kind=8),int(icxbm0+nd,kind=8),1,status) + + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_local_sec, + & 3,int(icxbm1st,kind=8),int(icxbm1st+nd,kind=8),1,status) + + img_dims(1) = ipx_pdest+1 + call xmp_coarray_put(img_dims, + & na_per_cell_desc,na_per_cell_remote_sec, + & na_per_cell_desc,na_per_cell_local_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -1133,10 +1678,39 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipx_pdest, myrank, !coarray & m2i(icarp), ncap, MPI_INTEGER, ipx_psrc, ipx_psrc, !coarray & mpi_comm_world, istatus, ierr ) - wkxyz(:,icarp:icarp+ncap-1)[ipx_pdest+1] - . = wkxyz(:,icasp:icasp+ncap-1) ! Put - m2i(icarp:icarp+ncap-1)[ipx_pdest+1] - . = m2i(icasp:icasp+ncap-1) ! Put + +! ! wkxyz(:,icarp:icarp+ncap-1)[ipx_pdest+1] +! !. = wkxyz(:,icasp:icasp+ncap-1) ! Put + + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 2,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) + + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 2,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) + + img_dims(1) = ipx_pdest+1 + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, + & wkxyz_desc,wkxyz_local_sec,status) + + + +! ! m2i(icarp:icarp+ncap-1)[ipx_pdest+1] +! !. = m2i(icasp:icasp+ncap-1) ! Put + call xmp_array_section_set_triplet(m2i_remote_sec, + & 1,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) + + call xmp_array_section_set_triplet(m2i_local_sec, + & 1,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) + + img_dims(1) = ipx_pdest+1 + call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, + & m2i_desc,m2i_local_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -1150,10 +1724,35 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & ipx_mdest, myrank, !coarray & m2i(icarm), ncam, MPI_INTEGER, ipx_msrc, ipx_msrc, !coarray & mpi_comm_world, istatus, ierr ) - wkxyz(:,icarm:icarm+ncam-1)[ipx_mdest+1] - . = wkxyz(:,icasm:icasm+ncam-1) ! Put - m2i(icarm:icarm+ncam-1)[ipx_mdest+1] - . = m2i(icasm:icasm+ncam-1) ! Put +! ! wkxyz(:,icarm:icarm+ncam-1)[ipx_mdest+1] +! !. = wkxyz(:,icasm:icasm+ncam-1) ! Put + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_remote_sec, + & 2,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) + + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 1,int(1,kind=8),int(3,kind=8),1,status) + call xmp_array_section_set_triplet(wkxyz_local_sec, + & 2,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) + + img_dims(1) = ipx_mdest+1 + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, + & wkxyz_desc,wkxyz_local_sec,status) + + +! ! m2i(icarm:icarm+ncam-1)[ipx_mdest+1] +! !. = m2i(icasm:icasm+ncam-1) ! Put + call xmp_array_section_set_triplet(m2i_remote_sec, + & 1,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) + + call xmp_array_section_set_triplet(m2i_local_sec, + & 1,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) + + img_dims(1) = ipx_pdest+1 + call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, + & m2i_desc,m2i_local_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -1242,5 +1841,23 @@ subroutine comm_direct_3() ! ver.20120314 !$omp end parallel ndatm=nselfatm+ntmp + call xmp_free_array_section(icbufp_sec) + call xmp_free_array_section(ircbufp_sec) + call xmp_free_array_section(icbufm_sec) + call xmp_free_array_section(ircbufm_sec) + call xmp_free_array_section(ibuffp_sec) + call xmp_free_array_section(irbuffp_sec) + call xmp_free_array_section(ibuffm_sec) + call xmp_free_array_section(irbuffm_sec) + call xmp_free_array_section(buffp_sec) + call xmp_free_array_section(rbuffp_sec) + call xmp_free_array_section(buffm_sec) + call xmp_free_array_section(rbuffm_sec) + call xmp_free_array_section(wkxyz_local_sec) + call xmp_free_array_section(wkxyz_remote_sec) + call xmp_free_array_section(m2i_local_sec) + call xmp_free_array_section(m2i_remote_sec) + call xmp_free_array_section(na_per_cell_local_sec) + call xmp_free_array_section(na_per_cell_remote_sec) return end diff --git a/MODYLAS-MINI/src/xmpAPI_comm_fmm.f b/MODYLAS-MINI/src/xmpAPI_comm_fmm.f index f6c9f4c..dfa46d1 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm_fmm.f +++ b/MODYLAS-MINI/src/xmpAPI_comm_fmm.f @@ -38,12 +38,27 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, integer m1 integer(4) nscell, nsczdiv, nscydiv, nscxdiv, mylm, il0 complex(8) :: ccbuf(mylm*5*nscydiv*nscxdiv) + integer(8) :: ccbuf_local_desc, ccbuf_local_sec + integer(8), dimension(1) :: ccbuf_lb, ccbuf_ub !coarray complex(8) :: rccbuf(mylm*5*nscydiv*nscxdiv,2) complex(8) :: wm(mylm,nscell,nscell,nscell) !coarray - complex(8),allocatable :: rccbuf(:,:)[:] - complex(8),allocatable :: wm_tmp(:,:,:,:)[:] - integer,allocatable :: ndis(:)[:] + !complex(8),allocatable :: rccbuf(:,:)[:] + complex(8), POINTER :: rccbuf(:,:) => null() + integer(8) :: rccbuf_desc + integer(8) :: rccbuf_l_sec, rccbuf_r_sec + integer(8), dimension(2) :: rccbuf_lb,rccbuf_ub + !complex(8),allocatable :: wm_tmp(:,:,:,:)[:] + complex(8), POINTER :: wm_tmp(:,:,:,:) => null() + integer(8) :: wm_tmp_desc + integer(8) :: wm_tmp_l_sec, wm_tmp_r_sec + integer(8), dimension(2) :: wm_tmp_lb,wm_tmp_ub + !integer,allocatable :: ndis(:)[:] + integer, POINTER :: ndis(:) => null() + integer(8) :: ndis_desc + integer(8) :: ndis_sec + integer(8), dimension(1) :: ndis_lb,ndis_ub + integer me, np, nb, nd integer ierrcode !! @@ -63,15 +78,52 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, integer icyb0prior, icxb0prior integer ierr,istatus(mpi_status_size) integer(4) status + integer :: img_dims(1) !coarray - allocate( rccbuf(mylm*5*nscydiv*nscxdiv,2)[*] ) - allocate( wm_tmp(mylm,nscell,nscell,nscell)[*] ) + !allocate( rccbuf(mylm*5*nscydiv*nscxdiv,2)[*] ) + rccbuf_lb(1)=1 + rccbuf_lb(2)=1 + rccbuf_ub(1)=mylm*5*nscydiv*nscxdiv + rccbuf_ub(2)=2 + call xmp_new_coarray(rccbuf_desc, + & 8,2,rccbuf_lb,rccbuf_ub,1,img_dims) + call xmp_coarray_bind(rccbuf_desc,rccbuf) + call xmp_new_array_section(rccbuf_l_sec,2) + call xmp_new_array_section(rccbuf_r_sec,2) + + + !allocate( wm_tmp(mylm,nscell,nscell,nscell)[*] ) + wm_tmp_lb(1)=1 + wm_tmp_lb(2)=1 + wm_tmp_lb(3)=1 + wm_tmp_lb(4)=1 + wm_tmp_ub(1)=mylm + wm_tmp_ub(2)=nscell + wm_tmp_ub(3)=nscell + wm_tmp_ub(4)=nscell + call xmp_new_coarray(wm_tmp_desc, + & 8,4,wm_tmp_lb,wm_tmp_ub,1,img_dims) + call xmp_coarray_bind(wm_tmp_desc,wm_tmp) + call xmp_new_array_section(wm_tmp_l_sec,2) + call xmp_new_array_section(wm_tmp_r_sec,2) + + wm_tmp = wm - me = this_image() - np = num_images() - allocate( ndis(np)[*] ) + me = xmp_this_image() + np = xmp_num_images() + !allocate( ndis(np)[*] ) + ndis_lb(1)=1 + ndis_ub(1)=np + call xmp_new_coarray(ndis_desc,8,2,ndis_lb,ndis_ub,1,img_dims) + call xmp_coarray_bind(ndis_desc,ndis) + call xmp_new_array_section(ndis_sec,1) !! + ccbuf_lb(1) = 1 + ccbuf_ub(1) = mylm*5*nscydiv*nscxdiv + call xmp_new_local_array(ccbuf_local_desc,8,1, + & ccbuf_lb,ccbuf_ub,loc(ccbuf)) + call xmp_new_array_section(ccbuf_local_sec,1) !=== local constant ===! m1 = (nmax+1)*(nmax+1) @@ -117,7 +169,19 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray call mpi_sendrecv(ccbuf, ncc, MPI_DOUBLE_COMPLEX,ipz_dest, !coarray & myrank, rccbuf(1,1), ncc, MPI_DOUBLE_COMPLEX, !coarray & ipz_src, ipz_src,mpi_comm_world,istatus,ierr) - rccbuf(1:ncc,1)[ipz_dest+1] = ccbuf(1:ncc) ! Put + !rccbuf(1:ncc,1)[ipz_dest+1] = ccbuf(1:ncc) ! Put + call xmp_array_section_set_triplet(rccbuf_r_sec, + & 1,int(1,kind=8),int(ncc,kind=8),1,status) + call xmp_array_section_set_triplet(rccbuf_r_sec, + & 2,int(1,kind=8),int(1,kind=8),1,status) + call xmp_array_section_set_triplet(ccbuf_local_sec, + & 1,int(1,kind=8),int(ncc,kind=8),1,status) + img_dims(1) = ipz_dest+1 + call xmp_coarray_put_local(img_dims,rccbuf_desc,rccbuf_r_sec, + & ccbuf_local_desc,ccbuf_local_sec,status) + + + ! sync all call xmp_sync_all(status) !! @@ -148,7 +212,18 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & ipz_dest, !coarray & myrank, rccbuf(1,ibr), ncc, MPI_DOUBLE_COMPLEX, !coarray & ipz_src, ipz_src,mpi_comm_world,istatus,ierr) - rccbuf(1:ncc,ibr)[ipz_dest+1] = rccbuf(1:ncc,ibs) ! Put + !rccbuf(1:ncc,ibr)[ipz_dest+1] = rccbuf(1:ncc,ibs) ! Put + call xmp_array_section_set_triplet(rccbuf_r_sec, + & 1,int(1,kind=8),int(ncc,kind=8),1,status) + call xmp_array_section_set_triplet(rccbuf_r_sec, + & 2,int(ibr,kind=8),int(1,kind=8),1,status) + call xmp_array_section_set_triplet(rccbuf_l_sec, + & 1,int(1,kind=8),int(ncc,kind=8),1,status) + call xmp_array_section_set_triplet(rccbuf_l_sec, + & 2,int(ibs,kind=8),int(1,kind=8),1,status) + img_dims(1) = ipz_dest+1 + call xmp_coarray_put_local(img_dims,rccbuf_desc,rccbuf_r_sec, + & rccbuf_desc,rccbuf_l_sec,status) ! sync all call xmp_sync_all(status) !! @@ -184,7 +259,16 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & ipz_dest, myrank, !coarray & rccbuf(1,1), ncc, MPI_DOUBLE_COMPLEX, !coarray & ipz_src, ipz_src,mpi_comm_world,istatus,ierr) - rccbuf(1:ncc,1)[ipz_dest+1] = ccbuf(1:ncc) ! Put + !rccbuf(1:ncc,1)[ipz_dest+1] = ccbuf(1:ncc) ! Put + call xmp_array_section_set_triplet(rccbuf_r_sec, + & 1,int(1,kind=8),int(ncc,kind=8),1,status) + call xmp_array_section_set_triplet(rccbuf_r_sec, + & 2,int(1,kind=8),int(1,kind=8),1,status) + call xmp_array_section_set_triplet(ccbuf_local_sec, + & 1,int(1,kind=8),int(ncc,kind=8),1,status) + img_dims(1) = ipz_dest+1 + call xmp_coarray_put_local(img_dims,rccbuf_desc,rccbuf_r_sec, + & ccbuf_local_desc,ccbuf_local_sec,status) ! sync all call xmp_sync_all(status) !! @@ -209,7 +293,18 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & ipz_dest, !coarray & myrank, rccbuf(1,ibr), ncc, MPI_DOUBLE_COMPLEX, !coarray & ipz_src, ipz_src,mpi_comm_world,istatus,ierr) - rccbuf(1:ncc,ibr)[ipz_dest+1] = rccbuf(1:ncc,ibs) ! Put + !rccbuf(1:ncc,ibr)[ipz_dest+1] = rccbuf(1:ncc,ibs) ! Put + call xmp_array_section_set_triplet(rccbuf_r_sec, + & 1,int(1,kind=8),int(ncc,kind=8),1,status) + call xmp_array_section_set_triplet(rccbuf_r_sec, + & 2,int(ibr,kind=8),int(ibr,kind=8),1,status) + call xmp_array_section_set_triplet(rccbuf_l_sec, + & 1,int(1,kind=8),int(ncc,kind=8),1,status) + call xmp_array_section_set_triplet(rccbuf_l_sec, + & 2,int(ibs,kind=8),int(ibs,kind=8),1,status) + img_dims(1) = ipz_dest+1 + call xmp_coarray_put_local(img_dims,rccbuf_desc,rccbuf_r_sec, + & rccbuf_desc,rccbuf_l_sec,status) ! sync all call xmp_sync_all(status) !! @@ -247,12 +342,41 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipy_src, ipy_src,mpi_comm_world,istatus,ierr ) nd = abs(icyb1 - icyb0) - ndis(me)[ipy_src+1] = icyb0 ! Put +! ndis(me)[ipy_src+1] = icyb0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipy_src+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icyb0,status) + ! sync all call xmp_sync_all(status) nb = ndis(ipy_dest+1) - wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] - . = wm_tmp( :, :, icy0:icy0+nd, icx ) ! Put +! wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] +! . = wm_tmp( :, :, icy0:icy0+nd, icx ) ! Put + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(nb,kind=8),int(nb+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(icy0,kind=8),int(icy0+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + img_dims(1) = ipy_dest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -265,12 +389,43 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & ipy_dest, myrank, wm_tmp(1,1,icyb0,icx), ncc, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipy_src, ipy_src,mpi_comm_world,istatus,ierr ) - ndis(me)[ipy_src+1] = icyb0 ! Put +! !ndis(me)[ipy_src+1] = icyb0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipy_src+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icyb0,status) + + ! sync all call xmp_sync_all(status) nb = ndis(ipy_dest+1) - wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] - . = wm_tmp( :, :, icyb0prior:icyb0prior+nd, icx ) ! Put +! wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] +! . = wm_tmp( :, :, icyb0prior:icyb0prior+nd, icx ) ! Put + + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(nb,kind=8),int(nb+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(icyb0prior,kind=8),int(icyb0prior+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + img_dims(1) = ipy_dest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -298,12 +453,42 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipy_src, ipy_src,mpi_comm_world,istatus,ierr) nd = abs(icyb1 - icyb0) - ndis(me)[ipy_src+1] = icyb0 ! Put +! ndis(me)[ipy_src+1] = icyb0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipy_src+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icyb0,status) + ! sync all call xmp_sync_all(status) nb = ndis(ipy_dest+1) - wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] - . = wm_tmp( :, :, icy0:icy0+nd, icx ) ! Put +! wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] +! . = wm_tmp( :, :, icy0:icy0+nd, icx ) ! Put + + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(nb,kind=8),int(nb+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(icy0,kind=8),int(icy0+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + img_dims(1) = ipy_dest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -315,12 +500,42 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & ipy_dest, myrank, wm_tmp(1,1,icyb0,icx), ncc, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipy_src, ipy_src,mpi_comm_world,istatus,ierr) - ndis(me)[ipy_src+1] = icyb0 ! Put +! ndis(me)[ipy_src+1] = icyb0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipy_src+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icyb0,status) + ! sync all call xmp_sync_all(status) nb = ndis(ipy_dest+1) - wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] - . = wm_tmp( :, :, icyb0prior:icyb0prior+nd, icx ) ! Put +! wm_tmp( :, :, nb:nb +nd, icx )[ipy_dest+1] +! . = wm_tmp( :, :, icyb0prior:icyb0prior+nd, icx ) ! Put + + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(nb,kind=8),int(nb+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(icyb0prior,kind=8),int(icyb0prior+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + img_dims(1) = ipy_dest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -347,12 +562,41 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipx_src, ipx_src,mpi_comm_world,istatus,ierr ) nd = abs(icxb1 - icxb0) - ndis(me)[ipx_src+1] = icxb0 ! Put +! ndis(me)[ipx_src+1] = icxb0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipx_src+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icxb0,status) + ! sync all call xmp_sync_all(status) nb = ndis(ipx_dest+1) - wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] - . = wm_tmp( :, :, :, icx0:icx0+nd ) ! Put +! wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] +! . = wm_tmp( :, :, :, icx0:icx0+nd ) ! Put + + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(nb,kind=8),int(nb+nd,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icx0,kind=8),int(icx0+nd,kind=8),1,status) + + img_dims(1) = ipx_dest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -364,12 +608,42 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & ipx_dest, myrank, wm_tmp(1,1,1,icxb0), ncc, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipx_src, ipx_src,mpi_comm_world,istatus,ierr ) - ndis(me)[ipx_src+1] = icxb0 ! Put +! ndis(me)[ipx_src+1] = icxb0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipx_src+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icxb0,status) + + ! sync all call xmp_sync_all(status) nb = ndis(ipx_dest+1) - wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] - . = wm_tmp( :, :, :, icxb0prior:icxb0prior+nd ) ! Put +! wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] +! . = wm_tmp( :, :, :, icxb0prior:icxb0prior+nd ) ! Put + + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(nb,kind=8),int(nb+nd,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icxb0prior,kind=8),int(icxb0prior+nd,kind=8),1,status) + + img_dims(1) = ipx_dest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -395,12 +669,40 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipx_src, ipx_src,mpi_comm_world,istatus,ierr ) nd = abs(icxb1 - icxb0) - ndis(me)[ipx_src+1] = icxb0 ! Put +! ndis(me)[ipx_src+1] = icxb0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipx_src+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icxb0,status) ! sync all call xmp_sync_all(status) - nb = ndis(ipx_dest+1) - wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] - . = wm_tmp( :, :, :, icx0:icx0+nd ) ! Put + +! nb = ndis(ipx_dest+1) +! wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] +! . = wm_tmp( :, :, :, icx0:icx0+nd ) ! Put + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(nb,kind=8),int(nb+nd,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icx0,kind=8),int(icx0+nd,kind=8),1,status) + + img_dims(1) = ipx_dest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -412,12 +714,41 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !coarray & ipx_dest, myrank, wm_tmp(1,1,1,icxb0), ncc, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipx_src, ipx_src,mpi_comm_world,istatus,ierr ) - ndis(me)[ipx_src+1] = icxb0 ! Put +! ndis(me)[ipx_src+1] = icxb0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipx_src+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icxb0,status) + ! sync all call xmp_sync_all(status) nb = ndis(ipx_dest) - wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] - . = wm_tmp( :, :, :, icxb0prior:icxb0prior+nd ) ! Put +! wm_tmp( :, :, :, nb:nb +nd )[ipx_dest+1] +! . = wm_tmp( :, :, :, icxb0prior:icxb0prior+nd ) ! Put + + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(nb,kind=8),int(nb+nd,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(1,kind=8),int(nscell,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icxb0prior,kind=8),int(icxb0prior+nd,kind=8),1,status) + + img_dims(1) = ipx_dest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -448,15 +779,44 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, complex(8) :: wm(mylm, lclz, lcly, lclx) integer(4) :: nbound_zm, nbound_ym, nbound_xm integer(4) :: nbound_zp, nbound_yp, nbound_xp + complex(8) ccbufp(mylm*nbsize*nscydiv*nscxdiv) + integer(8) :: ccbufp_local_sec,ccbufp_local_desc + integer(8), dimension(1) :: ccbufp_lb,ccbufp_ub + !coarray complex(8) rccbufp(mylm*nbsize*nscydiv*nscxdiv, 2) complex(8) ccbufm(mylm*nbsize*nscydiv*nscxdiv) + integer(8) :: ccbufm_local_sec,ccbufm_local_desc + integer(8), dimension(1) :: ccbufm_lb,ccbufm_ub !coarray complex(8) rccbufm(mylm*nbsize*nscydiv*nscxdiv, 2) - complex(8),allocatable :: rccbufp(:,:)[:] - complex(8),allocatable :: rccbufm(:,:)[:] - complex(8),allocatable :: wm_tmp(:,:,:,:)[:] - integer,allocatable :: ndis(:)[:] - integer,allocatable :: mdis(:)[:] +! complex(8),allocatable :: rccbufp(:,:)[:] + complex(8), POINTER :: rccbufp(:,:) => null() + integer(8) :: rccbufp_desc + integer(8) :: rccbufp_l_sec, rccbufp_r_sec + integer(8), dimension(2) :: rccbufp_lb, rccbufp_ub +! complex(8),allocatable :: rccbufm(:,:)[:] + complex(8), POINTER :: rccbufm(:,:) => null() + integer(8) :: rccbufm_desc + integer(8) :: rccbufm_l_sec, rccbufm_r_sec + integer(8), dimension(2) :: rccbufm_lb, rccbufm_ub +! complex(8),allocatable :: wm_tmp(:,:,:,:)[:] + complex(8), POINTER :: wm_tmp(:,:,:,:) => null() + integer(8) :: wm_tmp_desc + integer(8) :: wm_tmp_l_sec, wm_tmp_r_sec + integer(8), dimension(2) :: wm_tmp_lb, wm_tmp_ub + +! integer,allocatable :: ndis(:)[:] + integer, POINTER :: ndis(:) => null() + integer(8) :: ndis_desc + integer(8) :: ndis_sec + integer(8), dimension(1) :: ndis_lb,ndis_ub + +! integer,allocatable :: mdis(:)[:] + integer, POINTER :: mdis(:) => null() + integer(8) :: mdis_desc + integer(8) :: mdis_sec + integer(8), dimension(1) :: mdis_lb,mdis_ub + integer me, np, nb, nd, mb, md !! integer m1 @@ -494,19 +854,79 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, integer ibs, ibr integer istatus(mpi_status_size, 4), ierr integer(4) status + integer(4) img_dims(1) #ifndef SYNC_COM integer,dimension(4) :: irq integer nrq #endif !coarray - allocate( rccbufp(mylm*nbsize*nscydiv*nscxdiv, 2)[*] ) - allocate( rccbufm(mylm*nbsize*nscydiv*nscxdiv, 2)[*] ) - allocate( wm_tmp(mylm, lclz, lcly, lclx)[*] ) +! allocate( rccbufp(mylm*nbsize*nscydiv*nscxdiv, 2)[*] ) + rccbufp_lb(1)=1 + rccbufp_lb(2)=1 + rccbufp_ub(1)=mylm*nbsize*nscydiv*nscxdiv + rccbufp_ub(2)=2 + call xmp_new_coarray(rccbufp_desc, + & 8,2,rccbufp_lb,rccbufp_ub,1,img_dims) + call xmp_coarray_bind(rccbufp_desc,rccbufp) + call xmp_new_array_section(rccbufp_l_sec,2) + call xmp_new_array_section(rccbufp_r_sec,2) + +! allocate( rccbufm(mylm*nbsize*nscydiv*nscxdiv, 2)[*] ) + rccbufm_lb(1)=1 + rccbufm_lb(2)=1 + rccbufm_ub(1)=mylm*nbsize*nscydiv*nscxdiv + rccbufm_ub(2)=2 + call xmp_new_coarray(rccbufm_desc, + & 8,2,rccbufm_lb,rccbufm_ub,1,img_dims) + call xmp_coarray_bind(rccbufm_desc,rccbufm) + call xmp_new_array_section(rccbufm_l_sec,2) + call xmp_new_array_section(rccbufm_r_sec,2) + +! allocate( wm_tmp(mylm, lclz, lcly, lclx)[*] ) + rccbufm_lb(1)=1 + rccbufm_lb(2)=1 + rccbufm_lb(3)=1 + rccbufm_lb(4)=1 + rccbufm_ub(1)=mylm + rccbufm_ub(2)=lclz + rccbufm_ub(3)=lcly + rccbufm_ub(4)=lclx + call xmp_new_coarray(rccbufm_desc, + & 8,4,rccbufm_lb,rccbufm_ub,1,img_dims) + call xmp_coarray_bind(rccbufm_desc,rccbufm) + call xmp_new_array_section(rccbufm_l_sec,4) + call xmp_new_array_section(rccbufm_r_sec,4) + wm_tmp = wm - me = this_image() - np = num_images() - allocate( ndis(np)[*] ) - allocate( mdis(np)[*] ) +! me = this_image() + me = xmp_this_image() +! np = num_images() + np = xmp_num_images() +! allocate( ndis(np)[*] ) + ndis_lb(1)=1 + ndis_ub(1)=np + call xmp_new_coarray(ndis_desc,4,1,ndis_lb,ndis_ub,1,img_dims) + call xmp_coarray_bind(ndis_desc,ndis) + call xmp_new_array_section(ndis_sec,1) +! allocate( mdis(np)[*] ) + mdis_lb(1)=1 + mdis_ub(1)=np + call xmp_new_coarray(mdis_desc,4,1,mdis_lb,mdis_ub,1,img_dims) + call xmp_coarray_bind(mdis_desc,mdis) + call xmp_new_array_section(mdis_sec,1) + + ccbufp_lb(1) = 1 + ccbufp_ub(1) = mylm*nbsize*nscydiv*nscxdiv + call xmp_new_local_array(ccbufp_local_desc,8,1, + & ccbufp_lb,ccbufp_ub,loc(ccbufp)) + call xmp_new_array_section(ccbufp_local_sec,1) + + ccbufm_lb(1) = 1 + ccbufm_ub(1) = mylm*nbsize*nscydiv*nscxdiv + call xmp_new_local_array(ccbufm_local_desc,8,1, + & ccbufm_lb,ccbufm_ub,loc(ccbufm)) + call xmp_new_array_section(ccbufm_local_sec,1) + !! ! ---- 3D rank order rule. ---- @@ -603,8 +1023,32 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & rccbufm(1,1), nccm, MPI_DOUBLE_COMPLEX, !coarray & ipz_msrc, ipz_msrc, !coarray & mpi_comm_world, istatus, ierr ) - rccbufp(1:nccp,1)[ipz_pdest+1] = ccbufp(1:nccp) ! Put - rccbufm(1:nccm,1)[ipz_mdest+1] = ccbufm(1:nccm) ! Put +! rccbufp(1:nccp,1)[ipz_pdest+1] = ccbufp(1:nccp) ! Put + call xmp_array_section_set_triplet(rccbufp_r_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + call xmp_array_section_set_triplet(rccbufp_r_sec, + & 2,int(1,kind=8),int(1,kind=8),1,status) + + call xmp_array_section_set_triplet(ccbufp_local_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + img_dims(1) = ipz_pdest+1 + call xmp_coarray_put_local(img_dims,rccbufp_desc,rccbufp_r_sec, + & ccbufp_local_desc,ccbufp_local_sec,status) + + +! rccbufm(1:nccm,1)[ipz_mdest+1] = ccbufm(1:nccm) ! Put + call xmp_array_section_set_triplet(rccbufm_r_sec, + & 1,int(1,kind=8),int(nccm,kind=8),1,status) + call xmp_array_section_set_triplet(rccbufm_r_sec, + & 2,int(1,kind=8),int(1,kind=8),1,status) + + call xmp_array_section_set_triplet(ccbufm_local_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + img_dims(1) = ipz_mdest+1 + call xmp_coarray_put_local(img_dims,rccbufm_desc,rccbufm_r_sec, + & ccbufm_local_desc,ccbufm_local_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -673,8 +1117,35 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & rccbufm(1,ibr), nccm, MPI_DOUBLE_COMPLEX, !coarray & ipz_msrc, ipz_msrc, !coarray & mpi_comm_world, istatus, ierr ) - rccbufp(1:nccp,ibr)[ipz_pdest+1] = rccbufp(1:nccp,ibs) ! Put - rccbufm(1:nccm,ibr)[ipz_mdest+1] = rccbufm(1:nccm,ibs) ! Put +! rccbufp(1:nccp,ibr)[ipz_pdest+1] = rccbufp(1:nccp,ibs) ! Put + call xmp_array_section_set_triplet(rccbufp_r_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + call xmp_array_section_set_triplet(rccbufp_r_sec, + & 2,int(ibr,kind=8),int(ibr,kind=8),1,status) + + call xmp_array_section_set_triplet(rccbufp_l_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + call xmp_array_section_set_triplet(rccbufp_l_sec, + & 2,int(ibs,kind=8),int(ibs,kind=8),1,status) + img_dims(1) = ipz_pdest+1 + call xmp_coarray_put_local(img_dims,rccbufp_desc,rccbufp_r_sec, + & rccbufp_desc,rccbufp_l_sec,status) + +! rccbufm(1:nccm,ibr)[ipz_mdest+1] = rccbufm(1:nccm,ibs) ! Put + call xmp_array_section_set_triplet(rccbufm_r_sec, + & 1,int(1,kind=8),int(nccm,kind=8),1,status) + call xmp_array_section_set_triplet(rccbufm_r_sec, + & 2,int(ibr,kind=8),int(ibr,kind=8),1,status) + + call xmp_array_section_set_triplet(rccbufm_l_sec, + & 1,int(1,kind=8),int(nccm,kind=8),1,status) + call xmp_array_section_set_triplet(rccbufm_l_sec, + & 2,int(ibs,kind=8),int(ibs,kind=8),1,status) + img_dims(1) = ipz_mdest+1 + call xmp_coarray_put_local(img_dims,rccbufm_desc,rccbufm_r_sec, + & rccbufm_desc,rccbufm_l_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -726,7 +1197,20 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipz_pdest, myrank, !coarray & mpi_comm_world, istatus, ierr ) - rccbufp(1:nccp,ibr)[ipz_pdest+1] = rccbufp(1:nccp,ibs) ! Put +! rccbufp(1:nccp,ibr)[ipz_pdest+1] = rccbufp(1:nccp,ibs) ! Put + call xmp_array_section_set_triplet(rccbufp_r_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + call xmp_array_section_set_triplet(rccbufp_r_sec, + & 2,int(ibr,kind=8),int(ibr,kind=8),1,status) + + call xmp_array_section_set_triplet(rccbufp_l_sec, + & 1,int(1,kind=8),int(nccp,kind=8),1,status) + call xmp_array_section_set_triplet(rccbufp_l_sec, + & 2,int(ibs,kind=8),int(ibs,kind=8),1,status) + img_dims(1) = ipz_pdest+1 + call xmp_coarray_put_local(img_dims,rccbufp_desc,rccbufp_r_sec, + & rccbufp_desc,rccbufp_l_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -757,7 +1241,19 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipz_mdest, myrank, !coarray & mpi_comm_world, istatus, ierr ) - rccbufm(1:nccm,ibr)[ipz_mdest+1] = rccbufm(1:nccm,ibs) ! Put +! rccbufm(1:nccm,ibr)[ipz_mdest+1] = rccbufm(1:nccm,ibs) ! Put + call xmp_array_section_set_triplet(rccbufm_r_sec, + & 1,int(1,kind=8),int(nccm,kind=8),1,status) + call xmp_array_section_set_triplet(rccbufm_r_sec, + & 2,int(ibr,kind=8),int(ibr,kind=8),1,status) + + call xmp_array_section_set_triplet(rccbufm_l_sec, + & 1,int(1,kind=8),int(nccm,kind=8),1,status) + call xmp_array_section_set_triplet(rccbufm_l_sec, + & 2,int(ibs,kind=8),int(ibs,kind=8),1,status) + img_dims(1) = ipz_mdest+1 + call xmp_coarray_put_local(img_dims,rccbufm_desc,rccbufm_r_sec, + & rccbufm_desc,rccbufm_l_sec,status) ! sync all call xmp_sync_all(status) !! @@ -862,12 +1358,43 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipy_psrc, ipy_psrc, !coarray & mpi_comm_world, istatus, ierr ) nd = abs(icyp1 - icyp0) - ndis(me)[ipy_psrc+1] = icybp0 ! Put +! ndis(me)[ipy_psrc+1] = icybp0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipy_psrc+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icybp0,status) + + ! sync all call xmp_sync_all(status) nb = ndis(ipy_pdest+1) - wm_tmp( :, :, nb:nb +nd, icx )[ipy_pdest+1] - . = wm_tmp( :, :, icyp0:icyp0+nd, icx ) ! Put +! wm_tmp( :, :, nb:nb +nd, icx )[ipy_pdest+1] +! . = wm_tmp( :, :, icyp0:icyp0+nd, icx ) ! Put + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(nb,kind=8),int(nb+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(icyp0,kind=8),int(icyp0+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + img_dims(1) = ipy_pdest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + + + ! sync all call xmp_sync_all(status) !! @@ -878,12 +1405,42 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & ipy_msrc, ipy_msrc, !coarray & mpi_comm_world, istatus, ierr ) md = abs(icym1 - icym0) - mdis(me)[ipy_msrc+1] = icybm0 ! Put +! mdis(me)[ipy_msrc+1] = icybm0 ! Put + call xmp_array_section_set_triplet(mdis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipy_msrc+1 + call xmp_coarray_put_scalar(img_dims,mdis_desc, + & mdis_sec,icybm0,status) + + ! sync all call xmp_sync_all(status) mb = mdis(ipy_mdest+1) - wm_tmp( :, :, mb:mb +md, icx )[ipy_mdest+1] - . = wm_tmp( :, :, icym0:icym0+md, icx ) ! Put +! wm_tmp( :, :, mb:mb +md, icx )[ipy_mdest+1] +! . = wm_tmp( :, :, icym0:icym0+md, icx ) ! Put + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(mb,kind=8),int(mb+md,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(icym0,kind=8),int(icym0+md,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + img_dims(1) = ipy_mdest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -921,12 +1478,41 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & wm_tmp(1,1,icybp0,icx), nccp, MPI_DOUBLE_COMPLEX, !coarray * ipy_psrc, ipy_psrc, !coarray & mpi_comm_world, istatus, ierr ) - ndis(me)[ipy_psrc+1] = icybp0 ! Put +! ndis(me)[ipy_psrc+1] = icybp0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipy_psrc+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icybp0,status) ! sync all call xmp_sync_all(status) nb = ndis(ipy_pdest+1) - wm_tmp( :, :, nb:nb +nd, icx )[ipy_pdest+1] ! Put - . = wm_tmp( :, :, icybp0prior:icybp0prior+nd, icx ) +! wm_tmp( :, :, nb:nb +nd, icx )[ipy_pdest+1] ! Put +! . = wm_tmp( :, :, icybp0prior:icybp0prior+nd, icx ) + + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(mb,kind=8),int(nb+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(icybp0prior,kind=8),int(icybp0prior+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + img_dims(1) = ipy_pdest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -936,12 +1522,41 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & wm_tmp(1,1,icybm0,icx), nccm, MPI_DOUBLE_COMPLEX, !coarray & ipy_msrc, ipy_msrc, !coarray & mpi_comm_world, istatus, ierr ) - mdis(me)[ipy_msrc+1] = icybm0 ! Put +! mdis(me)[ipy_msrc+1] = icybm0 ! Put + call xmp_array_section_set_triplet(mdis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipy_msrc+1 + call xmp_coarray_put_scalar(img_dims,mdis_desc, + & mdis_sec,icybm0,status) + ! sync all call xmp_sync_all(status) mb = mdis(ipy_mdest+1) - wm_tmp( :, :, mb:mb +md, icx )[ipy_mdest+1] ! Put - . = wm_tmp( :, :, icybm0prior:icybm0prior+md, icx ) +! wm_tmp( :, :, mb:mb +md, icx )[ipy_mdest+1] ! Put +! . = wm_tmp( :, :, icybm0prior:icybm0prior+md, icx ) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(mb,kind=8),int(mb+md,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(icybm0prior,kind=8),int(icybm0prior+md,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + img_dims(1) = ipy_mdest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -970,12 +1585,38 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipy_pdest, myrank, !coarray & mpi_comm_world, istatus, ierr ) - ndis(me)[ipy_psrc+1] = icybp0 ! Put +! ndis(me)[ipy_psrc+1] = icybp0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipy_psrc+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icybp0,status) ! sync all call xmp_sync_all(status) - nb = ndis(ipy_pdest+1) - wm_tmp( :, :, nb:nb +nd, icx )[ipy_pdest+1] - . = wm_tmp( :, :, icybp0prior:icybp0prior+nd, icx ) ! Put +! nb = ndis(ipy_pdest+1) +! wm_tmp( :, :, nb:nb +nd, icx )[ipy_pdest+1] +! . = wm_tmp( :, :, icybp0prior:icybp0prior+nd, icx ) ! Put + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(nb,kind=8),int(nb+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(icybp0prior,kind=8),int(icybp0prior+nd,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + img_dims(1) = ipy_pdest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all call xmp_sync_all(status) !! @@ -1005,12 +1646,34 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipy_mdest, myrank, !coarray & mpi_comm_world, istatus, ierr ) - mdis(me)[ipy_msrc+1] = icybm0 ! Put +! mdis(me)[ipy_msrc+1] = icybm0 ! Put + call xmp_array_section_set_triplet(mdis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipy_msrc+1 + call xmp_coarray_put_scalar(img_dims,mdis_desc, + & mdis_sec,icybm0,status) ! sync all call xmp_sync_all(status) md = mdis(ipy_mdest+1) - wm_tmp( :, :, mb:mb +md, icx )[ipy_mdest+1] - . = wm_tmp( :, :, icybm0prior:icybm0prior+md, icx ) ! Put +! wm_tmp( :, :, mb:mb +md, icx )[ipy_mdest+1] +! . = wm_tmp( :, :, icybm0prior:icybm0prior+md, icx ) ! Put + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(mb,kind=8),int(mb+md,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(icybm0prior,kind=8),int(icybm0prior+md,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icx,kind=8),int(icx,kind=8),1,status) ! sync all call xmp_sync_all(status) #else @@ -1088,12 +1751,40 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & wm_tmp(1,1,1,icxbp0), nccp, MPI_DOUBLE_COMPLEX, !coarray & ipx_psrc, ipx_psrc, !coarray & mpi_comm_world, istatus, ierr ) - ndis(me)[ipx_psrc+1] = icxbp0 ! Put +! ndis(me)[ipx_psrc+1] = icxbp0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipx_psrc+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icxbp0,status) ! sync all call xmp_sync_all(status) nb = ndis(ipx_pdest+1) - wm_tmp( :, :, :, nb:nb +nd )[ipx_pdest+1] - . = wm_tmp( :, :, :, icxp0:icxp0+nd ) ! Put +! wm_tmp( :, :, :, nb:nb +nd )[ipx_pdest+1] +! . = wm_tmp( :, :, :, icxp0:icxp0+nd ) ! Put + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(1,kind=8),int(lcly,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(nb,kind=8),int(nb+nd,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(1,kind=8),int(lcly,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icxp0,kind=8),int(icxp0+nd,kind=8),1,status) + + img_dims(1) = ipx_pdest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + + ! sync all call xmp_sync_all(status) !! @@ -1103,12 +1794,38 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & wm_tmp(1,1,1,icxbm0), nccm, MPI_DOUBLE_COMPLEX, !coarray & ipx_msrc, ipx_msrc, !coarray & mpi_comm_world, istatus, ierr ) - mdis(me)[ipx_msrc+1] = icxbm0 ! Put +! mdis(me)[ipx_msrc+1] = icxbm0 ! Put + call xmp_array_section_set_triplet(mdis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipx_msrc+1 + call xmp_coarray_put_scalar(img_dims,mdis_desc, + & mdis_sec,icxbm0,status) ! sync all call xmp_sync_all(status) mb = mdis(ipx_mdest+1) - wm_tmp( :, :, :, mb:mb +md )[ipx_mdest+1] - . = wm_tmp( :, :, :, icxm0:icxm0+md ) +! wm_tmp( :, :, :, mb:mb +md )[ipx_mdest+1] +! . = wm_tmp( :, :, :, icxm0:icxm0+md ) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(1,kind=8),int(lcly,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(mb,kind=8),int(mb+md,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(1,kind=8),int(lcly,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icxm0,kind=8),int(icxm0+md,kind=8),1,status) + + img_dims(1) = ipx_mdest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all call xmp_sync_all(status) !! @@ -1149,12 +1866,41 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & wm_tmp(1,1,1,icxbp0), nccp, MPI_DOUBLE_COMPLEX, !coarray & ipx_psrc, ipx_psrc, !coarray & mpi_comm_world, istatus, ierr ) - ndis(me)[ipx_psrc+1] = icxbp0 ! Put +! ndis(me)[ipx_psrc+1] = icxbp0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipx_psrc+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icxbp0,status) ! sync all call xmp_sync_all(status) nb = ndis(ipx_pdest+1) - wm_tmp( :, :, :, nb:nb +nd-1 )[ipx_pdest+1] - . = wm_tmp( :, :, :, icxbp0prior:icxbp0prior+nd-1 ) ! Put +! wm_tmp( :, :, :, nb:nb +nd-1 )[ipx_pdest+1] +! . = wm_tmp( :, :, :, icxbp0prior:icxbp0prior+nd-1 ) ! Put + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(1,kind=8),int(lcly,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(nb,kind=8),int(nb+nd-1,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(1,kind=8),int(lcly,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icxbp0prior,kind=8),int(icxbp0prior+nd-1,kind=8),1,status) + + img_dims(1) = ipx_pdest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + + + ! sync all call xmp_sync_all(status) !! @@ -1164,12 +1910,38 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & wm_tmp(1,1,1,icxbm0), nccm, MPI_DOUBLE_COMPLEX, !coarray & ipx_msrc, ipx_msrc, !coarray & mpi_comm_world, istatus, ierr ) - mdis(me)[ipx_msrc+1] = icxbm0 ! Put +! mdis(me)[ipx_msrc+1] = icxbm0 ! Put + call xmp_array_section_set_triplet(mdis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipx_msrc+1 + call xmp_coarray_put_scalar(img_dims,mdis_desc, + & mdis_sec,icxbm0,status) ! sync all call xmp_sync_all(status) mb = mdis(ipx_mdest+1) - wm_tmp( :, :, :, mb:mb +md-1 )[ipx_mdest+1] - . = wm_tmp( :, :, :, icxbm0prior:icxbm0prior+md-1 ) ! Put +! wm_tmp( :, :, :, mb:mb +md-1 )[ipx_mdest+1] +! . = wm_tmp( :, :, :, icxbm0prior:icxbm0prior+md-1 ) ! Put + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(1,kind=8),int(lcly,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(mb,kind=8),int(mb+md-1,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(1,kind=8),int(lcly,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icxbm0prior,kind=8),int(icxbm0prior+md-1,kind=8),1,status) + + img_dims(1) = ipx_mdest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all call xmp_sync_all(status) !! @@ -1198,12 +1970,39 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipx_pdest, myrank, !coarray & mpi_comm_world, istatus, ierr ) - ndis(me)[ipx_psrc+1] = icxbp0 ! Put +! ndis(me)[ipx_psrc+1] = icxbp0 ! Put + call xmp_array_section_set_triplet(ndis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipx_psrc+1 + call xmp_coarray_put_scalar(img_dims,ndis_desc, + & ndis_sec,icxbp0,status) ! sync all call xmp_sync_all(status) nb = ndis(ipx_pdest+1) - wm_tmp( :, :, :, nb:nb +nd-1 )[ipx_pdest+1] - . = wm_tmp( :, :, :, icxbp0prior:icxbp0prior+nd-1 ) ! put +! wm_tmp( :, :, :, nb:nb +nd-1 )[ipx_pdest+1] +! . = wm_tmp( :, :, :, icxbp0prior:icxbp0prior+nd-1 ) ! put + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(1,kind=8),int(lcly,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(nb,kind=8),int(nb+nd-1,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(1,kind=8),int(lcly,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icxbp0prior,kind=8),int(icxbp0prior+nd-1,kind=8),1,status) + + img_dims(1) = ipx_pdest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) + ! sync all call xmp_sync_all(status) !! @@ -1233,12 +2032,38 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, !coarray & MPI_DOUBLE_COMPLEX, !coarray & ipx_mdest, myrank, !coarray & mpi_comm_world, istatus, ierr ) - mdis(me)[ipx_msrc+1] = icxbm0 ! Put +! mdis(me)[ipx_msrc+1] = icxbm0 ! Put + call xmp_array_section_set_triplet(mdis_sec, + & 1,int(me,kind=8),int(me,kind=8),1,status) + img_dims(1) = ipx_msrc+1 + call xmp_coarray_put_scalar(img_dims,mdis_desc, + & mdis_sec,icxbm0,status) ! sync all call xmp_sync_all(status) mb = mdis(ipx_mdest+1) - wm_tmp( :, :, :, mb:mb +md )[ipx_mdest+1] - . = wm_tmp( :, :, :, icxbm0prior:icxbm0prior+md ) ! Put +! wm_tmp( :, :, :, mb:mb +md )[ipx_mdest+1] +! . = wm_tmp( :, :, :, icxbm0prior:icxbm0prior+md ) ! Put + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 3,int(1,kind=8),int(lcly,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_r_sec, + & 4,int(mb,kind=8),int(mb+md,kind=8),1,status) + + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 1,int(1,kind=8),int(mylm,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 2,int(1,kind=8),int(lclz,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 3,int(1,kind=8),int(lcly,kind=8),1,status) + call xmp_array_section_set_triplet(wm_tmp_l_sec, + & 4,int(icxbm0prior,kind=8),int(icxbm0prior+md,kind=8),1,status) + + img_dims(1) = ipx_mdest+1 + call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all call xmp_sync_all(status) !! diff --git a/MODYLAS-MINI/src/xmpAPI_fmodules.f b/MODYLAS-MINI/src/xmpAPI_fmodules.f index 8d9c697..8e5d658 100755 --- a/MODYLAS-MINI/src/xmpAPI_fmodules.f +++ b/MODYLAS-MINI/src/xmpAPI_fmodules.f @@ -47,17 +47,20 @@ module trj_mpi ! real(8),allocatable :: wkxyz(:,:)[:] real(8), POINTER :: wkxyz(:,:) => null () integer(8) :: wkxyz_desc + integer(8) :: wkxyz_local_sec, wkxyz_remote_sec real(8),allocatable :: wkv(:,:) ! ! integer(4),allocatable :: i2m(:), m2i(:)[:] integer(4),allocatable :: i2m(:) integer(4), POINTER :: m2i(:) => null () integer(8) :: m2i_desc + integer(8) :: m2i_local_sec, m2i_remote_sec ! !integer(4),allocatable :: tag(:,:,:),na_per_cell(:,:,:)[:] integer(4),allocatable :: tag(:,:,:) integer(4), POINTER :: na_per_cell(:,:,:) => null () integer(8) :: na_per_cell_desc + integer(8) :: na_per_cell_local_sec, na_per_cell_remote_sec integer(4) :: na1cell,na5cell,nadirect integer(4) :: naline,narea @@ -285,28 +288,65 @@ module comm_d3 integer nczdiv, ncydiv, ncxdiv ! ! integer,allocatable :: icbufp(:)[:] integer, POINTER :: icbufp(:) => null () + integer(8) :: icbufp_sec, icbufp_desc + integer(8), dimension(1) :: icbufp_lb, icbufp_ub + ! ! integer,allocatable :: ircbufp(:)[:] integer, POINTER :: ircbufp(:) => null () + integer(8) :: ircbufp_sec, ircbufp_desc + integer(8), dimension(1) :: ircbufp_lb, ircbufp_ub + ! ! integer,allocatable :: icbufm(:)[:] integer, POINTER :: icbufm(:) => null () + integer(8) :: icbufm_sec, icbufm_desc + integer(8), dimension(1) :: icbufm_lb, icbufm_ub + ! ! integer,allocatable :: ircbufm(:)[:] integer, POINTER :: ircbufm(:) => null () + integer(8) :: ircbufm_sec, ircbufm_desc + integer(8), dimension(1) :: ircbufm_lb, ircbufm_ub + ! ! integer,allocatable :: ibuffp(:)[:] integer, POINTER :: ibuffp(:) => null () + integer(8) :: ibuffp_sec, ibuffp_desc + integer(8), dimension(1) :: ibuffp_lb, ibuffp_ub + ! ! integer,allocatable :: irbuffp(:)[:] integer, POINTER :: irbuffp(:) => null () + integer(8) :: irbuffp_sec, irbuffp_desc + integer(8), dimension(1) :: irbuffp_lb, irbuffp_ub + ! ! integer,allocatable :: ibuffm(:)[:] integer, POINTER :: ibuffm(:) => null () + integer(8) :: ibuffm_sec, ibuffm_desc + integer(8), dimension(1) :: ibuffm_lb, ibuffm_ub + ! ! integer,allocatable :: irbuffm(:)[:] integer, POINTER :: irbuffm(:) => null () + integer(8) :: irbuffm_sec, irbuffm_desc + integer(8), dimension(1) :: irbuffm_lb, irbuffm_ub + ! ! real(8),allocatable :: buffp(:,:)[:] real(8), POINTER :: buffp(:,:) => null () + integer(8) :: buffp_sec, buffp_desc + integer(8), dimension(2) :: buffp_lb, buffp_ub + ! ! real(8),allocatable :: rbuffp(:,:)[:] real(8), POINTER :: rbuffp(:,:) => null () + integer(8) :: rbuffp_sec, rbuffp_desc + integer(8), dimension(2) :: rbuffp_lb, rbuffp_ub + ! ! real(8),allocatable :: buffm(:,:)[:] real(8), POINTER :: buffm(:,:) => null () + integer(8) :: buffm_sec, buffm_desc + integer(8), dimension(2) :: buffm_lb, buffm_ub + ! ! real(8),allocatable :: rbuffm(:,:)[:] real(8), POINTER :: rbuffm(:,:) => null () + integer(8) :: rbuffm_sec, rbuffm_desc + integer(8), dimension(2) :: rbuffm_lb, rbuffm_ub + + integer :: img_dims(1) end module c---------------------------------------------------------------------- module comm_bd @@ -319,32 +359,61 @@ module comm_bd integer,allocatable :: isbucket(:,:,:,:) integer,allocatable :: ncseg(:,:,:) integer,allocatable :: ncatom(:,:,:) + real(8),allocatable :: buffp(:,:) real(8),allocatable :: buffm(:,:) + integer(8) :: buffp_local_sec, buffm_local_sec + integer(8) :: buffp_local_desc, buffm_local_desc + integer(8), dimension(2) :: buffp_lb, buffm_lb + integer(8), dimension(2) :: buffp_ub, buffm_ub + integer,allocatable :: ibuffp(:) + integer(8) :: ibuffp_local_sec, ibuffp_local_desc + integer(8), dimension(1) :: ibuffp_lb, ibuffp_ub + integer,allocatable :: ibuffm(:) + integer(8) :: ibuffm_local_sec, ibuffm_local_desc + integer(8), dimension(1) :: ibuffm_lb, ibuffm_ub + integer,allocatable :: isbufp(:) integer,allocatable :: isbufm(:) + integer(8) :: isbufp_local_sec, isbufm_local_sec + integer(8) :: isbufp_local_desc, isbufm_local_desc + integer(8), dimension(1) :: isbufp_lb,isbufm_lb + integer(8), dimension(1) :: isbufp_ub,isbufm_ub + ! ! real(8),allocatable :: rbuff_p(:,:)[:] real(8), POINTER :: rbuff_p(:,:) => null () integer(8) :: rbuff_p_desc + integer(8) :: rbuff_p_sec + integer(8), dimension(2) :: rbuff_p_lb,rbuff_p_ub ! ! real(8),allocatable :: rbuff_m(:,:)[:] real(8), POINTER :: rbuff_m(:,:) => null () integer(8) :: rbuff_m_desc + integer(8) :: rbuff_m_sec + integer(8), dimension(2) :: rbuff_m_lb, rbuff_m_ub ! ! integer,allocatable :: irbuff_p(:)[:] integer, POINTER :: irbuff_p(:) => null () integer(8) :: irbuff_p_desc + integer(8) :: irbuff_p_sec + integer(8), dimension(1) :: irbuff_p_lb, irbuff_p_ub ! ! integer,allocatable :: irbuff_m(:)[:] integer, POINTER :: irbuff_m(:) => null () integer(8) :: irbuff_m_desc + integer(8) :: irbuff_m_sec + integer(8), dimension(1) :: irbuff_m_lb, irbuff_m_ub ! ! integer,allocatable :: irsbuf_p(:)[:] integer, POINTER :: irsbuf_p(:) => null () integer(8) :: irsbuf_p_desc + integer(8) :: irsbuf_p_sec + integer(8), dimension(1) :: irsbuf_p_lb, irsbuf_p_ub ! ! integer,allocatable :: irsbuf_m(:)[:] integer, POINTER :: irsbuf_m(:) => null () integer(8) :: irsbuf_m_desc + integer(8) :: irsbuf_m_sec + integer(8), dimension(1) :: irsbuf_m_lb, irsbuf_m_ub - integer(4), dimension(1) :: comm_bd_img_dims + integer(4), dimension(1) :: img_dims integer,allocatable :: ncatmw(:,:,:,:) diff --git a/MODYLAS-MINI/src/xmpAPI_k_energy.f b/MODYLAS-MINI/src/xmpAPI_k_energy.f index 4ca5e10..42d5bce 100755 --- a/MODYLAS-MINI/src/xmpAPI_k_energy.f +++ b/MODYLAS-MINI/src/xmpAPI_k_energy.f @@ -87,8 +87,12 @@ subroutine k_energy_scaler(k_ene_sum) !coarray call mpi_allreduce(wk_ksum,k_ene_sum,1, !coarray & mpi_double_precision,mpi_sum,mpi_comm_world,ierr) - k_ene_sum = wk_ksum - call co_sum(k_ene_sum) +! k_ene_sum = wk_ksum +! call co_sum(k_ene_sum) + call mpi_allreduce(wk_ksum,k_ene_sum,1, + & mpi_double_precision,mpi_sum,mpi_comm_world,ierr) + wk_ksum = k_ene_sum + !! return diff --git a/MODYLAS-MINI/src/xmpAPI_mpitool.f b/MODYLAS-MINI/src/xmpAPI_mpitool.f index d2ae740..42f4651 100755 --- a/MODYLAS-MINI/src/xmpAPI_mpitool.f +++ b/MODYLAS-MINI/src/xmpAPI_mpitool.f @@ -39,10 +39,9 @@ subroutine mpistart !coarray call mpi_comm_rank(mpi_comm_world,myrank,ierr) ! nprocs = num_images() ! nprocs = xmp_num_images() - ! TODO: use xmp_num_images - call mpi_comm_size(mpi_comm_world,nprocs,ierr) - + !call mpi_comm_size(mpi_comm_world,nprocs,ierr) + nprocs = xmp_num_images() ! myrank = this_image()-1 myrank = xmp_this_image() - 1 !! diff --git a/MODYLAS-MINI/src/xmpAPI_nve_integrate.f b/MODYLAS-MINI/src/xmpAPI_nve_integrate.f index 56a27d4..3abdfbf 100755 --- a/MODYLAS-MINI/src/xmpAPI_nve_integrate.f +++ b/MODYLAS-MINI/src/xmpAPI_nve_integrate.f @@ -188,8 +188,11 @@ subroutine calc_hamiltonian_nve() ! ^^^ reduce potential energy ^^^ !coarray call mpi_allreduce(wk_p_energy,p_energy,1, !coarray & mpi_double_precision,mpi_sum,mpi_comm_world,ierr) - p_energy = wk_p_energy - call co_sum(p_energy) +! p_energy = wk_p_energy +! call co_sum(p_energy) + call mpi_allreduce(wk_p_energy,p_energy,1, + & mpi_double_precision,mpi_sum,mpi_comm_world,ierr) + wk_p_energy = p_energy !! ! ^^^ reduce kinetic energy ^^^ From 3a1fa635141317e375926d3cb72ce71b4dafd63c Mon Sep 17 00:00:00 2001 From: tozaki_chisae Date: Tue, 16 Mar 2021 15:20:43 +0900 Subject: [PATCH 61/70] [WIP] add 2 files and modify 4 files. --- FFB-MINI/src/Makefile | 2 +- FFB-MINI/src/dd_mpi/Makefile | 2 +- FFB-MINI/src/dd_mpi/xmpAPI_ddcom4.F | 110 ++++++++++++++++ FFB-MINI/src/xmpAPI_elm3dx.F | 3 - FFB-MINI/src/xmpAPI_extrfn.F | 196 ++++++++++++++++++++++++++++ FFB-MINI/src/xmpAPI_lrfnms.F | 18 ++- 6 files changed, 322 insertions(+), 9 deletions(-) create mode 100755 FFB-MINI/src/dd_mpi/xmpAPI_ddcom4.F create mode 100755 FFB-MINI/src/xmpAPI_extrfn.F diff --git a/FFB-MINI/src/Makefile b/FFB-MINI/src/Makefile index df24480..ec8c083 100755 --- a/FFB-MINI/src/Makefile +++ b/FFB-MINI/src/Makefile @@ -39,7 +39,7 @@ OBJS += metis_wrapper.o endif ifeq (, $(findstring -DNO_REFINER, $(FFLAGS))) -OBJS += xmpAPI_lrfnms.o extrfn.o +OBJS += xmpAPI_lrfnms.o xmpAPI_extrfn.o endif param.h: param.h.in diff --git a/FFB-MINI/src/dd_mpi/Makefile b/FFB-MINI/src/dd_mpi/Makefile index 971ccc4..e878ebb 100755 --- a/FFB-MINI/src/dd_mpi/Makefile +++ b/FFB-MINI/src/dd_mpi/Makefile @@ -6,7 +6,7 @@ RANLIB ?= ranlib all: libdd_mpi.a -OBJS = xmpAPI_dd_mpi.o ddcom4.o +OBJS = xmpAPI_dd_mpi.o xmpAPI_ddcom4.o libdd_mpi.a: $(OBJS) $(AR) $(ARFLAGS) $@ $(OBJS) diff --git a/FFB-MINI/src/dd_mpi/xmpAPI_ddcom4.F b/FFB-MINI/src/dd_mpi/xmpAPI_ddcom4.F new file mode 100755 index 0000000..7914b0b --- /dev/null +++ b/FFB-MINI/src/dd_mpi/xmpAPI_ddcom4.F @@ -0,0 +1,110 @@ + SUBROUTINE DDCOM4(NDOM,LDOM,MAX, + * NPB1,LPB1,XPB1,YPB1,ZPB1, + * NPB2,LPB2,XPB2,YPB2,ZPB2, +C Fujitsu start 202103 +C * MAXBUF,BUFSND,BUFRCV, + * MAXBUF,rx_desc,ry_desc, +C Fujitsu end 202103 + * IUT0,IERR) +C Fujitsu start 202103 + use xmp_api + use mpi +C Fujitsu end 202103 + IMPLICIT REAL*4 (A-H,O-Z) + DIMENSION LDOM(NDOM), + * NPB1( NDOM),NPB2( NDOM), + * LPB1(MAX,NDOM),LPB2(MAX,NDOM), + * XPB1(MAX,NDOM),XPB2(MAX,NDOM), + * YPB1(MAX,NDOM),YPB2(MAX,NDOM), +C Fujitsu start 202103 +C * ZPB1(MAX,NDOM),ZPB2(MAX,NDOM), + * ZPB1(MAX,NDOM),ZPB2(MAX,NDOM) +C * BUFSND(MAXBUF),BUFRCV(MAXBUF) + REAL*4 , POINTER :: BUFSND ( : ) => null ( ) + REAL*4 , POINTER :: BUFRCV ( : ) => null ( ) + INTEGER*8 :: rx_desc, ry_desc +C +C INCLUDE 'mpif.h' +C Fujitsu end 202103 +C + PARAMETER (MAXDOM=10000) + DIMENSION MSGIDS(MAXDOM),MSGSTS(MPI_STATUS_SIZE,MAXDOM) +C +C +C REAL NPB2_TMP(NDOM)[*] +C REAL NPB1_TMP(NDOM) +C +C Fujitsu start 202103 + call xmp_coarray_bind(rx_desc,BUFSND) + call xmp_coarray_bind(ry_desc,BUFRCV) +C Fujitsu end 202103 +C +C DO 100 IDOM=1,NDOM +C NPB2_TMP(IDOM)[LDOM(IDOM)]=NPB1_TMP(IDOM) +C 100 CONTINUE +CC! XMP SYNC_MEMORY + WRITE(IUT6,*) 'NDOM======================================',NDOM + DO 1000 IDOM=1,NDOM + MSGLEN=1 + MSGTYP=1 + IRECV =LDOM(IDOM)-1 + CALL MPI_IRECV(NPB2(IDOM),MSGLEN,MPI_REAL,IRECV,MSGTYP, + * MPI_COMM_WORLD,MSGIDS(IDOM),IERR) + 1000 CONTINUE +C + DO 1100 IDOM=1,NDOM + MSGLEN=1 + MSGTYP=1 + ISEND =LDOM(IDOM)-1 + CALL MPI_ISEND(NPB1(IDOM),MSGLEN,MPI_REAL,ISEND,MSGTYP, + * MPI_COMM_WORLD,MSGIDS(NDOM+IDOM),IERR) + 1100 CONTINUE +C + CALL MPI_WAITALL(2*NDOM,MSGIDS,MSGSTS,IERR) +C + NSTART=1 + DO 2000 IDOM=1,NDOM + MSGTYP=1 + IRECV =LDOM(IDOM)-1 + MSGLEN=4*NPB2(IDOM) + CALL MPI_IRECV(BUFRCV(NSTART),MSGLEN,MPI_REAL, + * IRECV,MSGTYP,MPI_COMM_WORLD,MSGIDS(IDOM),IERR) + NSTART=NSTART+MSGLEN + 2000 CONTINUE +C + NSTART=1 + DO 2100 IDOM=1,NDOM + DO 2200 IBP=1,NPB1(IDOM) + BUFSND(NSTART+0)=LPB1(IBP,IDOM) + BUFSND(NSTART+1)=XPB1(IBP,IDOM) + BUFSND(NSTART+2)=YPB1(IBP,IDOM) + BUFSND(NSTART+3)=ZPB1(IBP,IDOM) + NSTART=NSTART+4 + 2200 CONTINUE + 2100 CONTINUE +C + NSTART=1 + DO 2300 IDOM=1,NDOM + MSGTYP=1 + ISEND =LDOM(IDOM)-1 + MSGLEN=4*NPB1(IDOM) + CALL MPI_ISEND(BUFSND(NSTART),MSGLEN,MPI_REAL,ISEND,MSGTYP, + * MPI_COMM_WORLD,MSGIDS(NDOM+IDOM),IERR) + NSTART=NSTART+MSGLEN + 2300 CONTINUE +C + CALL MPI_WAITALL(2*NDOM,MSGIDS,MSGSTS,IERR) +C + NSTART=1 + DO 3000 IDOM=1,NDOM + DO 3100 IBP=1,NPB2(IDOM) + LPB2(IBP,IDOM)=BUFRCV(NSTART+0) + XPB2(IBP,IDOM)=BUFRCV(NSTART+1) + YPB2(IBP,IDOM)=BUFRCV(NSTART+2) + ZPB2(IBP,IDOM)=BUFRCV(NSTART+3) + NSTART=NSTART+4 + 3100 CONTINUE + 3000 CONTINUE +C + RETURN + END diff --git a/FFB-MINI/src/xmpAPI_elm3dx.F b/FFB-MINI/src/xmpAPI_elm3dx.F index b304fbc..7e193b4 100755 --- a/FFB-MINI/src/xmpAPI_elm3dx.F +++ b/FFB-MINI/src/xmpAPI_elm3dx.F @@ -23,9 +23,6 @@ SUBROUTINE ELM3DX(MGAUSS,IGAUSH, REAL*4 , POINTER :: DNX ( : , : ) => null ( ) REAL*4 , POINTER :: DNY ( : , : ) => null ( ) INTEGER*8 :: rx_desc, ry_desc - INTEGER*8, DIMENSION(2) :: rx_lb, rx_ub, ry_lb, ry_ub - INTEGER*4 :: img_dims(1) - INTEGER*4 :: status C Fujitsu end 202103 INTEGER IUT0,IERR REAL*8 NN,NC,PSI,PSIC,WW diff --git a/FFB-MINI/src/xmpAPI_extrfn.F b/FFB-MINI/src/xmpAPI_extrfn.F new file mode 100755 index 0000000..fa0b6bd --- /dev/null +++ b/FFB-MINI/src/xmpAPI_extrfn.F @@ -0,0 +1,196 @@ + SUBROUTINE EXTRFN(NGRID,ME,N,NE,NP,NODE,NPWALL,LPWALL, + * NPINLT,LPINLT, + * LERFN,LPRFN,LEACNV,NE1,NE2,NDORG,NODEBK, + * AWRK,IPART,NDOM,MBPDOM,LDOM,NBPDOM, +C Fujitsu start 202103 +C * IPSLF,IPSND,RX,RY,MAXBUF,IUT0,IERR) + * IPSLF,IPSND,rx_desc,ry_desc,MAXBUF,IUT0,IERR) +C Fujitsu end 202103 +C + IMPLICIT NONE +C + INTEGER*4 NPINLT,LPINLT(NPINLT) + INTEGER*4 NGRID + INTEGER*4 ME,N,NE,NP,NPWALL,MAXBUF + INTEGER*4 NODE(N,NE),LPWALL(NPWALL) + INTEGER*4 LERFN(NE),LPRFN(NP),LEACNV(ME) + INTEGER*4 IPART,NDOM,MBPDOM + INTEGER*4 LDOM(NDOM),NBPDOM(NDOM), + * IPSLF(MBPDOM,NDOM),IPSND(MBPDOM,NDOM), + * RX(ME*8),RY(ME*8) +C Fujitsu start 202103 + INTEGER*8 :: rx_desc, ry_desc +C Fujitsu end 202103 + INTEGER*4 IUT0,IERR + REAL*4 AWRK(NP) +C OUT + INTEGER*4 NE1,NE2 + INTEGER*4 NDORG(NE*8),NODEBK(8,NE) + +C WORK + INTEGER*4 I,J,IE,IP,IBP,IDUM,IEBUF,IER,NDUM + CHARACTER*60 ERMSGC + & /' ## SUBROUTINE FDRELM: ERROR OCCURED ; RETURNED' / + CHARACTER*60 ERMSG1 + & /' ## SUBROUTINE FDRELM: FAILED TO REORDER NODE TBL; RETURNED' / + +C +CC +CCHY[1] MAKE ELEMENT LIST TO BE REFINED +CC + DO 1000 IE=1,NE + LERFN(IE)=0 + 1000 CONTINUE +C + DO 1100 IP=1,NP + LPRFN(IP)=0 + 1100 CONTINUE +C + DO 1200 IBP=1,NPWALL + IP=LPWALL(IBP) + LPRFN(IP)=1 + 1200 CONTINUE +C + DO 1300 IE=1,ME + LEACNV(IE)=0 + 1300 CONTINUE +C + IF(NGRID.EQ.-1) THEN + DO 1500 IE=1,NE + LERFN(IE)=1 + 1500 CONTINUE + GOTO 2700 + ENDIF +C + DO 2000 J=1,NGRID +CC +CC FIND ELEMENTS ATTACHED TO REFINE BOUNDARY NODE +CC + DO 2100 IE=1,NE + IF(LERFN(IE).EQ.1) GOTO 2100 + DO 2200 I=1,N + IP=NODE(I,IE) + IF(IP.EQ.0) GOTO 2200 + IF(LPRFN(IP).EQ.0) GOTO 2200 + LERFN(IE)=1 + GOTO 2100 + 2200 CONTINUE + 2100 CONTINUE +CC +CC UPDATE REFINE BOUNDARY NODE +CC + DO 2300 IE=1,NE + IF(LERFN(IE).EQ.0) GOTO 2300 + DO 2400 I=1,N + IP=NODE(I,IE) + IF(IP.EQ.0) GOTO 2400 + LPRFN(IP)=1 + 2400 CONTINUE + 2300 CONTINUE +C + DO 2500 IP=1,NP + AWRK(IP)=FLOAT(LPRFN(IP)) + 2500 CONTINUE +C + IDUM=1 + CALL DDCOMX(IPART,IDUM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, +C Fujitsu start 202103 +C * AWRK,AWRK,AWRK,NP,IUT0,IERR,RX,RY,MAXBUF) + * AWRK,AWRK,AWRK,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) +C Fujitsu end 202103 + IF(IERR.NE.0) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSGC + RETURN + ENDIF +C + DO 2600 IP=1,NP + IF (AWRK(IP).GT.0.0) THEN + LPRFN(IP)=1 + ELSE + LPRFN(IP)=0 + ENDIF + 2600 CONTINUE +C + 2000 CONTINUE +C + 2700 CONTINUE +C +CC +CCHY [2] COUNT NUM. OF ELEMENTS TO BE REFINERD +CC + NE1=0 + IER=0 + DO 3000 IE=1,NE + IF(LERFN(IE).EQ.0) GOTO 3000 + NE1=NE1+1 +C + IF(NODE(6,IE).EQ.0.AND.NODE(5,IE).NE.0) THEN + NDUM=10 + ELSE + NDUM=8 + ENDIF +C + DO 3100 I=1,NDUM + IER=IER+1 + LEACNV(IER)=IE + 3100 CONTINUE +C + 3000 CONTINUE +C +CC +CCHY [3] REORDER NODE TABLE +CC + DO 4000 IE=1,NE + DO 4100 I=1,8 + NDORG((IE-1)*8+I)=0 + 4100 CONTINUE + 4000 CONTINUE +CC +CC SET NODE TABLE AT ELEMENTS TO BE REFINED +CC + IEBUF=0 + DO 4200 IE=1,NE + IF(LERFN(IE).EQ.0) GOTO 4200 + IEBUF=IEBUF+1 + DO 4300 I=1,8 + NDORG((IEBUF-1)*8+I)=NODE(I,IE) + 4300 CONTINUE + 4200 CONTINUE +C + IF(IEBUF.NE.NE1) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSG1 + IERR=1 + RETURN + ENDIF +CC +CC SET NODE TABLE AT ELEMENTS NOT TO BE REFINED +CC + NE2=0 + DO 4400 IE=1,NE + IF(LERFN(IE).EQ.1) GOTO 4400 + NE2=NE2+1 + IER=IER+1 + LEACNV(IER)=IE + DO 4500 I=1,8 + NDORG((NE1+NE2-1)*8+I)=NODE(I,IE) + NODEBK(I,NE2)=NODE(I,IE) + 4500 CONTINUE + 4400 CONTINUE +C + IF(NE1+NE2.NE.NE) THEN + WRITE(IUT0,*) + WRITE(IUT0,*) ERMSG1 + IERR=1 + RETURN + ENDIF +C + DO 4600 IE=1,NE + DO 4700 I=1,8 + NODE(I,IE)=NDORG((IE-1)*8+I) + 4700 CONTINUE + 4600 CONTINUE +C + RETURN + END diff --git a/FFB-MINI/src/xmpAPI_lrfnms.F b/FFB-MINI/src/xmpAPI_lrfnms.F index 133a958..df44bb3 100755 --- a/FFB-MINI/src/xmpAPI_lrfnms.F +++ b/FFB-MINI/src/xmpAPI_lrfnms.F @@ -203,7 +203,8 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, CALL DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM, C Fujitsu start 202103 C * WRK04,WRK04,WRK04,NP,IUT0,IERR,RX,RY,MAXBUF) - * WRK04,WRK04,WRK04,NP,IUT0,IERR,rx_desc,ry_desc,MAXBUF) + * WRK04,WRK04,WRK04,NP,IUT0,IERR,rx_desc,ry_desc, + * MAXBUF) C Fujitsu end 202103 CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) IF(IERRA.NE.0) THEN @@ -223,7 +224,10 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, * NPINLT,LPINLT, * LWRK01,LWRK02,LEACNV,NE1,NE2,NDORG,NODEBK, * WRK04,IPART,NDOM,MBPDOM,LDOM,NBPDOM, - * IPSLF,IPSND,RX,RY,MAXBUF,IUT0,IERR) +C Fujitsu start 202103 +C * IPSLF,IPSND,RX,RY,MAXBUF,IUT0,IERR) + * IPSLF,IPSND,rx_desc,ry_desc,MAXBUF,IUT0,IERR) +C Fujitsu end 202103 CALL ERRCHK(IUT6,IPART,1,IERR,IERRA) IF(IERRA.NE.0) THEN WRITE(IUT0,*) @@ -708,7 +712,10 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, CALL DDCOM4(NDOM,LDOM,MBPDOM, * NPB1,LPB1,XPB1,YPB1,ZPB1, * NPB2,LPB2,XPB2,YPB2,ZPB2, - * MAXBUF,RX,RY,IUT0,IERR) +C Fujitsu start 202103 +C * MAXBUF,RX,RY,IUT0,IERR) + * MAXBUF,rx_desc,ry_desc,IUT0,IERR) +C Fujitsu end 202103 C CC CC MAKE INTER-CONNECT NODES @@ -771,7 +778,10 @@ SUBROUTINE LRFNMS(IRFNFF,IRFN,NGRID, CALL DDCOM4(NDOM,LDOM,MBPDOM, * NPB0,LPB1,XPB1,YPB1,ZPB1, * NPB2,LPB2,XPB2,YPB2,ZPB2, - * MAXBUF,RX,RY,IUT0,IERR) +C Fujitsu start 202103 +C * MAXBUF,RX,RY,IUT0,IERR) + * MAXBUF,rx_desc,ry_desc,IUT0,IERR) +C Fujitsu end 202103 C DO 6950 IDOM=1,NDOM IF(NPB0(IDOM).NE.NPB2(IDOM)) IERR=1 From aa102611e6ff9f4daa1f6c2de4a33ab90758b9eb Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Tue, 16 Mar 2021 16:19:16 +0900 Subject: [PATCH 62/70] Fix xmp-api routines of MODYLAS. --- MODYLAS-MINI/src/xmpAPI_comm.f | 19 +- MODYLAS-MINI/src/xmpAPI_comm_3.f | 333 ++++++++++++++------------- MODYLAS-MINI/src/xmpAPI_comm_fmm.f | 89 +++---- MODYLAS-MINI/src/xmpAPI_domain_div.f | 6 +- MODYLAS-MINI/src/xmpAPI_fmodules.f | 12 +- 5 files changed, 231 insertions(+), 228 deletions(-) diff --git a/MODYLAS-MINI/src/xmpAPI_comm.f b/MODYLAS-MINI/src/xmpAPI_comm.f index 0fce60f..7cc76b5 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm.f +++ b/MODYLAS-MINI/src/xmpAPI_comm.f @@ -2110,7 +2110,6 @@ subroutine pre_record_data integer(8) :: m2i_tmp_local_desc integer(8),dimension(1) :: m2i_tmp_lb,m2i_tmp_ub - integer(4) :: img_dims(1) integer(4) :: status @@ -2124,18 +2123,21 @@ subroutine pre_record_data ! allocate(rcvx(6,n)[*]) ndis_lb(1) = 1 ndis_ub(1) = np + call xmp_new_coarray(ndis_desc,4,1,ndis_lb,ndis_ub,1, img_dims) + call xmp_coarray_bind(ndis_desc,ndis) + mdis_lb(1) = 1 mdis_ub(1) = n + call xmp_new_coarray(mdis_desc,4,1,mdis_lb,mdis_ub,1, img_dims) + call xmp_coarray_bind(mdis_desc,mdis) + rcvx_lb(1) = 1 rcvx_lb(2) = 1 rcvx_ub(1) = 6 rcvx_ub(2) = n - call xmp_new_coarray(ndis_desc,4,1,ndis_lb,ndis_ub,1, img_dims) - call xmp_new_coarray(mdis_desc,4,1,mdis_lb,mdis_ub,1, img_dims) call xmp_new_coarray(rcvx_desc,8,2,rcvx_lb,rcvx_ub,1, img_dims) - call xmp_coarray_bind(ndis_desc,ndis) - call xmp_coarray_bind(mdis_desc,mdis) call xmp_coarray_bind(rcvx_desc,rcvx) + call xmp_new_array_section(rcvx_sec,2) m2i_tmp_lb(1) = 1 m2i_tmp_ub(1) = na1cell*lxdiv*lydiv*lzdiv @@ -2143,6 +2145,7 @@ subroutine pre_record_data & m2i_tmp_lb,m2i_tmp_ub,loc(m2i_tmp)) call xmp_new_array_section(m2i_tmp_local_sec,1) + !! if(nprocs.eq.1) then @@ -2172,10 +2175,10 @@ subroutine pre_record_data !allocate(natmlist(nprocs)[*]) natmlist_lb(1) = 1 natmlist_ub(1) = nprocs - call xmp_new_coarray(natmlist_desc,8,2, + call xmp_new_coarray(natmlist_desc,8,1, & natmlist_lb,natmlist_ub,1,img_dims) call xmp_coarray_bind(natmlist_desc,natmlist) - call xmp_new_array_section(natmlist_sec,2) + call xmp_new_array_section(natmlist_sec,1) allocate(natmlist_tmp(nprocs)) allocate(natmdisp(nprocs)) @@ -2204,7 +2207,7 @@ subroutine pre_record_data do mm = 1,np !natmlist(me)[mm] = nselfatm ! Put call xmp_array_section_set_triplet(natmlist_sec, - & 1,int(me,kind=8),int(1,kind=8),1,status) + & 1,int(me,kind=8),int(me,kind=8),1,status) img_dims(1) = mm call xmp_coarray_put_scalar(img_dims,natmlist_desc, & natmlist_sec,nselfatm,status) diff --git a/MODYLAS-MINI/src/xmpAPI_comm_3.f b/MODYLAS-MINI/src/xmpAPI_comm_3.f index 88a5de9..c8e065a 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm_3.f +++ b/MODYLAS-MINI/src/xmpAPI_comm_3.f @@ -216,12 +216,12 @@ subroutine comm_direct_3() ! ver.20120314 integer nd integer(4) status !! - call xmp_new_array_section(na_per_cell_local_sec,3) - call xmp_new_array_section(na_per_cell_remote_sec,3) - call xmp_new_array_section(wkxyz_local_sec,2) - call xmp_new_array_section(wkxyz_remote_sec,2) - call xmp_new_array_section(m2i_local_sec,1) - call xmp_new_array_section(m2i_remote_sec,1) + call xmp_new_array_section(na_per_cell_l_sec,3) + call xmp_new_array_section(na_per_cell_r_sec,3) + call xmp_new_array_section(wkxyz_l_sec,2) + call xmp_new_array_section(wkxyz_r_sec,2) + call xmp_new_array_section(m2i_l_sec,1) + call xmp_new_array_section(m2i_r_sec,1) c----- common parameters for coordinate communication. ----- ipx=mod(myrank,npx) @@ -245,6 +245,7 @@ subroutine comm_direct_3() ! ver.20120314 ipz_mdest = mod(ipz-1+1/npz+npz,npz)*npx*npy + ipy*npx + ipx ipz_msrc = mod(ipz+1-1/npz+npz,npz)*npx*npy + ipy*npx + ipx + nitr = (2 - 1)/nczdiv + 1 DO itr = 1, nitr @@ -776,48 +777,50 @@ subroutine comm_direct_3() ! ver.20120314 ! !. = na_per_cell(:, icyp0:icyp0 +nd, icx) ! Put - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 2,int(icybp0,kind=8),int(icybp0+nd,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, - & 3,int(icx,kind=8),int(1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_r_sec, + & 3,int(icx,kind=8),int(icx,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 2,int(icyp0,kind=8),int(icyp0+nd,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, - & 3,int(icx,kind=8),int(1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_l_sec, + & 3,int(icx,kind=8),int(icx,kind=8),1,status) + img_dims(1) = ipy_pdest+1 - call xmp_coarray_put(img_dims,na_per_cell_desc, - & na_per_cell_remote_sec, - & na_per_cell_desc,na_per_cell_local_sec,status) + call xmp_coarray_put(img_dims, + & na_per_cell_desc,na_per_cell_r_sec, + & na_per_cell_desc,na_per_cell_l_sec,status) + nd = abs(icym1 - icym0) ! ! na_per_cell(:, icybm0:icybm0+nd, icx)[ipy_mdest+1] ! ! . = na_per_cell(:, icym0:icym0 +nd, icx) ! Put - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 2,int(icybm0,kind=8),int(icybm0+nd,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, - & 3,int(icx,kind=8),int(1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_r_sec, + & 3,int(icx,kind=8),int(icx,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 2,int(icym0,kind=8),int(icym0+nd,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, - & 3,int(icx,kind=8),int(1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_l_sec, + & 3,int(icx,kind=8),int(icx,kind=8),1,status) img_dims(1) = ipy_mdest+1 call xmp_coarray_put(img_dims, - & na_per_cell_desc,na_per_cell_remote_sec, - & na_per_cell_desc,na_per_cell_local_sec,status) + & na_per_cell_desc,na_per_cell_r_sec, + & na_per_cell_desc,na_per_cell_l_sec,status) ! sync all @@ -895,32 +898,32 @@ subroutine comm_direct_3() ! ver.20120314 ! ! wkxyz(:,icarp:icarp+ncap-1)[ipy_pdest+1] ! !. = wkxyz(:,icasp:icasp+ncap-1) ! Put - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 2,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 2,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) img_dims(1) = ipy_pdest+1 - call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, - & wkxyz_desc,wkxyz_local_sec,status) + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_r_sec, + & wkxyz_desc,wkxyz_l_sec,status) ! ! m2i(icarp:icarp+ncap-1)[ipy_pdest+1] ! !. = m2i(icasp:icasp+ncap-1) ! Put - call xmp_array_section_set_triplet(m2i_remote_sec, + call xmp_array_section_set_triplet(m2i_r_sec, & 1,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) - call xmp_array_section_set_triplet(m2i_local_sec, + call xmp_array_section_set_triplet(m2i_l_sec, & 1,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) img_dims(1) = ipy_pdest+1 - call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, - & m2i_desc,m2i_local_sec,status) + call xmp_coarray_put(img_dims,m2i_desc,m2i_r_sec, + & m2i_desc,m2i_l_sec,status) @@ -928,33 +931,33 @@ subroutine comm_direct_3() ! ver.20120314 call xmp_sync_all(status) ! ! wkxyz(:,icarm:icarm+ncam-1)[ipy_mdest+1] ! !. = wkxyz(:,icasm:icasm+ncam-1) ! Put - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 2,int(icarp,kind=8),int(icarp+ncam-1,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 2,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) img_dims(1) = ipy_mdest+1 - call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, - & wkxyz_desc,wkxyz_local_sec,status) + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_r_sec, + & wkxyz_desc,wkxyz_l_sec,status) ! ! m2i(icarm:icarm+ncam-1)[ipy_mdest+1] ! !. = m2i(icasm:icasm+ncam-1) ! Put - call xmp_array_section_set_triplet(m2i_remote_sec, + call xmp_array_section_set_triplet(m2i_r_sec, & 1,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) - call xmp_array_section_set_triplet(m2i_local_sec, + call xmp_array_section_set_triplet(m2i_l_sec, & 1,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) img_dims(1) = ipy_mdest+1 - call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, - & m2i_desc,m2i_local_sec,status) + call xmp_coarray_put(img_dims,m2i_desc,m2i_r_sec, + & m2i_desc,m2i_l_sec,status) ! sync all call xmp_sync_all(status) @@ -1014,24 +1017,24 @@ subroutine comm_direct_3() ! ver.20120314 ! ! na_per_cell(:, icybp0:icybp0+nd, icx)[ipy_pdest+1] ! !. = na_per_cell(:, icybp1st:icybp1st+nd, icx) ! Put - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 2,int(icybp0,kind=8),int(icybp0+nd,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, - & 3,int(icx,kind=8),int(1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_r_sec, + & 3,int(icx,kind=8),int(icx,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 2,int(icybp1st,kind=8),int(icybp1st+nd,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, - & 3,int(icx,kind=8),int(1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_l_sec, + & 3,int(icx,kind=8),int(icx,kind=8),1,status) img_dims(1) = ipy_pdest+1 call xmp_coarray_put(img_dims, - & na_per_cell_desc,na_per_cell_remote_sec, - & na_per_cell_desc,na_per_cell_local_sec,status) + & na_per_cell_desc,na_per_cell_r_sec, + & na_per_cell_desc,na_per_cell_l_sec,status) ! sync all call xmp_sync_all(status) @@ -1039,24 +1042,24 @@ subroutine comm_direct_3() ! ver.20120314 ! ! na_per_cell(:, icybm0:icybm0+nd, icx)[ipy_pdest+1] ! !. = na_per_cell(:, icybm1st:icybm1st+nd, icx) ! Put - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 2,int(icybm0,kind=8),int(icybm0+nd,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, - & 3,int(icx,kind=8),int(1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_r_sec, + & 3,int(icx,kind=8),int(icx,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 1,int(1,kind=8),int(lzdiv+4,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 2,int(icybm1st,kind=8),int(icybm1st+nd,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, - & 3,int(icx,kind=8),int(1,kind=8),1,status) + call xmp_array_section_set_triplet(na_per_cell_l_sec, + & 3,int(icx,kind=8),int(icx,kind=8),1,status) img_dims(1) = ipy_pdest+1 call xmp_coarray_put(img_dims, - & na_per_cell_desc,na_per_cell_remote_sec, - & na_per_cell_desc,na_per_cell_local_sec,status) + & na_per_cell_desc,na_per_cell_r_sec, + & na_per_cell_desc,na_per_cell_l_sec,status) ! sync all @@ -1125,31 +1128,31 @@ subroutine comm_direct_3() ! ver.20120314 ! ! wkxyz(:,icarp:icarp+ncap-1)[ipy_pdest+1] ! !. = wkxyz(:,icasp:icasp+ncap-1) ! Put - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 2,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 2,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) img_dims(1) = ipy_pdest+1 - call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, - & wkxyz_desc,wkxyz_local_sec,status) + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_r_sec, + & wkxyz_desc,wkxyz_l_sec,status) ! ! m2i(icarp:icarp+ncap-1)[ipy_pdest+1] ! !. = m2i(icasp:icasp+ncap-1) ! Put - call xmp_array_section_set_triplet(m2i_remote_sec, + call xmp_array_section_set_triplet(m2i_r_sec, & 1,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) - call xmp_array_section_set_triplet(m2i_local_sec, + call xmp_array_section_set_triplet(m2i_l_sec, & 1,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) img_dims(1) = ipy_pdest+1 - call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, - & m2i_desc,m2i_local_sec,status) + call xmp_coarray_put(img_dims,m2i_desc,m2i_r_sec, + & m2i_desc,m2i_l_sec,status) ! sync all @@ -1157,33 +1160,33 @@ subroutine comm_direct_3() ! ver.20120314 ! ! wkxyz(:,icarm:icarm+ncam-1)[ipy_mdest+1] ! !. = wkxyz(:,icasm:icasm+ncam-1) ! Put - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 2,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 2,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) img_dims(1) = ipy_mdest+1 - call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, - & wkxyz_desc,wkxyz_local_sec,status) + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_r_sec, + & wkxyz_desc,wkxyz_l_sec,status) ! ! m2i(icarm:icarm+ncam-1)[ipy_mdest+1] ! !. = m2i(icasm:icasm+ncam-1) ! Put - call xmp_array_section_set_triplet(m2i_remote_sec, + call xmp_array_section_set_triplet(m2i_r_sec, & 1,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) - call xmp_array_section_set_triplet(m2i_local_sec, + call xmp_array_section_set_triplet(m2i_l_sec, & 1,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) img_dims(1) = ipy_mdest+1 - call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, - & m2i_desc,m2i_local_sec,status) + call xmp_coarray_put(img_dims,m2i_desc,m2i_r_sec, + & m2i_desc,m2i_l_sec,status) ! sync all call xmp_sync_all(status) @@ -1269,24 +1272,24 @@ subroutine comm_direct_3() ! ver.20120314 ! ! na_per_cell(icz0:icz1,icy0:icy1,icxbp0:icxbp0+(icxp1-icxp0)) ! !. [ipx_pdest+1] ! !. = na_per_cell(icz0:icz1,icy0:icy1,icxp0:icxp1) ! Put - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 3,int(icxbp0,kind=8),int(icxbp0+(icxp1-icxp0),kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 3,int(icxp0,kind=8),int(icxp1,kind=8),1,status) img_dims(1) = ipx_pdest+1 call xmp_coarray_put(img_dims, - & na_per_cell_desc,na_per_cell_remote_sec, - & na_per_cell_desc,na_per_cell_local_sec,status) + & na_per_cell_desc,na_per_cell_r_sec, + & na_per_cell_desc,na_per_cell_l_sec,status) ! sync all @@ -1303,24 +1306,24 @@ subroutine comm_direct_3() ! ver.20120314 ! !. [ipx_mdest+1] ! !. = na_per_cell(icz0:icz1,icy0:icy1,icxm0:icxm1) ! Put - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 3,int(icxbm0,kind=8),int(icxbm0+(icxm1-icxm0),kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 3,int(icxm0,kind=8),int(icxm1,kind=8),1,status) img_dims(1) = ipx_mdest+1 call xmp_coarray_put(img_dims, - & na_per_cell_desc,na_per_cell_remote_sec, - & na_per_cell_desc,na_per_cell_local_sec,status) + & na_per_cell_desc,na_per_cell_r_sec, + & na_per_cell_desc,na_per_cell_l_sec,status) ! sync all call xmp_sync_all(status) @@ -1410,31 +1413,31 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & mpi_comm_world, istatus, ierr ) ! ! wkxyz(:,icarp:icarp+ncap-1)[ipx_pdest+1] ! !. = wkxyz(:,icasp:icasp+ncap-1) ! Put - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 2,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 2,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) img_dims(1) = ipx_pdest+1 - call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, - & wkxyz_desc,wkxyz_local_sec,status) + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_r_sec, + & wkxyz_desc,wkxyz_l_sec,status) ! ! m2i(icarp:icarp+ncap-1)[ipx_pdest+1] ! !. = m2i(icasp:icasp+ncap-1) ! Put - call xmp_array_section_set_triplet(m2i_remote_sec, + call xmp_array_section_set_triplet(m2i_r_sec, & 1,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) - call xmp_array_section_set_triplet(m2i_local_sec, + call xmp_array_section_set_triplet(m2i_l_sec, & 1,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) img_dims(1) = ipx_pdest+1 - call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, - & m2i_desc,m2i_local_sec,status) + call xmp_coarray_put(img_dims,m2i_desc,m2i_r_sec, + & m2i_desc,m2i_l_sec,status) ! sync all call xmp_sync_all(status) @@ -1451,31 +1454,31 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & mpi_comm_world, istatus, ierr ) ! ! wkxyz(:,icarm:icarm+ncam-1)[ipx_mdest+1] ! !. = wkxyz(:,icasm:icasm+ncam-1) ! Put - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 2,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 2,int(icasm,kind=8),int(icasm+ncap-1,kind=8),1,status) img_dims(1) = ipx_mdest+1 - call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, - & wkxyz_desc,wkxyz_local_sec,status) + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_r_sec, + & wkxyz_desc,wkxyz_l_sec,status) ! ! m2i(icarm:icarm+ncam-1)[ipx_mdest+1] ! !. = m2i(icasm:icasm+ncam-1) ! Put - call xmp_array_section_set_triplet(m2i_remote_sec, + call xmp_array_section_set_triplet(m2i_r_sec, & 1,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) - call xmp_array_section_set_triplet(m2i_local_sec, + call xmp_array_section_set_triplet(m2i_l_sec, & 1,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) img_dims(1) = ipx_mdest+1 - call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, - & m2i_desc,m2i_local_sec,status) + call xmp_coarray_put(img_dims,m2i_desc,m2i_r_sec, + & m2i_desc,m2i_l_sec,status) ! sync all call xmp_sync_all(status) @@ -1549,24 +1552,24 @@ subroutine comm_direct_3() ! ver.20120314 ! ! na_per_cell(icz0:icz1,icy0:icy1,icxbp0:icxbp0+nd)[ipx_pdest+1] ! !.= na_per_cell(icz0:icz1,icy0:icy1,icxbp1st:icxbp1st+nd) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 3,int(icxbp0,kind=8),int(icxbp0+nd,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 3,int(icxbp1st,kind=8),int(icxbp1st+nd,kind=8),1,status) img_dims(1) = ipx_pdest+1 call xmp_coarray_put(img_dims, - & na_per_cell_desc,na_per_cell_remote_sec, - & na_per_cell_desc,na_per_cell_local_sec,status) + & na_per_cell_desc,na_per_cell_r_sec, + & na_per_cell_desc,na_per_cell_l_sec,status) @@ -1585,24 +1588,24 @@ subroutine comm_direct_3() ! ver.20120314 ! ! na_per_cell(icz0:icz1,icy0:icy1,icxbm0:icxbm0+nd)[ipx_pdest+1] ! !.= na_per_cell(icz0:icz1,icy0:icy1,icxbm1st:icxbm1st+nd) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_remote_sec, + call xmp_array_section_set_triplet(na_per_cell_r_sec, & 3,int(icxbm0,kind=8),int(icxbm0+nd,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 1,int(icz0,kind=8),int(icz1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 2,int(icy0,kind=8),int(icy1,kind=8),1,status) - call xmp_array_section_set_triplet(na_per_cell_local_sec, + call xmp_array_section_set_triplet(na_per_cell_l_sec, & 3,int(icxbm1st,kind=8),int(icxbm1st+nd,kind=8),1,status) img_dims(1) = ipx_pdest+1 call xmp_coarray_put(img_dims, - & na_per_cell_desc,na_per_cell_remote_sec, - & na_per_cell_desc,na_per_cell_local_sec,status) + & na_per_cell_desc,na_per_cell_r_sec, + & na_per_cell_desc,na_per_cell_l_sec,status) ! sync all @@ -1682,33 +1685,33 @@ subroutine comm_direct_3() ! ver.20120314 ! ! wkxyz(:,icarp:icarp+ncap-1)[ipx_pdest+1] ! !. = wkxyz(:,icasp:icasp+ncap-1) ! Put - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 2,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 2,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) img_dims(1) = ipx_pdest+1 - call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, - & wkxyz_desc,wkxyz_local_sec,status) + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_r_sec, + & wkxyz_desc,wkxyz_l_sec,status) ! ! m2i(icarp:icarp+ncap-1)[ipx_pdest+1] ! !. = m2i(icasp:icasp+ncap-1) ! Put - call xmp_array_section_set_triplet(m2i_remote_sec, + call xmp_array_section_set_triplet(m2i_r_sec, & 1,int(icarp,kind=8),int(icarp+ncap-1,kind=8),1,status) - call xmp_array_section_set_triplet(m2i_local_sec, + call xmp_array_section_set_triplet(m2i_l_sec, & 1,int(icasp,kind=8),int(icasp+ncap-1,kind=8),1,status) img_dims(1) = ipx_pdest+1 - call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, - & m2i_desc,m2i_local_sec,status) + call xmp_coarray_put(img_dims,m2i_desc,m2i_r_sec, + & m2i_desc,m2i_l_sec,status) ! sync all @@ -1726,32 +1729,32 @@ subroutine comm_direct_3() ! ver.20120314 !coarray & mpi_comm_world, istatus, ierr ) ! ! wkxyz(:,icarm:icarm+ncam-1)[ipx_mdest+1] ! !. = wkxyz(:,icasm:icasm+ncam-1) ! Put - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_remote_sec, + call xmp_array_section_set_triplet(wkxyz_r_sec, & 2,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) - call xmp_array_section_set_triplet(wkxyz_local_sec, + call xmp_array_section_set_triplet(wkxyz_l_sec, & 2,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) img_dims(1) = ipx_mdest+1 - call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_remote_sec, - & wkxyz_desc,wkxyz_local_sec,status) + call xmp_coarray_put(img_dims,wkxyz_desc,wkxyz_r_sec, + & wkxyz_desc,wkxyz_l_sec,status) ! ! m2i(icarm:icarm+ncam-1)[ipx_mdest+1] ! !. = m2i(icasm:icasm+ncam-1) ! Put - call xmp_array_section_set_triplet(m2i_remote_sec, + call xmp_array_section_set_triplet(m2i_r_sec, & 1,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) - call xmp_array_section_set_triplet(m2i_local_sec, + call xmp_array_section_set_triplet(m2i_l_sec, & 1,int(icasm,kind=8),int(icasm+ncam-1,kind=8),1,status) img_dims(1) = ipx_pdest+1 - call xmp_coarray_put(img_dims,m2i_desc,m2i_remote_sec, - & m2i_desc,m2i_local_sec,status) + call xmp_coarray_put(img_dims,m2i_desc,m2i_r_sec, + & m2i_desc,m2i_l_sec,status) ! sync all call xmp_sync_all(status) @@ -1853,11 +1856,11 @@ subroutine comm_direct_3() ! ver.20120314 call xmp_free_array_section(rbuffp_sec) call xmp_free_array_section(buffm_sec) call xmp_free_array_section(rbuffm_sec) - call xmp_free_array_section(wkxyz_local_sec) - call xmp_free_array_section(wkxyz_remote_sec) - call xmp_free_array_section(m2i_local_sec) - call xmp_free_array_section(m2i_remote_sec) - call xmp_free_array_section(na_per_cell_local_sec) - call xmp_free_array_section(na_per_cell_remote_sec) + call xmp_free_array_section(wkxyz_l_sec) + call xmp_free_array_section(wkxyz_r_sec) + call xmp_free_array_section(m2i_l_sec) + call xmp_free_array_section(m2i_r_sec) + call xmp_free_array_section(na_per_cell_l_sec) + call xmp_free_array_section(na_per_cell_r_sec) return end diff --git a/MODYLAS-MINI/src/xmpAPI_comm_fmm.f b/MODYLAS-MINI/src/xmpAPI_comm_fmm.f index dfa46d1..a9fb69d 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm_fmm.f +++ b/MODYLAS-MINI/src/xmpAPI_comm_fmm.f @@ -78,7 +78,6 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, integer icyb0prior, icxb0prior integer ierr,istatus(mpi_status_size) integer(4) status - integer :: img_dims(1) !coarray !allocate( rccbuf(mylm*5*nscydiv*nscxdiv,2)[*] ) @@ -105,8 +104,8 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, call xmp_new_coarray(wm_tmp_desc, & 8,4,wm_tmp_lb,wm_tmp_ub,1,img_dims) call xmp_coarray_bind(wm_tmp_desc,wm_tmp) - call xmp_new_array_section(wm_tmp_l_sec,2) - call xmp_new_array_section(wm_tmp_r_sec,2) + call xmp_new_array_section(wm_tmp_l_sec,4) + call xmp_new_array_section(wm_tmp_r_sec,4) wm_tmp = wm @@ -115,7 +114,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, !allocate( ndis(np)[*] ) ndis_lb(1)=1 ndis_ub(1)=np - call xmp_new_coarray(ndis_desc,8,2,ndis_lb,ndis_ub,1,img_dims) + call xmp_new_coarray(ndis_desc,8,1,ndis_lb,ndis_ub,1,img_dims) call xmp_coarray_bind(ndis_desc,ndis) call xmp_new_array_section(ndis_sec,1) !! @@ -222,7 +221,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, call xmp_array_section_set_triplet(rccbuf_l_sec, & 2,int(ibs,kind=8),int(1,kind=8),1,status) img_dims(1) = ipz_dest+1 - call xmp_coarray_put_local(img_dims,rccbuf_desc,rccbuf_r_sec, + call xmp_coarray_put(img_dims,rccbuf_desc,rccbuf_r_sec, & rccbuf_desc,rccbuf_l_sec,status) ! sync all call xmp_sync_all(status) @@ -303,7 +302,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, call xmp_array_section_set_triplet(rccbuf_l_sec, & 2,int(ibs,kind=8),int(ibs,kind=8),1,status) img_dims(1) = ipz_dest+1 - call xmp_coarray_put_local(img_dims,rccbuf_desc,rccbuf_r_sec, + call xmp_coarray_put(img_dims,rccbuf_desc,rccbuf_r_sec, & rccbuf_desc,rccbuf_l_sec,status) ! sync all call xmp_sync_all(status) @@ -373,7 +372,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, & 4,int(icx,kind=8),int(icx,kind=8),1,status) img_dims(1) = ipy_dest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) @@ -422,7 +421,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, & 4,int(icx,kind=8),int(icx,kind=8),1,status) img_dims(1) = ipy_dest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) @@ -485,7 +484,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, & 4,int(icx,kind=8),int(icx,kind=8),1,status) img_dims(1) = ipy_dest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) @@ -532,7 +531,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, & 4,int(icx,kind=8),int(icx,kind=8),1,status) img_dims(1) = ipy_dest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) @@ -594,7 +593,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, & 4,int(icx0,kind=8),int(icx0+nd,kind=8),1,status) img_dims(1) = ipx_dest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all @@ -641,7 +640,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, & 4,int(icxb0prior,kind=8),int(icxb0prior+nd,kind=8),1,status) img_dims(1) = ipx_dest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all @@ -700,7 +699,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, & 4,int(icx0,kind=8),int(icx0+nd,kind=8),1,status) img_dims(1) = ipx_dest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all @@ -746,7 +745,7 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, & 4,int(icxb0prior,kind=8),int(icxb0prior+nd,kind=8),1,status) img_dims(1) = ipx_dest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all @@ -854,7 +853,6 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, integer ibs, ibr integer istatus(mpi_status_size, 4), ierr integer(4) status - integer(4) img_dims(1) #ifndef SYNC_COM integer,dimension(4) :: irq integer nrq @@ -883,19 +881,19 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, call xmp_new_array_section(rccbufm_r_sec,2) ! allocate( wm_tmp(mylm, lclz, lcly, lclx)[*] ) - rccbufm_lb(1)=1 - rccbufm_lb(2)=1 - rccbufm_lb(3)=1 - rccbufm_lb(4)=1 - rccbufm_ub(1)=mylm - rccbufm_ub(2)=lclz - rccbufm_ub(3)=lcly - rccbufm_ub(4)=lclx - call xmp_new_coarray(rccbufm_desc, - & 8,4,rccbufm_lb,rccbufm_ub,1,img_dims) - call xmp_coarray_bind(rccbufm_desc,rccbufm) - call xmp_new_array_section(rccbufm_l_sec,4) - call xmp_new_array_section(rccbufm_r_sec,4) + wm_tmp_lb(1)=1 + wm_tmp_lb(2)=1 + wm_tmp_lb(3)=1 + wm_tmp_lb(4)=1 + wm_tmp_ub(1)=mylm + wm_tmp_ub(2)=lclz + wm_tmp_ub(3)=lcly + wm_tmp_ub(4)=lclx + call xmp_new_coarray(wm_tmp_desc, + & 8,4,wm_tmp_lb,wm_tmp_ub,1,img_dims) + call xmp_coarray_bind(wm_tmp_desc,wm_tmp) + call xmp_new_array_section(wm_tmp_l_sec,4) + call xmp_new_array_section(wm_tmp_r_sec,4) wm_tmp = wm ! me = this_image() @@ -971,6 +969,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, nitr = max( (nbound_zm - 1) / nsczdiv + 1, & (nbound_zp - 1) / nsczdiv + 1 ) + !coarray ! allocate( rccbufp(mylm*nbsize*nscydiv*nscxdiv, 2)[*] ) ! allocate( rccbufm(mylm*nbsize*nscydiv*nscxdiv, 2)[*] ) @@ -1128,7 +1127,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, call xmp_array_section_set_triplet(rccbufp_l_sec, & 2,int(ibs,kind=8),int(ibs,kind=8),1,status) img_dims(1) = ipz_pdest+1 - call xmp_coarray_put_local(img_dims,rccbufp_desc,rccbufp_r_sec, + call xmp_coarray_put(img_dims,rccbufp_desc,rccbufp_r_sec, & rccbufp_desc,rccbufp_l_sec,status) ! rccbufm(1:nccm,ibr)[ipz_mdest+1] = rccbufm(1:nccm,ibs) ! Put @@ -1142,7 +1141,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, call xmp_array_section_set_triplet(rccbufm_l_sec, & 2,int(ibs,kind=8),int(ibs,kind=8),1,status) img_dims(1) = ipz_mdest+1 - call xmp_coarray_put_local(img_dims,rccbufm_desc,rccbufm_r_sec, + call xmp_coarray_put(img_dims,rccbufm_desc,rccbufm_r_sec, & rccbufm_desc,rccbufm_l_sec,status) @@ -1208,7 +1207,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, call xmp_array_section_set_triplet(rccbufp_l_sec, & 2,int(ibs,kind=8),int(ibs,kind=8),1,status) img_dims(1) = ipz_pdest+1 - call xmp_coarray_put_local(img_dims,rccbufp_desc,rccbufp_r_sec, + call xmp_coarray_put(img_dims,rccbufp_desc,rccbufp_r_sec, & rccbufp_desc,rccbufp_l_sec,status) ! sync all @@ -1252,7 +1251,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, call xmp_array_section_set_triplet(rccbufm_l_sec, & 2,int(ibs,kind=8),int(ibs,kind=8),1,status) img_dims(1) = ipz_mdest+1 - call xmp_coarray_put_local(img_dims,rccbufm_desc,rccbufm_r_sec, + call xmp_coarray_put(img_dims,rccbufm_desc,rccbufm_r_sec, & rccbufm_desc,rccbufm_l_sec,status) ! sync all call xmp_sync_all(status) @@ -1389,8 +1388,9 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, call xmp_array_section_set_triplet(wm_tmp_l_sec, & 4,int(icx,kind=8),int(icx,kind=8),1,status) + img_dims(1) = ipy_pdest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) @@ -1413,6 +1413,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, & mdis_sec,icybm0,status) + ! sync all call xmp_sync_all(status) mb = mdis(ipy_mdest+1) @@ -1437,7 +1438,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, & 4,int(icx,kind=8),int(icx,kind=8),1,status) img_dims(1) = ipy_mdest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) @@ -1495,7 +1496,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, call xmp_array_section_set_triplet(wm_tmp_r_sec, & 2,int(1,kind=8),int(lclz,kind=8),1,status) call xmp_array_section_set_triplet(wm_tmp_r_sec, - & 3,int(mb,kind=8),int(nb+nd,kind=8),1,status) + & 3,int(nb,kind=8),int(nb+nd,kind=8),1,status) call xmp_array_section_set_triplet(wm_tmp_r_sec, & 4,int(icx,kind=8),int(icx,kind=8),1,status) @@ -1509,7 +1510,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, & 4,int(icx,kind=8),int(icx,kind=8),1,status) img_dims(1) = ipy_pdest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) @@ -1553,7 +1554,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, & 4,int(icx,kind=8),int(icx,kind=8),1,status) img_dims(1) = ipy_mdest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) @@ -1615,7 +1616,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, & 4,int(icx,kind=8),int(icx,kind=8),1,status) img_dims(1) = ipy_pdest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all call xmp_sync_all(status) @@ -1781,7 +1782,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, & 4,int(icxp0,kind=8),int(icxp0+nd,kind=8),1,status) img_dims(1) = ipx_pdest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) @@ -1824,7 +1825,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, & 4,int(icxm0,kind=8),int(icxm0+md,kind=8),1,status) img_dims(1) = ipx_mdest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all call xmp_sync_all(status) @@ -1896,7 +1897,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, & 4,int(icxbp0prior,kind=8),int(icxbp0prior+nd-1,kind=8),1,status) img_dims(1) = ipx_pdest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) @@ -1940,7 +1941,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, & 4,int(icxbm0prior,kind=8),int(icxbm0prior+md-1,kind=8),1,status) img_dims(1) = ipx_mdest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all call xmp_sync_all(status) @@ -2000,7 +2001,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, & 4,int(icxbp0prior,kind=8),int(icxbp0prior+nd-1,kind=8),1,status) img_dims(1) = ipx_pdest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all @@ -2062,7 +2063,7 @@ subroutine comm_fmm_local_multi(ilevel, mylm, wm, & 4,int(icxbm0prior,kind=8),int(icxbm0prior+md,kind=8),1,status) img_dims(1) = ipx_mdest+1 - call xmp_coarray_put_local(img_dims,wm_tmp_desc,wm_tmp_r_sec, + call xmp_coarray_put(img_dims,wm_tmp_desc,wm_tmp_r_sec, & wm_tmp_desc,wm_tmp_l_sec,status) ! sync all call xmp_sync_all(status) diff --git a/MODYLAS-MINI/src/xmpAPI_domain_div.f b/MODYLAS-MINI/src/xmpAPI_domain_div.f index efcc27a..48a8545 100755 --- a/MODYLAS-MINI/src/xmpAPI_domain_div.f +++ b/MODYLAS-MINI/src/xmpAPI_domain_div.f @@ -447,7 +447,7 @@ subroutine fmod_alloc_metadata !init_fmm_direct_com() na_per_cell_ub(2) = lydiv+4 na_per_cell_ub(3) = lxdiv+4 call xmp_new_coarray(na_per_cell_desc,4,3, - & na_per_cell_lb,na_per_cell_ub,1, trj_mpi_img_dims) + & na_per_cell_lb,na_per_cell_ub,1, img_dims) call xmp_coarray_bind(na_per_cell_desc,na_per_cell) !############ @@ -474,7 +474,7 @@ subroutine fmod_alloc_metadata !init_fmm_direct_com() wkxyz_ub(1) = 3 wkxyz_ub(2) = nadirect call xmp_new_coarray(wkxyz_desc,8,2, - & wkxyz_lb,wkxyz_ub,1, trj_mpi_img_dims) + & wkxyz_lb,wkxyz_ub,1, img_dims) call xmp_coarray_bind(wkxyz_desc,wkxyz) allocate(wkv(3,nadirect)) @@ -483,7 +483,7 @@ subroutine fmod_alloc_metadata !init_fmm_direct_com() m2i_lb(1) = 1 m2i_ub(1) = nadirect call xmp_new_coarray(m2i_desc,4,1, - & m2i_lb,m2i_ub,1,trj_mpi_img_dims) + & m2i_lb,m2i_ub,1,img_dims) call xmp_coarray_bind(m2i_desc,m2i) !Force diff --git a/MODYLAS-MINI/src/xmpAPI_fmodules.f b/MODYLAS-MINI/src/xmpAPI_fmodules.f index 8e5d658..09fbe97 100755 --- a/MODYLAS-MINI/src/xmpAPI_fmodules.f +++ b/MODYLAS-MINI/src/xmpAPI_fmodules.f @@ -43,24 +43,23 @@ module trj_org end module c---------------------------------------------------------------------- module trj_mpi - integer(4) :: trj_mpi_img_dims(1) ! real(8),allocatable :: wkxyz(:,:)[:] real(8), POINTER :: wkxyz(:,:) => null () integer(8) :: wkxyz_desc - integer(8) :: wkxyz_local_sec, wkxyz_remote_sec + integer(8) :: wkxyz_l_sec, wkxyz_r_sec real(8),allocatable :: wkv(:,:) ! ! integer(4),allocatable :: i2m(:), m2i(:)[:] integer(4),allocatable :: i2m(:) integer(4), POINTER :: m2i(:) => null () integer(8) :: m2i_desc - integer(8) :: m2i_local_sec, m2i_remote_sec + integer(8) :: m2i_l_sec, m2i_r_sec ! !integer(4),allocatable :: tag(:,:,:),na_per_cell(:,:,:)[:] integer(4),allocatable :: tag(:,:,:) integer(4), POINTER :: na_per_cell(:,:,:) => null () integer(8) :: na_per_cell_desc - integer(8) :: na_per_cell_local_sec, na_per_cell_remote_sec + integer(8) :: na_per_cell_l_sec, na_per_cell_r_sec integer(4) :: na1cell,na5cell,nadirect integer(4) :: naline,narea @@ -346,7 +345,6 @@ module comm_d3 integer(8) :: rbuffm_sec, rbuffm_desc integer(8), dimension(2) :: rbuffm_lb, rbuffm_ub - integer :: img_dims(1) end module c---------------------------------------------------------------------- module comm_bd @@ -413,9 +411,6 @@ module comm_bd integer(8) :: irsbuf_m_sec integer(8), dimension(1) :: irsbuf_m_lb, irsbuf_m_ub - integer(4), dimension(1) :: img_dims - - integer,allocatable :: ncatmw(:,:,:,:) end module c---------------------------------------------------------------------- @@ -429,6 +424,7 @@ module md_multiplestep module mpivar implicit none integer(4) :: myrank=0, nprocs=1, mpiout=0 + integer(4), dimension(1) :: img_dims end module !---------------------------------------------------------------------- module ompvar From 97a11cdce7ecfea2999ef6c3a71cfe58c023c52c Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Thu, 18 Mar 2021 12:24:19 +0900 Subject: [PATCH 63/70] Fix xmp_api routines of MODYLAS. --- MODYLAS-MINI/src/xmpAPI_comm.f | 29 +++++++++++++++-------------- MODYLAS-MINI/src/xmpAPI_comm_3.f | 20 ++++++++++---------- MODYLAS-MINI/src/xmpAPI_comm_fmm.f | 4 ++-- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/MODYLAS-MINI/src/xmpAPI_comm.f b/MODYLAS-MINI/src/xmpAPI_comm.f index 7cc76b5..a728a7e 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm.f +++ b/MODYLAS-MINI/src/xmpAPI_comm.f @@ -130,16 +130,17 @@ subroutine init_comm_bound() allocate(ibuffm ( max_cellcbd*max_mvatom)) allocate(isbufp (2*max_cellcbd + 1 + max_cellcbd*max_mvseg)) - allocate(isbufm (2*max_cellcbd + 1 + max_cellcbd*max_mvseg)) isbufp_lb(1) = 1 - isbufp_ub(1) = max_cellcbd*max_mvatom - isbufm_lb(1) = 1 - isbufm_ub(1) = max_cellcbd*max_mvatom + isbufp_ub(1) = 2*max_cellcbd + 1 + max_cellcbd*max_mvseg call xmp_new_local_array(isbufp_local_desc,4,1, & isbufp_lb,isbufp_ub,loc(isbufp)) + call xmp_new_array_section(isbufp_local_sec,1) + + allocate(isbufm (2*max_cellcbd + 1 + max_cellcbd*max_mvseg)) + isbufm_lb(1) = 1 + isbufm_ub(1) = 2*max_cellcbd + 1 + max_cellcbd*max_mvseg call xmp_new_local_array(isbufm_local_desc,4,1, & isbufm_lb,isbufm_ub,loc(isbufm)) - call xmp_new_array_section(isbufp_local_sec,1) call xmp_new_array_section(isbufm_local_sec,1) @@ -158,8 +159,8 @@ subroutine init_comm_bound() ! !allocate(rbuff_m (6,max_cellcbd*max_mvatom)[*]) rbuff_m_lb(1) = 1 rbuff_m_lb(2) = 1 - rbuff_m_ub(1) = max_cellcbd*max_mvatom - rbuff_m_ub(2) = 6 + rbuff_m_ub(1) = 6 + rbuff_m_ub(2) = max_cellcbd*max_mvatom call xmp_new_coarray(rbuff_m_desc,8,2, & rbuff_m_lb,rbuff_m_ub,1,img_dims) call xmp_coarray_bind(rbuff_m_desc,rbuff_m) @@ -1300,10 +1301,10 @@ subroutine comm_bound() !irbuff_m(1:ncam)[ipy_dest+1] = ibuffm(1:ncam) ! Put call xmp_array_section_set_triplet(irbuff_m_sec, - & 1,int(1,kind=8),int(6,kind=8),1,status) + & 1,int(1,kind=8),int(ncam,kind=8),1,status) call xmp_array_section_set_triplet(ibuffm_local_sec, - & 1,int(1,kind=8),int(6,kind=8),1,status) + & 1,int(1,kind=8),int(ncam,kind=8),1,status) img_dims(1) = ipy_dest+1 call xmp_coarray_put_local(img_dims,irbuff_m_desc,irbuff_m_sec, @@ -1555,7 +1556,7 @@ subroutine comm_bound() call xmp_array_section_set_triplet(isbufp_local_sec, & 1,int(1,kind=8),int(ncs+1,kind=8),1,status) - img_dims(1) = ipy_dest+1 + img_dims(1) = ipx_dest+1 call xmp_coarray_put_local(img_dims,irsbuf_p_desc,irsbuf_p_sec, & isbufp_local_desc,isbufp_local_sec,status) ! sync all @@ -1580,7 +1581,7 @@ subroutine comm_bound() call xmp_array_section_set_triplet(buffp_local_sec, & 2,int(1,kind=8),int(nca,kind=8),1,status) - img_dims(1) = ipy_dest+1 + img_dims(1) = ipx_dest+1 call xmp_coarray_put_local(img_dims,rbuff_p_desc,rbuff_p_sec, & buffp_local_desc,buffp_local_sec,status) @@ -1685,7 +1686,7 @@ subroutine comm_bound() & 1,int(1,kind=8),int(ncs+1,kind=8),1,status) call xmp_array_section_set_triplet(isbufm_local_sec, - & 1,int(1,kind=8),int(nca,kind=8),1,status) + & 1,int(1,kind=8),int(ncs+1,kind=8),1,status) img_dims(1) = ipx_dest+1 call xmp_coarray_put_local(img_dims,irsbuf_m_desc,irsbuf_m_sec, @@ -2141,7 +2142,7 @@ subroutine pre_record_data m2i_tmp_lb(1) = 1 m2i_tmp_ub(1) = na1cell*lxdiv*lydiv*lzdiv - call xmp_new_local_array(m2i_tmp_local_desc,8,1, + call xmp_new_local_array(m2i_tmp_local_desc,4,1, & m2i_tmp_lb,m2i_tmp_ub,loc(m2i_tmp)) call xmp_new_array_section(m2i_tmp_local_sec,1) @@ -2175,7 +2176,7 @@ subroutine pre_record_data !allocate(natmlist(nprocs)[*]) natmlist_lb(1) = 1 natmlist_ub(1) = nprocs - call xmp_new_coarray(natmlist_desc,8,1, + call xmp_new_coarray(natmlist_desc,4,1, & natmlist_lb,natmlist_ub,1,img_dims) call xmp_coarray_bind(natmlist_desc,natmlist) call xmp_new_array_section(natmlist_sec,1) diff --git a/MODYLAS-MINI/src/xmpAPI_comm_3.f b/MODYLAS-MINI/src/xmpAPI_comm_3.f index c8e065a..8cf6794 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm_3.f +++ b/MODYLAS-MINI/src/xmpAPI_comm_3.f @@ -116,7 +116,7 @@ subroutine init_comm_direct_3() buffp_ub(1) = 3 buffp_lb(2) = 1 buffp_ub(2) = na1cell*(ncell/npy)*(ncell/npx)*2 - call xmp_new_coarray(buffp_desc,4,2, + call xmp_new_coarray(buffp_desc,8,2, & buffp_lb,buffp_ub,1,img_dims) call xmp_coarray_bind(buffp_desc,buffp) call xmp_new_array_section(buffp_sec,2) @@ -126,7 +126,7 @@ subroutine init_comm_direct_3() rbuffp_ub(1) = 3 rbuffp_lb(2) = 1 rbuffp_ub(2) = na1cell*(ncell/npy)*(ncell/npx)*2 - call xmp_new_coarray(rbuffp_desc,4,2, + call xmp_new_coarray(rbuffp_desc,8,2, & rbuffp_lb,rbuffp_ub,1,img_dims) call xmp_coarray_bind(rbuffp_desc,rbuffp) call xmp_new_array_section(rbuffp_sec,2) @@ -136,7 +136,7 @@ subroutine init_comm_direct_3() buffm_ub(1) = 3 buffm_lb(2) = 1 buffm_ub(2) = na1cell*(ncell/npy)*(ncell/npx)*2 - call xmp_new_coarray(buffm_desc,4,2, + call xmp_new_coarray(buffm_desc,8,2, & buffm_lb,buffm_ub,1,img_dims) call xmp_coarray_bind(buffm_desc,buffm) call xmp_new_array_section(buffm_sec,2) @@ -146,7 +146,7 @@ subroutine init_comm_direct_3() rbuffm_ub(1) = 3 rbuffm_lb(2) = 1 rbuffm_ub(2) = na1cell*(ncell/npy)*(ncell/npx)*2 - call xmp_new_coarray(rbuffm_desc,4,2, + call xmp_new_coarray(rbuffm_desc,8,2, & rbuffm_lb,rbuffm_ub,1,img_dims) call xmp_coarray_bind(rbuffm_desc,rbuffm) call xmp_new_array_section(rbuffm_sec,2) @@ -441,20 +441,20 @@ subroutine comm_direct_3() ! ver.20120314 call xmp_array_section_set_triplet(rbuffm_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) call xmp_array_section_set_triplet(rbuffm_sec, - & 2,int(1,kind=8),int(ncap,kind=8),1,status) + & 2,int(1,kind=8),int(ncam,kind=8),1,status) call xmp_array_section_set_triplet(buffm_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) call xmp_array_section_set_triplet(buffm_sec, - & 2,int(1,kind=8),int(ncap,kind=8),1,status) + & 2,int(1,kind=8),int(ncam,kind=8),1,status) img_dims(1) = ipz_mdest+1 call xmp_coarray_put(img_dims,rbuffm_desc,rbuffm_sec, & buffm_desc,buffm_sec,status) !irbuffm(1:ncam)[ipz_mdest+1] = ibuffm(1:ncam) ! Put call xmp_array_section_set_triplet(irbuffm_sec, - & 1,int(1,kind=8),int(ncap,kind=8),1,status) + & 1,int(1,kind=8),int(ncam,kind=8),1,status) call xmp_array_section_set_triplet(ibuffm_sec, - & 1,int(1,kind=8),int(ncap,kind=8),1,status) + & 1,int(1,kind=8),int(ncam,kind=8),1,status) img_dims(1) = ipz_mdest+1 call xmp_coarray_put(img_dims,irbuffm_desc,irbuffm_sec, & ibuffm_desc,ibuffm_sec,status) @@ -545,9 +545,9 @@ subroutine comm_direct_3() ! ver.20120314 !icbufm(1:nccm)[ipz_mdest+1] = ircbufm(1:nccm) ! Put call xmp_array_section_set_triplet(icbufm_sec, - & 1,int(1,kind=8),int(nccp,kind=8),1,status) + & 1,int(1,kind=8),int(nccm,kind=8),1,status) call xmp_array_section_set_triplet(ircbufm_sec, - & 1,int(1,kind=8),int(nccp,kind=8),1,status) + & 1,int(1,kind=8),int(nccm,kind=8),1,status) img_dims(1) = ipz_mdest+1 call xmp_coarray_put(img_dims,icbufm_desc,icbufm_sec, & ircbufm_desc,ircbufm_sec,status) diff --git a/MODYLAS-MINI/src/xmpAPI_comm_fmm.f b/MODYLAS-MINI/src/xmpAPI_comm_fmm.f index a9fb69d..56d672a 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm_fmm.f +++ b/MODYLAS-MINI/src/xmpAPI_comm_fmm.f @@ -215,11 +215,11 @@ subroutine comm_fmm_local_top(il0,mylm,wm,nscell, call xmp_array_section_set_triplet(rccbuf_r_sec, & 1,int(1,kind=8),int(ncc,kind=8),1,status) call xmp_array_section_set_triplet(rccbuf_r_sec, - & 2,int(ibr,kind=8),int(1,kind=8),1,status) + & 2,int(ibr,kind=8),int(ibr,kind=8),1,status) call xmp_array_section_set_triplet(rccbuf_l_sec, & 1,int(1,kind=8),int(ncc,kind=8),1,status) call xmp_array_section_set_triplet(rccbuf_l_sec, - & 2,int(ibs,kind=8),int(1,kind=8),1,status) + & 2,int(ibs,kind=8),int(ibs,kind=8),1,status) img_dims(1) = ipz_dest+1 call xmp_coarray_put(img_dims,rccbuf_desc,rccbuf_r_sec, & rccbuf_desc,rccbuf_l_sec,status) From 8b2bdda5fafa2fdd4d28811fb2fac76e1f1aedbd Mon Sep 17 00:00:00 2001 From: tozaki_chisae Date: Thu, 18 Mar 2021 15:38:37 +0900 Subject: [PATCH 64/70] [WIP] modify 1 file. --- FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 b/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 index 6716c6f..76eb9e6 100755 --- a/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 +++ b/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 @@ -793,7 +793,8 @@ SUBROUTINE DDCOM3(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT END ! SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT0,IERR,& - snd_desc, rcv_desc, MAXBUF) +! snd_desc, rcv_desc, MAXBUF) + snd_desc_org, rcv_desc_org, MAXBUF) !fj BUFSND, BUFRCV, MAXBUF) ! Fujitsu start 202103 use xmp_api @@ -810,6 +811,7 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! DIMENSION BUFSND(MAXBUF)[*], BUFRCV(MAXBUF)[*] REAL*4 , POINTER :: BUFSND ( : ) => null ( ) REAL*4 , POINTER :: BUFRCV ( : ) => null ( ) + INTEGER*8 :: snd_desc_org, rcv_desc_org INTEGER*8 :: snd_desc, rcv_desc INTEGER*8 :: snd_sec, rcv_sec INTEGER*8, DIMENSION(1) :: snd_lb, snd_ub, rcv_lb, rcv_ub @@ -934,6 +936,11 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! call xmp_new_coarray(snd_desc, 4, 1, snd_lb, snd_ub, 1, img_dims) ! call xmp_new_coarray(rcv_desc, 4, 1, rcv_lb, rcv_ub, 1, img_dims) ! + call xmp_reshape_coarray(snd_desc, snd_desc_org, 4, 1, & + snd_lb, snd_ub, 1, img_dims) + call xmp_reshape_coarray(rcv_desc, rcv_desc_org, 4, 1, & + rcv_lb, rcv_ub, 1, img_dims) + call xmp_coarray_bind(snd_desc,BUFSND) call xmp_coarray_bind(rcv_desc,BUFRCV) ! From 93aa6a81dca5be342cb384ec15c566c5773a7abd Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Thu, 18 Mar 2021 18:17:10 +0900 Subject: [PATCH 65/70] Modify Makefiles for xmpAPI FFB --- FFB-MINI/src/Makefile | 38 +++++++++++++++++++--------- FFB-MINI/src/dd_mpi/Makefile | 4 +++ FFB-MINI/src/make_setting | 1 + FFB-MINI/src/make_setting.xmpAPI_gcc | 1 + 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/FFB-MINI/src/Makefile b/FFB-MINI/src/Makefile index ec8c083..7d3ac27 100755 --- a/FFB-MINI/src/Makefile +++ b/FFB-MINI/src/Makefile @@ -20,20 +20,34 @@ FFLAGS += -DFFB_MINI_VERSION=\"$(VERSION)\" all: $(LES3X.MPI) $(FFB_MINI) OBJS = \ - xmpAPI_les3x.o xmpAPI_bcgs3x.o xmpAPI_bcgsxe.o xmpAPI_calax3.o \ - xmpAPI_callap.o caluel.o clrcrs.o \ + caluel.o clrcrs.o \ csin3x.o datcnv.o dgnscl.o e2plst.o e2pmtr.o \ - xmpAPI_elm3dx.o errchk.o \ - fild3x.o fld3x2.o \ - xmpAPI_grad3x.o icalel.o int3dx.o \ - lesrop.o lesrpx.o xmpAPI_lessfx.o lumpex.o \ - match4.o matgau.o mkcrs.o neibr2.o \ - xmpAPI_nodlex.o xmpAPI_pres3e.o xmpAPI_rcmelm.o reordr.o \ - rfname.o sethex.o srfexx.o subcnv.o \ - xmpAPI_vel3d1.o xmpAPI_vel3d2.o \ + errchk.o fild3x.o fld3x2.o \ + icalel.o int3dx.o \ + lesrop.o lesrpx.o lumpex.o \ + match4.o matgau.o mkcrs.o neibr2.o \ + reordr.o \ + rfname.o sethex.o srfexx.o subcnv.o \ mfname.o \ miniapp_util.o +OBJS0 = \ + les3x.o bcgs3x.o bcgsxe.o calax3.o \ + callap.o elm3dx.o grad3x.o lessfx.o \ + nodlex.o pres3e.o rcmelm.o vel3d1.o vel3d2.o +OBJS1 = \ + xmpAPI_les3x.o xmpAPI_bcgs3x.o xmpAPI_bcgsxe.o xmpAPI_calax3.o \ + xmpAPI_callap.o xmpAPI_elm3dx.o xmpAPI_grad3x.o xmpAPI_lessfx.o \ + xmpAPI_nodlex.o xmpAPI_pres3e.o xmpAPI_rcmelm.o xmpAPI_vel3d1.o xmpAPI_vel3d2.o + +ifeq ($(USE_XMP_API), yes) + OBJS += $(OBJS1) + FBMOBJ = xmpAPI_ffb_mini_main.o +else + OBJS += $(OBJS0) + FBMOBJ = ffb_mini_main.o +endif + ifeq (, $(findstring -DNO_METIS, $(FFLAGS))) OBJS += metis_wrapper.o endif @@ -75,8 +89,8 @@ $(LIB_DD_MPI): $(LES3X.MPI): $(LIB_GF2) $(LIB_DD_MPI) $(OBJS) les3x_main.o $(LD) $(LDFLAGS) -o $@ $(OBJS) les3x_main.o $(LIB_GF2) $(LIB_DD_MPI) $(LIBS) -$(FFB_MINI): $(LIB_GF2) $(LIB_DD_MPI) $(OBJS) xmpAPI_ffb_mini_main.o makemesh.o - $(LD) $(LDFLAGS) -o $@ $(OBJS) xmpAPI_ffb_mini_main.o makemesh.o $(LIB_GF2) $(LIB_DD_MPI) $(LIBS) +$(FFB_MINI): $(LIB_GF2) $(LIB_DD_MPI) $(OBJS) $(FBMOBJ) makemesh.o + $(LD) $(LDFLAGS) -o $@ $(OBJS) $(FBMOBJ) makemesh.o $(LIB_GF2) $(LIB_DD_MPI) $(LIBS) .SUFFIXES: .SUFFIXES: .f .f90 .F .F90 .c .o diff --git a/FFB-MINI/src/dd_mpi/Makefile b/FFB-MINI/src/dd_mpi/Makefile index e878ebb..1053f89 100755 --- a/FFB-MINI/src/dd_mpi/Makefile +++ b/FFB-MINI/src/dd_mpi/Makefile @@ -6,7 +6,11 @@ RANLIB ?= ranlib all: libdd_mpi.a +ifeq ($(USE_XMP_API), yes) OBJS = xmpAPI_dd_mpi.o xmpAPI_ddcom4.o +else +OBJS = dd_mpi.o ddcom4.o +endif libdd_mpi.a: $(OBJS) $(AR) $(ARFLAGS) $@ $(OBJS) diff --git a/FFB-MINI/src/make_setting b/FFB-MINI/src/make_setting index 928db76..a83c40d 100755 --- a/FFB-MINI/src/make_setting +++ b/FFB-MINI/src/make_setting @@ -1,6 +1,7 @@ CC = mpicc FC = mpif90 +USE_XMP_API = yes OMNI_HOME = $(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') LIBS = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') diff --git a/FFB-MINI/src/make_setting.xmpAPI_gcc b/FFB-MINI/src/make_setting.xmpAPI_gcc index 928db76..a83c40d 100755 --- a/FFB-MINI/src/make_setting.xmpAPI_gcc +++ b/FFB-MINI/src/make_setting.xmpAPI_gcc @@ -1,6 +1,7 @@ CC = mpicc FC = mpif90 +USE_XMP_API = yes OMNI_HOME = $(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') LIBS = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') From 9fb27a070bf0df151f6462aefb6d8472b642f2e9 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 19 Mar 2021 09:14:42 +0900 Subject: [PATCH 66/70] Modify comment of NTCHEM. --- NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 index dd38865..9f38247 100755 --- a/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 +++ b/NTCHEM-MINI/src/mp2/xmpAPI_rimp2_rmp2energy_incore_v_mpiomp.F90 @@ -614,7 +614,7 @@ SUBROUTINE RIMP2_RMP2Energy_InCore_V_MPIOMP RecvBuf(1:bufsize,jsta) = rbuf(1:bufsize) ! if (allocated(sbuf)) deallocate(sbuf) ! if (allocated(rbuf)) deallocate(rbuf) -! TODO: check + if (associated(sbuf)) then !deallocate(sbuf) call xmp_coarray_deallocate(sbuf_desc,status) From 2e299b1b7716696ceace987efce1c62aac731a2f Mon Sep 17 00:00:00 2001 From: Yoshikawa Hiroyuki Date: Fri, 19 Mar 2021 11:31:59 +0900 Subject: [PATCH 67/70] modify Makefile of FFB for building coarray version --- FFB-MINI/src/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FFB-MINI/src/Makefile b/FFB-MINI/src/Makefile index 7d3ac27..06e4100 100755 --- a/FFB-MINI/src/Makefile +++ b/FFB-MINI/src/Makefile @@ -128,8 +128,8 @@ test_fx10: $(FFB_MINI) $(MAKE) -C ../test test_fx10 -xmpAPI_ffb_mini_main.o: param.h -xmpAPI_ffb_mini_main.o: makemesh.o +xmpAPI_ffb_mini_main.o: param.h makemesh.o +ffb_mini_main.o: param.h makemesh.o xmpAPI_les3x.o: timing.h xmpAPI_vel3d1.o: timing.h From 2c34a4179611602ab46ae71a2983b4515d983a60 Mon Sep 17 00:00:00 2001 From: Kazuma Tago Date: Fri, 19 Mar 2021 13:23:44 +0900 Subject: [PATCH 68/70] Fix alloc & free processes of MODYLAS. --- MODYLAS-MINI/src/xmpAPI_comm.f | 43 +++++++++++++++++++++-------- MODYLAS-MINI/src/xmpAPI_comm_3.f | 47 ++++++++++++++++---------------- 2 files changed, 56 insertions(+), 34 deletions(-) diff --git a/MODYLAS-MINI/src/xmpAPI_comm.f b/MODYLAS-MINI/src/xmpAPI_comm.f index a728a7e..80122a9 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm.f +++ b/MODYLAS-MINI/src/xmpAPI_comm.f @@ -109,7 +109,6 @@ subroutine init_comm_bound() buffp_ub(2) = max_cellcbd*max_mvatom call xmp_new_local_array(buffp_local_desc,8,2, & buffp_lb,buffp_ub,loc(buffp)) - call xmp_new_array_section(buffp_local_sec,2) allocate(buffm (6,max_cellcbd*max_mvatom)) buffm_lb(1) = 1 @@ -118,14 +117,12 @@ subroutine init_comm_bound() buffm_ub(2) = max_cellcbd*max_mvatom call xmp_new_local_array(buffm_local_desc,8,2, & buffm_lb,buffm_ub,loc(buffm)) - call xmp_new_array_section(buffm_local_sec,2) allocate(ibuffp ( max_cellcbd*max_mvatom)) ibuffp_lb(1) = 1 ibuffp_ub(1) = max_cellcbd*max_mvatom call xmp_new_local_array(ibuffp_local_desc,8,1, & ibuffp_lb,ibuffp_ub,loc(ibuffp)) - call xmp_new_array_section(ibuffp_local_sec,1) allocate(ibuffm ( max_cellcbd*max_mvatom)) @@ -134,14 +131,12 @@ subroutine init_comm_bound() isbufp_ub(1) = 2*max_cellcbd + 1 + max_cellcbd*max_mvseg call xmp_new_local_array(isbufp_local_desc,4,1, & isbufp_lb,isbufp_ub,loc(isbufp)) - call xmp_new_array_section(isbufp_local_sec,1) allocate(isbufm (2*max_cellcbd + 1 + max_cellcbd*max_mvseg)) isbufm_lb(1) = 1 isbufm_ub(1) = 2*max_cellcbd + 1 + max_cellcbd*max_mvseg call xmp_new_local_array(isbufm_local_desc,4,1, & isbufm_lb,isbufm_ub,loc(isbufm)) - call xmp_new_array_section(isbufm_local_sec,1) allocate( ncatmw(32, nczdiv+2, ncydiv+2, ncxdiv+2) ) @@ -154,7 +149,6 @@ subroutine init_comm_bound() call xmp_new_coarray(rbuff_p_desc,8,2, & rbuff_p_lb,rbuff_p_ub,1,img_dims) call xmp_coarray_bind(rbuff_p_desc,rbuff_p) - call xmp_new_array_section(rbuff_p_sec,2) ! !allocate(rbuff_m (6,max_cellcbd*max_mvatom)[*]) rbuff_m_lb(1) = 1 @@ -164,7 +158,6 @@ subroutine init_comm_bound() call xmp_new_coarray(rbuff_m_desc,8,2, & rbuff_m_lb,rbuff_m_ub,1,img_dims) call xmp_coarray_bind(rbuff_m_desc,rbuff_m) - call xmp_new_array_section(rbuff_m_sec,2) ! !allocate(irbuff_p( max_cellcbd*max_mvatom)[*]) irbuff_p_lb(1) = 1 @@ -172,7 +165,6 @@ subroutine init_comm_bound() call xmp_new_coarray(irbuff_p_desc,4,1, & irbuff_p_lb,irbuff_p_ub,1,img_dims) call xmp_coarray_bind(irbuff_p_desc,irbuff_p) - call xmp_new_array_section(irbuff_p_sec,1) ! !allocate(irbuff_m( max_cellcbd*max_mvatom)[*]) irbuff_m_lb(1) = 1 @@ -180,7 +172,6 @@ subroutine init_comm_bound() call xmp_new_coarray(irbuff_m_desc,4,1, & irbuff_m_lb,irbuff_m_ub,1,img_dims) call xmp_coarray_bind(irbuff_m_desc,irbuff_m) - call xmp_new_array_section(irbuff_m_sec,1) ! !allocate(irsbuf_p(2*max_cellcbd + 1 + max_cellcbd*max_mvseg)[*]) irsbuf_p_lb(1) = 1 @@ -188,7 +179,6 @@ subroutine init_comm_bound() call xmp_new_coarray(irsbuf_p_desc,4,1, & irsbuf_p_lb,irsbuf_p_ub,1,img_dims) call xmp_coarray_bind(irsbuf_p_desc,irsbuf_p) - call xmp_new_array_section(irsbuf_p_sec,1) ! !allocate(irsbuf_m(2*max_cellcbd + 1 + max_cellcbd*max_mvseg)[*]) irsbuf_m_lb(1) = 1 @@ -196,7 +186,6 @@ subroutine init_comm_bound() call xmp_new_coarray(irsbuf_m_desc,4,1, & irsbuf_m_lb,irsbuf_m_ub,1,img_dims) call xmp_coarray_bind(irsbuf_m_desc,irsbuf_m) - call xmp_new_array_section(irsbuf_m_sec,1) return end @@ -250,6 +239,20 @@ subroutine comm_bound() integer istatus(mpi_status_size), ierr ! integer(4) status + + call xmp_new_array_section(buffp_local_sec,2) + call xmp_new_array_section(buffm_local_sec,2) + call xmp_new_array_section(ibuffp_local_sec,1) + call xmp_new_array_section(isbufp_local_sec,1) + call xmp_new_array_section(isbufm_local_sec,1) + call xmp_new_array_section(rbuff_p_sec,2) + call xmp_new_array_section(rbuff_m_sec,2) + call xmp_new_array_section(irbuff_p_sec,1) + call xmp_new_array_section(irbuff_m_sec,1) + call xmp_new_array_section(irsbuf_p_sec,1) + call xmp_new_array_section(irsbuf_m_sec,1) + + rdcellx=dble(ncell)/cellx rdcelly=dble(ncell)/celly rdcellz=dble(ncell)/cellz @@ -1860,6 +1863,20 @@ subroutine comm_bound() !$omp end do !$omp end parallel ! + + + call xmp_free_array_section(buffp_local_sec) + call xmp_free_array_section(buffm_local_sec) + call xmp_free_array_section(ibuffp_local_sec) + call xmp_free_array_section(isbufp_local_sec) + call xmp_free_array_section(isbufm_local_sec) + call xmp_free_array_section(rbuff_p_sec) + call xmp_free_array_section(rbuff_m_sec) + call xmp_free_array_section(irbuff_p_sec) + call xmp_free_array_section(irbuff_m_sec) + call xmp_free_array_section(irsbuf_p_sec) + call xmp_free_array_section(irsbuf_m_sec) + return end c---------------------------------------------------------------------- @@ -2314,5 +2331,9 @@ subroutine pre_record_data call cell_edge() + call xmp_free_array_section(rcvx_sec) + call xmp_free_array_section(m2i_tmp_local_sec) + call xmp_free_array_section(snd_local_sec) + call xmp_free_array_section(natmlist_sec) return end diff --git a/MODYLAS-MINI/src/xmpAPI_comm_3.f b/MODYLAS-MINI/src/xmpAPI_comm_3.f index 8cf6794..420edf1 100755 --- a/MODYLAS-MINI/src/xmpAPI_comm_3.f +++ b/MODYLAS-MINI/src/xmpAPI_comm_3.f @@ -53,7 +53,6 @@ subroutine init_comm_direct_3() call xmp_new_coarray(icbufp_desc,4,1, & icbufp_lb,icbufp_ub,1,img_dims) call xmp_coarray_bind(icbufp_desc,icbufp) - call xmp_new_array_section(icbufp_sec,1) ! allocate(ircbufp((ncell/npy)*(ncell/npx)*2)[*]) ircbufp_lb(1) = 1 @@ -61,57 +60,50 @@ subroutine init_comm_direct_3() call xmp_new_coarray(ircbufp_desc,4,1, & ircbufp_lb,ircbufp_ub,1,img_dims) call xmp_coarray_bind(ircbufp_desc,ircbufp) - call xmp_new_array_section(ircbufp_sec,1) - !allocate(icbufm ((ncell/npy)*(ncell/npx)*2)[*]) +! allocate(icbufm ((ncell/npy)*(ncell/npx)*2)[*]) icbufm_lb(1) = 1 icbufm_ub(1) = (ncell/npy)*(ncell/npx)*2 call xmp_new_coarray(icbufm_desc,4,1, & icbufm_lb,icbufm_ub,1,img_dims) call xmp_coarray_bind(icbufm_desc,icbufm) - call xmp_new_array_section(icbufm_sec,1) - !allocate(ircbufm((ncell/npy)*(ncell/npx)*2)[*]) +! allocate(ircbufm((ncell/npy)*(ncell/npx)*2)[*]) ircbufm_lb(1) = 1 ircbufm_ub(1) = (ncell/npy)*(ncell/npx)*2 call xmp_new_coarray(ircbufm_desc,4,1, & ircbufm_lb,ircbufm_ub,1,img_dims) call xmp_coarray_bind(ircbufm_desc,ircbufm) - call xmp_new_array_section(ircbufm_sec,1) - !allocate(ibuffp (na1cell*(ncell/npy)*(ncell/npx)*2)[*]) +! allocate(ibuffp (na1cell*(ncell/npy)*(ncell/npx)*2)[*]) ibuffp_lb(1) = 1 ibuffp_ub(1) = na1cell*(ncell/npy)*(ncell/npx)*2 call xmp_new_coarray(ibuffp_desc,4,1, & ibuffp_lb,ibuffp_ub,1,img_dims) call xmp_coarray_bind(ibuffp_desc,ibuffp) - call xmp_new_array_section(ibuffp_sec,1) - !allocate(irbuffp(na1cell*(ncell/npy)*(ncell/npx)*2)[*]) +! allocate(irbuffp(na1cell*(ncell/npy)*(ncell/npx)*2)[*]) irbuffp_lb(1) = 1 irbuffp_ub(1) = na1cell*(ncell/npy)*(ncell/npx)*2 call xmp_new_coarray(irbuffp_desc,4,1, & irbuffp_lb,irbuffp_ub,1,img_dims) call xmp_coarray_bind(irbuffp_desc,irbuffp) - call xmp_new_array_section(irbuffp_sec,1) - !allocate(ibuffm (na1cell*(ncell/npy)*(ncell/npx)*2)[*]) +! allocate(ibuffm (na1cell*(ncell/npy)*(ncell/npx)*2)[*]) ibuffm_lb(1) = 1 ibuffm_ub(1) = na1cell*(ncell/npy)*(ncell/npx)*2 call xmp_new_coarray(ibuffm_desc,4,1, & ibuffm_lb,ibuffm_ub,1,img_dims) call xmp_coarray_bind(ibuffm_desc,ibuffm) - call xmp_new_array_section(ibuffm_sec,1) - !allocate(irbuffm(na1cell*(ncell/npy)*(ncell/npx)*2)[*]) +! allocate(irbuffm(na1cell*(ncell/npy)*(ncell/npx)*2)[*]) irbuffm_lb(1) = 1 irbuffm_ub(1) = na1cell*(ncell/npy)*(ncell/npx)*2 call xmp_new_coarray(irbuffm_desc,4,1, & irbuffm_lb,irbuffm_ub,1,img_dims) call xmp_coarray_bind(irbuffm_desc,irbuffm) - call xmp_new_array_section(irbuffm_sec,1) - !allocate(buffp (3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) +! allocate(buffp (3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) buffp_lb(1) = 1 buffp_ub(1) = 3 buffp_lb(2) = 1 @@ -119,9 +111,8 @@ subroutine init_comm_direct_3() call xmp_new_coarray(buffp_desc,8,2, & buffp_lb,buffp_ub,1,img_dims) call xmp_coarray_bind(buffp_desc,buffp) - call xmp_new_array_section(buffp_sec,2) - !allocate(rbuffp(3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) +! allocate(rbuffp(3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) rbuffp_lb(1) = 1 rbuffp_ub(1) = 3 rbuffp_lb(2) = 1 @@ -129,9 +120,8 @@ subroutine init_comm_direct_3() call xmp_new_coarray(rbuffp_desc,8,2, & rbuffp_lb,rbuffp_ub,1,img_dims) call xmp_coarray_bind(rbuffp_desc,rbuffp) - call xmp_new_array_section(rbuffp_sec,2) - !allocate(buffm (3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) +! allocate(buffm (3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) buffm_lb(1) = 1 buffm_ub(1) = 3 buffm_lb(2) = 1 @@ -139,9 +129,8 @@ subroutine init_comm_direct_3() call xmp_new_coarray(buffm_desc,8,2, & buffm_lb,buffm_ub,1,img_dims) call xmp_coarray_bind(buffm_desc,buffm) - call xmp_new_array_section(buffm_sec,2) - !allocate(rbuffm(3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) +! allocate(rbuffm(3,na1cell*(ncell/npy)*(ncell/npx)*2)[*]) rbuffm_lb(1) = 1 rbuffm_ub(1) = 3 rbuffm_lb(2) = 1 @@ -149,7 +138,6 @@ subroutine init_comm_direct_3() call xmp_new_coarray(rbuffm_desc,8,2, & rbuffm_lb,rbuffm_ub,1,img_dims) call xmp_coarray_bind(rbuffm_desc,rbuffm) - call xmp_new_array_section(rbuffm_sec,2) return @@ -216,6 +204,18 @@ subroutine comm_direct_3() ! ver.20120314 integer nd integer(4) status !! + call xmp_new_array_section(rbuffm_sec,2) + call xmp_new_array_section(buffm_sec,2) + call xmp_new_array_section(buffp_sec,2) + call xmp_new_array_section(rbuffp_sec,2) + call xmp_new_array_section(irbuffm_sec,1) + call xmp_new_array_section(ibuffm_sec,1) + call xmp_new_array_section(irbuffp_sec,1) + call xmp_new_array_section(ibuffp_sec,1) + call xmp_new_array_section(ircbufm_sec,1) + call xmp_new_array_section(icbufm_sec,1) + call xmp_new_array_section(icbufp_sec,1) + call xmp_new_array_section(ircbufp_sec,1) call xmp_new_array_section(na_per_cell_l_sec,3) call xmp_new_array_section(na_per_cell_r_sec,3) call xmp_new_array_section(wkxyz_l_sec,2) @@ -934,7 +934,7 @@ subroutine comm_direct_3() ! ver.20120314 call xmp_array_section_set_triplet(wkxyz_r_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) call xmp_array_section_set_triplet(wkxyz_r_sec, - & 2,int(icarp,kind=8),int(icarp+ncam-1,kind=8),1,status) + & 2,int(icarm,kind=8),int(icarm+ncam-1,kind=8),1,status) call xmp_array_section_set_triplet(wkxyz_l_sec, & 1,int(1,kind=8),int(3,kind=8),1,status) @@ -1862,5 +1862,6 @@ subroutine comm_direct_3() ! ver.20120314 call xmp_free_array_section(m2i_r_sec) call xmp_free_array_section(na_per_cell_l_sec) call xmp_free_array_section(na_per_cell_r_sec) + return end From 6f726d34269d5dbe1108b77dfb4f823692f694e5 Mon Sep 17 00:00:00 2001 From: tozaki_chisae Date: Fri, 19 Mar 2021 16:34:04 +0900 Subject: [PATCH 69/70] modify 2 files. --- FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 | 9 +++------ FFB-MINI/src/xmpAPI_elm3dx.F | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 b/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 index 76eb9e6..fbf7ebe 100755 --- a/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 +++ b/FFB-MINI/src/dd_mpi/xmpAPI_dd_mpi.F90 @@ -793,8 +793,7 @@ SUBROUTINE DDCOM3(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT END ! SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT0,IERR,& -! snd_desc, rcv_desc, MAXBUF) - snd_desc_org, rcv_desc_org, MAXBUF) + snd_desc, rcv_desc, MAXBUF) !fj BUFSND, BUFRCV, MAXBUF) ! Fujitsu start 202103 use xmp_api @@ -811,7 +810,6 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! DIMENSION BUFSND(MAXBUF)[*], BUFRCV(MAXBUF)[*] REAL*4 , POINTER :: BUFSND ( : ) => null ( ) REAL*4 , POINTER :: BUFRCV ( : ) => null ( ) - INTEGER*8 :: snd_desc_org, rcv_desc_org INTEGER*8 :: snd_desc, rcv_desc INTEGER*8 :: snd_sec, rcv_sec INTEGER*8, DIMENSION(1) :: snd_lb, snd_ub, rcv_lb, rcv_ub @@ -936,9 +934,9 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT ! call xmp_new_coarray(snd_desc, 4, 1, snd_lb, snd_ub, 1, img_dims) ! call xmp_new_coarray(rcv_desc, 4, 1, rcv_lb, rcv_ub, 1, img_dims) ! - call xmp_reshape_coarray(snd_desc, snd_desc_org, 4, 1, & + call xmp_reshape_coarray(snd_desc, snd_desc, 4, 1, & snd_lb, snd_ub, 1, img_dims) - call xmp_reshape_coarray(rcv_desc, rcv_desc_org, 4, 1, & + call xmp_reshape_coarray(rcv_desc, rcv_desc, 4, 1, & rcv_lb, rcv_ub, 1, img_dims) call xmp_coarray_bind(snd_desc,BUFSND) @@ -1140,7 +1138,6 @@ SUBROUTINE DDCOMX(IPART,IDIM,LDOM,NBPDOM,NDOM,IPSLF,IPSND,MBPDOM,FX,FY,FZ,NP,IUT START_RR,status); ! Fujitsu end 202103 END_RR = START_RR + (END_S(LDOM(IDOM)) - START_S(LDOM(IDOM))) -! ! Fujitsu start 202103 ! BUFRCV(START_RR:END_RR)[LDOM(IDOM)] = & ! BUFSND(START_S(LDOM(IDOM)):END_S(LDOM(IDOM))) diff --git a/FFB-MINI/src/xmpAPI_elm3dx.F b/FFB-MINI/src/xmpAPI_elm3dx.F index 7e193b4..339d7f0 100755 --- a/FFB-MINI/src/xmpAPI_elm3dx.F +++ b/FFB-MINI/src/xmpAPI_elm3dx.F @@ -23,6 +23,8 @@ SUBROUTINE ELM3DX(MGAUSS,IGAUSH, REAL*4 , POINTER :: DNX ( : , : ) => null ( ) REAL*4 , POINTER :: DNY ( : , : ) => null ( ) INTEGER*8 :: rx_desc, ry_desc + INTEGER*8, DIMENSION(2) :: rx_lb, rx_ub, ry_lb, ry_ub + INTEGER*4 :: img_dims(1) C Fujitsu end 202103 INTEGER IUT0,IERR REAL*8 NN,NC,PSI,PSIC,WW @@ -104,6 +106,19 @@ SUBROUTINE ELM3DX(MGAUSS,IGAUSH, INTEGER IE,J C C Fujitsu start 202103 + rx_lb(1) = 1 + rx_lb(2) = N1 + rx_ub(1) = 1 + rx_ub(2) = NE+1 + ry_lb(1) = 1 + ry_lb(2) = N1 + ry_ub(1) = 1 + ry_ub(2) = NE+1 + call xmp_reshape_coarray(rx_desc, rx_desc, 4, 2, + * rx_lb, rx_ub, 1, img_dims) + call xmp_reshape_coarray(ry_desc, ry_desc, 4, 2, + * ry_lb, ry_ub, 1, img_dims) +C call xmp_coarray_bind(rx_desc,DNX) call xmp_coarray_bind(ry_desc,DNY) C Fujitsu end 202103 From e2479477bbc805e2c977a8e3b5002cd664c98115 Mon Sep 17 00:00:00 2001 From: "Yoshikawa, Hiroyuki" Date: Fri, 19 Mar 2021 17:21:08 +0900 Subject: [PATCH 70/70] change the default make_setting of FFT to xmp_gcc --- FFB-MINI/src/make_setting | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/FFB-MINI/src/make_setting b/FFB-MINI/src/make_setting index a83c40d..0ca11a5 100755 --- a/FFB-MINI/src/make_setting +++ b/FFB-MINI/src/make_setting @@ -1,23 +1,14 @@ CC = mpicc -FC = mpif90 - -USE_XMP_API = yes -OMNI_HOME = $(shell xmpcc --show-env | grep OMNI_HOME | sed -e 's/OMNI_HOME=//' -e 's/"//g') -LIBS = $(shell xmpcc --show-env | grep OMNI_LINKER_OPT | sed -e 's/OMNI_LINKER_OPT=//' -e 's/"//g') - -MPIBIN = $(shell dirname `which mpicc`) -MPIHOME = $(shell dirname ${MPIBIN}) +FC = xmpf90 DEFINE += -DNO_METIS DEFINE += -DNO_REFINER # timing option DEFINE += -DPROF_MAPROF - -#FFLAGS = -I$(OMNI_HOME)/include -J$(OMNI_HOME)/include -fopenmp -FFLAGS = -I$(OMNI_HOME)/include -fopenmp -FFLAGS += $(DEFINE) -O2 -I$(MPIHOME)/include +MPIHOME = CFLAGS += $(DEFINE) -O2 +FFLAGS += $(DEFINE) -O2 -I$(MPIHOME)/include ifeq (, $(findstring -DNO_METIS, $(FFLAGS)))