Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ jobs:
- name: running test 14, Test on AOI implementation aa wall bnb
run: |
sed -i 's/test = 13/test = 14/' BG_param.txt
./BG_Flood
./BG_Flood
- name: running test 15, Test on flexible time input
run: |
sed -i 's/test = 14/test = 15/' BG_param.txt
./BG_Flood
./BG_Flood BG_param_test15.txt
## var=$(ncdump -v h_P0 Test15_zoom2.nc)
## if: echo ${{ ${#var} == 7 }}


13 changes: 8 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))

SAMPLE_ENABLED := 1

ALL_LDFLAGS := -lnetcdf -I
ALL_LDFLAGS := -I
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
Expand All @@ -207,10 +207,14 @@ ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
INCLUDES := -I/usr/includes
LIBRARIES :=

# Add NetCDF include library
INCLUDES += $(shell nc-config --cflags)
ALL_LDFLAGS += $(shell nc-config --libs)

################################################################################

# Gencode arguments
SMS ?= 35 50 52 60
SMS ?= 50 52 60 75
#SMS ?= 20 30 35
ifeq ($(SMS),)
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
Expand Down Expand Up @@ -318,7 +322,7 @@ ReadInput.o:./src/ReadInput.cu

Read_netcdf.o:./src/Read_netcdf.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

Reimann.o:./src/Reimann.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

Expand All @@ -338,8 +342,7 @@ Write_netcdf.o:./src/Write_netcdf.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

Write_txtlog.o:./src/Write_txtlog.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

Spherical.o:./src/Spherical.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
Expand Down
44 changes: 44 additions & 0 deletions doc/paper.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
title: 'BG_Flood: Adaptive GPU-capable hydrodynamics model for flood and inundation '
tags:
- C++
- CUDA
- flood
- inundation
- tsunami
- storm-surge
- adaptive mesh refinement
- GPU

authors:
- name: Cyprien Bosserelle
q corresponding: true # (This is how to denote the corresponding author)
orcid: 0000-0000-0000-0000
equal-contrib: true
affiliation: 1
- name: Alice Harang
equal-contrib: true # (This is how you can denote equal contributions between multiple authors)
orcid: 0000-0000-0000-0000
affiliation: 1
- name: Emily Lane
affiliation: 1
affiliations:
- name: Earth Sciences New Zealand
index: 1


date: 1 July 2025
bibliography: paper.bib

# Summary
Flood hazard assessment and forecasting aften require physics-based simulations. These simulation are often completted using hydrodynamics models using high resolution to capture small landscape and flow features (e.g. small drains, hydraulic jump), but also capture large scale domain where the hazard forms and or amplifies (e.g. river catchment, continental shelf). this is only acheivable with unstructured and adaptive mesh. Unfortunatly most available open-source codes only provide unstructured mest that rely heavily on user's input for generating a suitable mesh. These model also do not generally offer a flexibility in reverting mesh generation. Most available open-source model also do not offer GPU enabled code that is well optimised and instead for the few that can offer

Enabling GPU and semi-automatic mesh refinement is critical to enable rapid devlopement of flood assessment that doesn't compromise physics simulated.

# Statement of need
BG_Flood is a numerical model for simulating shallow water hydrodynamics on the GPU using an Adaptive Mesh Refinment type grid. The model was designed with the goal of simulating inundation (River, Storm surge or tsunami). The model uses a Block Uniform Quadtree approach that runs on the GPU with adaptive mesh being generated at the start of tee simulation.

The core SWE engines and adaptivity has been inspired and taken from St Venant solver from Basilisk (Popinet XXXX) and the CUDA GPU memory model has been inspired by the work from (Vacondio et al. 2017). The rest of the implementation




14 changes: 7 additions & 7 deletions src/Advection.cu
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,11 @@ template <class T> __global__ void AdvkernelGPU(Param XParam, BlockP<T> XBlock,
T ho, uo, vo;
T dhi = XAdv.dh[i];

T edt = dt;// dhi > T(0.0) ? dt : min(dt, hold / (T(-1.0) * dhi));

T edt = XParam.ForceMassConserve ? dt : dhi >= T(0.0) ? dt : min(dt, max(hold, XParam.eps) / abs(dhi));

//ho = max(hold + edt * dhi,T(0.0));
ho = hold + edt * dhi;


if (ho > eps) {
//
uo = (hold * XEv.u[i] + edt * XAdv.dhu[i]) / ho;
Expand Down Expand Up @@ -307,7 +307,7 @@ template <class T> __host__ void AdvkernelCPU(Param XParam, BlockP<T> XBlock, T

dhi = XAdv.dh[i];

T edt = dt;// dhi > T(0.0) ? dt : min(dt, hold / (T(-1.0) * dhi));
T edt = XParam.ForceMassConserve ? dt : dhi >= T(0.0) ? dt : min(dt, max(hold, XParam.eps) / abs(dhi));

ho = hold + edt * dhi;

Expand Down Expand Up @@ -475,7 +475,7 @@ template <class T> __host__ T CalctimestepGPU(Param XParam,Loop<T> XLoop, BlockP

//GPU Harris reduction #3. 8.3x reduction #0 Note #7 if a lot faster
// This was successfully tested with a range of grid size
//reducemax3 << <gridDimLine, blockDimLine, 64*sizeof(float) >> >(dtmax_g, arrmax_g, nx*ny)
//reducemax3 <<<gridDimLine, blockDimLine, 64*sizeof(float) >>>(dtmax_g, arrmax_g, nx*ny)

int maxThreads = 256;
int threads = (s < maxThreads * 2) ? nextPow2((s + 1) / 2) : maxThreads;
Expand All @@ -485,7 +485,7 @@ template <class T> __host__ T CalctimestepGPU(Param XParam,Loop<T> XLoop, BlockP
dim3 gridDimLine(blocks, 1, 1);


reducemin3 << <gridDimLine, blockDimLine, smemSize >> > (XTime.dtmax, XTime.arrmin, s);
reducemin3 <<<gridDimLine, blockDimLine, smemSize >>> (XTime.dtmax, XTime.arrmin, s);
CUDA_CHECK(cudaDeviceSynchronize());


Expand All @@ -503,7 +503,7 @@ template <class T> __host__ T CalctimestepGPU(Param XParam,Loop<T> XLoop, BlockP

CUDA_CHECK(cudaMemcpy(XTime.dtmax, XTime.arrmin, s * sizeof(T), cudaMemcpyDeviceToDevice));

reducemin3 << <gridDimLineS, blockDimLineS, smemSize >> > (XTime.dtmax, XTime.arrmin, s);
reducemin3 <<<gridDimLineS, blockDimLineS, smemSize >>> (XTime.dtmax, XTime.arrmin, s);
CUDA_CHECK(cudaDeviceSynchronize());

s = (s + (threads * 2 - 1)) / (threads * 2);
Expand Down
34 changes: 29 additions & 5 deletions src/Arrays.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,23 @@ struct maskinfo

};

template <class T>
struct RiverInfo
{
int nbir;
int nburmax; // size of (max number of) unique block with rivers
int nribmax; // size of (max number of) rivers in one block
int* Xbidir; // array of block id for each river size(nburmax,nribmax)
int* Xridib; // array of river id in each block size(nburmax,nribmax)
T* xstart;
T* xend;
T* ystart;
T *yend;
T* qnow; // qnow is a pin mapped and so both pointers are needed here
T* qnow_g; // this simplify the code later

};


// outzone info used to actually write the nc files (one nc file by zone, the default zone is the full domain)
struct outzoneB
Expand All @@ -92,6 +109,8 @@ struct outzoneB
std::string outname; // name for the output file (one for each zone)
int maxlevel; // maximum level in the zone
int minlevel; //minimum level in the zone
std::vector<double> OutputT; //Next time for the output of this zone
int index_next_OutputT = 0; //Index of next time output
};


Expand Down Expand Up @@ -125,7 +144,7 @@ struct AdaptP




template <class T>
struct BndblockP
{
int nblkriver, nblkTs, nbndblkleft, nbndblkright, nbndblktop, nbndblkbot;
Expand All @@ -140,12 +159,15 @@ struct BndblockP
int* top;
int* bot;


RiverInfo<T> Riverinfo;


};


struct RiverBlk
{
std::vector<int> block;
};



Expand Down Expand Up @@ -192,7 +214,7 @@ struct Model
std::map<std::string, std::string> Outvarlongname;
std::map<std::string, std::string> Outvarstdname;
std::map<std::string, std::string> Outvarunits;

std::vector<double> OutputT;

//other output
//std::vector< std::vector< Pointout > > TSallout;
Expand All @@ -208,7 +230,7 @@ struct Model

AdaptP adapt;

BndblockP bndblk;
BndblockP<T> bndblk;



Expand All @@ -229,6 +251,8 @@ struct Loop
int nstep = 0;
//useful for calculating avg timestep
int nstepout = 0;
// Needed to identify next output time
int indNextoutputtime = 0;

// usefull for Time series output
int nTSsteps = 0;
Expand Down
69 changes: 58 additions & 11 deletions src/Boundary.cu
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ template <class T> void FlowbndFlux(Param XParam, double totaltime, BlockP<T> XB
T taper=T(1.0);
if (bndseg.on)
{
if (XParam.bndtaper > 0.0)
{
taper = min((totaltime - XParam.inittime) / XParam.bndtaper, 1.0);
}

if (bndseg.uniform)
{
int SLstepinbnd = 1;
Expand All @@ -110,10 +115,7 @@ template <class T> void FlowbndFlux(Param XParam, double totaltime, BlockP<T> XB
zsbnd = interptime(bndseg.data[SLstepinbnd].wspeed, bndseg.data[SLstepinbnd - 1].wspeed, bndseg.data[SLstepinbnd].time - bndseg.data[SLstepinbnd - 1].time, totaltime - bndseg.data[SLstepinbnd - 1].time);


if (XParam.bndtaper > 0.0)
{
taper = min(totaltime / XParam.bndtaper, 1.0);
}

}
else
{
Expand All @@ -131,25 +133,25 @@ template <class T> void FlowbndFlux(Param XParam, double totaltime, BlockP<T> XB
//Left
//template <class T> __global__ void bndFluxGPUSide(Param XParam, bndsegmentside side, BlockP<T> XBlock, DynForcingP<float> Atmp, DynForcingP<float> Zsmap, bool uniform, float zsbnd, T * zs, T * h, T * un, T * ut, T * Fh, T * Fq, T * Ss)
//bndFluxGPUSide <<< gridDimBBND, blockDim, 0 >>> (XParam, bndseg.left, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), XEv.zs, XEv.h, un, ut, Fh, Fq, S);
bndFluxGPUSide << < gridDimBBNDLeft, blockDim, 0 >> > (XParam, bndseg.left, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.u, XEv.v, XFlux.Fhu, XFlux.Fqux, XFlux.Su);
bndFluxGPUSide <<< gridDimBBNDLeft, blockDim, 0 >>> (XParam, bndseg.left, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.u, XEv.v, XFlux.Fhu, XFlux.Fqux, XFlux.Su);
CUDA_CHECK(cudaDeviceSynchronize());
}
//if (bndseg.right.nblk > 0)
{
//Right
bndFluxGPUSide << < gridDimBBNDRight, blockDim, 0 >> > (XParam, bndseg.right, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.u, XEv.v, XFlux.Fhu, XFlux.Fqux, XFlux.Su);
bndFluxGPUSide <<< gridDimBBNDRight, blockDim, 0 >>> (XParam, bndseg.right, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.u, XEv.v, XFlux.Fhu, XFlux.Fqux, XFlux.Su);
CUDA_CHECK(cudaDeviceSynchronize());
}
//if (bndseg.top.nblk > 0)
{
//top
bndFluxGPUSide << < gridDimBBNDTop, blockDim, 0 >> > (XParam, bndseg.top, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.v, XEv.u, XFlux.Fhv, XFlux.Fqvy, XFlux.Sv);
bndFluxGPUSide <<< gridDimBBNDTop, blockDim, 0 >>> (XParam, bndseg.top, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.v, XEv.u, XFlux.Fhv, XFlux.Fqvy, XFlux.Sv);
CUDA_CHECK(cudaDeviceSynchronize());
}
//if (bndseg.bot.nblk > 0)
{
//bot
bndFluxGPUSide << < gridDimBBNDBot, blockDim, 0 >> > (XParam, bndseg.bot, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.v, XEv.u, XFlux.Fhv, XFlux.Fqvy, XFlux.Sv);
bndFluxGPUSide <<< gridDimBBNDBot, blockDim, 0 >>> (XParam, bndseg.bot, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.v, XEv.u, XFlux.Fhv, XFlux.Fqvy, XFlux.Sv);
CUDA_CHECK(cudaDeviceSynchronize());
}
}
Expand Down Expand Up @@ -219,7 +221,7 @@ template <class T> void FlowbndFluxold(Param XParam, double totaltime, BlockP<T>

if (XParam.GPUDEVICE >= 0)
{
//bndFluxGPU << < gridDimBBND, blockDim, 0 >> > (XParam, side, XBlock, Atmp, float(itime), XEv.zs, XEv.h, un, ut, Fh, Fq, S);
//bndFluxGPU <<< gridDimBBND, blockDim, 0 >>> (XParam, side, XBlock, Atmp, float(itime), XEv.zs, XEv.h, un, ut, Fh, Fq, S);
//CUDA_CHECK(cudaDeviceSynchronize());
}
else
Expand Down Expand Up @@ -290,9 +292,9 @@ template <class T> __global__ void bndFluxGPUSide(Param XParam, bndsegmentside s
}





zsbnd = zsbnd + XParam.zsoffset;


int inside = Inside(halowidth, blkmemwidth, side.isright, side.istop, ix, iy, ib);

Expand Down Expand Up @@ -343,6 +345,19 @@ template <class T> __global__ void bndFluxGPUSide(Param XParam, bndsegmentside s
noslipbndQ(F, G, S);//noslipbndQ(T & F, T & G, T & S) F = T(0.0); S = G;

}
else if (type == 2)
{
if (h[i] > XParam.eps || zsX > zsi)
{
//
Dirichlet1Q(T(XParam.g), sign, zsX, zsinside, hinside, uninside, F);
}
else
{
noslipbndQ(F, G, S);
qmean = T(0.0);
}
}
else if (type == 3)
{
if (h[i] > XParam.eps || zsX > zsi )
Expand Down Expand Up @@ -443,6 +458,9 @@ template <class T> void bndFluxGPUSideCPU(Param XParam, bndsegmentside side, Blo
zsbnd = interp2BUQ(XParam.xo + xx, XParam.yo + yy, Zsmap);
}


zsbnd = zsbnd + XParam.zsoffset;


int i = memloc(halowidth, blkmemwidth, ix, iy, ib);
int inside = Inside(halowidth, blkmemwidth, side.isright, side.istop, ix, iy, ib);
Expand Down Expand Up @@ -504,6 +522,21 @@ template <class T> void bndFluxGPUSideCPU(Param XParam, bndsegmentside side, Blo
}
side.qmean[iq] = qmean;
}
else if (type == 2)
{
if (h[i] > XParam.eps || zsX > zsi)
{
//ABS1DQ(T(XParam.g), sign, factime, facrel, zsi, zsX, zsinside, h[i], qmean, F, G, S);
//qmean = T(0.0);
Dirichlet1Q(T(XParam.g), sign, zsX, zsinside, hinside, uninside, F);
}
else
{
noslipbndQ(F, G, S);
qmean = T(0.0);
}
side.qmean[iq] = qmean;
}


// write the results
Expand Down Expand Up @@ -1568,6 +1601,20 @@ template <class T> __device__ __host__ void Dirichlet1D(T g, T sign, T zsbnd, T
h = hinside;
}

template <class T> __device__ __host__ void Dirichlet1Q(T g, T sign, T zsbnd, T zsinside, T hinside, T uninside, T& q)
{
// Is this even the right formulation?.
// I don't really like this formulation. while a bit less dissipative then abs1D with 0 unbnd (but worse if forcing uniside with 0) it is very reflective an not stable
T zbinside = zsinside - hinside;
T un = sign * T(2.0) * (sqrt(g * max(hinside, T(0.0))) - sqrt(g * max(zsbnd - zbinside, T(0.0)))) + uninside;
T ut = T(0.0);
//zs = zsinside;
//ut[i] = ut[inside];
//h = hinside;

q = un * hinside;
}



/*
Expand Down
2 changes: 2 additions & 0 deletions src/Boundary.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ template <class T> __device__ __host__ void noslipbndQ(T& F, T& G, T& S);
template <class T> __device__ __host__ void ABS1D(T g, T sign, T zsbnd, T zsinside, T hinside, T utbnd,T unbnd, T& un, T& ut, T& zs, T& h);
template <class T> __device__ __host__ void ABS1DQ(T g, T sign, T factime, T facrel, T zs, T zsbnd, T zsinside, T h, T& qmean, T& q, T& G, T& S);
template <class T> __device__ __host__ void Dirichlet1D(T g, T sign, T zsbnd, T zsinside, T hinside, T uninside, T& un, T& ut, T& zs, T& h);
template <class T> __device__ __host__ void Dirichlet1Q(T g, T sign, T zsbnd, T zsinside, T hinside, T uninside, T& q);


// End of global definition
#endif
Loading