Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
3e17570
add structure for parallel process of rivers
CyprienBosserelle Sep 24, 2024
5918ee6
try to build new array to parallellise rivers
CyprienBosserelle Sep 25, 2024
3011e4d
Fix some compile issue
CyprienBosserelle Sep 25, 2024
65c82b6
Fix compile issue
CyprienBosserelle Sep 25, 2024
6c7489d
Fix code and fill Xriver array
CyprienBosserelle Sep 25, 2024
c5c3cbe
fix allocation blunder
CyprienBosserelle Oct 3, 2024
e058769
Add functions for Pinned memory
CyprienBosserelle Oct 10, 2024
9cb83cb
Fix Compile paged mem
CyprienBosserelle Oct 10, 2024
1b6c499
Make test for paged mem
CyprienBosserelle Oct 11, 2024
9fa3614
tweak test and remove out to screen
CyprienBosserelle Oct 13, 2024
00f02f3
modify test for Pin Meme for non-GPU
CyprienBosserelle Oct 15, 2024
9c59e3e
Fix CPU only MappedMem
CyprienBosserelle Oct 16, 2024
236b679
add allocations of river info in GPU XModel
CyprienBosserelle Oct 16, 2024
b8e8ed9
Add missing variable but also added template to various classes
CyprienBosserelle Oct 17, 2024
ef90c69
Fix compile issues
CyprienBosserelle Oct 17, 2024
31ab89f
fix map mem alloc
CyprienBosserelle Oct 17, 2024
b0576b1
Add momentum adjustment when using rain
CyprienBosserelle Oct 18, 2024
a957e97
Add limiter for flux adjustment for dry cells
CyprienBosserelle Oct 22, 2024
1c7018f
playing up with velocity sanity
CyprienBosserelle Oct 29, 2024
56bfaac
Add explanation to new algo
CyprienBosserelle Oct 29, 2024
306cc11
Revert experiment changes on roughness
CyprienBosserelle Jan 8, 2025
bda4751
Fix Dynamic forcing
CyprienBosserelle Jan 16, 2025
a835af5
force sane velocity
CyprienBosserelle Jan 30, 2025
9ba730a
Fix zsoffset
CyprienBosserelle Feb 4, 2025
ad71ffa
Clean <<< ...>>>
CyprienBosserelle Feb 11, 2025
db30cd0
Adding record of timestart
CyprienBosserelle Mar 6, 2025
8aa4c77
Update Makefile
CyprienBosserelle Mar 10, 2025
50c7238
ad bnd filter and relax time to input param
CyprienBosserelle Mar 11, 2025
bf99d36
coding fix for Hipcc
CyprienBosserelle Apr 1, 2025
852ccd5
Update General.h
CyprienBosserelle Apr 1, 2025
b037684
Update General.h
CyprienBosserelle Apr 1, 2025
854b4bb
Update Boundary.cu
CyprienBosserelle Apr 1, 2025
2646cdf
Update Boundary.cu
CyprienBosserelle Apr 2, 2025
c2ff93c
Update Boundary.h
CyprienBosserelle Apr 2, 2025
999a1a2
Update ConserveElevation.cu
CyprienBosserelle Apr 2, 2025
7231215
Fix encoding to remove BOM
CyprienBosserelle Apr 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))

SAMPLE_ENABLED := 1

ALL_LDFLAGS := -lnetcdf -I
ALL_LDFLAGS := -I
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
Expand All @@ -207,10 +207,14 @@ ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
INCLUDES := -I/usr/includes
LIBRARIES :=

# Add NetCDF include library
INCLUDES += $(shell nc-config --cflags)
ALL_LDFLAGS += $(shell nc-config --libs)

################################################################################

# Gencode arguments
SMS ?= 35 50 52 60
SMS ?= 52 60 75
#SMS ?= 20 30 35
ifeq ($(SMS),)
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
Expand Down Expand Up @@ -318,7 +322,7 @@ ReadInput.o:./src/ReadInput.cu

Read_netcdf.o:./src/Read_netcdf.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

Reimann.o:./src/Reimann.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

Expand All @@ -338,8 +342,7 @@ Write_netcdf.o:./src/Write_netcdf.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

Write_txtlog.o:./src/Write_txtlog.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<

Spherical.o:./src/Spherical.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
Expand Down
1 change: 0 additions & 1 deletion src/AdaptCriteria.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@



#include "AdaptCriteria.h"
Expand Down
2 changes: 1 addition & 1 deletion src/AdaptCriteria.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
template <class T> int inrangecriteria(Param XParam, T zmin, T zmax, T* z, BlockP<T> XBlock, bool* refine, bool* coarsen);
template <class T> int Thresholdcriteria(Param XParam, T threshold, T* z, BlockP<T> XBlock, bool* refine, bool* coarsen);
template <class T> int AdaptCriteria(Param XParam, Forcing<float> XForcing, Model<T> XModel);

template<class T> int targetlevelcriteria(Param XParam, StaticForcingP<int> targetlevelmap, BlockP<T> XBlock, bool* refine, bool* coarsen);



Expand Down
2 changes: 0 additions & 2 deletions src/Adaptation.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@



#include "Adaptation.h"

Expand Down
12 changes: 6 additions & 6 deletions src/Advection.cu
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,11 @@ template <class T> __global__ void AdvkernelGPU(Param XParam, BlockP<T> XBlock,
T ho, uo, vo;
T dhi = XAdv.dh[i];

T edt = dt;// dhi > T(0.0) ? dt : min(dt, hold / (T(-1.0) * dhi));
T edt = dhi >= T(0.0) ? dt : min(dt, max(hold, XParam.eps) / abs(dhi));

//ho = max(hold + edt * dhi,T(0.0));
ho = hold + edt * dhi;


if (ho > eps) {
//
uo = (hold * XEv.u[i] + edt * XAdv.dhu[i]) / ho;
Expand Down Expand Up @@ -307,7 +307,7 @@ template <class T> __host__ void AdvkernelCPU(Param XParam, BlockP<T> XBlock, T

dhi = XAdv.dh[i];

T edt = dt;// dhi > T(0.0) ? dt : min(dt, hold / (T(-1.0) * dhi));
T edt = dhi > T(0.0) ? dt : min(dt, hold / (T(-1.0) * dhi));

ho = hold + edt * dhi;

Expand Down Expand Up @@ -475,7 +475,7 @@ template <class T> __host__ T CalctimestepGPU(Param XParam,Loop<T> XLoop, BlockP

//GPU Harris reduction #3. 8.3x reduction #0 Note #7 if a lot faster
// This was successfully tested with a range of grid size
//reducemax3 << <gridDimLine, blockDimLine, 64*sizeof(float) >> >(dtmax_g, arrmax_g, nx*ny)
//reducemax3 <<<gridDimLine, blockDimLine, 64*sizeof(float) >>>(dtmax_g, arrmax_g, nx*ny)

int maxThreads = 256;
int threads = (s < maxThreads * 2) ? nextPow2((s + 1) / 2) : maxThreads;
Expand All @@ -485,7 +485,7 @@ template <class T> __host__ T CalctimestepGPU(Param XParam,Loop<T> XLoop, BlockP
dim3 gridDimLine(blocks, 1, 1);


reducemin3 << <gridDimLine, blockDimLine, smemSize >> > (XTime.dtmax, XTime.arrmin, s);
reducemin3 <<<gridDimLine, blockDimLine, smemSize >>> (XTime.dtmax, XTime.arrmin, s);
CUDA_CHECK(cudaDeviceSynchronize());


Expand All @@ -503,7 +503,7 @@ template <class T> __host__ T CalctimestepGPU(Param XParam,Loop<T> XLoop, BlockP

CUDA_CHECK(cudaMemcpy(XTime.dtmax, XTime.arrmin, s * sizeof(T), cudaMemcpyDeviceToDevice));

reducemin3 << <gridDimLineS, blockDimLineS, smemSize >> > (XTime.dtmax, XTime.arrmin, s);
reducemin3 <<<gridDimLineS, blockDimLineS, smemSize >>> (XTime.dtmax, XTime.arrmin, s);
CUDA_CHECK(cudaDeviceSynchronize());

s = (s + (threads * 2 - 1)) / (threads * 2);
Expand Down
28 changes: 24 additions & 4 deletions src/Arrays.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,23 @@ struct maskinfo

};

template <class T>
struct RiverInfo
{
int nbir;
int nburmax; // size of (max number of) unique block with rivers
int nribmax; // size of (max number of) rivers in one block
int* Xbidir; // array of block id for each river size(nburmax,nribmax)
int* Xridib; // array of river id in each block size(nburmax,nribmax)
T* xstart;
T* xend;
T* ystart;
T *yend;
T* qnow; // qnow is a pin mapped and so both pointers are needed here
T* qnow_g; // this simplify the code later

};


// outzone info used to actually write the nc files (one nc file by zone, the default zone is the full domain)
struct outzoneB
Expand Down Expand Up @@ -125,7 +142,7 @@ struct AdaptP




template <class T>
struct BndblockP
{
int nblkriver, nblkTs, nbndblkleft, nbndblkright, nbndblktop, nbndblkbot;
Expand All @@ -140,12 +157,15 @@ struct BndblockP
int* top;
int* bot;


RiverInfo<T> Riverinfo;


};


struct RiverBlk
{
std::vector<int> block;
};



Expand Down Expand Up @@ -208,7 +228,7 @@ struct Model

AdaptP adapt;

BndblockP bndblk;
BndblockP<T> bndblk;



Expand Down
4 changes: 2 additions & 2 deletions src/BG_Flood.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////
// BG_Flood Main function //
// Copyright (C) 2018 Bosserelle //
// This code contains an adaptation of the St Venant equation from Basilisk //
Expand All @@ -20,7 +20,7 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>. //
//////////////////////////////////////////////////////////////////////////////////

// includes, system
// includes

#include "BG_Flood.h"

Expand Down
21 changes: 12 additions & 9 deletions src/Boundary.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "Boundary.h"
#include "Boundary.h"


template <class T> void Flowbnd(Param XParam, Loop<T> &XLoop, BlockP<T> XBlock, bndparam side, DynForcingP<float> Atmp, EvolvingP<T> XEv)
Expand Down Expand Up @@ -112,7 +112,7 @@ template <class T> void FlowbndFlux(Param XParam, double totaltime, BlockP<T> XB

if (XParam.bndtaper > 0.0)
{
taper = min(totaltime / XParam.bndtaper, 1.0);
taper = min((totaltime - XParam.inittime) / XParam.bndtaper, 1.0);
}
}
else
Expand All @@ -131,25 +131,25 @@ template <class T> void FlowbndFlux(Param XParam, double totaltime, BlockP<T> XB
//Left
//template <class T> __global__ void bndFluxGPUSide(Param XParam, bndsegmentside side, BlockP<T> XBlock, DynForcingP<float> Atmp, DynForcingP<float> Zsmap, bool uniform, float zsbnd, T * zs, T * h, T * un, T * ut, T * Fh, T * Fq, T * Ss)
//bndFluxGPUSide <<< gridDimBBND, blockDim, 0 >>> (XParam, bndseg.left, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), XEv.zs, XEv.h, un, ut, Fh, Fq, S);
bndFluxGPUSide << < gridDimBBNDLeft, blockDim, 0 >> > (XParam, bndseg.left, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.u, XEv.v, XFlux.Fhu, XFlux.Fqux, XFlux.Su);
bndFluxGPUSide <<< gridDimBBNDLeft, blockDim, 0 >>> (XParam, bndseg.left, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.u, XEv.v, XFlux.Fhu, XFlux.Fqux, XFlux.Su);
CUDA_CHECK(cudaDeviceSynchronize());
}
//if (bndseg.right.nblk > 0)
{
//Right
bndFluxGPUSide << < gridDimBBNDRight, blockDim, 0 >> > (XParam, bndseg.right, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.u, XEv.v, XFlux.Fhu, XFlux.Fqux, XFlux.Su);
bndFluxGPUSide <<< gridDimBBNDRight, blockDim, 0 >>> (XParam, bndseg.right, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.u, XEv.v, XFlux.Fhu, XFlux.Fqux, XFlux.Su);
CUDA_CHECK(cudaDeviceSynchronize());
}
//if (bndseg.top.nblk > 0)
{
//top
bndFluxGPUSide << < gridDimBBNDTop, blockDim, 0 >> > (XParam, bndseg.top, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.v, XEv.u, XFlux.Fhv, XFlux.Fqvy, XFlux.Sv);
bndFluxGPUSide <<< gridDimBBNDTop, blockDim, 0 >>> (XParam, bndseg.top, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.v, XEv.u, XFlux.Fhv, XFlux.Fqvy, XFlux.Sv);
CUDA_CHECK(cudaDeviceSynchronize());
}
//if (bndseg.bot.nblk > 0)
{
//bot
bndFluxGPUSide << < gridDimBBNDBot, blockDim, 0 >> > (XParam, bndseg.bot, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.v, XEv.u, XFlux.Fhv, XFlux.Fqvy, XFlux.Sv);
bndFluxGPUSide <<< gridDimBBNDBot, blockDim, 0 >>> (XParam, bndseg.bot, XBlock, Atmp, bndseg.WLmap, bndseg.uniform, bndseg.type, float(zsbnd), taper, XEv.zs, XEv.h, XEv.v, XEv.u, XFlux.Fhv, XFlux.Fqvy, XFlux.Sv);
CUDA_CHECK(cudaDeviceSynchronize());
}
}
Expand Down Expand Up @@ -219,7 +219,7 @@ template <class T> void FlowbndFluxold(Param XParam, double totaltime, BlockP<T>

if (XParam.GPUDEVICE >= 0)
{
//bndFluxGPU << < gridDimBBND, blockDim, 0 >> > (XParam, side, XBlock, Atmp, float(itime), XEv.zs, XEv.h, un, ut, Fh, Fq, S);
//bndFluxGPU <<< gridDimBBND, blockDim, 0 >>> (XParam, side, XBlock, Atmp, float(itime), XEv.zs, XEv.h, un, ut, Fh, Fq, S);
//CUDA_CHECK(cudaDeviceSynchronize());
}
else
Expand Down Expand Up @@ -290,9 +290,9 @@ template <class T> __global__ void bndFluxGPUSide(Param XParam, bndsegmentside s
}





zsbnd = zsbnd + XParam.zsoffset;


int inside = Inside(halowidth, blkmemwidth, side.isright, side.istop, ix, iy, ib);

Expand Down Expand Up @@ -443,6 +443,9 @@ template <class T> void bndFluxGPUSideCPU(Param XParam, bndsegmentside side, Blo
zsbnd = interp2BUQ(XParam.xo + xx, XParam.yo + yy, Zsmap);
}


zsbnd = zsbnd + XParam.zsoffset;


int i = memloc(halowidth, blkmemwidth, ix, iy, ib);
int inside = Inside(halowidth, blkmemwidth, side.isright, side.istop, ix, iy, ib);
Expand Down
2 changes: 1 addition & 1 deletion src/Boundary.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ template <class T> void FlowbndFlux(Param XParam,double totaltime, BlockP<T> XBl


template <class T> __global__ void bndFluxGPUSide(Param XParam, bndsegmentside side, BlockP<T> XBlock, DynForcingP<float> Atmp, DynForcingP<float> Zsmap, bool uniform, int type, float zsbnd, T taper, T* zs, T* h, T* un, T* ut, T* Fh, T* Fq, T* Ss);

template <class T> void bndFluxGPUSideCPU(Param XParam, bndsegmentside side, BlockP<T> XBlock, DynForcingP<float> Atmp, DynForcingP<float> Zsmap, bool uniform, int type, float zsbnd, T taper, T* zs, T* h, T* un, T* ut, T* Fh, T* Fq, T* Ss);
template <class T> __global__ void bndGPU(Param XParam, bndparam side, BlockP<T> XBlock, DynForcingP<float> Atmp, float itime, T* zs, T* h, T* un, T* ut);
template <class T> __host__ void bndCPU(Param XParam, bndparam side, BlockP<T> XBlock, std::vector<double> zsbndvec, std::vector<double> uubndvec, std::vector<double> vvbndvec, DynForcingP<float> Atmp, T* zs, T* h, T* un, T* ut);

Expand Down
27 changes: 14 additions & 13 deletions src/ConserveElevation.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "ConserveElevation.h"

#include "ConserveElevation.h"


template <class T> void conserveElevation(Param XParam, BlockP<T> XBlock, EvolvingP<T> XEv, T* zb)
Expand Down Expand Up @@ -31,13 +32,13 @@ template <class T> void conserveElevationGPU(Param XParam, BlockP<T> XBlock, Evo
dim3 gridDim(XParam.nblk, 1, 1);


conserveElevationLeft << <gridDim, blockDimHaloLR, 0 >> > (XParam, XBlock, XEv, zb);
conserveElevationLeft <<<gridDim, blockDimHaloLR, 0 >>> (XParam, XBlock, XEv, zb);
CUDA_CHECK(cudaDeviceSynchronize());
conserveElevationRight << <gridDim, blockDimHaloLR, 0 >> > (XParam, XBlock, XEv, zb);
conserveElevationRight <<<gridDim, blockDimHaloLR, 0 >>> (XParam, XBlock, XEv, zb);
CUDA_CHECK(cudaDeviceSynchronize());
conserveElevationTop << <gridDim, blockDimHaloBT, 0 >> > (XParam, XBlock, XEv, zb);
conserveElevationTop <<<gridDim, blockDimHaloBT, 0 >>> (XParam, XBlock, XEv, zb);
CUDA_CHECK(cudaDeviceSynchronize());
conserveElevationBot << <gridDim, blockDimHaloBT, 0 >> > (XParam, XBlock, XEv, zb);
conserveElevationBot <<<gridDim, blockDimHaloBT, 0 >>> (XParam, XBlock, XEv, zb);
CUDA_CHECK(cudaDeviceSynchronize());

}
Expand Down Expand Up @@ -329,13 +330,13 @@ template <class T> void WetDryProlongationGPU(Param XParam, BlockP<T> XBlock, Ev

//WetDryProlongationGPUBot

WetDryProlongationGPULeft << <gridDim, blockDimHaloLR, 0 >> > (XParam, XBlock, XEv, zb);
WetDryProlongationGPULeft <<<gridDim, blockDimHaloLR, 0 >>> (XParam, XBlock, XEv, zb);
CUDA_CHECK(cudaDeviceSynchronize());
WetDryProlongationGPURight << <gridDim, blockDimHaloLR, 0 >> > (XParam, XBlock, XEv, zb);
WetDryProlongationGPURight <<<gridDim, blockDimHaloLR, 0 >>> (XParam, XBlock, XEv, zb);
CUDA_CHECK(cudaDeviceSynchronize());
WetDryProlongationGPUTop << <gridDim, blockDimHaloBT, 0 >> > (XParam, XBlock, XEv, zb);
WetDryProlongationGPUTop <<<gridDim, blockDimHaloBT, 0 >>> (XParam, XBlock, XEv, zb);
CUDA_CHECK(cudaDeviceSynchronize());
WetDryProlongationGPUBot << <gridDim, blockDimHaloBT, 0 >> > (XParam, XBlock, XEv, zb);
WetDryProlongationGPUBot <<<gridDim, blockDimHaloBT, 0 >>> (XParam, XBlock, XEv, zb);
CUDA_CHECK(cudaDeviceSynchronize());

}
Expand All @@ -350,13 +351,13 @@ template <class T> void WetDryRestrictionGPU(Param XParam, BlockP<T> XBlock, Evo

//WetDryProlongationGPUBot

WetDryRestrictionGPULeft << <gridDim, blockDimHaloLR, 0 >> > (XParam, XBlock, XEv, zb);
WetDryRestrictionGPULeft <<<gridDim, blockDimHaloLR, 0 >>> (XParam, XBlock, XEv, zb);
CUDA_CHECK(cudaDeviceSynchronize());
WetDryRestrictionGPURight << <gridDim, blockDimHaloLR, 0 >> > (XParam, XBlock, XEv, zb);
WetDryRestrictionGPURight <<<gridDim, blockDimHaloLR, 0 >>> (XParam, XBlock, XEv, zb);
CUDA_CHECK(cudaDeviceSynchronize());
WetDryRestrictionGPUTop << <gridDim, blockDimHaloBT, 0 >> > (XParam, XBlock, XEv, zb);
WetDryRestrictionGPUTop <<<gridDim, blockDimHaloBT, 0 >>> (XParam, XBlock, XEv, zb);
CUDA_CHECK(cudaDeviceSynchronize());
WetDryRestrictionGPUBot << <gridDim, blockDimHaloBT, 0 >> > (XParam, XBlock, XEv, zb);
WetDryRestrictionGPUBot <<<gridDim, blockDimHaloBT, 0 >>> (XParam, XBlock, XEv, zb);
CUDA_CHECK(cudaDeviceSynchronize());

}
Expand Down
Loading