Skip to content

Commit 6a9a419

Browse files
committed
Significantly reduced simulation startup time
1 parent 636724b commit 6a9a419

File tree

4 files changed

+62
-39
lines changed

4 files changed

+62
-39
lines changed

DOCUMENTATION.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,20 @@ git clone https://github.com/ProjectPhysX/FluidX3D.git
3030
chmod +x make.sh
3131
./make.sh
3232
```
33-
- Compiling requires `C++17`, which is supported since `g++` version `8` (check with `g++ --version`). If you have `make` installed (check with `make --version`), compiling will will be faster using multiple CPU cores; otherwise compiling falls back to using a single CPU core.
34-
- If you use [`INTERACTIVE_GRAPHICS`](src/defines.hpp), change to the "[compile on Linux with X11 graphics](make.sh#L3)" command in [`make.sh`](make.sh#L3).
33+
- Compiling requires [`g++`](https://gcc.gnu.org/) with `C++17`, which is supported since version `8` (check with `g++ --version`). If you have [`make`](https://www.gnu.org/software/make/) installed (check with `make --version`), compiling will will be faster using multiple CPU cores; otherwise compiling falls back to using a single CPU core.
34+
- If you use [`INTERACTIVE_GRAPHICS`](src/defines.hpp), select [`TARGET=Linux-X11`](make.sh#L3) in [`make.sh`](make.sh#L3).
3535
- To select a specific GPU, enter `./make.sh 0` to compile+run, or `bin/FluidX3D 0` to run on device `0`. You can also select multiple GPUs with `bin/FluidX3D 0 1 3 6` if the setup is [configured as multi-GPU](#the-lbm-class).
3636

3737
### macOS
38-
- Select the "[compile on macOS](make.sh#L5)" command in [`make.sh`](make.sh#L5).
38+
- Select [`TARGET=macOS`](make.sh#L5) in [`make.sh`](make.sh#L5).
3939
- Compile and run with:
4040
```bash
4141
chmod +x make.sh
4242
./make.sh
4343
```
4444

4545
### Android
46-
- Select the "[compile on Android](make.sh#L6)" command in [`make.sh`](make.sh#L6).
46+
- Select [`TARGET=Android`](make.sh#L6) in [`make.sh`](make.sh#L6).
4747
- Compile and run with:
4848
```bash
4949
chmod +x make.sh

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ The fastest and most memory efficient lattice Boltzmann CFD software, running on
112112
- added operating system info to OpenCL device driver version printout
113113
- fixed flickering with frustrum culling at very small field of view
114114
- fixed bug where rendered/exported frame was not updated when `visualization_modes` changed
115+
- v2.12 (17.01.2024)
116+
- significantly (~3x) faster source code compiling on Linux using multiple CPU cores if [`make`](https://www.gnu.org/software/make/) is installed
117+
- significantly faster simulation initialization (~40% single-GPU, ~15% multi-GPU)
118+
- minor bug fix in `Memory_Container::reset()` function
115119

116120
</details>
117121

src/info.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ void Info::print_logo() const {
5555
print("| "); print("\\ \\ / /", c); print(" |\n");
5656
print("| "); print("\\ ' /", c); print(" |\n");
5757
print("| "); print("\\ /", c); print(" |\n");
58-
print("| "); print("\\ /", c); print(" FluidX3D Version 2.11 |\n");
58+
print("| "); print("\\ /", c); print(" FluidX3D Version 2.12 |\n");
5959
print("| "); print("'", c); print(" Copyright (c) Dr. Moritz Lehmann |\n");
6060
print("|-----------------------------------------------------------------------------|\n");
6161
}

src/lbm.hpp

Lines changed: 53 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -222,29 +222,53 @@ class LBM {
222222
public:
223223
template<typename T> class Memory_Container { // does not hold any data itsef, just links to LBM_Domain data
224224
private:
225-
LBM* lbm = nullptr;
226225
ulong N = 0ull; // buffer length
227226
uint d = 1u; // buffer dimensions
228-
uint Nx=1u, Ny=1u, Nz=1u; // (local) lattice dimensions
229-
uint Dx=1u, Dy=1u, Dz=1u; // lattice domains
227+
LBM* lbm = nullptr;
230228
Memory<T>** buffers = nullptr; // host buffers
231229
string name = "";
230+
231+
uint Nx=1u, Ny=1u, Nz=1u, Dx=1u, Dy=1u, Dz=1u, D=1u; // auxiliary variables: (local) lattice dimensions, lattice domains, number of domains
232+
uint NxDx=1u, NyDy=1u, NzDz=1u, Hx=0u, Hy=0u, Hz=0u; // auxiliary variables: number of domains, shortcuts for N_/D_, halo offsets
233+
ulong NxNy=1ull, local_Nx=1ull, local_Ny=1ull, local_Nz=1ull, local_N=1ull; // auxiliary variables: shortcut for Nx*Ny, size of each domain, number of cells in each domain
234+
inline void initialize_auxiliary_variables() { // these variables are frequently used in reference() functions, so pre-compute them only once here
235+
Nx = lbm->get_Nx(); Ny = lbm->get_Ny(); Nz = lbm->get_Nz();
236+
Dx = lbm->get_Dx(); Dy = lbm->get_Dy(); Dz = lbm->get_Dz();
237+
D = Dx*Dy*Dz; // number of domains
238+
NxNy = (ulong)Nx*(ulong)Ny; // shortcut for Nx*Ny
239+
NxDx=Nx/Dx; NyDy=Ny/Dy; NzDz=Nz/Dz; // shortcuts for N_/D_
240+
Hx=Dx>1u; Hy=Dy>1u; Hz=Dz>1u; // halo offsets
241+
local_Nx=(ulong)(NxDx+2u*Hx); local_Ny=(ulong)(NyDy+2u*Hy); local_Nz=(ulong)(NzDz+2u*Hz); // size of each domain
242+
local_N = local_Nx*local_Ny*local_Nz; // number of cells in each domain
243+
}
232244
inline void initialize_auxiliary_pointers() {
233245
/********/ x = Pointer(this, 0x0u);
234246
if(d>0x1u) y = Pointer(this, 0x1u);
235247
if(d>0x2u) z = Pointer(this, 0x2u);
236248
}
249+
inline T& reference(const ulong i) { // stitch together domain buffers and make them appear as one single large buffer
250+
if(D==1u) { // take shortcut for single domain
251+
return buffers[0]->data()[i]; // array of structures
252+
} else { // decompose index for multiple domains
253+
const ulong global_i=i%N, t=global_i%NxNy;
254+
const uint x=(uint)(t%(ulong)Nx), y=(uint)(t/(ulong)Nx), z=(uint)(global_i/NxNy); // n = x+(y+z*Ny)*Nx
255+
const uint px=x%NxDx, py=y%NyDy, pz=z%NzDz, dx=x/NxDx, dy=y/NyDy, dz=z/NzDz, domain=dx+(dy+dz*Dy)*Dx; // 3D position within domain and which domain
256+
const ulong local_i = (ulong)(px+Hx)+((ulong)(py+Hy)+(ulong)(pz+Hz)*local_Ny)*local_Nx; // add halo offsets
257+
const ulong local_dimension = i/N;
258+
return buffers[domain]->data()[local_i+local_dimension*local_N]; // array of structures
259+
}
260+
}
237261
inline T& reference(const ulong i, const uint dimension) { // stitch together domain buffers and make them appear as one single large buffer
238-
const ulong global_i = i%N;
239-
const ulong NxNy=(ulong)Nx*(ulong)Ny, t=global_i%NxNy;
240-
const uint x=(uint)(t%(ulong)Nx), y=(uint)(t/(ulong)Nx), z=(uint)(global_i/NxNy); // n = x+(y+z*Ny)*Nx
241-
const uint NxDx=Nx/Dx, NyDy=Ny/Dy, NzDz=Nz/Dz;
242-
const uint px=x%NxDx, py=y%NyDy, pz=z%NzDz, dx=x/NxDx, dy=y/NyDy, dz=z/NzDz, domain=dx+(dy+dz*Dy)*Dx;
243-
const uint Hx=Dx>1u, Hy=Dy>1u, Hz=Dz>1u; // halo offsets
244-
const ulong local_N = (ulong)(NxDx+2u*Hx)*(ulong)(NyDy+2u*Hy)*(ulong)(NzDz+2u*Hz); // add halo offsets
245-
const ulong local_i = (ulong)(px+Hx)+((ulong)(py+Hy)+(ulong)(pz+Hz)*(ulong)(NyDy+2u*Hy))*(ulong)(NxDx+2u*Hx); // add halo offsets
246-
const ulong local_dimension = max(i/N, (ulong)dimension);
247-
return buffers[domain]->data()[local_i+local_dimension*local_N]; // array of structures
262+
if(D==1u) { // take shortcut for single domain
263+
return buffers[0]->data()[i+(ulong)dimension*N]; // array of structures
264+
} else { // decompose index for multiple domains
265+
const ulong global_i=i%N, t=global_i%NxNy;
266+
const uint x=(uint)(t%(ulong)Nx), y=(uint)(t/(ulong)Nx), z=(uint)(global_i/NxNy); // n = x+(y+z*Ny)*Nx
267+
const uint px=x%NxDx, py=y%NyDy, pz=z%NzDz, dx=x/NxDx, dy=y/NyDy, dz=z/NzDz, domain=dx+(dy+dz*Dy)*Dx; // 3D position within domain and which domain
268+
const ulong local_i = (ulong)(px+Hx)+((ulong)(py+Hy)+(ulong)(pz+Hz)*local_Ny)*local_Nx; // add halo offsets
269+
const ulong local_dimension = max(i/N, (ulong)dimension);
270+
return buffers[domain]->data()[local_i+local_dimension*local_N]; // array of structures
271+
}
248272
}
249273
inline static string vtk_type() {
250274
/**/ if constexpr(std::is_same<T, char >::value) return "char" ; else if constexpr(std::is_same<T, uchar >::value) return "unsigned_char" ;
@@ -300,52 +324,47 @@ class LBM {
300324
Pointer x, y, z; // host buffer auxiliary pointers for multi-dimensional array access (array of structures)
301325

302326
inline Memory_Container(LBM* lbm, Memory<T>** buffers, const string& name) {
327+
this->N = lbm->get_N();
328+
this->d = buffers[0]->dimensions();
329+
if(this->N*(ulong)this->d==0ull) print_error("Memory size must be larger than 0.");
303330
this->lbm = lbm;
304-
this->Nx = lbm->get_Nx(); this->Ny = lbm->get_Ny(); this->Nz = lbm->get_Nz();
305-
this->Dx = lbm->get_Dx(); this->Dy = lbm->get_Dy(); this->Dz = lbm->get_Dz();
306331
this->buffers = buffers;
307332
this->name = name;
308-
this->N = (ulong)this->Nx*(ulong)this->Ny*(ulong)this->Nz;
309-
this->d = buffers[0]->dimensions();
310-
if(this->N*(ulong)this->d==0ull) print_error("Memory size must be larger than 0.");
333+
initialize_auxiliary_variables();
311334
initialize_auxiliary_pointers();
312335
}
313336
inline Memory_Container() {} // default constructor
314337
inline Memory_Container& operator=(Memory_Container&& memory) noexcept { // move assignment
338+
this->N = memory.N;
339+
this->d = memory.d;
315340
this->lbm = memory.lbm;
316-
this->Nx = memory.Nx; this->Ny = memory.Ny; this->Nz = memory.Nz;
317-
this->Dx = memory.Dx; this->Dy = memory.Dy; this->Dz = memory.Dz;
318341
this->buffers = memory.buffers;
319342
this->name = memory.name;
320-
this->N = memory.N;
321-
this->d = memory.d;
343+
initialize_auxiliary_variables();
322344
initialize_auxiliary_pointers();
323345
return *this;
324346
}
325347
inline void reset(const T value=(T)0) {
326-
for(uint domain=0u; domain<Dx*Dy*Dz; domain++) {
327-
for(ulong i=0ull; i<range()/(ulong)(Dx*Dy*Dz); i++) buffers[domain][i] = value;
328-
}
329-
write_to_device();
348+
for(uint domain=0u; domain<D; domain++) buffers[domain]->reset(value);
330349
}
331350
inline const ulong length() const { return N; }
332351
inline const uint dimensions() const { return d; }
333352
inline const ulong range() const { return N*(ulong)d; }
334353
inline const ulong capacity() const { return N*(ulong)d*sizeof(T); } // returns capacity of the buffer in Byte
335-
inline T& operator[](const ulong i) { return reference(i, 0u); }
336-
inline const T& operator[](const ulong i) const { return reference(i, 0u); }
337-
inline const T operator()(const ulong i) const { return reference(i, 0u); }
354+
inline T& operator[](const ulong i) { return reference(i); }
355+
inline const T& operator[](const ulong i) const { return reference(i); }
356+
inline const T operator()(const ulong i) const { return reference(i); }
338357
inline const T operator()(const ulong i, const uint dimension) const { return reference(i, dimension); } // array of structures
339358
inline void read_from_device() {
340359
#ifndef UPDATE_FIELDS
341-
for(uint domain=0u; domain<Dx*Dy*Dz; domain++) lbm->lbm[domain]->enqueue_update_fields(); // make sure data in device memory is up-to-date
360+
for(uint domain=0u; domain<D; domain++) lbm->lbm[domain]->enqueue_update_fields(); // make sure data in device memory is up-to-date
342361
#endif // UPDATE_FIELDS
343-
for(uint domain=0u; domain<Dx*Dy*Dz; domain++) buffers[domain]->enqueue_read_from_device();
344-
for(uint domain=0u; domain<Dx*Dy*Dz; domain++) buffers[domain]->finish_queue();
362+
for(uint domain=0u; domain<D; domain++) buffers[domain]->enqueue_read_from_device();
363+
for(uint domain=0u; domain<D; domain++) buffers[domain]->finish_queue();
345364
}
346365
inline void write_to_device() {
347-
for(uint domain=0u; domain<Dx*Dy*Dz; domain++) buffers[domain]->enqueue_write_to_device();
348-
for(uint domain=0u; domain<Dx*Dy*Dz; domain++) buffers[domain]->finish_queue();
366+
for(uint domain=0u; domain<D; domain++) buffers[domain]->enqueue_write_to_device();
367+
for(uint domain=0u; domain<D; domain++) buffers[domain]->finish_queue();
349368
}
350369
inline void write_host_to_vtk(const string& path="") { // write binary .vtk file
351370
write_vtk(default_filename(path, name, ".vtk", lbm->get_t()));

0 commit comments

Comments
 (0)