Skip to content

Commit 9a3f593

Browse files
committed
OpenCL driver bug workaround for old AMD GPUs (binary number literals don't work), FORCE_FIELD and VOLUME_FORCE can now be used independently, updated OpenCL-Wrapper
1 parent 3299c7a commit 9a3f593

File tree

4 files changed

+21
-29
lines changed

4 files changed

+21
-29
lines changed

src/defines.hpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#define BENCHMARK // disable all extensions and setups and run benchmark setup instead
1717

1818
//#define VOLUME_FORCE // enables global force per volume in one direction, specified in the LBM class constructor; the force can be changed on-the-fly between time steps at no performance cost
19-
//#define FORCE_FIELD // enables a force per volume for each lattice point independently; allocates an extra 12 Bytes/node; enables computing the forces from the fluid on solid boundaries with lbm.calculate_force_on_boundaries();
19+
//#define FORCE_FIELD // enables computing the forces on solid boundaries with lbm.calculate_force_on_boundaries(); and enables setting the force for each lattice point independently (enable VOLUME_FORCE too); allocates an extra 12 Bytes/node
2020
//#define MOVING_BOUNDARIES // enables moving solids: set solid nodes to TYPE_S and set their velocity u unequal to zero
2121
//#define EQUILIBRIUM_BOUNDARIES // enables fixing the velocity/density by marking nodes with TYPE_E; can be used for inflow/outflow; does not reflect shock waves
2222
//#define SURFACE // enables free surface LBM: mark fluid nodes with TYPE_F; at initialization the TYPE_I interface and TYPE_G gas domains will automatically be completed; allocates an extra 12 Bytes/node
@@ -69,10 +69,6 @@
6969
#undef GRAPHICS
7070
#endif // BENCHMARK
7171

72-
#ifdef FORCE_FIELD
73-
#define VOLUME_FORCE
74-
#endif // FORCE_FIELD
75-
7672
#ifdef SURFACE // (rho, u) need to be updated exactly every LBM step
7773
#define UPDATE_FIELDS // update (rho, u, T) in every LBM step
7874
#endif // SURFACE

src/lbm.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -490,21 +490,21 @@ string LBM::device_defines() const { return
490490
"\n #define TRT"
491491
#endif // TRT
492492

493-
"\n #define TYPE_S 0b00000001" // (stationary or moving) solid boundary
494-
"\n #define TYPE_E 0b00000010" // equilibrium boundary (inflow/outflow)
495-
"\n #define TYPE_T 0b00000100" // temperature boundary
496-
"\n #define TYPE_F 0b00001000" // fluid
497-
"\n #define TYPE_I 0b00010000" // interface
498-
"\n #define TYPE_G 0b00100000" // gas
499-
"\n #define TYPE_X 0b01000000" // reserved type X
500-
"\n #define TYPE_Y 0b10000000" // reserved type Y
501-
502-
"\n #define TYPE_MS 0b00000011" // node next to moving solid boundary
503-
"\n #define TYPE_BO 0b00000011" // any flag bit used for boundaries (temperature excluded)
504-
"\n #define TYPE_IF 0b00011000" // change from interface to fluid
505-
"\n #define TYPE_IG 0b00110000" // change from interface to gas
506-
"\n #define TYPE_GI 0b00111000" // change from gas to interface
507-
"\n #define TYPE_SU 0b00111000" // any flag bit used for SURFACE
493+
"\n #define TYPE_S 0x01" // 0b00000001 // (stationary or moving) solid boundary
494+
"\n #define TYPE_E 0x02" // 0b00000010 // equilibrium boundary (inflow/outflow)
495+
"\n #define TYPE_T 0x04" // 0b00000100 // temperature boundary
496+
"\n #define TYPE_F 0x08" // 0b00001000 // fluid
497+
"\n #define TYPE_I 0x10" // 0b00010000 // interface
498+
"\n #define TYPE_G 0x20" // 0b00100000 // gas
499+
"\n #define TYPE_X 0x40" // 0b01000000 // reserved type X
500+
"\n #define TYPE_Y 0x80" // 0b10000000 // reserved type Y
501+
502+
"\n #define TYPE_MS 0x03" // 0b00000011 // node next to moving solid boundary
503+
"\n #define TYPE_BO 0x03" // 0b00000011 // any flag bit used for boundaries (temperature excluded)
504+
"\n #define TYPE_IF 0x18" // 0b00011000 // change from interface to fluid
505+
"\n #define TYPE_IG 0x30" // 0b00110000 // change from interface to gas
506+
"\n #define TYPE_GI 0x38" // 0b00111000 // change from gas to interface
507+
"\n #define TYPE_SU 0x38" // 0b00111000 // any flag bit used for SURFACE
508508

509509
#if defined(FP16S)
510510
"\n #define fpxx half" // switchable data type (scaled IEEE-754 16-bit floating-point format: 1-5-10, exp-30, +-1.99902344, +-1.86446416E-9, +-1.81898936E-12, 3.311 digits)

src/opencl.hpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,7 @@
33
#define WORKGROUP_SIZE 64 // needs to be 64 to fully use AMD GPUs
44
//#define PTX
55
//#define LOG
6-
//#define USE_OPENCL_1_1
76

8-
#ifdef USE_OPENCL_1_1
9-
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
10-
#endif // USE_OPENCL_1_1
117
#ifndef _WIN32
128
#pragma GCC diagnostic ignored "-Wignored-attributes" // ignore compiler warnings for CL/cl.hpp with g++
139
#endif // _WIN32
@@ -54,7 +50,7 @@ struct Device_Info {
5450
const bool nvidia_192_cores_per_cu = contains_any(to_lower(name), {" 6", " 7", "ro k", "la k"}) || (clock_frequency<1000u&&contains(to_lower(name), "titan")); // identify Kepler GPUs
5551
const bool nvidia_64_cores_per_cu = contains_any(to_lower(name), {"p100", "v100", "a100", "a30", " 16", " 20", "titan v", "titan rtx", "ro t", "la t", "ro rtx"}) && !contains(to_lower(name), "rtx a"); // identify P100, Volta, Turing, A100, A30
5652
const bool amd_128_cores_per_dualcu = contains(to_lower(name), "gfx10"); // identify RDNA/RDNA2 GPUs where dual CUs are reported
57-
const float nvidia = (float)(contains(to_lower(vendor), "nvidia"))*(nvidia_192_cores_per_cu?192.0f:(nvidia_64_cores_per_cu?64.0f:128.0f)); // Nvidia GPUs have 192 cores/CU (Kepler), 128 cores/CU (Maxwell, Pascal, Ampere) or 64 cores/CU (P100, Volta, Turing, A100)
53+
const float nvidia = (float)(contains(to_lower(vendor), "nvidia"))*(nvidia_192_cores_per_cu?192.0f:(nvidia_64_cores_per_cu?64.0f:128.0f)); // Nvidia GPUs have 192 cores/CU (Kepler), 128 cores/CU (Maxwell, Pascal, Ampere, Hopper, Ada) or 64 cores/CU (P100, Volta, Turing, A100, A30)
5854
const float amd = (float)(contains_any(to_lower(vendor), {"amd", "advanced"}))*(is_gpu?(amd_128_cores_per_dualcu?128.0f:64.0f):0.5f); // AMD GPUs have 64 cores/CU (GCN, CDNA) or 128 cores/dualCU (RDNA, RDNA2), AMD CPUs (with SMT) have 1/2 core/CU
5955
const float intel = (float)(contains(to_lower(vendor), "intel"))*(is_gpu?8.0f:0.5f); // Intel integrated GPUs usually have 8 cores/CU, Intel CPUs (with HT) have 1/2 core/CU
6056
const float apple = (float)(contains(to_lower(vendor), "apple"))*(128.0f); // Apple ARM GPUs usually have 128 cores/CU
@@ -170,7 +166,7 @@ class Device {
170166
write_file("bin/kernel.log", log); // save build log
171167
if((uint)log.length()>2u) print_warning(log); // print build log
172168
#endif // LOG
173-
if(error) print_error("OpenCL C code compilation failed with error code "+to_string(error)+". Make sure there are no errors in kernel.cpp (\"#define LOG\" might help). If your GPU is old, try uncommenting \"#define USE_OPENCL_1_1\".");
169+
if(error) print_error("OpenCL C code compilation failed with error code "+to_string(error)+". Make sure there are no errors in kernel.cpp.");
174170
else print_info("OpenCL C code successfully compiled.");
175171
#ifdef PTX // generate assembly (ptx) file for OpenCL code
176172
write_file("bin/kernel.ptx", cl_program.getInfo<CL_PROGRAM_BINARIES>()[0]); // save binary (ptx file)

src/setup.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,10 +204,10 @@
204204

205205
/*void main_setup() { // lid-driven cavity
206206
// ######################################################### define simulation box size, viscosity and volume force ############################################################################
207-
const uint L = 96u;
207+
const uint L = 128u;
208208
const float Re = 1000.0f;
209-
const float u = 0.4f;
210-
LBM lbm(L, L, L, units.nu_from_Re(Re, (float)L, u));
209+
const float u = 0.1f;
210+
LBM lbm(L, L, L, units.nu_from_Re(Re, (float)(L-2u), u));
211211
// #############################################################################################################################################################################################
212212
const uint N=lbm.get_N(), Nx=lbm.get_Nx(), Ny=lbm.get_Ny(), Nz=lbm.get_Nz(); for(uint n=0u, x=0u, y=0u, z=0u; n<N; n++, lbm.coordinates(n, x, y, z)) {
213213
// ########################################################################### define geometry #############################################################################################

0 commit comments

Comments
 (0)