Skip to content

Commit 8ccdf11

Browse files
committed
Fixed that voxelization did not always produce binary identical results in multi-GPU compared to single-GPU, fixed z-fighting, fixed flickering in multi-GPU interactive rendering
1 parent 649fd40 commit 8ccdf11

File tree

9 files changed

+48
-28
lines changed

9 files changed

+48
-28
lines changed

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,19 @@ The fastest and most memory efficient lattice Boltzmann CFD software, running on
146146
- fixed bug that raytracing kernel could lock up simulation
147147
- fixed minor visual artifacts with raytracing
148148
- fixed that console sometimes was not cleared before `INTERACTIVE_GRAPHICS_ASCII` rendering starts
149+
- [v2.16](https://github.com/ProjectPhysX/FluidX3D/releases/tag/v2.16) (02.05.2024) [changes](https://github.com/ProjectPhysX/FluidX3D/compare/v2.15...v2.16) (bug fixes)
150+
- simplified 10% faster marching-cubes implementation with 1D interpolation on edges instead of 3D interpolation, allowing to get rid of edge table
151+
- added faster, simplified marching-cubes variant for solid surface rendering where edges are always halfway between grid cells
152+
- refactoring in OpenCL rendering kernels
153+
- fixed that voxelization failed in Intel OpenCL CPU Runtime due to array out-of-bounds access
154+
- fixed that voxelization did not always produce binary identical results in multi-GPU compared to single-GPU
155+
- fixed that velocity voxelization failed for free surface simulations
156+
- fixed that <kbd>Y</kbd>/<kbd>Z</kbd> keys were incorrect for `QWERTY` keyboard layout in Linux
157+
- fixed that free camera movement speed in help overlay was not updated in stationary image when scrolling
158+
- fixed that cursor would sometimes flicker when scrolling on trackpads with Linux-X11 interactive graphics
159+
- fixed flickering of interactive rendering with multi-GPU when camera is not moved
160+
- fixed missing `XInitThreads()` call that could crash Linux interactive graphics on some systems
161+
- fixed z-fighting between `graphics_rasterize_phi()` and `graphics_flags_mc()` kernels
149162

150163
</details>
151164

makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ temp/setup.o: src/setup.cpp src/defines.hpp src/graphics.hpp src/info.hpp src/lb
5252
@mkdir -p temp
5353
$(CC) -c src/setup.cpp -o temp/setup.o $(CFLAGS) $(LDFLAGS_OPENCL)
5454

55-
temp/shapes.o: src/shapes.cpp src/defines.hpp src/graphics.hpp src/info.hpp src/lbm.hpp src/lodepng.hpp src/opencl.hpp src/shapes.hpp src/units.hpp src/utilities.hpp make.sh
55+
temp/shapes.o: src/shapes.cpp src/shapes.hpp src/utilities.hpp make.sh
5656
@mkdir -p temp
5757
$(CC) -c src/shapes.cpp -o temp/shapes.o $(CFLAGS) $(LDFLAGS_OPENCL)
5858

src/info.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ void Info::print_logo() const {
5555
print("| "); print("\\ \\ / /", c); print(" |\n");
5656
print("| "); print("\\ ' /", c); print(" |\n");
5757
print("| "); print("\\ /", c); print(" |\n");
58-
print("| "); print("\\ /", c); print(" FluidX3D Version 2.15 |\n");
58+
print("| "); print("\\ /", c); print(" FluidX3D Version 2.16 |\n");
5959
print("| "); print( "'", c); print(" Copyright (c) Dr. Moritz Lehmann |\n");
6060
print("|-----------------------------------------------------------------------------|\n");
6161
}

src/kernel.cpp

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2129,7 +2129,6 @@ string opencl_c_container() { return R( // ########################## begin of O
21292129
insert_rho_u_flags(a, A, index_insert_m(a, direction), transfer_buffer_m, rho, u, flags);
21302130
}
21312131

2132-
)+"#ifdef SURFACE"+R(
21332132
)+R(kernel void transfer_extract_flags(const uint direction, const ulong t, global uchar* transfer_buffer_p, global uchar* transfer_buffer_m, const global uchar* flags) {
21342133
const uint a=get_global_id(0), A=get_area(direction); // a = domain area index for each side, A = area of the domain boundary
21352134
if(a>=A) return; // area might not be a multiple of def_workgroup_size, so return here to avoid writing in unallocated memory space
@@ -2143,6 +2142,7 @@ string opencl_c_container() { return R( // ########################## begin of O
21432142
flags[index_insert_m(a, direction)] = transfer_buffer_m[a];
21442143
}
21452144

2145+
)+"#ifdef SURFACE"+R(
21462146
)+R(void extract_phi_massex_flags(const uint a, const uint A, const uint n, global char* transfer_buffer, const global float* phi, const global float* massex, const global uchar* flags) {
21472147
((global float*)transfer_buffer)[ a] = phi [n];
21482148
((global float*)transfer_buffer)[ A+a] = massex[n];
@@ -2222,9 +2222,8 @@ string opencl_c_container() { return R( // ########################## begin of O
22222222
const float x0=bbu[ 1], y0=bbu[ 2], z0=bbu[ 3], x1=bbu[ 4], y1=bbu[ 5], z1=bbu[ 6];
22232223
const float cx=bbu[ 7], cy=bbu[ 8], cz=bbu[ 9], ux=bbu[10], uy=bbu[11], uz=bbu[12], rx=bbu[13], ry=bbu[14], rz=bbu[15];
22242224
const uint3 xyz = direction==0u ? (uint3)((uint)clamp((int)x0-def_Ox, 0, (int)def_Nx-1), a%def_Ny, a/def_Ny) : direction==1u ? (uint3)(a/def_Nz, (uint)clamp((int)y0-def_Oy, 0, (int)def_Ny-1), a%def_Nz) : (uint3)(a%def_Nx, a/def_Nx, (uint)clamp((int)z0-def_Oz, 0, (int)def_Nz-1));
2225-
const float3 p = position(xyz);
2226-
const float3 offset = (float3)(0.5f*(float)((def_Nx-2u*(def_Dx>1u))*def_Dx)-0.5f, 0.5f*(float)((def_Ny-2u*(def_Dy>1u))*def_Dy)-0.5f, 0.5f*(float)((def_Nz-2u*(def_Dz>1u))*def_Dz)-0.5f)+(float3)(def_domain_offset_x, def_domain_offset_y, def_domain_offset_z);
2227-
const float3 r_origin = p+offset;
2225+
const float3 offset = (float3)(0.5f*(float)((int)def_Nx+2*def_Ox)-0.5f, 0.5f*(float)((int)def_Ny+2*def_Oy)-0.5f, 0.5f*(float)((int)def_Nz+2*def_Oz)-0.5f);
2226+
const float3 r_origin = position(xyz)+offset;
22282227
const float3 r_direction = (float3)((float)(direction==0u), (float)(direction==1u), (float)(direction==2u));
22292228
uint intersections=0u, intersections_check=0u;
22302229
ushort distances[64]; // allow up to 64 mesh intersections
@@ -2307,7 +2306,7 @@ string opencl_c_container() { return R( // ########################## begin of O
23072306

23082307
)+R(kernel void unvoxelize_mesh(global uchar* flags, const uchar flag, float x0, float y0, float z0, float x1, float y1, float z1) { // remove voxelized triangle mesh
23092308
const uint n = get_global_id(0);
2310-
const float3 p = position(coordinates(n))+(float3)(0.5f*(float)((def_Nx-2u*(def_Dx>1u))*def_Dx)-0.5f, 0.5f*(float)((def_Ny-2u*(def_Dy>1u))*def_Dy)-0.5f, 0.5f*(float)((def_Nz-2u*(def_Dz>1u))*def_Dz)-0.5f)+(float3)(def_domain_offset_x, def_domain_offset_y, def_domain_offset_z);
2309+
const float3 p = position(coordinates(n))+(float3)(0.5f*(float)((int)def_Nx+2*def_Ox)-0.5f, 0.5f*(float)((int)def_Ny+2*def_Oy)-0.5f, 0.5f*(float)((int)def_Nz+2*def_Oz)-0.5f);
23112310
if(p.x>=x0-1.0f&&p.y>=y0-1.0f&&p.z>=z0-1.0f&&p.x<=x1+1.0f&&p.y<=y1+1.0f&&p.z<=z1+1.0f) flags[n] &= ~flag;
23122311
} // unvoxelize_mesh()
23132312

@@ -2669,11 +2668,14 @@ string opencl_c_container() { return R( // ########################## begin of O
26692668
draw_line(p-(0.5f/ul)*un, p+(0.5f/ul)*un, c, camera_cache, bitmap, zbuffer);
26702669
}
26712670

2672-
)+"#ifndef TEMPERATURE"+R(
2673-
)+R(kernel void graphics_q(const global float* camera, global int* bitmap, global int* zbuffer, const int field_mode, const global float* rho, const global float* u, const global uchar* flags) {
2674-
)+"#else"+R( // TEMPERATURE
2675-
)+R(kernel void graphics_q(const global float* camera, global int* bitmap, global int* zbuffer, const int field_mode, const global float* rho, const global float* u, const global uchar* flags, const global float* T) {
2671+
)+R(kernel void graphics_q)+"("+R(const global float* camera, global int* bitmap, global int* zbuffer, const int field_mode, const global float* rho, const global float* u // ) {
2672+
)+"#ifdef SURFACE"+R(
2673+
, const global uchar* flags // argument order is important
2674+
)+"#endif"+R( // SURFACE
2675+
)+"#ifdef TEMPERATURE"+R(
2676+
, const global float* T // argument order is important
26762677
)+"#endif"+R( // TEMPERATURE
2678+
)+") {"+R( // graphics_q()
26772679
const uint n = get_global_id(0);
26782680
const uint3 xyz = coordinates(n);
26792681
if(xyz.x>=def_Nx-1u||xyz.y>=def_Ny-1u||xyz.z>=def_Nz-1u||is_halo_q(xyz)) return; // don't execute graphics_q_field() on marching-cubes halo
@@ -2683,9 +2685,11 @@ string opencl_c_container() { return R( // ########################## begin of O
26832685
if(!is_in_camera_frustrum(p, camera_cache)) return; // skip loading LBM data if grid cell is not visible
26842686
uint j[32];
26852687
calculate_j32(xyz, j);
2688+
)+"#ifdef SURFACE"+R(
26862689
uchar flags_cell = 0u;
2687-
for(uint i=0u; i<32u; i++) flags_cell |= flags[j[i]];
2688-
if(flags_cell&(TYPE_E|TYPE_I|TYPE_G)) return;
2690+
for(uint i=0u; i<8u; i++) flags_cell |= flags[j[i]];
2691+
if(flags_cell&(TYPE_I|TYPE_G)) return;
2692+
)+"#endif"+R( // SURFACE
26892693
float3 uj[32];
26902694
for(uint i=0u; i<32u; i++) uj[i] = load3(j[i], u);
26912695
float v[8]; // don't load any velocity twice from global memory
@@ -2745,7 +2749,7 @@ string opencl_c_container() { return R( // ########################## begin of O
27452749
float v[8];
27462750
for(uint i=0u; i<8u; i++) v[i] = phi[j[i]];
27472751
float3 triangles[15]; // maximum of 5 triangles with 3 vertices each
2748-
const uint tn = marching_cubes(v, 0.5f, triangles); // run marching cubes algorithm
2752+
const uint tn = marching_cubes(v, 0.502f, triangles); // run marching cubes algorithm, isovalue slightly larger than 0.5f to fix z-fighting with graphics_flags_mc()
27492753
if(tn==0u) return;
27502754
for(uint i=0u; i<tn; i++) {
27512755
const float3 p0 = triangles[3u*i ];

src/lbm.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -306,9 +306,9 @@ string LBM_Domain::device_defines() const { return
306306
"\n #define def_Ay "+to_string(Nz*Nx)+"u"
307307
"\n #define def_Az "+to_string(Nx*Ny)+"u"
308308

309-
"\n #define def_domain_offset_x "+to_string((float)Ox+(float)(Dx>1u)-0.5f*((float)Dx-1.0f)*(float)(Nx-2u*(Dx>1u)))+"f"
310-
"\n #define def_domain_offset_y "+to_string((float)Oy+(float)(Dy>1u)-0.5f*((float)Dy-1.0f)*(float)(Ny-2u*(Dy>1u)))+"f"
311-
"\n #define def_domain_offset_z "+to_string((float)Oz+(float)(Dz>1u)-0.5f*((float)Dz-1.0f)*(float)(Nz-2u*(Dz>1u)))+"f"
309+
"\n #define def_domain_offset_x "+to_string(0.5f*(float)((int)Nx+2*Ox+(int)Dx*(2*(int)(Dx>1u)-(int)Nx)))+"f"
310+
"\n #define def_domain_offset_y "+to_string(0.5f*(float)((int)Ny+2*Oy+(int)Dy*(2*(int)(Dy>1u)-(int)Ny)))+"f"
311+
"\n #define def_domain_offset_z "+to_string(0.5f*(float)((int)Nz+2*Oz+(int)Dz*(2*(int)(Dz>1u)-(int)Nz)))+"f"
312312

313313
"\n #define D"+to_string(dimensions)+"Q"+to_string(velocity_set)+"" // D2Q9/D3Q15/D3Q19/D3Q27
314314
"\n #define def_velocity_set "+to_string(velocity_set)+"u" // LBM velocity set (D2Q9/D3Q15/D3Q19/D3Q27)
@@ -434,7 +434,7 @@ void LBM_Domain::Graphics::allocate(Device& device) {
434434
#else // D2Q9
435435
kernel_graphics_streamline = Kernel(device, (lbm->get_Nx()/GRAPHICS_STREAMLINE_SPARSE)*(lbm->get_Ny()/GRAPHICS_STREAMLINE_SPARSE), "graphics_streamline", camera_parameters, bitmap, zbuffer, 0, 0, 0, 0, 0, lbm->rho, lbm->u, lbm->flags); // 2D
436436
#endif // D2Q9
437-
kernel_graphics_q = Kernel(device, lbm->get_N(), "graphics_q", camera_parameters, bitmap, zbuffer, 0, lbm->rho, lbm->u, lbm->flags);
437+
kernel_graphics_q = Kernel(device, lbm->get_N(), "graphics_q", camera_parameters, bitmap, zbuffer, 0, lbm->rho, lbm->u);
438438

439439
#ifdef FORCE_FIELD
440440
kernel_graphics_flags.add_parameters(lbm->F);
@@ -445,6 +445,7 @@ void LBM_Domain::Graphics::allocate(Device& device) {
445445
skybox = Memory<int>(device, skybox_image->width()*skybox_image->height(), 1u, skybox_image->data());
446446
kernel_graphics_rasterize_phi = Kernel(device, lbm->get_N(), "graphics_rasterize_phi", camera_parameters, bitmap, zbuffer, lbm->phi);
447447
kernel_graphics_raytrace_phi = Kernel(device, bitmap.length(), "graphics_raytrace_phi", camera_parameters, bitmap, skybox, lbm->phi, lbm->flags);
448+
kernel_graphics_q.add_parameters(lbm->flags);
448449
#endif // SURFACE
449450

450451
#ifdef TEMPERATURE
@@ -472,7 +473,7 @@ bool LBM_Domain::Graphics::update_camera() {
472473
bool LBM_Domain::Graphics::enqueue_draw_frame(const int visualization_modes, const int field_mode, const int slice_mode, const int slice_x, const int slice_y, const int slice_z, const bool visualization_change) {
473474
const bool camera_update = update_camera();
474475
#if defined(INTERACTIVE_GRAPHICS)||defined(INTERACTIVE_GRAPHICS_ASCII)
475-
if(!visualization_change&&!camera_update&&!camera.key_update&&lbm->get_t()==t_last_rendered_frame) return false; // don't render a new frame if the scene hasn't changed since last frame
476+
if(!visualization_change&&!camera_update&&lbm->get_t()==t_last_rendered_frame) return false; // don't render a new frame if the scene hasn't changed since last frame
476477
#endif // INTERACTIVE_GRAPHICS||INTERACTIVE_GRAPHICS_ASCII
477478
t_last_rendered_frame = lbm->get_t();
478479
if(camera_update) camera_parameters.enqueue_write_to_device(); // camera_parameters PCIe transfer and kernel_clear execution can happen simulataneously
@@ -956,8 +957,11 @@ float3 LBM::calculate_torque_on_object(const float3& rotation_center, const ucha
956957
#ifdef MOVING_BOUNDARIES
957958
void LBM::update_moving_boundaries() { // mark/unmark cells next to TYPE_S cells with velocity!=0 with TYPE_MS
958959
for(uint d=0u; d<get_D(); d++) lbm_domain[d]->enqueue_update_moving_boundaries();
959-
communicate_rho_u_flags();
960+
communicate_flags();
960961
for(uint d=0u; d<get_D(); d++) lbm_domain[d]->finish_queue();
962+
#ifdef GRAPHICS
963+
camera.key_update = true; // to prevent flickering of flags in interactive graphics when camera is not moved
964+
#endif // GRAPHICS
961965
}
962966
#endif // MOVING_BOUNDARIES
963967

@@ -1113,7 +1117,8 @@ int* LBM::Graphics::draw_frame() {
11131117
if(key_Q) { slice_z = clamp(slice_z-1, 0, (int)lbm->get_Nz()-1); key_Q = false; }
11141118
if(key_E) { slice_z = clamp(slice_z+1, 0, (int)lbm->get_Nz()-1); key_E = false; }
11151119
}
1116-
const bool visualization_change = last_visualization_modes!=visualization_modes||last_field_mode!=field_mode||last_slice_mode!=slice_mode||last_slice_x!=slice_x||last_slice_y!=slice_y||last_slice_z!=slice_z;
1120+
const bool visualization_change = camera.key_update||last_visualization_modes!=visualization_modes||last_field_mode!=field_mode||last_slice_mode!=slice_mode||last_slice_x!=slice_x||last_slice_y!=slice_y||last_slice_z!=slice_z;
1121+
camera.key_update = false;
11171122
last_visualization_modes = visualization_modes;
11181123
last_field_mode = field_mode;
11191124
last_slice_mode = slice_mode;
@@ -1123,7 +1128,6 @@ int* LBM::Graphics::draw_frame() {
11231128
bool new_frame = true;
11241129
for(uint d=0u; d<lbm->get_D(); d++) new_frame = new_frame && lbm->lbm_domain[d]->graphics.enqueue_draw_frame(visualization_modes, field_mode, slice_mode, slice_x, slice_y, slice_z, visualization_change);
11251130
for(uint d=0u; d<lbm->get_D(); d++) lbm->lbm_domain[d]->finish_queue();
1126-
camera.key_update = false;
11271131
int* bitmap = lbm->lbm_domain[0]->graphics.get_bitmap();
11281132
int* zbuffer = lbm->lbm_domain[0]->graphics.get_zbuffer();
11291133
for(uint d=1u; d<lbm->get_D()&&new_frame; d++) {
@@ -1245,9 +1249,9 @@ void LBM_Domain::allocate_transfer(Device& device) { // allocate all memory for
12451249
kernel_transfer[enum_transfer_field::fi ][1] = Kernel(device, 0u, "transfer__insert_fi" , 0u, t, transfer_buffer_p, transfer_buffer_m, fi);
12461250
kernel_transfer[enum_transfer_field::rho_u_flags ][0] = Kernel(device, 0u, "transfer_extract_rho_u_flags" , 0u, t, transfer_buffer_p, transfer_buffer_m, rho, u, flags);
12471251
kernel_transfer[enum_transfer_field::rho_u_flags ][1] = Kernel(device, 0u, "transfer__insert_rho_u_flags" , 0u, t, transfer_buffer_p, transfer_buffer_m, rho, u, flags);
1248-
#ifdef SURFACE
12491252
kernel_transfer[enum_transfer_field::flags ][0] = Kernel(device, 0u, "transfer_extract_flags" , 0u, t, transfer_buffer_p, transfer_buffer_m, flags);
12501253
kernel_transfer[enum_transfer_field::flags ][1] = Kernel(device, 0u, "transfer__insert_flags" , 0u, t, transfer_buffer_p, transfer_buffer_m, flags);
1254+
#ifdef SURFACE
12511255
kernel_transfer[enum_transfer_field::phi_massex_flags][0] = Kernel(device, 0u, "transfer_extract_phi_massex_flags", 0u, t, transfer_buffer_p, transfer_buffer_m, phi, massex, flags);
12521256
kernel_transfer[enum_transfer_field::phi_massex_flags][1] = Kernel(device, 0u, "transfer__insert_phi_massex_flags", 0u, t, transfer_buffer_p, transfer_buffer_m, phi, massex, flags);
12531257
#endif // SURFACE
@@ -1311,10 +1315,10 @@ void LBM::communicate_fi() {
13111315
void LBM::communicate_rho_u_flags() {
13121316
communicate_field(enum_transfer_field::rho_u_flags, 17u);
13131317
}
1314-
#ifdef SURFACE
13151318
void LBM::communicate_flags() {
13161319
communicate_field(enum_transfer_field::flags, 1u);
13171320
}
1321+
#ifdef SURFACE
13181322
void LBM::communicate_phi_massex_flags() {
13191323
communicate_field(enum_transfer_field::phi_massex_flags, 9u);
13201324
}

src/lbm.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,8 @@ class LBM {
212212

213213
void communicate_fi();
214214
void communicate_rho_u_flags();
215-
#ifdef SURFACE
216215
void communicate_flags();
216+
#ifdef SURFACE
217217
void communicate_phi_massex_flags();
218218
#endif // SURFACE
219219
#ifdef TEMPERATURE

src/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ void main_label(const double frametime) {
7777
draw_label(ox, oy+i, "Steps " +alignr(31u, /************************************/ alignr(10u, info.lbm->get_t())+" ("+alignr(5, to_uint(1.0/info.runtime_lbm_timestep_smooth))+" Steps/s)"), c); i+=FONT_HEIGHT;
7878
draw_label(ox, oy+i, "FPS " +alignr(33u, /************************************************************/ alignr(4u, to_uint(1.0/frametime))+" ("+alignr(5u, camera.fps_limit)+" fps max)"), c);
7979
}
80-
draw_label(2, camera.height-1*(FONT_HEIGHT)-1, "FluidX3D v2.15 Copyright (c) Dr. Moritz Lehmann", c);
80+
draw_label(2, camera.height-1*(FONT_HEIGHT)-1, "FluidX3D v2.16 Copyright (c) Dr. Moritz Lehmann", c);
8181
if(!key_H) {
8282
draw_label(camera.width-16*(FONT_WIDTH)-1, 2, "Press H for Help", c);
8383
} else {

src/resource.rc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ BEGIN
2424
VALUE "LegalCopyright", "(c) Dr. Moritz Lehmann"
2525
VALUE "OriginalFilename", "FluidX3D.exe"
2626
VALUE "ProductName", "FluidX3D"
27-
VALUE "ProductVersion", "v2.15"
27+
VALUE "ProductVersion", "v2.16"
2828
END
2929
END
3030
BLOCK "VarFileInfo"

src/shapes.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#include "shapes.hpp"
2-
#include "lbm.hpp"
32

43
const float d = 0.8660254f; // sqrt(3)/2
54

0 commit comments

Comments
 (0)