-
-
Notifications
You must be signed in to change notification settings - Fork 4k
Use bit ops instead of integer modulo and divide in shaders #19994
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,8 +42,8 @@ fn downsample_depth_first( | |
@builtin(workgroup_id) workgroup_id: vec3u, | ||
@builtin(local_invocation_index) local_invocation_index: u32, | ||
) { | ||
let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u); | ||
let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u); | ||
let sub_xy = remap_for_wave_reduction(local_invocation_index & 63u); | ||
let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) & 1u); | ||
let y = sub_xy.y + 8u * (local_invocation_index >> 7u); | ||
|
||
downsample_mips_0_and_1(x, y, workgroup_id.xy, local_invocation_index); | ||
|
@@ -54,8 +54,8 @@ fn downsample_depth_first( | |
@compute | ||
@workgroup_size(256, 1, 1) | ||
fn downsample_depth_second(@builtin(local_invocation_index) local_invocation_index: u32) { | ||
let sub_xy = remap_for_wave_reduction(local_invocation_index % 64u); | ||
let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) % 2u); | ||
let sub_xy = remap_for_wave_reduction(local_invocation_index & 63u); | ||
let x = sub_xy.x + 8u * ((local_invocation_index >> 6u) & 1u); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
let y = sub_xy.y + 8u * (local_invocation_index >> 7u); | ||
|
||
downsample_mips_6_and_7(x, y); | ||
|
@@ -99,8 +99,8 @@ fn downsample_mips_0_and_1(x: u32, y: u32, workgroup_id: vec2u, local_invocation | |
intermediate_memory[x * 2u + 1u][y * 2u + 1u], | ||
)); | ||
pix = (workgroup_id * 16u) + vec2( | ||
x + (i % 2u) * 8u, | ||
y + (i / 2u) * 8u, | ||
x + (i & 1u) * 8u, | ||
y + (i >> 1u) * 8u, | ||
); | ||
textureStore(mip_2, pix, vec4(v[i])); | ||
} | ||
|
@@ -142,7 +142,7 @@ fn downsample_mip_2(x: u32, y: u32, workgroup_id: vec2u, local_invocation_index: | |
intermediate_memory[x * 2u + 1u][y * 2u + 1u], | ||
)); | ||
textureStore(mip_3, (workgroup_id * 8u) + vec2(x, y), vec4(v)); | ||
intermediate_memory[x * 2u + y % 2u][y * 2u] = v; | ||
intermediate_memory[x * 2u + (y & 1u)][y * 2u] = v; | ||
} | ||
} | ||
|
||
|
@@ -241,7 +241,7 @@ fn downsample_mip_8(x: u32, y: u32, local_invocation_index: u32) { | |
intermediate_memory[x * 2u + 1u][y * 2u + 1u], | ||
)); | ||
textureStore(mip_9, vec2(x, y), vec4(v)); | ||
intermediate_memory[x * 2u + y % 2u][y * 2u] = v; | ||
intermediate_memory[x * 2u + (y & 1u)][y * 2u] = v; | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,8 +8,8 @@ | |
|
||
@vertex | ||
fn vertex(@builtin(vertex_index) vertex_input: u32) -> @builtin(position) vec4<f32> { | ||
let vertex_index = vertex_input % 3u; | ||
let material_id = vertex_input / 3u; | ||
let vertex_index = vertex_input - material_id * 3u; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be worse. I thought compilers can see consecutive % and / and combine the instructions into one thing(?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not when its split into a function call that looks like int naga_mod(int lhs, int rhs) {
int divisor = ((lhs == int(-2147483647 - 1) & rhs == -1) | (rhs == 0)) ? 1 : rhs;
return lhs - (lhs / divisor) * divisor;
}
// ...
let vertex_index = naga_mod(vertex_input, 3u);
let material_id = vertex_input / 3u; |
||
let material_depth = f32(material_id) / 65535.0; | ||
let uv = vec2<f32>(vec2(vertex_index >> 1u, vertex_index & 1u)) * 2.0; | ||
return vec4(uv_to_ndc(uv), material_depth, 1.0); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -103,6 +103,13 @@ fn henyey_greenstein(neg_LdotV: f32) -> f32 { | |
return FRAC_4_PI * (1.0 - g * g) / (denom * sqrt(denom)); | ||
} | ||
|
||
fn simple_wrap_3(index: i32) -> i32 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can index be >=6? If so then this is wrong, if not then I think this function should be named/commented to indicate it is special purpose. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. its only used in this file, and its only called with numbers in range 0-5. I called it simple_wrap_3, not implying its modulo, because it is not an implementation of modulo, just something that works for this specific case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a comment saying that it only works for values 0 to 5, which is fine for its use in this file? |
||
if (index >= 3) { | ||
return index - 3; | ||
} | ||
return index; | ||
} | ||
|
||
@fragment | ||
fn fragment(@builtin(position) position: vec4<f32>) -> @location(0) vec4<f32> { | ||
// Unpack the `volumetric_fog` settings. | ||
|
@@ -140,8 +147,8 @@ fn fragment(@builtin(position) position: vec4<f32>) -> @location(0) vec4<f32> { | |
var end_depth_view = 0.0; | ||
for (var plane_index = 0; plane_index < 3; plane_index += 1) { | ||
let plane = volumetric_fog.far_planes[plane_index]; | ||
let other_plane_a = volumetric_fog.far_planes[(plane_index + 1) % 3]; | ||
let other_plane_b = volumetric_fog.far_planes[(plane_index + 2) % 3]; | ||
let other_plane_a = volumetric_fog.far_planes[simple_wrap_3(plane_index + 1)]; | ||
let other_plane_b = volumetric_fog.far_planes[simple_wrap_3(plane_index + 2)]; | ||
|
||
// Calculate the intersection of the ray and the plane. The ray must | ||
// intersect in front of us (t > 0). | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.