Skip to content

Commit 8602338

Browse files
committed
Slightly simpler version of atomic test
Takes the loop out, each thread does either load or store, not both. This still fails, but doesn't test as many opportunities.
1 parent fc7dc5d commit 8602338

File tree

2 files changed

+10
-19
lines changed

2 files changed

+10
-19
lines changed

compute-shader-hello/src/main.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,6 @@ use wgpu::util::DeviceExt;
2222

2323
use bytemuck;
2424

25-
// A strategy of 0 is just atomic loads.
26-
// A strategy of 1 replaces the flag load with an atomicOr.
27-
const STRATEGY: u32 = 0;
28-
2925
const USE_SPIRV: bool = false;
3026

3127
async fn run() {
@@ -83,7 +79,7 @@ async fn run() {
8379
});
8480
let config_buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
8581
label: None,
86-
contents: bytemuck::bytes_of(&[STRATEGY, 0]),
82+
contents: bytemuck::bytes_of(&[0u32]),
8783
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::MAP_READ,
8884
});
8985
// This works if the buffer is initialized, otherwise reads all 0, for some reason.
@@ -152,12 +148,11 @@ async fn run() {
152148
let mut cpass = encoder.begin_compute_pass(&Default::default());
153149
cpass.set_pipeline(&pipeline);
154150
cpass.set_bind_group(0, &bind_group, &[]);
155-
cpass.dispatch(256, 1, 1);
151+
cpass.dispatch(512, 1, 1);
156152
}
157153
if let Some(query_set) = &query_set {
158154
encoder.write_timestamp(query_set, 1);
159155
}
160-
//encoder.copy_buffer_to_buffer(&input_buf, 0, &output_buf, 0, input.len() as u64);
161156
if let Some(query_set) = &query_set {
162157
encoder.resolve_query_set(query_set, 0..2, &query_buf, 0);
163158
}
@@ -171,7 +166,7 @@ async fn run() {
171166
if buf_future.await.is_ok() {
172167
let data_raw = &*buf_slice.get_mapped_range();
173168
let data: &[u32] = bytemuck::cast_slice(data_raw);
174-
println!("failures with strategy {}: {}", data[0], data[1]);
169+
println!("failures: {}", data[0]);
175170
}
176171
if features.contains(wgpu::Features::TIMESTAMP_QUERY) {
177172
let ts_period = queue.get_timestamp_period();

compute-shader-hello/src/shader.wgsl

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ struct DataBuf {
2626

2727
[[block]]
2828
struct ControlBuf {
29-
strategy: u32;
3029
failures: atomic<u32>;
3130
};
3231

@@ -46,19 +45,16 @@ fn permute_flag_ix(data_ix: u32) -> u32 {
4645
[[stage(compute), workgroup_size(256)]]
4746
fn main([[builtin(global_invocation_id)]] global_id: vec3<u32>) {
4847
let ix = global_id.x;
49-
// Originally this was passed in, but is now hardcoded, as D3DCompiler
50-
// thinks control flow becomes nonuniform if it's read from input.
51-
let n_iter = 1024u;
52-
let strategy = control_buf.strategy;
5348
var failures = 0u;
54-
for (var i: u32 = 0u; i < n_iter; i = i + 1u) {
55-
let wr_flag_ix = permute_flag_ix(ix);
56-
data_buf.data[ix].data = i + 1u;
49+
let a = ix >> 1u;
50+
if ((ix & 1u) == 0u) {
51+
let wr_flag_ix = permute_flag_ix(a);
52+
data_buf.data[a].data = 1u;
5753
storageBarrier(); // release semantics for writing flag
58-
atomicStore(&data_buf.data[wr_flag_ix].flag, i + 1u);
59-
54+
atomicStore(&data_buf.data[wr_flag_ix].flag, 1u);
55+
} else {
6056
// Read from a different workgroup
61-
let read_ix = ((ix & 0xffu) << 8u) | (ix >> 8u);
57+
let read_ix = ((a & 0xffu) << 8u) | (a >> 8u);
6258
let read_flag_ix = permute_flag_ix(read_ix);
6359

6460
let flag = atomicLoad(&data_buf.data[read_flag_ix].flag);

0 commit comments

Comments
 (0)