@@ -70,14 +70,17 @@ class VulkanExample : public VulkanExampleBase
70
70
} graphics;
71
71
72
72
// Resources for the compute part of the example
73
+ // SRS - Number of compute command buffers: set to 1 for serialized processing or 2 for in-parallel with graphics queue
74
+ #define COMPUTE_CMD_BUFFERS 2
73
75
struct Compute {
74
- struct Semaphores {
76
+ typedef struct Semaphores_t {
75
77
VkSemaphore ready{ VK_NULL_HANDLE };
76
78
VkSemaphore complete{ VK_NULL_HANDLE };
77
- } semaphores;
79
+ } semaphores_t ;
80
+ std::array<semaphores_t , COMPUTE_CMD_BUFFERS> semaphores{};
78
81
VkQueue queue{ VK_NULL_HANDLE };
79
82
VkCommandPool commandPool{ VK_NULL_HANDLE };
80
- std::array<VkCommandBuffer, 2 > commandBuffers{};
83
+ std::array<VkCommandBuffer, COMPUTE_CMD_BUFFERS > commandBuffers{};
81
84
VkDescriptorSetLayout descriptorSetLayout{ VK_NULL_HANDLE };
82
85
std::array<VkDescriptorSet, 2 > descriptorSets{ VK_NULL_HANDLE };
83
86
VkPipelineLayout pipelineLayout{ VK_NULL_HANDLE };
@@ -126,8 +129,10 @@ class VulkanExample : public VulkanExampleBase
126
129
vkDestroyPipelineLayout (device, compute.pipelineLayout , nullptr );
127
130
vkDestroyDescriptorSetLayout (device, compute.descriptorSetLayout , nullptr );
128
131
vkDestroyPipeline (device, compute.pipeline , nullptr );
129
- vkDestroySemaphore (device, compute.semaphores .ready , nullptr );
130
- vkDestroySemaphore (device, compute.semaphores .complete , nullptr );
132
+ for (uint32_t i = 0 ; i < compute.semaphores .size (); i++) {
133
+ vkDestroySemaphore (device, compute.semaphores [i].ready , nullptr );
134
+ vkDestroySemaphore (device, compute.semaphores [i].complete , nullptr );
135
+ }
131
136
vkDestroyCommandPool (device, compute.commandPool , nullptr );
132
137
133
138
// SSBOs
@@ -176,7 +181,7 @@ class VulkanExample : public VulkanExampleBase
176
181
}
177
182
}
178
183
179
- void addComputeToComputeBarriers (VkCommandBuffer commandBuffer)
184
+ void addComputeToComputeBarriers (VkCommandBuffer commandBuffer, uint32_t readSet )
180
185
{
181
186
VkBufferMemoryBarrier bufferBarrier = vks::initializers::bufferMemoryBarrier ();
182
187
bufferBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
@@ -185,10 +190,20 @@ class VulkanExample : public VulkanExampleBase
185
190
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
186
191
bufferBarrier.size = VK_WHOLE_SIZE;
187
192
std::vector<VkBufferMemoryBarrier> bufferBarriers;
188
- bufferBarrier.buffer = storageBuffers.input .buffer ;
189
- bufferBarriers.push_back (bufferBarrier);
190
- bufferBarrier.buffer = storageBuffers.output .buffer ;
191
- bufferBarriers.push_back (bufferBarrier);
193
+ if (readSet == 0 )
194
+ {
195
+ // SRS - we have written to output.buffer and need a memory barrier before reading it
196
+ // - don't need a memory barrier for input.buffer, the execution barrier is enough
197
+ bufferBarrier.buffer = storageBuffers.output .buffer ;
198
+ bufferBarriers.push_back (bufferBarrier);
199
+ }
200
+ else // if (readSet == 1)
201
+ {
202
+ // SRS - we have written to input.buffer and need a memory barrier before reading it
203
+ // - don't need a memory barrier for output.buffer, the execution barrier is enough
204
+ bufferBarrier.buffer = storageBuffers.input .buffer ;
205
+ bufferBarriers.push_back (bufferBarrier);
206
+ }
192
207
vkCmdPipelineBarrier (
193
208
commandBuffer,
194
209
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
@@ -280,7 +295,7 @@ class VulkanExample : public VulkanExampleBase
280
295
vkCmdEndRenderPass (drawCmdBuffers[i]);
281
296
282
297
// release the storage buffers to the compute queue
283
- addGraphicsToComputeBarriers (drawCmdBuffers[i], VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT , 0 , VK_PIPELINE_STAGE_VERTEX_INPUT_BIT , VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
298
+ addGraphicsToComputeBarriers (drawCmdBuffers[i], VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT , 0 , VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT , VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
284
299
285
300
VK_CHECK_RESULT (vkEndCommandBuffer (drawCmdBuffers[i]));
286
301
}
@@ -292,19 +307,20 @@ class VulkanExample : public VulkanExampleBase
292
307
VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo ();
293
308
cmdBufInfo.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
294
309
295
- for (uint32_t i = 0 ; i < 2 ; i++) {
310
+ for (uint32_t i = 0 ; i < compute. commandBuffers . size () ; i++) {
296
311
297
312
VK_CHECK_RESULT (vkBeginCommandBuffer (compute.commandBuffers [i], &cmdBufInfo));
298
313
299
314
// Acquire the storage buffers from the graphics queue
300
- addGraphicsToComputeBarriers (compute.commandBuffers [i], 0 , VK_ACCESS_SHADER_WRITE_BIT , VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
315
+ addGraphicsToComputeBarriers (compute.commandBuffers [i], 0 , VK_ACCESS_SHADER_READ_BIT , VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
301
316
302
317
vkCmdBindPipeline (compute.commandBuffers [i], VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline );
303
318
304
319
uint32_t calculateNormals = 0 ;
305
320
vkCmdPushConstants (compute.commandBuffers [i], compute.pipelineLayout , VK_SHADER_STAGE_COMPUTE_BIT, 0 , sizeof (uint32_t ), &calculateNormals);
306
321
307
322
// Dispatch the compute job
323
+ // SRS - Iterations **must** be an even number, so that readSet starts at 1 and the final result ends up in output.buffer with readSet equal to 0
308
324
const uint32_t iterations = 64 ;
309
325
for (uint32_t j = 0 ; j < iterations; j++) {
310
326
readSet = 1 - readSet;
@@ -319,7 +335,7 @@ class VulkanExample : public VulkanExampleBase
319
335
320
336
// Don't add a barrier on the last iteration of the loop, since we'll have an explicit release to the graphics queue
321
337
if (j != iterations - 1 ) {
322
- addComputeToComputeBarriers (compute.commandBuffers [i]);
338
+ addComputeToComputeBarriers (compute.commandBuffers [i], readSet );
323
339
}
324
340
325
341
}
@@ -386,7 +402,7 @@ class VulkanExample : public VulkanExampleBase
386
402
// Add an initial release barrier to the graphics queue,
387
403
// so that when the compute command buffer executes for the first time
388
404
// it doesn't complain about a lack of a corresponding "release" to its "acquire"
389
- addGraphicsToComputeBarriers (copyCmd, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT , 0 , VK_PIPELINE_STAGE_VERTEX_INPUT_BIT , VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
405
+ addGraphicsToComputeBarriers (copyCmd, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT , 0 , VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT , VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
390
406
vulkanDevice->flushCommandBuffer (copyCmd, queue, true );
391
407
392
408
stagingBuffer.destroy ();
@@ -595,13 +611,15 @@ class VulkanExample : public VulkanExampleBase
595
611
VK_CHECK_RESULT (vkCreateCommandPool (device, &cmdPoolInfo, nullptr , &compute.commandPool ));
596
612
597
613
// Create a command buffer for compute operations
598
- VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo (compute.commandPool , VK_COMMAND_BUFFER_LEVEL_PRIMARY, 2 );
614
+ VkCommandBufferAllocateInfo cmdBufAllocateInfo = vks::initializers::commandBufferAllocateInfo (compute.commandPool , VK_COMMAND_BUFFER_LEVEL_PRIMARY, static_cast < uint32_t >(compute. commandBuffers . size ()) );
599
615
VK_CHECK_RESULT (vkAllocateCommandBuffers (device, &cmdBufAllocateInfo, &compute.commandBuffers [0 ]));
600
616
601
617
// Semaphores for graphics / compute synchronization
602
618
VkSemaphoreCreateInfo semaphoreCreateInfo = vks::initializers::semaphoreCreateInfo ();
603
- VK_CHECK_RESULT (vkCreateSemaphore (device, &semaphoreCreateInfo, nullptr , &compute.semaphores .ready ));
604
- VK_CHECK_RESULT (vkCreateSemaphore (device, &semaphoreCreateInfo, nullptr , &compute.semaphores .complete ));
619
+ for (uint32_t i = 0 ; i < compute.semaphores .size (); i++) {
620
+ VK_CHECK_RESULT (vkCreateSemaphore (device, &semaphoreCreateInfo, nullptr , &compute.semaphores [i].ready ));
621
+ VK_CHECK_RESULT (vkCreateSemaphore (device, &semaphoreCreateInfo, nullptr , &compute.semaphores [i].complete ));
622
+ }
605
623
606
624
// Build a single command buffer containing the compute dispatch commands
607
625
buildComputeCommandBuffer ();
@@ -643,20 +661,46 @@ class VulkanExample : public VulkanExampleBase
643
661
// We'll be using semaphores to synchronize between the compute shader updating the cloth and the graphics pipeline drawing it
644
662
645
663
static bool firstDraw = true ;
664
+ static uint32_t computeSubmitIndex{ 0 }, graphicsSubmitIndex{ 0 };
665
+ if (COMPUTE_CMD_BUFFERS > 1 ) // should be constexpr, but requires C++17
666
+ {
667
+ // SRS - if we are double buffering the compute queue, swap the compute command buffer indices
668
+ graphicsSubmitIndex = computeSubmitIndex;
669
+ computeSubmitIndex = 1 - graphicsSubmitIndex;
670
+ }
671
+
646
672
VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo ();
647
673
VkPipelineStageFlags computeWaitDstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
648
674
if (!firstDraw) {
649
675
computeSubmitInfo.waitSemaphoreCount = 1 ;
650
- computeSubmitInfo.pWaitSemaphores = &compute.semaphores .ready ;
676
+ computeSubmitInfo.pWaitSemaphores = &compute.semaphores [computeSubmitIndex] .ready ;
651
677
computeSubmitInfo.pWaitDstStageMask = &computeWaitDstStageMask;
652
678
}
653
679
else {
654
680
firstDraw = false ;
681
+ if (COMPUTE_CMD_BUFFERS > 1 ) // should be constexpr, but requires C++17
682
+ {
683
+ // SRS - if we are double buffering the compute queue, submit extra command buffer at start
684
+ computeSubmitInfo.signalSemaphoreCount = 1 ;
685
+ computeSubmitInfo.pSignalSemaphores = &compute.semaphores [graphicsSubmitIndex].complete ;
686
+ computeSubmitInfo.commandBufferCount = 1 ;
687
+ computeSubmitInfo.pCommandBuffers = &compute.commandBuffers [graphicsSubmitIndex];
688
+
689
+ VK_CHECK_RESULT (vkQueueSubmit (compute.queue , 1 , &computeSubmitInfo, VK_NULL_HANDLE));
690
+
691
+ // Add an extra set of acquire and release barriers to the graphics queue,
692
+ // so that when the second compute command buffer executes for the first time
693
+ // it doesn't complain about a lack of a corresponding "acquire" to its "release" and vice versa
694
+ VkCommandBuffer barrierCmd = vulkanDevice->createCommandBuffer (VK_COMMAND_BUFFER_LEVEL_PRIMARY, true );
695
+ addComputeToGraphicsBarriers (barrierCmd, 0 , VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
696
+ addGraphicsToComputeBarriers (barrierCmd, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, 0 , VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
697
+ vulkanDevice->flushCommandBuffer (barrierCmd, queue, true );
698
+ }
655
699
}
656
700
computeSubmitInfo.signalSemaphoreCount = 1 ;
657
- computeSubmitInfo.pSignalSemaphores = &compute.semaphores .complete ;
701
+ computeSubmitInfo.pSignalSemaphores = &compute.semaphores [computeSubmitIndex] .complete ;
658
702
computeSubmitInfo.commandBufferCount = 1 ;
659
- computeSubmitInfo.pCommandBuffers = &compute.commandBuffers [readSet ];
703
+ computeSubmitInfo.pCommandBuffers = &compute.commandBuffers [computeSubmitIndex ];
660
704
661
705
VK_CHECK_RESULT (vkQueueSubmit (compute.queue , 1 , &computeSubmitInfo, VK_NULL_HANDLE));
662
706
@@ -667,10 +711,10 @@ class VulkanExample : public VulkanExampleBase
667
711
submitPipelineStages, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
668
712
};
669
713
VkSemaphore waitSemaphores[2 ] = {
670
- semaphores.presentComplete , compute.semaphores .complete
714
+ semaphores.presentComplete , compute.semaphores [graphicsSubmitIndex] .complete
671
715
};
672
716
VkSemaphore signalSemaphores[2 ] = {
673
- semaphores.renderComplete , compute.semaphores .ready
717
+ semaphores.renderComplete , compute.semaphores [graphicsSubmitIndex] .ready
674
718
};
675
719
676
720
submitInfo.waitSemaphoreCount = 2 ;
0 commit comments