Documentation: Added "Recommended usage patterns" chapter.

adam-sawicki-a · adam-sawicki-a · commit 57fa5580c0d1 · 2018-03-12T17:46:49.000+01:00
diff --git a/docs/html/index.html b/docs/html/index.html
@@ -108,6 +108,11 @@ <h1><a class="anchor" id="main_table_of_contents"></a>
 </li>
 </ul>
 </li>
+<li><a class="el" href="usage_patterns.html">Recommended usage patterns</a><ul>
+<li><a class="el" href="usage_patterns.html#usage_patterns_simple">Simple patterns</a></li>
+<li><a class="el" href="usage_patterns.html#usage_patterns_advanced">Advanced patterns</a></li>
+</ul>
+</li>
 <li><a class="el" href="configuration.html">Configuration</a><ul>
 <li><a class="el" href="configuration.html#config_Vulkan_functions">Pointers to Vulkan functions</a></li>
 <li><a class="el" href="configuration.html#custom_memory_allocator">Custom host memory allocator</a></li>
diff --git a/docs/html/search/all_b.js b/docs/html/search/all_b.js
@@ -1,4 +1,5 @@
 var searchData=
 [
-  ['requiredflags',['requiredFlags',['../struct_vma_allocation_create_info.html#a9166390303ff42d783305bc31c2b6b90',1,'VmaAllocationCreateInfo']]]
+  ['requiredflags',['requiredFlags',['../struct_vma_allocation_create_info.html#a9166390303ff42d783305bc31c2b6b90',1,'VmaAllocationCreateInfo']]],
+  ['recommended_20usage_20patterns',['Recommended usage patterns',['../usage_patterns.html',1,'index']]]
 ];
diff --git a/docs/html/search/pages_7.js b/docs/html/search/pages_7.js
@@ -1,4 +1,4 @@
 var searchData=
 [
-  ['statistics',['Statistics',['../statistics.html',1,'index']]]
+  ['recommended_20usage_20patterns',['Recommended usage patterns',['../usage_patterns.html',1,'index']]]
 ];
diff --git a/docs/html/search/pages_8.js b/docs/html/search/pages_8.js
@@ -1,5 +1,4 @@
 var searchData=
 [
-  ['vulkan_20memory_20allocator',['Vulkan Memory Allocator',['../index.html',1,'']]],
-  ['vk_5fkhr_5fdedicated_5fallocation',['VK_KHR_dedicated_allocation',['../vk_khr_dedicated_allocation.html',1,'index']]]
+  ['statistics',['Statistics',['../statistics.html',1,'index']]]
 ];
diff --git a/docs/html/search/searchdata.js b/docs/html/search/searchdata.js
@@ -9,7 +9,7 @@ var indexSectionsWithContent =
   6: "v",
   7: "v",
   8: "v",
-  9: "acdglmqsv"
+  9: "acdglmqrsv"
 };
 
 var indexSectionNames =
diff --git a/docs/html/usage_patterns.html b/docs/html/usage_patterns.html
@@ -0,0 +1,126 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.13"/>
+<meta name="viewport" content="width=device-width, initial-scale=1"/>
+<title>Vulkan Memory Allocator: Recommended usage patterns</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/searchdata.js"></script>
+<script type="text/javascript" src="search/search.js"></script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td id="projectalign" style="padding-left: 0.5em;">
+   <div id="projectname">Vulkan Memory Allocator
+   </div>
+  </td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.13 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+<script type="text/javascript" src="menudata.js"></script>
+<script type="text/javascript" src="menu.js"></script>
+<script type="text/javascript">
+$(function() {
+  initMenu('',true,false,'search.php','Search');
+  $(document).ready(function() { init_search(); });
+});
+</script>
+<div id="main-nav"></div>
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+</div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div id="nav-path" class="navpath">
+  <ul>
+<li class="navelem"><a class="el" href="index.html">Vulkan Memory Allocator</a></li>  </ul>
+</div>
+</div><!-- top -->
+<div class="header">
+  <div class="headertitle">
+<div class="title">Recommended usage patterns </div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="textblock"><h1><a class="anchor" id="usage_patterns_simple"></a>
+Simple patterns</h1>
+<h2><a class="anchor" id="usage_patterns_simple_render_targets"></a>
+Render targets</h2>
+<p><b>When:</b> Any resources that you frequently write and read on GPU, e.g. images used as color attachments (aka "render targets"), depth-stencil attachments, images/buffers used as storage image/buffer (aka "Unordered Access View (UAV)").</p>
+<p><b>What to do:</b> Create them in video memory that is fastest to access from GPU using <a class="el" href="vk__mem__alloc_8h.html#aa5846affa1e9da3800e3e78fae2305ccac6b5dc1432d88647aa4cd456246eadf7">VMA_MEMORY_USAGE_GPU_ONLY</a>.</p>
+<p>Consider using <a class="el" href="vk_khr_dedicated_allocation.html">VK_KHR_dedicated_allocation</a> extension and/or manually creating them as dedicated allocations using <a class="el" href="vk__mem__alloc_8h.html#ad9889c10c798b040d59c92f257cae597a3fc311d855c2ff53f1090ef5c722b38f" title="Set this flag if the allocation should have its own memory block. ">VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT</a>, especially if they are large or if you plan to destroy and recreate them e.g. when display resolution changes. Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later.</p>
+<h2><a class="anchor" id="usage_patterns_simple_immutable_resources"></a>
+Immutable resources</h2>
+<p><b>When:</b> Any resources that you fill on CPU only once (aka "immutable") or infrequently and then read frequently on GPU, e.g. textures, vertex and index buffers, constant buffers that don't change often.</p>
+<p><b>What to do:</b> Create them in video memory that is fastest to access from GPU using <a class="el" href="vk__mem__alloc_8h.html#aa5846affa1e9da3800e3e78fae2305ccac6b5dc1432d88647aa4cd456246eadf7">VMA_MEMORY_USAGE_GPU_ONLY</a>.</p>
+<p>To initialize content of such resource, create a CPU-side (aka "staging") copy of it in system memory - <a class="el" href="vk__mem__alloc_8h.html#aa5846affa1e9da3800e3e78fae2305cca40bdf4cddeffeb12f43d45ca1286e0a5">VMA_MEMORY_USAGE_CPU_ONLY</a>, map it, fill it, and submit a transfer from it to the GPU resource. You can keep the staging copy if you need it for another upload transfer in the future. If you don't, you can destroy it or reuse this buffer for uploading different resource after the transfer finishes.</p>
+<p>Prefer to create just buffers in system memory rather than images, even for uploading textures. Use <code>vkCmdCopyBufferToImage()</code>. Dont use images with <code>VK_IMAGE_TILING_LINEAR</code>.</p>
+<h2><a class="anchor" id="usage_patterns_dynamic_resources"></a>
+Dynamic resources</h2>
+<p><b>When:</b> Any resources that change frequently (aka "dynamic"), e.g. every frame or every draw call, written on CPU, read on GPU.</p>
+<p><b>What to do:</b> Create them using <a class="el" href="vk__mem__alloc_8h.html#aa5846affa1e9da3800e3e78fae2305cca9066b52c5a7079bb74a69aaf8b92ff67">VMA_MEMORY_USAGE_CPU_TO_GPU</a>. You can map it and write to it directly on CPU, as well as read from it on GPU.</p>
+<p>This is a more complex situation. Different solutions are possible, and the best one depends on specific GPU type, but you can use this simple approach for the start. Prefer to write to such resource sequentially (e.g. using <code>memcpy</code>). Don't perform random access or any reads from it, as it may be very slow.</p>
+<h2><a class="anchor" id="usage_patterns_readback"></a>
+Readback</h2>
+<p><b>When:</b> Resources that contain data written by GPU that you want to read back on CPU, e.g. results of some computations.</p>
+<p><b>What to do:</b> Create them using <a class="el" href="vk__mem__alloc_8h.html#aa5846affa1e9da3800e3e78fae2305cca7b586d2fdaf82a463b58f581ed72be27">VMA_MEMORY_USAGE_GPU_TO_CPU</a>. You can write to them directly on GPU, as well as map and read them on CPU.</p>
+<h1><a class="anchor" id="usage_patterns_advanced"></a>
+Advanced patterns</h1>
+<h2><a class="anchor" id="usage_patterns_integrated_graphics"></a>
+Detecting integrated graphics</h2>
+<p>You can support integrated graphics (like Intel HD Graphics, AMD APU) better by detecting it in Vulkan. To do it, call <code>vkGetPhysicalDeviceProperties()</code>, inspect <code>VkPhysicalDeviceProperties::deviceType</code> and look for <code>VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU</code>. When you find it, you can assume that memory is unified and all memory types are equally fast to access from GPU, regardless of <code>VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT</code>.</p>
+<p>You can then sum up sizes of all available memory heaps and treat them as useful for your GPU resources, instead of only <code>DEVICE_LOCAL</code> ones. You can also prefer to create your resources in memory types that are <code>HOST_VISIBLE</code> to map them directly instead of submitting explicit transfer (see below).</p>
+<h2><a class="anchor" id="usage_patterns_direct_vs_transfer"></a>
+Direct access versus transfer</h2>
+<p>For resources that you frequently write on CPU and read on GPU, many solutions are possible:</p>
+<ol type="1">
+<li>Create one copy in video memory using <a class="el" href="vk__mem__alloc_8h.html#aa5846affa1e9da3800e3e78fae2305ccac6b5dc1432d88647aa4cd456246eadf7">VMA_MEMORY_USAGE_GPU_ONLY</a>, second copy in system memory using <a class="el" href="vk__mem__alloc_8h.html#aa5846affa1e9da3800e3e78fae2305cca40bdf4cddeffeb12f43d45ca1286e0a5">VMA_MEMORY_USAGE_CPU_ONLY</a> and submit explicit tranfer each time.</li>
+<li>Create just single copy using <a class="el" href="vk__mem__alloc_8h.html#aa5846affa1e9da3800e3e78fae2305cca9066b52c5a7079bb74a69aaf8b92ff67">VMA_MEMORY_USAGE_CPU_TO_GPU</a>, map it and fill it on CPU, read it directly on GPU.</li>
+<li>Create just single copy using <a class="el" href="vk__mem__alloc_8h.html#aa5846affa1e9da3800e3e78fae2305cca40bdf4cddeffeb12f43d45ca1286e0a5">VMA_MEMORY_USAGE_CPU_ONLY</a>, map it and fill it on CPU, read it directly on GPU.</li>
+</ol>
+<p>Which solution is the most efficient depends on your resource and especially on the GPU. It is best to measure it and then make the decision. Some general recommendations:</p>
+<ul>
+<li>On integrated graphics use (2) or (3) to avoid unnecesary time and memory overhead related to using a second copy.</li>
+<li>For small resources (e.g. constant buffers) use (2). Discrete AMD cards have special 256 MiB pool of video memory that is directly mappable. Even if the resource ends up in system memory, its data may be cached on GPU after first fetch over PCIe bus.</li>
+<li>For larger resources (e.g. textures), decide between (1) and (2). You may want to differentiate NVIDIA and AMD, e.g. by looking for memory type that is both <code>DEVICE_LOCAL</code> and <code>HOST_VISIBLE</code>. When you find it, use (2), otherwise use (1).</li>
+</ul>
+<p>Similarly, for resources that you frequently write on GPU and read on CPU, multiple solutions are possible:</p>
+<ol type="1">
+<li>Create one copy in video memory using <a class="el" href="vk__mem__alloc_8h.html#aa5846affa1e9da3800e3e78fae2305ccac6b5dc1432d88647aa4cd456246eadf7">VMA_MEMORY_USAGE_GPU_ONLY</a>, second copy in system memory using <a class="el" href="vk__mem__alloc_8h.html#aa5846affa1e9da3800e3e78fae2305cca7b586d2fdaf82a463b58f581ed72be27">VMA_MEMORY_USAGE_GPU_TO_CPU</a> and submit explicit tranfer each time.</li>
+<li>Create just single copy using <a class="el" href="vk__mem__alloc_8h.html#aa5846affa1e9da3800e3e78fae2305cca7b586d2fdaf82a463b58f581ed72be27">VMA_MEMORY_USAGE_GPU_TO_CPU</a>, write to it directly on GPU, map it and read it on CPU.</li>
+</ol>
+<p>You should take some measurements to decide which option is faster in case of your specific resource.</p>
+<p>If you don't want to specialize your code for specific types of GPUs, yon can still make an simple optimization for cases when your resource ends up in mappable memory to use it directly in this case instead of creating CPU-side staging copy. For details see <a class="el" href="memory_mapping.html#memory_mapping_finding_if_memory_mappable">Finding out if memory is mappable</a>. </p>
+</div></div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.13
+</small></address>
+</body>
+</html>
diff --git a/docs/html/vk__mem__alloc_8h_source.html b/docs/html/vk__mem__alloc_8h_source.html
diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h
@@ -63,6 +63,9 @@ Documentation of all members: vk_mem_alloc.h
   - \subpage allocation_annotation
     - [Allocation user data](@ref allocation_user_data)
     - [Allocation names](@ref allocation_names)
+- \subpage usage_patterns
+  - [Simple patterns](@ref usage_patterns_simple)
+  - [Advanced patterns](@ref usage_patterns_advanced)
 - \subpage configuration
   - [Pointers to Vulkan functions](@ref config_Vulkan_functions)
   - [Custom host memory allocator](@ref custom_memory_allocator)
@@ -803,6 +806,133 @@ printf("Image name: %s\n", imageName);
 
 That string is also printed in JSON report created by vmaBuildStatsString().
 
+
+\page usage_patterns Recommended usage patterns
+
+\section usage_patterns_simple Simple patterns
+
+\subsection usage_patterns_simple_render_targets Render targets
+
+<b>When:</b>
+Any resources that you frequently write and read on GPU,
+e.g. images used as color attachments (aka "render targets"), depth-stencil attachments,
+images/buffers used as storage image/buffer (aka "Unordered Access View (UAV)").
+
+<b>What to do:</b>
+Create them in video memory that is fastest to access from GPU using
+#VMA_MEMORY_USAGE_GPU_ONLY.
+
+Consider using [VK_KHR_dedicated_allocation](@ref vk_khr_dedicated_allocation) extension
+and/or manually creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT,
+especially if they are large or if you plan to destroy and recreate them e.g. when
+display resolution changes.
+Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later.
+
+\subsection usage_patterns_simple_immutable_resources Immutable resources
+
+<b>When:</b>
+Any resources that you fill on CPU only once (aka "immutable") or infrequently
+and then read frequently on GPU,
+e.g. textures, vertex and index buffers, constant buffers that don't change often.
+
+<b>What to do:</b>
+Create them in video memory that is fastest to access from GPU using
+#VMA_MEMORY_USAGE_GPU_ONLY.
+
+To initialize content of such resource, create a CPU-side (aka "staging") copy of it
+in system memory - #VMA_MEMORY_USAGE_CPU_ONLY, map it, fill it,
+and submit a transfer from it to the GPU resource.
+You can keep the staging copy if you need it for another upload transfer in the future.
+If you don't, you can destroy it or reuse this buffer for uploading different resource
+after the transfer finishes.
+
+Prefer to create just buffers in system memory rather than images, even for uploading textures.
+Use `vkCmdCopyBufferToImage()`.
+Dont use images with `VK_IMAGE_TILING_LINEAR`.
+
+\subsection usage_patterns_dynamic_resources Dynamic resources
+
+<b>When:</b>
+Any resources that change frequently (aka "dynamic"), e.g. every frame or every draw call,
+written on CPU, read on GPU.
+
+<b>What to do:</b>
+Create them using #VMA_MEMORY_USAGE_CPU_TO_GPU.
+You can map it and write to it directly on CPU, as well as read from it on GPU.
+
+This is a more complex situation. Different solutions are possible,
+and the best one depends on specific GPU type, but you can use this simple approach for the start.
+Prefer to write to such resource sequentially (e.g. using `memcpy`).
+Don't perform random access or any reads from it, as it may be very slow.
+
+\subsection usage_patterns_readback Readback
+
+<b>When:</b>
+Resources that contain data written by GPU that you want to read back on CPU,
+e.g. results of some computations.
+
+<b>What to do:</b>
+Create them using #VMA_MEMORY_USAGE_GPU_TO_CPU.
+You can write to them directly on GPU, as well as map and read them on CPU.
+
+\section usage_patterns_advanced Advanced patterns
+
+\subsection usage_patterns_integrated_graphics Detecting integrated graphics
+
+You can support integrated graphics (like Intel HD Graphics, AMD APU) better
+by detecting it in Vulkan.
+To do it, call `vkGetPhysicalDeviceProperties()`, inspect
+`VkPhysicalDeviceProperties::deviceType` and look for `VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU`.
+When you find it, you can assume that memory is unified and all memory types are equally fast
+to access from GPU, regardless of `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`.
+
+You can then sum up sizes of all available memory heaps and treat them as useful for
+your GPU resources, instead of only `DEVICE_LOCAL` ones.
+You can also prefer to create your resources in memory types that are `HOST_VISIBLE` to map them
+directly instead of submitting explicit transfer (see below).
+
+\subsection usage_patterns_direct_vs_transfer Direct access versus transfer
+
+For resources that you frequently write on CPU and read on GPU, many solutions are possible:
+
+-# Create one copy in video memory using #VMA_MEMORY_USAGE_GPU_ONLY,
+   second copy in system memory using #VMA_MEMORY_USAGE_CPU_ONLY and submit explicit tranfer each time.
+-# Create just single copy using #VMA_MEMORY_USAGE_CPU_TO_GPU, map it and fill it on CPU,
+   read it directly on GPU.
+-# Create just single copy using #VMA_MEMORY_USAGE_CPU_ONLY, map it and fill it on CPU,
+   read it directly on GPU.
+
+Which solution is the most efficient depends on your resource and especially on the GPU.
+It is best to measure it and then make the decision.
+Some general recommendations:
+
+- On integrated graphics use (2) or (3) to avoid unnecesary time and memory overhead
+  related to using a second copy.
+- For small resources (e.g. constant buffers) use (2).
+  Discrete AMD cards have special 256 MiB pool of video memory that is directly mappable.
+  Even if the resource ends up in system memory, its data may be cached on GPU after first
+  fetch over PCIe bus.
+- For larger resources (e.g. textures), decide between (1) and (2).
+  You may want to differentiate NVIDIA and AMD, e.g. by looking for memory type that is
+  both `DEVICE_LOCAL` and `HOST_VISIBLE`. When you find it, use (2), otherwise use (1).
+
+Similarly, for resources that you frequently write on GPU and read on CPU, multiple
+solutions are possible:
+
+-# Create one copy in video memory using #VMA_MEMORY_USAGE_GPU_ONLY,
+   second copy in system memory using #VMA_MEMORY_USAGE_GPU_TO_CPU and submit explicit tranfer each time.
+-# Create just single copy using #VMA_MEMORY_USAGE_GPU_TO_CPU, write to it directly on GPU,
+   map it and read it on CPU.
+
+You should take some measurements to decide which option is faster in case of your specific
+resource.
+
+If you don't want to specialize your code for specific types of GPUs, yon can still make
+an simple optimization for cases when your resource ends up in mappable memory to use it
+directly in this case instead of creating CPU-side staging copy.
+For details see [Finding out if memory is mappable](@ref memory_mapping_finding_if_memory_mappable).
+
+
 \page configuration Configuration
 
 Please check "CONFIGURATION SECTION" in the code to find macros that you can define

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`var searchData=`
`2`	`2`	`[`
`3`		`- ['requiredflags',['requiredFlags',['../struct_vma_allocation_create_info.html#a9166390303ff42d783305bc31c2b6b90',1,'VmaAllocationCreateInfo']]]`
	`3`	`+ ['requiredflags',['requiredFlags',['../struct_vma_allocation_create_info.html#a9166390303ff42d783305bc31c2b6b90',1,'VmaAllocationCreateInfo']]],`
	`4`	`+ ['recommended_20usage_20patterns',['Recommended usage patterns',['../usage_patterns.html',1,'index']]]`
`4`	`5`	`];`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`	`1`	`var searchData=`
`2`	`2`	`[`
`3`		`- ['statistics',['Statistics',['../statistics.html',1,'index']]]`
	`3`	`+ ['recommended_20usage_20patterns',['Recommended usage patterns',['../usage_patterns.html',1,'index']]]`
`4`	`4`	`];`
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`var searchData=`
`2`	`2`	`[`
`3`		`- ['vulkan_20memory_20allocator',['Vulkan Memory Allocator',['../index.html',1,'']]],`
`4`		`- ['vk_5fkhr_5fdedicated_5fallocation',['VK_KHR_dedicated_allocation',['../vk_khr_dedicated_allocation.html',1,'index']]]`
	`3`	`+ ['statistics',['Statistics',['../statistics.html',1,'index']]]`
`5`	`4`	`];`