Skip to content

Commit 58f8e0f

Browse files
committed
Merge branch 'master' into 'development'
v1.0 See merge request GPUOpen-Effects/FidelityFX-SPD!3
2 parents 57fcf4b + 2773348 commit 58f8e0f

34 files changed

+703
-562
lines changed

.gitlab-ci.yml

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
variables:
22
SampleName: SPDSample
33
GIT_SUBMODULE_STRATEGY: normal
4-
4+
55
stages:
66
- build
77
- deploy
@@ -11,22 +11,24 @@ build_dx12:
1111
- windows
1212
- amd64
1313
stage: build
14-
artifacts:
15-
untracked: true
1614
script:
1715
- 'cmake -S sample -B sample/build/DX12 -G "Visual Studio 15 2017" -A x64 -DGFX_API=DX12'
18-
- 'cmake --build sample/build/DX12'
16+
- 'cmake --build sample/build/DX12 --config Release'
17+
artifacts:
18+
paths:
19+
- sample/bin/
1920

2021
build_vk:
2122
tags:
2223
- windows
2324
- amd64
2425
stage: build
25-
artifacts:
26-
untracked: true
2726
script:
28-
- 'cmake -S sample -B sample/build/Vk -G "Visual Studio 15 2017" -A x64 -DGFX_API=VK'
29-
- 'cmake --build sample/build/Vk'
27+
- 'cmake -S sample -B sample/build/VK -G "Visual Studio 15 2017" -A x64 -DGFX_API=VK'
28+
- 'cmake --build sample/build/VK --config Release'
29+
artifacts:
30+
paths:
31+
- sample/bin/
3032

3133
package_sample:
3234
tags:
@@ -37,17 +39,19 @@ package_sample:
3739
- build_dx12
3840
- build_vk
3941
script:
40-
- echo "Packaging build"
42+
- echo "Packaging build"
43+
- copy %VULKAN_SDK%\Bin\glslc.exe .\sample\bin
44+
- echo cd .\sample\bin\ > %SampleName%_VK.bat
45+
- echo start %SampleName%_VK.exe >> %SampleName%_VK.bat
46+
- echo cd .\sample\bin\ > %SampleName%_DX12.bat
47+
- echo start %SampleName%_DX12.exe >> %SampleName%_DX12.bat
4148
artifacts:
4249
name: "%SampleName%-%CI_COMMIT_TAG%-%CI_COMMIT_REF_NAME%-%CI_COMMIT_SHORT_SHA%"
4350
paths:
44-
- LICENSE.txt
45-
- sample/bin/dxcompiler.dll
46-
- sample/bin/dxil.dll
47-
- sample/bin/amd_ags_x64.dll
48-
- sample/bin/brdfLut.dds
49-
- sample/bin/%SampleName%_VK.exe
50-
- sample/bin/%SampleName%_DX12.exe
51-
- sample/bin/ShaderLibDX
52-
- sample/bin/ShaderLibVK
53-
- sample/media/
51+
- "NOTICES.txt"
52+
- "sample/bin/"
53+
- "sample/media/"
54+
- "docs/"
55+
- "readme.md"
56+
- "%SampleName%_VK.bat"
57+
- "%SampleName%_DX12.bat"

NOTICES.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
2+
3+
Permission is hereby granted, free of charge, to any person obtaining a copy
4+
of this software and associated documentation files (the "Software"), to deal
5+
in the Software without restriction, including without limitation the rights
6+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
copies of the Software, and to permit persons to whom the Software is
8+
furnished to do so, subject to the following conditions:
9+
10+
The above copyright notice and this permission notice shall be included in
11+
all copies or substantial portions of the Software.
12+
13+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19+
THE SOFTWARE.

README.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,12 @@ Downsampler
2424
- SPD CS linear sampler: uses the SPD library and for sampling the source texture a linear sampler
2525

2626
SPD Versions
27-
- WaveOps: uses Intrinsics and LDS to share the data between threads - this is the recommended version
2827
- NO-WaveOps: uses only LDS to share the data between threads
28+
- WaveOps: uses Intrinsics and LDS to share the data between threads
2929

3030
SPD Non-Packed / Packed Version
3131
- Non-Packed: uses fp32
3232
- Packed: uses fp16, reduced register pressure
3333

34-
# Known issues
35-
DX12, No-WaveOps, Non-Packed: Corruptions on Nvidia when using DXC compiler.
36-
3734
# Recommendations
3835
We recommend to use the WapeOps path when supported. If higher precision is not needed, you can enable the packed mode - it has less register pressure and can run a bit faster as well.

docs/FidelityFX_SPD.pdf

651 KB
Binary file not shown.

ffx-spd/ffx_spd.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060

6161
// // global atomic counter - MUST be initialized to 0
6262
// // GLSL:
63-
// layout(std430, set=0, binding=2) buffer globalAtomicBuffer
63+
// layout(std430, set=0, binding=2) coherent buffer globalAtomicBuffer
6464
// {
6565
// uint counter;
6666
// } globalAtomic;
@@ -69,7 +69,7 @@
6969
// {
7070
// uint counter;
7171
// };
72-
// [[vk::binding(2)]] RWStructuredBuffer<globalAtomicBuffer> globalAtomic;
72+
// [[vk::binding(2)]] globallycoherent RWStructuredBuffer<globalAtomicBuffer> globalAtomic;
7373

7474
// // [SAMPLER] add sampler
7575
// GLSL: layout(set=0, binding=3) uniform sampler srcSampler;
@@ -81,7 +81,7 @@
8181
// // GLSL:
8282
// layout(push_constant) uniform pushConstants {
8383
// uint mips; // needed to opt out earlier if mips are < 12
84-
// uint numWorkGroups; // number of total thread groups, so numWorkGroupsX * numWorkGroupsY * numWorkGroupsZ
84+
// uint numWorkGroups; // number of total thread groups, so numWorkGroupsX * numWorkGroupsY * 1
8585
// } spdConstants;
8686
// // HLSL:
8787
// [[vk::push_constant]]

sample/README.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,12 @@ Downsampler
2424
- SPD CS linear sampler: uses the SPD library and for sampling the source texture a linear sampler
2525

2626
SPD Versions
27-
- WaveOps: uses Intrinsics and LDS to share the data between threads - this is the recommended version
2827
- NO-WaveOps: uses only LDS to share the data between threads
28+
- WaveOps: uses Intrinsics and LDS to share the data between threads
2929

3030
SPD Non-Packed / Packed Version
3131
- Non-Packed: uses fp32
3232
- Packed: uses fp16, reduced register pressure
3333

34-
# Known issues
35-
DX12, No-WaveOps, Non-Packed: Corruptions on Nvidia when using DXC compiler.
36-
3734
# Recommendations
3835
We recommend to use the WapeOps path when supported. If higher precision is not needed, you can enable the packed mode - it has less register pressure and can run a bit faster as well.

sample/src/DX12/CSDownsampler.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ namespace CAULDRON_DX12
142142
1,
143143
0,
144144
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS),
145-
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
145+
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
146146

147147
// Create views for the mip chain
148148
//
@@ -172,7 +172,17 @@ namespace CAULDRON_DX12
172172

173173
void CSDownsampler::OnDestroy()
174174
{
175-
m_pRootSignature->Release();
175+
if (m_pPipeline != NULL)
176+
{
177+
m_pPipeline->Release();
178+
m_pPipeline = NULL;
179+
}
180+
181+
if (m_pRootSignature != NULL)
182+
{
183+
m_pRootSignature->Release();
184+
m_pRootSignature = NULL;
185+
}
176186
}
177187

178188
void CSDownsampler::Draw(ID3D12GraphicsCommandList* pCommandList)
@@ -183,7 +193,7 @@ namespace CAULDRON_DX12
183193
//
184194
for (int i = 0; i < m_mipCount; i++)
185195
{
186-
pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, i));
196+
pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, i));
187197

188198
D3D12_GPU_VIRTUAL_ADDRESS cbHandle;
189199
uint32_t* pConstMem;
@@ -219,7 +229,7 @@ namespace CAULDRON_DX12
219229
uint32_t dispatchZ = 1;
220230
pCommandList->Dispatch(dispatchX, dispatchY, dispatchZ);
221231

222-
pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, i));
232+
pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, i));
223233
}
224234
}
225235

sample/src/DX12/SPD_CS.cpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ namespace CAULDRON_DX12
153153
1,
154154
0,
155155
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS),
156-
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
156+
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
157157

158158
// Create views for the mip chain
159159
//
@@ -178,12 +178,23 @@ namespace CAULDRON_DX12
178178

179179
void SPD_CS::OnDestroyWindowSizeDependentResources()
180180
{
181+
m_globalCounterBuffer.OnDestroy();
181182
m_result.OnDestroy();
182183
}
183184

184185
void SPD_CS::OnDestroy()
185186
{
186-
m_pRootSignature->Release();
187+
if (m_pPipeline != NULL)
188+
{
189+
m_pPipeline->Release();
190+
m_pPipeline = NULL;
191+
}
192+
193+
if (m_pRootSignature != NULL)
194+
{
195+
m_pRootSignature->Release();
196+
m_pRootSignature = NULL;
197+
}
187198
}
188199

189200
void SPD_CS::Draw(ID3D12GraphicsCommandList2* pCommandList)
@@ -229,13 +240,16 @@ namespace CAULDRON_DX12
229240
D3D12_WRITEBUFFERIMMEDIATE_PARAMETER pParams = { m_globalCounterBuffer.GetResource()->GetGPUVirtualAddress(), 0 };
230241
pCommandList->WriteBufferImmediate(1, &pParams, NULL);
231242

232-
pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_globalCounterBuffer.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0));
243+
D3D12_RESOURCE_BARRIER resourceBarriers[2] = {
244+
CD3DX12_RESOURCE_BARRIER::Transition(m_globalCounterBuffer.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0),
245+
CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS)
246+
};
247+
pCommandList->ResourceBarrier(2, resourceBarriers);
233248

234249
// Dispatch
235250
//
236-
pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS));
237251
pCommandList->Dispatch(dispatchX, dispatchY, dispatchZ);
238-
pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE));
252+
pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE));
239253
}
240254

241255
void SPD_CS::Gui()

sample/src/DX12/SPD_CS_Linear_Sampler.cpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ namespace CAULDRON_DX12
168168
1,
169169
0,
170170
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS),
171-
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
171+
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
172172

173173
// Create views for the mip chain
174174
//
@@ -193,12 +193,23 @@ namespace CAULDRON_DX12
193193

194194
void SPD_CS_Linear_Sampler::OnDestroyWindowSizeDependentResources()
195195
{
196+
m_globalCounterBuffer.OnDestroy();
196197
m_result.OnDestroy();
197198
}
198199

199200
void SPD_CS_Linear_Sampler::OnDestroy()
200201
{
201-
m_pRootSignature->Release();
202+
if (m_pPipeline != NULL)
203+
{
204+
m_pPipeline->Release();
205+
m_pPipeline = NULL;
206+
}
207+
208+
if (m_pRootSignature != NULL)
209+
{
210+
m_pRootSignature->Release();
211+
m_pRootSignature = NULL;
212+
}
202213
}
203214

204215
void SPD_CS_Linear_Sampler::Draw(ID3D12GraphicsCommandList2* pCommandList)
@@ -246,13 +257,16 @@ namespace CAULDRON_DX12
246257
D3D12_WRITEBUFFERIMMEDIATE_PARAMETER pParams = { m_globalCounterBuffer.GetResource()->GetGPUVirtualAddress(), 0 };
247258
pCommandList->WriteBufferImmediate(1, &pParams, NULL);
248259

249-
pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_globalCounterBuffer.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0));
260+
D3D12_RESOURCE_BARRIER resourceBarriers[2] = {
261+
CD3DX12_RESOURCE_BARRIER::Transition(m_globalCounterBuffer.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0),
262+
CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS)
263+
};
264+
pCommandList->ResourceBarrier(2, resourceBarriers);
250265

251266
// Dispatch
252267
//
253-
pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS));
254268
pCommandList->Dispatch(dispatchX, dispatchY, dispatchZ);
255-
pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE));
269+
pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE));
256270
}
257271

258272
void SPD_CS_Linear_Sampler::Gui()

sample/src/DX12/SPD_Integration.hlsl

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ struct globalAtomicBuffer
4242
{
4343
uint counter;
4444
};
45-
RWStructuredBuffer<globalAtomicBuffer> globalAtomic :register(u1);
45+
globallycoherent RWStructuredBuffer<globalAtomicBuffer> globalAtomic :register(u1);
4646

4747
#define A_GPU
4848
#define A_HLSL
@@ -52,27 +52,47 @@ RWStructuredBuffer<globalAtomicBuffer> globalAtomic :register(u1);
5252
groupshared AU1 spd_counter;
5353

5454
#ifndef SPD_PACKED_ONLY
55-
groupshared AF4 spd_intermediate[16][16];
55+
groupshared AF1 spd_intermediateR[16][16];
56+
groupshared AF1 spd_intermediateG[16][16];
57+
groupshared AF1 spd_intermediateB[16][16];
58+
groupshared AF1 spd_intermediateA[16][16];
5659
AF4 SpdLoadSourceImage(AF2 tex){return imgSrc[tex];}
5760
AF4 SpdLoad(ASU2 tex){return imgDst[5][tex];}
5861
void SpdStore(ASU2 pix, AF4 outValue, AU1 index){imgDst[index][pix] = outValue;}
5962
void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);}
6063
AU1 SpdGetAtomicCounter(){return spd_counter;}
61-
AF4 SpdLoadIntermediate(AU1 x, AU1 y){return spd_intermediate[x][y];}
62-
void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){spd_intermediate[x][y] = value;}
64+
AF4 SpdLoadIntermediate(AU1 x, AU1 y){
65+
return AF4(
66+
spd_intermediateR[x][y],
67+
spd_intermediateG[x][y],
68+
spd_intermediateB[x][y],
69+
spd_intermediateA[x][y]);}
70+
void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){
71+
spd_intermediateR[x][y] = value.x;
72+
spd_intermediateG[x][y] = value.y;
73+
spd_intermediateB[x][y] = value.z;
74+
spd_intermediateA[x][y] = value.w;}
6375
AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return (v0+v1+v2+v3)*0.25;}
6476
#endif
6577

6678
// define fetch and store functions Packed
6779
#ifdef A_HALF
68-
groupshared AH4 spd_intermediate[16][16];
80+
groupshared AH2 spd_intermediateRG[16][16];
81+
groupshared AH2 spd_intermediateBA[16][16];
6982
AH4 SpdLoadSourceImageH(AF2 tex){return AH4(imgSrc[tex]);}
7083
AH4 SpdLoadH(ASU2 p){return AH4(imgDst[5][p]);}
7184
void SpdStoreH(ASU2 p, AH4 value, AU1 mip){imgDst[mip][p] = AF4(value);}
7285
void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);}
7386
AU1 SpdGetAtomicCounter(){return spd_counter;}
74-
AH4 SpdLoadIntermediateH(AU1 x, AU1 y){return spd_intermediate[x][y];}
75-
void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value){spd_intermediate[x][y] = value;}
87+
AH4 SpdLoadIntermediateH(AU1 x, AU1 y){
88+
return AH4(
89+
spd_intermediateRG[x][y].x,
90+
spd_intermediateRG[x][y].y,
91+
spd_intermediateBA[x][y].x,
92+
spd_intermediateBA[x][y].y);}
93+
void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value){
94+
spd_intermediateRG[x][y] = value.xy;
95+
spd_intermediateBA[x][y] = value.zw;}
7696
AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3){return (v0+v1+v2+v3)*AH1(0.25);}
7797
#endif
7898

0 commit comments

Comments
 (0)