Skip to content

Commit 827ff79

Browse files
committed
HLSL dependencies for example 48 arithmetic unit tests now properly compile
1 parent e48ff51 commit 827ff79

File tree

8 files changed

+51
-74
lines changed

8 files changed

+51
-74
lines changed

include/nbl/builtin/hlsl/shared_memory_accessor.hlsl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ struct SharedMemoryAdaptor
7878
{
7979
NumberSharedMemoryAccessor accessor;
8080

81+
uint get(const uint ix) { return accessor.get(ix); }
8182
void get(const uint ix, out uint value) { value = accessor.get(ix);}
8283
void get(const uint ix, out uint2 value) { value = uint2(accessor.get(ix), accessor.get(ix + _NBL_HLSL_WORKGROUP_SIZE_));}
8384
void get(const uint ix, out uint3 value) { value = uint3(accessor.get(ix), accessor.get(ix + _NBL_HLSL_WORKGROUP_SIZE_), accessor.get(ix + 2 * _NBL_HLSL_WORKGROUP_SIZE_));}

include/nbl/builtin/hlsl/subgroup/arithmetic_portability.hlsl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,16 @@ namespace portability
3333
{
3434

3535
// PORTABILITY BINOP DECLARATIONS
36-
template<typename T, class Binop, class ScratchAccessor>
36+
template<typename T, class Binop, class ScratchAccessor, bool initializeScratch>
3737
struct reduction;
38-
template<typename T, class Binop, class ScratchAccessor>
38+
template<typename T, class Binop, class ScratchAccessor, bool initializeScratch>
3939
struct inclusive_scan;
40-
template<typename T, class Binop, class ScratchAccessor>
40+
template<typename T, class Binop, class ScratchAccessor, bool initializeScratch>
4141
struct exclusive_scan;
4242

4343
}
4444

45-
template<typename T, class Binop, class ScratchAccessor>
45+
template<typename T, class Binop, class ScratchAccessor, bool initializeScratch = true>
4646
struct reduction
4747
{
4848
T operator()(const T x)
@@ -51,12 +51,12 @@ struct reduction
5151
native::reduction<T, Binop> reduce;
5252
return reduce(x);
5353
#else
54-
return portability::reduction<T, Binop, ScratchAccessor>::create()(x);
54+
return portability::reduction<T, Binop, ScratchAccessor, initializeScratch>::create()(x);
5555
#endif
5656
}
5757
};
5858

59-
template<typename T, class Binop, class ScratchAccessor>
59+
template<typename T, class Binop, class ScratchAccessor, bool initializeScratch = true>
6060
struct exclusive_scan
6161
{
6262
T operator()(const T x)
@@ -65,12 +65,12 @@ struct exclusive_scan
6565
native::exclusive_scan<T, Binop> scan;
6666
return scan(x);
6767
#else
68-
portability::exclusive_scan<T, Binop, ScratchAccessor>::create()(x);
68+
return portability::exclusive_scan<T, Binop, ScratchAccessor, initializeScratch>::create()(x);
6969
#endif
7070
}
7171
};
7272

73-
template<typename T, class Binop, class ScratchAccessor>
73+
template<typename T, class Binop, class ScratchAccessor, bool initializeScratch = true>
7474
struct inclusive_scan
7575
{
7676
T operator()(const T x)
@@ -79,7 +79,7 @@ struct inclusive_scan
7979
native::inclusive_scan<T, Binop> scan;
8080
return scan(x);
8181
#else
82-
portability::inclusive_scan<T, Binop, ScratchAccessor>::create()(x);
82+
return portability::inclusive_scan<T, Binop, ScratchAccessor, initializeScratch>::create()(x);
8383
#endif
8484
}
8585
};

include/nbl/builtin/hlsl/subgroup/arithmetic_portability_impl.hlsl

Lines changed: 19 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#ifndef _NBL_BUILTIN_HLSL_SUBGROUP_ARITHMETIC_PORTABILITY_IMPL_INCLUDED_
55
#define _NBL_BUILTIN_HLSL_SUBGROUP_ARITHMETIC_PORTABILITY_IMPL_INCLUDED_
66

7+
#include "nbl/builtin/hlsl/binops.hlsl"
78
#include "nbl/builtin/hlsl/subgroup/scratch.hlsl"
89

910
// REVIEW: Location and need of these. They need to be over a function but
@@ -24,7 +25,8 @@ void fake_for_capability_and_extension(){}
2425
const uint gl_LocalInvocationIndex : SV_GroupIndex; // REVIEW: Discuss proper placement of SV_* values. They are not allowed to be defined inside a function scope, only as arguments of main() or global variables in the shader.
2526
#endif
2627

27-
const uint LastWorkgroupInvocation = _NBL_HLSL_WORKGROUP_SIZE_-1; // REVIEW: Where should this be defined?
28+
//const uint LastWorkgroupInvocation = _NBL_HLSL_WORKGROUP_SIZE_-1; // REVIEW: Where should this be defined?
29+
#define LastWorkgroupInvocation (_NBL_HLSL_WORKGROUP_SIZE_-1U)
2830

2931
namespace nbl
3032
{
@@ -361,15 +363,12 @@ struct exclusive_scan<uint, binops::max<float> >
361363
namespace portability
362364
{
363365

364-
template<class Binop, class ScratchAccessor>
365-
struct inclusive_scan;
366-
367366
struct scan_base
368367
{
369-
template<class Binop, class ScratchAccessor>
370-
static inclusive_scan<Binop, ScratchAccessor> create()
368+
template<typename T, class Binop, class ScratchAccessor, bool initializeScratch>
369+
static inclusive_scan<T, Binop, ScratchAccessor, initializeScratch> create()
371370
{
372-
inclusive_scan<Binop, ScratchAccessor> retval;
371+
inclusive_scan<T, Binop, ScratchAccessor, initializeScratch> retval;
373372
retval.offsetsAndMasks = ScratchOffsetsAndMasks::WithDefaults();
374373
return retval;
375374
}
@@ -378,15 +377,14 @@ struct scan_base
378377
ScratchOffsetsAndMasks offsetsAndMasks;
379378
};
380379

381-
template<class Binop, class ScratchAccessor>
380+
template<typename T, class Binop, class ScratchAccessor, bool initializeScratch>
382381
struct inclusive_scan : scan_base
383382
{
384-
static inclusive_scan<Binop,ScratchAccessor> create()
383+
static inclusive_scan<T, Binop, ScratchAccessor, initializeScratch> create()
385384
{
386-
return scan_base::create<Binop,ScratchAccessor>(); // REVIEW: Is this correct?
385+
return scan_base::create<T, Binop, ScratchAccessor, initializeScratch>();
387386
}
388387

389-
template<typename T, bool initializeScratch>
390388
T operator()(T value)
391389
{
392390
Binop op;
@@ -434,27 +432,20 @@ struct inclusive_scan : scan_base
434432
}
435433
return value;
436434
}
437-
438-
template<typename T>
439-
T operator()(const T value)
440-
{
441-
return operator()<T,true>(value);
442-
}
443435
// protected:
444436
ScratchAccessor scratchAccessor;
445437
};
446438

447-
template<class Binop, class ScratchAccessor>
439+
template<typename T, class Binop, class ScratchAccessor, bool initializeScratch>
448440
struct exclusive_scan
449441
{
450-
static exclusive_scan<Binop,ScratchAccessor> create()
442+
static exclusive_scan<T, Binop, ScratchAccessor, initializeScratch> create()
451443
{
452-
exclusive_scan<Binop, ScratchAccessor> retval;
453-
retval.impl = inclusive_scan<Binop, ScratchAccessor>::create();
444+
exclusive_scan<T, Binop, ScratchAccessor, initializeScratch> retval;
445+
retval.impl = inclusive_scan<T, Binop, ScratchAccessor, initializeScratch>::create();
454446
return retval;
455447
}
456448

457-
template<typename T, bool initializeScratch>
458449
T operator()(T value)
459450
{
460451
value = impl(value);
@@ -475,27 +466,20 @@ struct exclusive_scan
475466
return value;
476467
}
477468

478-
template<typename T>
479-
T operator()(const T value)
480-
{
481-
return operator()<T,true>(value);
482-
}
483-
484469
// protected:
485-
inclusive_scan<Binop,ScratchAccessor> impl;
470+
inclusive_scan<T, Binop, ScratchAccessor, initializeScratch> impl;
486471
};
487472

488-
template<class Binop, class ScratchAccessor>
473+
template<typename T, class Binop, class ScratchAccessor, bool initializeScratch>
489474
struct reduction
490475
{
491-
static reduction<Binop,ScratchAccessor> create()
476+
static reduction<T, Binop, ScratchAccessor, initializeScratch> create()
492477
{
493-
reduction<Binop,ScratchAccessor> retval;
494-
retval.impl = inclusive_scan<Binop,ScratchAccessor>::create();
478+
reduction<T, Binop, ScratchAccessor, initializeScratch> retval;
479+
retval.impl = inclusive_scan<T, Binop, ScratchAccessor, initializeScratch>::create();
495480
return retval;
496481
}
497482

498-
template<typename T, bool initializeScratch>
499483
T operator()(T value)
500484
{
501485
value = impl(value);
@@ -513,23 +497,15 @@ struct reduction
513497
impl.scratchAccessor.set(impl.offsetsAndMasks.scanStoreOffset, value);
514498
Barrier();
515499
MemoryBarrierShared();
516-
517500
value = impl.scratchAccessor.get(reductionResultOffset);
518501
Barrier();
519502
MemoryBarrierShared();
520503
#endif
521504
// return it
522505
return value;
523506
}
524-
525-
template<typename T>
526-
T operator()(const T value)
527-
{
528-
return operator()<T,true>(value);
529-
}
530-
531507
// protected:
532-
inclusive_scan<Binop,ScratchAccessor> impl;
508+
inclusive_scan<T, Binop, ScratchAccessor, initializeScratch> impl;
533509
};
534510
}
535511

include/nbl/builtin/hlsl/subgroup/basic_portability.hlsl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,21 +88,23 @@ namespace subgroup
8888
void spirv_subgroupBarrier(uint executionScope, uint memoryScope, uint memorySemantics);
8989

9090
// REVIEW Should we name the Barriers with the Subgroup prefix just to make it clearer when calling?
91-
91+
// REVIEW Proper Memory Semantics!! Link here: https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#Memory_Semantics_-id-
92+
// REVIEW: Need advice on memory semantics. Would think SubgroupMemory(0x80) | AcquireRelease(0x8) is the correct bitmask but SubgroupMemory doesn't seem to be supported as Vulkan storage class
93+
9294
void Barrier() {
9395
// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_scope_id
9496
// Subgroup scope is number 3
9597

9698
// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_memory_semantics_id
9799
// By providing memory semantics None we do both control and memory barrier as is done in GLSL
98-
spirv_subgroupBarrier(3, 3, 0x0);
100+
spirv_subgroupBarrier(3, 3, 0x8 | 0x100);
99101
}
100102

101103
[[vk::ext_instruction(/* OpMemoryBarrier */ 225)]] // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpControlBarrier
102104
void spirv_subgroupMemoryBarrierShared(uint memoryScope, uint memorySemantics);
103105

104106
void MemoryBarrierShared() {
105-
spirv_subgroupMemoryBarrierShared(3, 0x0); // REVIEW: Need advice on memory semantics. Would think SubgroupMemory(0x80) but have no idea
107+
spirv_subgroupMemoryBarrierShared(3, 0x8 | 0x100);
106108
}
107109
}
108110
}

include/nbl/builtin/hlsl/workgroup/arithmetic.hlsl

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@
44
#ifndef _NBL_BUILTIN_HLSL_WORKGROUP_ARITHMETIC_INCLUDED_
55
#define _NBL_BUILTIN_HLSL_WORKGROUP_ARITHMETIC_INCLUDED_
66

7-
#include "nbl/builtin/hlsl/workgroup/shared_arithmetic.hlsl"
8-
#include "nbl/builtin/hlsl/workgroup/shared_scan.hlsl"
9-
#include "nbl/builtin/hlsl/workgroup/clustered.hlsl"
7+
#include "nbl/builtin/hlsl/workgroup/ballot.hlsl"
108

119
#ifndef _NBL_GL_LOCAL_INVOCATION_IDX_DECLARED_
1210
#define _NBL_GL_LOCAL_INVOCATION_IDX_DECLARED_
@@ -24,24 +22,25 @@ template<typename T, class Binop, class ScratchAccessor>
2422
struct reduction
2523
{
2624
struct reduction_t : subgroup::reduction<T, Binop, ScratchAccessor> {};
27-
T operator()()
25+
T operator()(T value)
2826
{
2927
WorkgroupScanHead<T, reduction_t, ScratchAccessor> wsh = WorkgroupScanHead<T, reduction_t, ScratchAccessor>::create(false, 0xffFFffFFu, _NBL_HLSL_WORKGROUP_SIZE_);
30-
T result = wsh();
28+
T result = wsh(value);
3129
Barrier();
32-
return broadcast(result, wsh.lastInvocationInLevel);
30+
// REVIEW: I think broadcast is fine to use the ScratchAccessor at this point since reduction has finished
31+
return broadcast<uint, ScratchAccessor>(result, wsh.lastInvocationInLevel);
3332
}
3433
};
3534

3635
template<typename T, class Binop, class ScratchAccessor>
3736
struct exclusive_scan
3837
{
3938
struct exclusive_scan_t : subgroup::exclusive_scan<T, Binop, ScratchAccessor> {};
40-
T operator()()
39+
T operator()(T value)
4140
{
4241
WorkgroupScanHead<T, exclusive_scan_t, ScratchAccessor> wsh = WorkgroupScanHead<T, exclusive_scan_t, ScratchAccessor>::create(true, 0xffFFffFFu, _NBL_HLSL_WORKGROUP_SIZE_);
43-
wsh();
44-
WorkgroupScanTail<T, Binop, ScratchAccessor> wst = WorkgroupScanTail<T, Binop, ScratchAccessor>::create(true, 0xffFFffFFu, wsh.lastInvocation, wsh.scanStoreIndex);
42+
wsh(value);
43+
WorkgroupScanTail<T, Binop, ScratchAccessor> wst = WorkgroupScanTail<T, Binop, ScratchAccessor>::create(true, 0xffFFffFFu, wsh.firstLevelScan, wsh.lastInvocation, wsh.scanStoreIndex);
4544
return wst();
4645
}
4746
};
@@ -50,11 +49,11 @@ template<typename T, class Binop, class ScratchAccessor>
5049
struct inclusive_scan
5150
{
5251
struct inclusive_scan_t : subgroup::inclusive_scan<T, Binop, ScratchAccessor> {};
53-
T operator()()
52+
T operator()(T value)
5453
{
5554
WorkgroupScanHead<T, inclusive_scan_t, ScratchAccessor> wsh = WorkgroupScanHead<T, inclusive_scan_t, ScratchAccessor>::create(true, 0xffFFffFFu, _NBL_HLSL_WORKGROUP_SIZE_);
56-
wsh();
57-
WorkgroupScanTail<T, Binop, ScratchAccessor> wst = WorkgroupScanTail<T, Binop, ScratchAccessor>::create(false, 0xffFFffFFu, wsh.lastInvocation, wsh.scanStoreIndex);
55+
wsh(value);
56+
WorkgroupScanTail<T, Binop, ScratchAccessor> wst = WorkgroupScanTail<T, Binop, ScratchAccessor>::create(false, 0xffFFffFFu, wsh.firstLevelScan, wsh.lastInvocation, wsh.scanStoreIndex);
5857
return wst();
5958
}
6059
};

include/nbl/builtin/hlsl/workgroup/ballot.hlsl

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66

77
#include "nbl/builtin/hlsl/atomics.hlsl"
88
#include "nbl/builtin/hlsl/workgroup/basic.hlsl"
9-
#include "nbl/builtin/hlsl/workgroup/shared_ballot.hlsl"
9+
#include "nbl/builtin/hlsl/subgroup/scratch.hlsl"
1010
#include "nbl/builtin/hlsl/workgroup/shared_scan.hlsl"
11-
#include "nbl/builtin/hlsl/subgroup/basic_portability.hlsl"
1211
#include "nbl/builtin/hlsl/subgroup/arithmetic_portability.hlsl"
1312

1413
#ifndef _NBL_GL_LOCAL_INVOCATION_IDX_DECLARED_
@@ -102,7 +101,7 @@ bool inverseBallot()
102101
* into uint then back again. We use the converter template for this,
103102
* which also contains the source type.
104103
*/
105-
template<class ScratchAccessor, typename T>
104+
template<typename T, class ScratchAccessor>
106105
T broadcast(in T val, in uint id)
107106
{
108107
// REVIEW: Check if we need edge barriers
@@ -115,7 +114,7 @@ T broadcast(in T val, in uint id)
115114
}
116115

117116
// REVIEW: Should we have broadcastFirst and broadcastElected?
118-
template<class ScratchAccessor, typename T>
117+
template<typename T, class ScratchAccessor>
119118
T broadcastFirst(in T val)
120119
{
121120
ScratchAccessor scratch;
@@ -171,7 +170,7 @@ uint ballotScanBitCount(in bool exclusive)
171170
// scan hierarchically, invocations with `gl_LocalInvocationIndex >= bitfieldDWORDs` will have garbage here
172171
Barrier();
173172

174-
using WSHT = WorkgroupScanHead<uint, subgroup::inclusive_scan<binops::add<uint> >, ScratchAccessor>;
173+
using WSHT = WorkgroupScanHead<uint, subgroup::inclusive_scan<uint, binops::add<uint>, ScratchAccessor >, ScratchAccessor>;
175174
WSHT wsh = WSHT::create(true, 0u, bitfieldDWORDs);
176175
wsh();
177176

include/nbl/builtin/hlsl/workgroup/shared_ballot.hlsl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,5 @@
55
#define _NBL_BUILTIN_HLSL_WORKGROUP_SHARED_BALLOT_INCLUDED_
66

77
#include "nbl/builtin/hlsl/workgroup/basic.hlsl"
8-
#include "nbl/builtin/hlsl/subgroup/shared_arithmetic_portability.hlsl"
98

109
#endif

include/nbl/builtin/hlsl/workgroup/shared_scan.hlsl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ struct WorkgroupScanHead
2121
{
2222
bool isScan;
2323
T identity;
24+
T firstLevelScan;
2425
uint itemCount;
2526
uint lastInvocation;
2627
uint lastInvocationInLevel;
@@ -43,7 +44,7 @@ struct WorkgroupScanHead
4344
subgroup::scratchInitialize<ScratchAccessor, T>(value, identity, itemCount);
4445
lastInvocationInLevel = lastInvocation;
4546
SubgroupOp subgroupOp;
46-
T firstLevelScan = subgroupOp(value);
47+
firstLevelScan = subgroupOp(value);
4748
T scan = firstLevelScan;
4849
const bool possibleProp = offsetsAndMasks.subgroupInvocation == offsetsAndMasks.subgroupMask; // last invocation in subgroup
4950
const uint subgroupId = gl_LocalInvocationIndex >> subgroup::SizeLog2();

0 commit comments

Comments
 (0)