Skip to content

Commit b2aff3f

Browse files
author
jgtong
authored
Merge pull request #54 from oneapi-src/jgt/ethminer/code_updates
Update shuffle API to latest SYCL 2020 spec
2 parents 0866eac + 958121f commit b2aff3f

File tree

2 files changed

+79
-71
lines changed

2 files changed

+79
-71
lines changed

ethminer/libethash-sycl/dagger_shuffled.dp.hpp

Lines changed: 57 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
#ifdef USE_LOOP_UNROLLING
3232
#define mix_and_shuffle(t, a, p, b, thread_id) \
3333
offset[p] = fnv(init0[p] ^ (a + b), ((uint32_t *)&mix[p])[b]) % d_dag_size; \
34-
offset[p] = item_ct1.get_sub_group().shuffle(offset[p], t + iShuffleOffset); \
34+
offset[p] = sycl::select_from_group(item_ct1.get_sub_group(), offset[p], t + iShuffleOffset); \
3535
mix[p] = fnv4(mix[p], d_dag[offset[p]].uint4s[thread_id]);
3636
#endif
3737

@@ -48,9 +48,13 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
4848
const int thread_id = item_ct1.get_local_id(0) & (THREADS_PER_HASH - 1);
4949
const int mix_idx = thread_id & 3;
5050

51-
int const iSubGroupThreadId(item_ct1.get_sub_group().get_local_id());
51+
auto g = item_ct1.get_sub_group();
52+
53+
///int const iSubGroupThreadId(item_ct1.get_sub_group().get_local_id());
54+
int const iSubGroupThreadId(g.get_local_id());
5255
int const iShuffleOffset(pdShuffleOffsets[iSubGroupThreadId]);
5356

57+
5458
#ifndef USE_LOOP_UNROLLING
5559

5660
for (int i = 0; i < THREADS_PER_HASH; i += _PARALLEL_HASH) {
@@ -61,29 +65,29 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
6165
// share init among threads
6266
for (int p = 0; p < _PARALLEL_HASH; p++) {
6367
sycl::uint2 shuffle[8];
64-
shuffle[0].x() = item_ct1.get_sub_group().shuffle(state[0].x(), i + p + iShuffleOffset);
65-
shuffle[0].y() = item_ct1.get_sub_group().shuffle(state[0].y(), i + p + iShuffleOffset);
68+
shuffle[0].x() = sycl::select_from_group(g, state[0].x(), i + p + iShuffleOffset);
69+
shuffle[0].y() = sycl::select_from_group(g, state[0].y(), i + p + iShuffleOffset);
6670

67-
shuffle[1].x() = item_ct1.get_sub_group().shuffle(state[1].x(), i + p + iShuffleOffset);
68-
shuffle[1].y() = item_ct1.get_sub_group().shuffle(state[1].y(), i + p + iShuffleOffset);
71+
shuffle[1].x() = sycl::select_from_group(g, state[1].x(), i + p + iShuffleOffset);
72+
shuffle[1].y() = sycl::select_from_group(g, state[1].y(), i + p + iShuffleOffset);
6973

70-
shuffle[2].x() = item_ct1.get_sub_group().shuffle(state[2].x(), i + p + iShuffleOffset);
71-
shuffle[2].y() = item_ct1.get_sub_group().shuffle(state[2].y(), i + p + iShuffleOffset);
74+
shuffle[2].x() = sycl::select_from_group(g, state[2].x(), i + p + iShuffleOffset);
75+
shuffle[2].y() = sycl::select_from_group(g, state[2].y(), i + p + iShuffleOffset);
7276

73-
shuffle[3].x() = item_ct1.get_sub_group().shuffle(state[3].x(), i + p + iShuffleOffset);
74-
shuffle[3].y() = item_ct1.get_sub_group().shuffle(state[3].y(), i + p + iShuffleOffset);
77+
shuffle[3].x() = sycl::select_from_group(g, state[3].x(), i + p + iShuffleOffset);
78+
shuffle[3].y() = sycl::select_from_group(g, state[3].y(), i + p + iShuffleOffset);
7579

76-
shuffle[4].x() = item_ct1.get_sub_group().shuffle(state[4].x(), i + p + iShuffleOffset);
77-
shuffle[4].y() = item_ct1.get_sub_group().shuffle(state[4].y(), i + p + iShuffleOffset);
80+
shuffle[4].x() = sycl::select_from_group(g, state[4].x(), i + p + iShuffleOffset);
81+
shuffle[4].y() = sycl::select_from_group(g, state[4].y(), i + p + iShuffleOffset);
7882

79-
shuffle[5].x() = item_ct1.get_sub_group().shuffle(state[5].x(), i + p + iShuffleOffset);
80-
shuffle[5].y() = item_ct1.get_sub_group().shuffle(state[5].y(), i + p + iShuffleOffset);
83+
shuffle[5].x() = sycl::select_from_group(g, state[5].x(), i + p + iShuffleOffset);
84+
shuffle[5].y() = sycl::select_from_group(g, state[5].y(), i + p + iShuffleOffset);
8185

82-
shuffle[6].x() = item_ct1.get_sub_group().shuffle(state[6].x(), i + p + iShuffleOffset);
83-
shuffle[6].y() = item_ct1.get_sub_group().shuffle(state[6].y(), i + p + iShuffleOffset);
86+
shuffle[6].x() = sycl::select_from_group(g, state[6].x(), i + p + iShuffleOffset);
87+
shuffle[6].y() = sycl::select_from_group(g, state[6].y(), i + p + iShuffleOffset);
8488

85-
shuffle[7].x() = item_ct1.get_sub_group().shuffle(state[7].x(), i + p + iShuffleOffset);
86-
shuffle[7].y() = item_ct1.get_sub_group().shuffle(state[7].y(), i + p + iShuffleOffset);
89+
shuffle[7].x() = sycl::select_from_group(g, state[7].x(), i + p + iShuffleOffset);
90+
shuffle[7].y() = sycl::select_from_group(g, state[7].y(), i + p + iShuffleOffset);
8791

8892
assert(mix_idx <= 3);
8993

@@ -102,7 +106,7 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
102106
break;
103107
}
104108

105-
init0[p] = item_ct1.get_sub_group().shuffle(shuffle[0].x(), iShuffleOffset);
109+
init0[p] = sycl::select_from_group(item_ct1.get_sub_group(), shuffle[0].x(), iShuffleOffset);
106110
}
107111

108112
for (uint32_t a = 0; a < ACCESSES; a += 4)
@@ -116,7 +120,7 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
116120
{
117121
offset[p] = fnv(init0[p] ^ (a + b), ((uint32_t*)&mix[p])[b]) % d_dag_size;
118122

119-
offset[p] = item_ct1.get_sub_group().shuffle(offset[p], t + iShuffleOffset);
123+
offset[p] = sycl::select_from_group(item_ct1.get_sub_group(), offset[p], t + iShuffleOffset);
120124
mix[p] = fnv4(mix[p], d_dag[offset[p]].uint4s[thread_id]);
121125
}
122126
}
@@ -128,14 +132,14 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
128132

129133
// update mix across threads
130134

131-
shuffle[0].x() = item_ct1.get_sub_group().shuffle(thread_mix, 0 + iShuffleOffset);
132-
shuffle[0].y() = item_ct1.get_sub_group().shuffle(thread_mix, 1 + iShuffleOffset);
133-
shuffle[1].x() = item_ct1.get_sub_group().shuffle(thread_mix, 2 + iShuffleOffset);
134-
shuffle[1].y() = item_ct1.get_sub_group().shuffle(thread_mix, 3 + iShuffleOffset);
135-
shuffle[2].x() = item_ct1.get_sub_group().shuffle(thread_mix, 4 + iShuffleOffset);
136-
shuffle[2].y() = item_ct1.get_sub_group().shuffle(thread_mix, 5 + iShuffleOffset);
137-
shuffle[3].x() = item_ct1.get_sub_group().shuffle(thread_mix, 6 + iShuffleOffset);
138-
shuffle[3].y() = item_ct1.get_sub_group().shuffle(thread_mix, 7 + iShuffleOffset);
135+
shuffle[0].x() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 0 + iShuffleOffset);
136+
shuffle[0].y() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 1 + iShuffleOffset);
137+
shuffle[1].x() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 2 + iShuffleOffset);
138+
shuffle[1].y() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 3 + iShuffleOffset);
139+
shuffle[2].x() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 4 + iShuffleOffset);
140+
shuffle[2].y() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 5 + iShuffleOffset);
141+
shuffle[3].x() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 6 + iShuffleOffset);
142+
shuffle[3].y() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 7 + iShuffleOffset);
139143

140144
if ((i + p) == thread_id) {
141145
// move mix into state:
@@ -158,29 +162,29 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
158162
//////sycl::uint2 shuffle[8];
159163
//////for (int j = 0; j < 8; j++)
160164
//////{
161-
shuffle_0.x() = item_ct1.get_sub_group().shuffle(state[0].x(), i + p + iShuffleOffset);
162-
shuffle_0.y() = item_ct1.get_sub_group().shuffle(state[0].y(), i + p + iShuffleOffset);
165+
shuffle_0.x() = sycl::select_from_group(item_ct1.get_sub_group(), state[0].x(), i + p + iShuffleOffset);
166+
shuffle_0.y() = sycl::select_from_group(item_ct1.get_sub_group(), state[0].y(), i + p + iShuffleOffset);
163167

164-
shuffle_1.x() = item_ct1.get_sub_group().shuffle(state[1].x(), i + p + iShuffleOffset);
165-
shuffle_1.y() = item_ct1.get_sub_group().shuffle(state[1].y(), i + p + iShuffleOffset);
168+
shuffle_1.x() = sycl::select_from_group(item_ct1.get_sub_group(), state[1].x(), i + p + iShuffleOffset);
169+
shuffle_1.y() = sycl::select_from_group(item_ct1.get_sub_group(), state[1].y(), i + p + iShuffleOffset);
166170

167-
shuffle_2.x() = item_ct1.get_sub_group().shuffle(state[2].x(), i + p + iShuffleOffset);
168-
shuffle_2.y() = item_ct1.get_sub_group().shuffle(state[2].y(), i + p + iShuffleOffset);
171+
shuffle_2.x() = sycl::select_from_group(item_ct1.get_sub_group(), state[2].x(), i + p + iShuffleOffset);
172+
shuffle_2.y() = sycl::select_from_group(item_ct1.get_sub_group(), state[2].y(), i + p + iShuffleOffset);
169173

170-
shuffle_3.x() = item_ct1.get_sub_group().shuffle(state[3].x(), i + p + iShuffleOffset);
171-
shuffle_3.y() = item_ct1.get_sub_group().shuffle(state[3].y(), i + p + iShuffleOffset);
174+
shuffle_3.x() = sycl::select_from_group(item_ct1.get_sub_group(), state[3].x(), i + p + iShuffleOffset);
175+
shuffle_3.y() = sycl::select_from_group(item_ct1.get_sub_group(), state[3].y(), i + p + iShuffleOffset);
172176

173-
shuffle_4.x() = item_ct1.get_sub_group().shuffle(state[4].x(), i + p + iShuffleOffset);
174-
shuffle_4.y() = item_ct1.get_sub_group().shuffle(state[4].y(), i + p + iShuffleOffset);
177+
shuffle_4.x() = sycl::select_from_group(item_ct1.get_sub_group(), state[4].x(), i + p + iShuffleOffset);
178+
shuffle_4.y() = sycl::select_from_group(item_ct1.get_sub_group(), state[4].y(), i + p + iShuffleOffset);
175179

176-
shuffle_5.x() = item_ct1.get_sub_group().shuffle(state[5].x(), i + p + iShuffleOffset);
177-
shuffle_5.y() = item_ct1.get_sub_group().shuffle(state[5].y(), i + p + iShuffleOffset);
180+
shuffle_5.x() = sycl::select_from_group(item_ct1.get_sub_group(), state[5].x(), i + p + iShuffleOffset);
181+
shuffle_5.y() = sycl::select_from_group(item_ct1.get_sub_group(), state[5].y(), i + p + iShuffleOffset);
178182

179-
shuffle_6.x() = item_ct1.get_sub_group().shuffle(state[6].x(), i + p + iShuffleOffset);
180-
shuffle_6.y() = item_ct1.get_sub_group().shuffle(state[6].y(), i + p + iShuffleOffset);
183+
shuffle_6.x() = sycl::select_from_group(item_ct1.get_sub_group(), state[6].x(), i + p + iShuffleOffset);
184+
shuffle_6.y() = sycl::select_from_group(item_ct1.get_sub_group(), state[6].y(), i + p + iShuffleOffset);
181185

182-
shuffle_7.x() = item_ct1.get_sub_group().shuffle(state[7].x(), i + p + iShuffleOffset);
183-
shuffle_7.y() = item_ct1.get_sub_group().shuffle(state[7].y(), i + p + iShuffleOffset);
186+
shuffle_7.x() = sycl::select_from_group(item_ct1.get_sub_group(), state[7].x(), i + p + iShuffleOffset);
187+
shuffle_7.y() = sycl::select_from_group(item_ct1.get_sub_group(), state[7].y(), i + p + iShuffleOffset);
184188

185189
/////}
186190
assert(mix_idx <= 3);
@@ -200,7 +204,7 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
200204
break;
201205
}
202206

203-
init0[p] = item_ct1.get_sub_group().shuffle(shuffle_0.x(), iShuffleOffset);
207+
init0[p] = sycl::select_from_group(item_ct1.get_sub_group(), shuffle_0.x(), iShuffleOffset);
204208
}
205209

206210
//////for (uint32_t a = 0; a < ACCESSES; a += 4)
@@ -554,14 +558,14 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
554558

555559
// update mix across threads
556560

557-
shuffle_0.x() = item_ct1.get_sub_group().shuffle(thread_mix, 0 + iShuffleOffset);
558-
shuffle_0.y() = item_ct1.get_sub_group().shuffle(thread_mix, 1 + iShuffleOffset);
559-
shuffle_1.x() = item_ct1.get_sub_group().shuffle(thread_mix, 2 + iShuffleOffset);
560-
shuffle_1.y() = item_ct1.get_sub_group().shuffle(thread_mix, 3 + iShuffleOffset);
561-
shuffle_2.x() = item_ct1.get_sub_group().shuffle(thread_mix, 4 + iShuffleOffset);
562-
shuffle_2.y() = item_ct1.get_sub_group().shuffle(thread_mix, 5 + iShuffleOffset);
563-
shuffle_3.x() = item_ct1.get_sub_group().shuffle(thread_mix, 6 + iShuffleOffset);
564-
shuffle_3.y() = item_ct1.get_sub_group().shuffle(thread_mix, 7 + iShuffleOffset);
561+
shuffle_0.x() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 0 + iShuffleOffset);
562+
shuffle_0.y() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 1 + iShuffleOffset);
563+
shuffle_1.x() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 2 + iShuffleOffset);
564+
shuffle_1.y() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 3 + iShuffleOffset);
565+
shuffle_2.x() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 4 + iShuffleOffset);
566+
shuffle_2.y() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 5 + iShuffleOffset);
567+
shuffle_3.x() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 6 + iShuffleOffset);
568+
shuffle_3.y() = sycl::select_from_group(item_ct1.get_sub_group(), thread_mix, 7 + iShuffleOffset);
565569

566570
if ((i + p) == thread_id) {
567571
// move mix into state:

ethminer/libethash-sycl/ethash_sycl_miner_kernel.cpp

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -93,46 +93,47 @@ void ethash_calculate_dag_item(uint32_t start, sycl::nd_item<1> item_ct1, uint32
9393
SHA3_512(dag_node_mem, keccak_round_constants);
9494

9595
const int thread_id = item_ct1.get_local_id(0) & 3;
96-
int const iSubGroupThreadId(item_ct1.get_sub_group().get_local_id());
96+
auto g = item_ct1.get_sub_group();
97+
int const iSubGroupThreadId(g.get_local_id());
9798

9899
for (uint32_t i = 0; i != ETHASH_DATASET_PARENTS; ++i) {
99100
uint32_t parent_index = fnv(node_index ^ i, dag_node.words[i % NODE_WORDS]) % d_light_size;
100101
for (uint32_t t = 0; t < 4; t++) {
101102
uint32_t shuffle_index = 0;
102103
if (item_ct1.get_sub_group().get_local_id() < 4)
103-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t);
104+
shuffle_index = sycl::select_from_group(g, parent_index, t);
104105
else if (item_ct1.get_sub_group().get_local_id() < 8)
105-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 4);
106+
shuffle_index = sycl::select_from_group(g, parent_index, t + 4);
106107
else if (item_ct1.get_sub_group().get_local_id() < 12)
107-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 8);
108+
shuffle_index = sycl::select_from_group(g, parent_index, t + 8);
108109
else if (item_ct1.get_sub_group().get_local_id() < 16)
109-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 12);
110+
shuffle_index = sycl::select_from_group(g, parent_index, t + 12);
110111
else if (item_ct1.get_sub_group().get_local_id() < 20)
111-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 16);
112+
shuffle_index = sycl::select_from_group(g, parent_index, t + 16);
112113
else if (item_ct1.get_sub_group().get_local_id() < 24)
113-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 20);
114+
shuffle_index = sycl::select_from_group(g, parent_index, t + 20);
114115
else if (item_ct1.get_sub_group().get_local_id() < 28)
115-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 24);
116+
shuffle_index = sycl::select_from_group(g, parent_index, t + 24);
116117
else if (item_ct1.get_sub_group().get_local_id() < 32)
117-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 28);
118+
shuffle_index = sycl::select_from_group(g, parent_index, t + 28);
118119

119120
#ifdef USE_AMD_BACKEND // Shuffle on AMD GPUs is done over 64 threads
120121
else if (item_ct1.get_sub_group().get_local_id() < 36)
121-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 32);
122+
shuffle_index = sycl::select_from_group(g, parent_index, t + 32);
122123
else if (item_ct1.get_sub_group().get_local_id() < 40)
123-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 36);
124+
shuffle_index = sycl::select_from_group(g, parent_index, t + 36);
124125
else if (item_ct1.get_sub_group().get_local_id() < 44)
125-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 40);
126+
shuffle_index = sycl::select_from_group(g, parent_index, t + 40);
126127
else if (item_ct1.get_sub_group().get_local_id() < 48)
127-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 44);
128+
shuffle_index = sycl::select_from_group(g, parent_index, t + 44);
128129
else if (item_ct1.get_sub_group().get_local_id() < 52)
129-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 48);
130+
shuffle_index = sycl::select_from_group(g, parent_index, t + 48);
130131
else if (item_ct1.get_sub_group().get_local_id() < 56)
131-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 52);
132+
shuffle_index = sycl::select_from_group(g, parent_index, t + 52);
132133
else if (item_ct1.get_sub_group().get_local_id() < 60)
133-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 56);
134+
shuffle_index = sycl::select_from_group(g, parent_index, t + 56);
134135
else if (item_ct1.get_sub_group().get_local_id() < 64)
135-
shuffle_index = item_ct1.get_sub_group().shuffle(parent_index, t + 60);
136+
shuffle_index = sycl::select_from_group(g, parent_index, t + 60);
136137
#endif
137138

138139
sycl::uint4 p4 = d_light[shuffle_index].uint4s[thread_id & 3];
@@ -175,7 +176,10 @@ void ethash_calculate_dag_item(uint32_t start, sycl::nd_item<1> item_ct1, uint32
175176
w1 = w + 60;
176177
#endif
177178

178-
sycl::uint4 s4 = sycl::uint4(item_ct1.get_sub_group().shuffle(p4.x(), w1), item_ct1.get_sub_group().shuffle(p4.y(), w1), item_ct1.get_sub_group().shuffle(p4.z(), w1), item_ct1.get_sub_group().shuffle(p4.w(), w1));
179+
sycl::uint4 s4 = sycl::uint4(sycl::select_from_group(item_ct1.get_sub_group(), p4.x(), w1),
180+
sycl::select_from_group(item_ct1.get_sub_group(), p4.y(), w1),
181+
sycl::select_from_group(item_ct1.get_sub_group(), p4.z(), w1),
182+
sycl::select_from_group(item_ct1.get_sub_group(), p4.w(), w1));
179183

180184
if (t == (thread_id & 3)) {
181185
dag_node.uint4s[w] = fnv4(dag_node.uint4s[w], s4);

0 commit comments

Comments
 (0)