31
31
#ifdef USE_LOOP_UNROLLING
32
32
#define mix_and_shuffle (t, a, p, b, thread_id ) \
33
33
offset[p] = fnv(init0[p] ^ (a + b), ((uint32_t *)&mix[p])[b]) % d_dag_size; \
34
- offset[p] = item_ct1.get_sub_group().shuffle( offset[p], t + iShuffleOffset); \
34
+ offset[p] = sycl::select_from_group( item_ct1.get_sub_group(), offset[p], t + iShuffleOffset); \
35
35
mix[p] = fnv4(mix[p], d_dag[offset[p]].uint4s[thread_id]);
36
36
#endif
37
37
@@ -48,9 +48,13 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
48
48
const int thread_id = item_ct1.get_local_id (0 ) & (THREADS_PER_HASH - 1 );
49
49
const int mix_idx = thread_id & 3 ;
50
50
51
- int const iSubGroupThreadId (item_ct1.get_sub_group ().get_local_id ());
51
+ auto g = item_ct1.get_sub_group ();
52
+
53
+ // /int const iSubGroupThreadId(item_ct1.get_sub_group().get_local_id());
54
+ int const iSubGroupThreadId (g.get_local_id ());
52
55
int const iShuffleOffset (pdShuffleOffsets[iSubGroupThreadId]);
53
56
57
+
54
58
#ifndef USE_LOOP_UNROLLING
55
59
56
60
for (int i = 0 ; i < THREADS_PER_HASH; i += _PARALLEL_HASH) {
@@ -61,29 +65,29 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
61
65
// share init among threads
62
66
for (int p = 0 ; p < _PARALLEL_HASH; p++) {
63
67
sycl::uint2 shuffle[8 ];
64
- shuffle[0 ].x () = item_ct1. get_sub_group (). shuffle ( state[0 ].x (), i + p + iShuffleOffset);
65
- shuffle[0 ].y () = item_ct1. get_sub_group (). shuffle ( state[0 ].y (), i + p + iShuffleOffset);
68
+ shuffle[0 ].x () = sycl::select_from_group (g, state[0 ].x (), i + p + iShuffleOffset);
69
+ shuffle[0 ].y () = sycl::select_from_group (g, state[0 ].y (), i + p + iShuffleOffset);
66
70
67
- shuffle[1 ].x () = item_ct1. get_sub_group (). shuffle ( state[1 ].x (), i + p + iShuffleOffset);
68
- shuffle[1 ].y () = item_ct1. get_sub_group (). shuffle ( state[1 ].y (), i + p + iShuffleOffset);
71
+ shuffle[1 ].x () = sycl::select_from_group (g, state[1 ].x (), i + p + iShuffleOffset);
72
+ shuffle[1 ].y () = sycl::select_from_group (g, state[1 ].y (), i + p + iShuffleOffset);
69
73
70
- shuffle[2 ].x () = item_ct1. get_sub_group (). shuffle ( state[2 ].x (), i + p + iShuffleOffset);
71
- shuffle[2 ].y () = item_ct1. get_sub_group (). shuffle ( state[2 ].y (), i + p + iShuffleOffset);
74
+ shuffle[2 ].x () = sycl::select_from_group (g, state[2 ].x (), i + p + iShuffleOffset);
75
+ shuffle[2 ].y () = sycl::select_from_group (g, state[2 ].y (), i + p + iShuffleOffset);
72
76
73
- shuffle[3 ].x () = item_ct1. get_sub_group (). shuffle ( state[3 ].x (), i + p + iShuffleOffset);
74
- shuffle[3 ].y () = item_ct1. get_sub_group (). shuffle ( state[3 ].y (), i + p + iShuffleOffset);
77
+ shuffle[3 ].x () = sycl::select_from_group (g, state[3 ].x (), i + p + iShuffleOffset);
78
+ shuffle[3 ].y () = sycl::select_from_group (g, state[3 ].y (), i + p + iShuffleOffset);
75
79
76
- shuffle[4 ].x () = item_ct1. get_sub_group (). shuffle ( state[4 ].x (), i + p + iShuffleOffset);
77
- shuffle[4 ].y () = item_ct1. get_sub_group (). shuffle ( state[4 ].y (), i + p + iShuffleOffset);
80
+ shuffle[4 ].x () = sycl::select_from_group (g, state[4 ].x (), i + p + iShuffleOffset);
81
+ shuffle[4 ].y () = sycl::select_from_group (g, state[4 ].y (), i + p + iShuffleOffset);
78
82
79
- shuffle[5 ].x () = item_ct1. get_sub_group (). shuffle ( state[5 ].x (), i + p + iShuffleOffset);
80
- shuffle[5 ].y () = item_ct1. get_sub_group (). shuffle ( state[5 ].y (), i + p + iShuffleOffset);
83
+ shuffle[5 ].x () = sycl::select_from_group (g, state[5 ].x (), i + p + iShuffleOffset);
84
+ shuffle[5 ].y () = sycl::select_from_group (g, state[5 ].y (), i + p + iShuffleOffset);
81
85
82
- shuffle[6 ].x () = item_ct1. get_sub_group (). shuffle ( state[6 ].x (), i + p + iShuffleOffset);
83
- shuffle[6 ].y () = item_ct1. get_sub_group (). shuffle ( state[6 ].y (), i + p + iShuffleOffset);
86
+ shuffle[6 ].x () = sycl::select_from_group (g, state[6 ].x (), i + p + iShuffleOffset);
87
+ shuffle[6 ].y () = sycl::select_from_group (g, state[6 ].y (), i + p + iShuffleOffset);
84
88
85
- shuffle[7 ].x () = item_ct1. get_sub_group (). shuffle ( state[7 ].x (), i + p + iShuffleOffset);
86
- shuffle[7 ].y () = item_ct1. get_sub_group (). shuffle ( state[7 ].y (), i + p + iShuffleOffset);
89
+ shuffle[7 ].x () = sycl::select_from_group (g, state[7 ].x (), i + p + iShuffleOffset);
90
+ shuffle[7 ].y () = sycl::select_from_group (g, state[7 ].y (), i + p + iShuffleOffset);
87
91
88
92
89
93
switch (mix_idx) {
@@ -101,7 +105,7 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
101
105
break ;
102
106
}
103
107
104
- init0[p] = item_ct1.get_sub_group (). shuffle ( shuffle[0 ].x (), iShuffleOffset);
108
+ init0[p] = sycl::select_from_group ( item_ct1.get_sub_group (), shuffle[0 ].x (), iShuffleOffset);
105
109
}
106
110
107
111
for (uint32_t a = 0 ; a < ACCESSES; a += 4 )
@@ -115,7 +119,7 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
115
119
{
116
120
offset[p] = fnv (init0[p] ^ (a + b), ((uint32_t *)&mix[p])[b]) % d_dag_size;
117
121
118
- offset[p] = item_ct1.get_sub_group (). shuffle ( offset[p], t + iShuffleOffset);
122
+ offset[p] = sycl::select_from_group ( item_ct1.get_sub_group (), offset[p], t + iShuffleOffset);
119
123
mix[p] = fnv4 (mix[p], d_dag[offset[p]].uint4s [thread_id]);
120
124
}
121
125
}
@@ -127,14 +131,14 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
127
131
128
132
// update mix across threads
129
133
130
- shuffle[0 ].x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 0 + iShuffleOffset);
131
- shuffle[0 ].y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 1 + iShuffleOffset);
132
- shuffle[1 ].x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 2 + iShuffleOffset);
133
- shuffle[1 ].y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 3 + iShuffleOffset);
134
- shuffle[2 ].x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 4 + iShuffleOffset);
135
- shuffle[2 ].y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 5 + iShuffleOffset);
136
- shuffle[3 ].x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 6 + iShuffleOffset);
137
- shuffle[3 ].y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 7 + iShuffleOffset);
134
+ shuffle[0 ].x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 0 + iShuffleOffset);
135
+ shuffle[0 ].y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 1 + iShuffleOffset);
136
+ shuffle[1 ].x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 2 + iShuffleOffset);
137
+ shuffle[1 ].y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 3 + iShuffleOffset);
138
+ shuffle[2 ].x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 4 + iShuffleOffset);
139
+ shuffle[2 ].y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 5 + iShuffleOffset);
140
+ shuffle[3 ].x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 6 + iShuffleOffset);
141
+ shuffle[3 ].y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 7 + iShuffleOffset);
138
142
139
143
if ((i + p) == thread_id) {
140
144
// move mix into state:
@@ -157,29 +161,29 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
157
161
// ////sycl::uint2 shuffle[8];
158
162
// ////for (int j = 0; j < 8; j++)
159
163
// ////{
160
- shuffle_0.x () = item_ct1.get_sub_group (). shuffle ( state[0 ].x (), i + p + iShuffleOffset);
161
- shuffle_0.y () = item_ct1.get_sub_group (). shuffle ( state[0 ].y (), i + p + iShuffleOffset);
164
+ shuffle_0.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[0 ].x (), i + p + iShuffleOffset);
165
+ shuffle_0.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[0 ].y (), i + p + iShuffleOffset);
162
166
163
- shuffle_1.x () = item_ct1.get_sub_group (). shuffle ( state[1 ].x (), i + p + iShuffleOffset);
164
- shuffle_1.y () = item_ct1.get_sub_group (). shuffle ( state[1 ].y (), i + p + iShuffleOffset);
167
+ shuffle_1.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[1 ].x (), i + p + iShuffleOffset);
168
+ shuffle_1.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[1 ].y (), i + p + iShuffleOffset);
165
169
166
- shuffle_2.x () = item_ct1.get_sub_group (). shuffle ( state[2 ].x (), i + p + iShuffleOffset);
167
- shuffle_2.y () = item_ct1.get_sub_group (). shuffle ( state[2 ].y (), i + p + iShuffleOffset);
170
+ shuffle_2.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[2 ].x (), i + p + iShuffleOffset);
171
+ shuffle_2.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[2 ].y (), i + p + iShuffleOffset);
168
172
169
- shuffle_3.x () = item_ct1.get_sub_group (). shuffle ( state[3 ].x (), i + p + iShuffleOffset);
170
- shuffle_3.y () = item_ct1.get_sub_group (). shuffle ( state[3 ].y (), i + p + iShuffleOffset);
173
+ shuffle_3.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[3 ].x (), i + p + iShuffleOffset);
174
+ shuffle_3.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[3 ].y (), i + p + iShuffleOffset);
171
175
172
- shuffle_4.x () = item_ct1.get_sub_group (). shuffle ( state[4 ].x (), i + p + iShuffleOffset);
173
- shuffle_4.y () = item_ct1.get_sub_group (). shuffle ( state[4 ].y (), i + p + iShuffleOffset);
176
+ shuffle_4.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[4 ].x (), i + p + iShuffleOffset);
177
+ shuffle_4.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[4 ].y (), i + p + iShuffleOffset);
174
178
175
- shuffle_5.x () = item_ct1.get_sub_group (). shuffle ( state[5 ].x (), i + p + iShuffleOffset);
176
- shuffle_5.y () = item_ct1.get_sub_group (). shuffle ( state[5 ].y (), i + p + iShuffleOffset);
179
+ shuffle_5.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[5 ].x (), i + p + iShuffleOffset);
180
+ shuffle_5.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[5 ].y (), i + p + iShuffleOffset);
177
181
178
- shuffle_6.x () = item_ct1.get_sub_group (). shuffle ( state[6 ].x (), i + p + iShuffleOffset);
179
- shuffle_6.y () = item_ct1.get_sub_group (). shuffle ( state[6 ].y (), i + p + iShuffleOffset);
182
+ shuffle_6.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[6 ].x (), i + p + iShuffleOffset);
183
+ shuffle_6.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[6 ].y (), i + p + iShuffleOffset);
180
184
181
- shuffle_7.x () = item_ct1.get_sub_group (). shuffle ( state[7 ].x (), i + p + iShuffleOffset);
182
- shuffle_7.y () = item_ct1.get_sub_group (). shuffle ( state[7 ].y (), i + p + iShuffleOffset);
185
+ shuffle_7.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[7 ].x (), i + p + iShuffleOffset);
186
+ shuffle_7.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[7 ].y (), i + p + iShuffleOffset);
183
187
184
188
// ///}
185
189
@@ -198,7 +202,7 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
198
202
break ;
199
203
}
200
204
201
- init0[p] = item_ct1.get_sub_group (). shuffle ( shuffle_0.x (), iShuffleOffset);
205
+ init0[p] = sycl::select_from_group ( item_ct1.get_sub_group (), shuffle_0.x (), iShuffleOffset);
202
206
}
203
207
204
208
// ////for (uint32_t a = 0; a < ACCESSES; a += 4)
@@ -552,14 +556,14 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
552
556
553
557
// update mix across threads
554
558
555
- shuffle_0.x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 0 + iShuffleOffset);
556
- shuffle_0.y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 1 + iShuffleOffset);
557
- shuffle_1.x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 2 + iShuffleOffset);
558
- shuffle_1.y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 3 + iShuffleOffset);
559
- shuffle_2.x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 4 + iShuffleOffset);
560
- shuffle_2.y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 5 + iShuffleOffset);
561
- shuffle_3.x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 6 + iShuffleOffset);
562
- shuffle_3.y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 7 + iShuffleOffset);
559
+ shuffle_0.x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 0 + iShuffleOffset);
560
+ shuffle_0.y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 1 + iShuffleOffset);
561
+ shuffle_1.x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 2 + iShuffleOffset);
562
+ shuffle_1.y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 3 + iShuffleOffset);
563
+ shuffle_2.x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 4 + iShuffleOffset);
564
+ shuffle_2.y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 5 + iShuffleOffset);
565
+ shuffle_3.x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 6 + iShuffleOffset);
566
+ shuffle_3.y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 7 + iShuffleOffset);
563
567
564
568
if ((i + p) == thread_id) {
565
569
// move mix into state:
0 commit comments