31
31
#ifdef USE_LOOP_UNROLLING
32
32
#define mix_and_shuffle (t, a, p, b, thread_id ) \
33
33
offset[p] = fnv(init0[p] ^ (a + b), ((uint32_t *)&mix[p])[b]) % d_dag_size; \
34
- offset[p] = item_ct1.get_sub_group().shuffle( offset[p], t + iShuffleOffset); \
34
+ offset[p] = sycl::select_from_group( item_ct1.get_sub_group(), offset[p], t + iShuffleOffset); \
35
35
mix[p] = fnv4(mix[p], d_dag[offset[p]].uint4s[thread_id]);
36
36
#endif
37
37
@@ -48,9 +48,13 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
48
48
const int thread_id = item_ct1.get_local_id (0 ) & (THREADS_PER_HASH - 1 );
49
49
const int mix_idx = thread_id & 3 ;
50
50
51
- int const iSubGroupThreadId (item_ct1.get_sub_group ().get_local_id ());
51
+ auto g = item_ct1.get_sub_group ();
52
+
53
+ // /int const iSubGroupThreadId(item_ct1.get_sub_group().get_local_id());
54
+ int const iSubGroupThreadId (g.get_local_id ());
52
55
int const iShuffleOffset (pdShuffleOffsets[iSubGroupThreadId]);
53
56
57
+
54
58
#ifndef USE_LOOP_UNROLLING
55
59
56
60
for (int i = 0 ; i < THREADS_PER_HASH; i += _PARALLEL_HASH) {
@@ -61,29 +65,29 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
61
65
// share init among threads
62
66
for (int p = 0 ; p < _PARALLEL_HASH; p++) {
63
67
sycl::uint2 shuffle[8 ];
64
- shuffle[0 ].x () = item_ct1. get_sub_group (). shuffle ( state[0 ].x (), i + p + iShuffleOffset);
65
- shuffle[0 ].y () = item_ct1. get_sub_group (). shuffle ( state[0 ].y (), i + p + iShuffleOffset);
68
+ shuffle[0 ].x () = sycl::select_from_group (g, state[0 ].x (), i + p + iShuffleOffset);
69
+ shuffle[0 ].y () = sycl::select_from_group (g, state[0 ].y (), i + p + iShuffleOffset);
66
70
67
- shuffle[1 ].x () = item_ct1. get_sub_group (). shuffle ( state[1 ].x (), i + p + iShuffleOffset);
68
- shuffle[1 ].y () = item_ct1. get_sub_group (). shuffle ( state[1 ].y (), i + p + iShuffleOffset);
71
+ shuffle[1 ].x () = sycl::select_from_group (g, state[1 ].x (), i + p + iShuffleOffset);
72
+ shuffle[1 ].y () = sycl::select_from_group (g, state[1 ].y (), i + p + iShuffleOffset);
69
73
70
- shuffle[2 ].x () = item_ct1. get_sub_group (). shuffle ( state[2 ].x (), i + p + iShuffleOffset);
71
- shuffle[2 ].y () = item_ct1. get_sub_group (). shuffle ( state[2 ].y (), i + p + iShuffleOffset);
74
+ shuffle[2 ].x () = sycl::select_from_group (g, state[2 ].x (), i + p + iShuffleOffset);
75
+ shuffle[2 ].y () = sycl::select_from_group (g, state[2 ].y (), i + p + iShuffleOffset);
72
76
73
- shuffle[3 ].x () = item_ct1. get_sub_group (). shuffle ( state[3 ].x (), i + p + iShuffleOffset);
74
- shuffle[3 ].y () = item_ct1. get_sub_group (). shuffle ( state[3 ].y (), i + p + iShuffleOffset);
77
+ shuffle[3 ].x () = sycl::select_from_group (g, state[3 ].x (), i + p + iShuffleOffset);
78
+ shuffle[3 ].y () = sycl::select_from_group (g, state[3 ].y (), i + p + iShuffleOffset);
75
79
76
- shuffle[4 ].x () = item_ct1. get_sub_group (). shuffle ( state[4 ].x (), i + p + iShuffleOffset);
77
- shuffle[4 ].y () = item_ct1. get_sub_group (). shuffle ( state[4 ].y (), i + p + iShuffleOffset);
80
+ shuffle[4 ].x () = sycl::select_from_group (g, state[4 ].x (), i + p + iShuffleOffset);
81
+ shuffle[4 ].y () = sycl::select_from_group (g, state[4 ].y (), i + p + iShuffleOffset);
78
82
79
- shuffle[5 ].x () = item_ct1. get_sub_group (). shuffle ( state[5 ].x (), i + p + iShuffleOffset);
80
- shuffle[5 ].y () = item_ct1. get_sub_group (). shuffle ( state[5 ].y (), i + p + iShuffleOffset);
83
+ shuffle[5 ].x () = sycl::select_from_group (g, state[5 ].x (), i + p + iShuffleOffset);
84
+ shuffle[5 ].y () = sycl::select_from_group (g, state[5 ].y (), i + p + iShuffleOffset);
81
85
82
- shuffle[6 ].x () = item_ct1. get_sub_group (). shuffle ( state[6 ].x (), i + p + iShuffleOffset);
83
- shuffle[6 ].y () = item_ct1. get_sub_group (). shuffle ( state[6 ].y (), i + p + iShuffleOffset);
86
+ shuffle[6 ].x () = sycl::select_from_group (g, state[6 ].x (), i + p + iShuffleOffset);
87
+ shuffle[6 ].y () = sycl::select_from_group (g, state[6 ].y (), i + p + iShuffleOffset);
84
88
85
- shuffle[7 ].x () = item_ct1. get_sub_group (). shuffle ( state[7 ].x (), i + p + iShuffleOffset);
86
- shuffle[7 ].y () = item_ct1. get_sub_group (). shuffle ( state[7 ].y (), i + p + iShuffleOffset);
89
+ shuffle[7 ].x () = sycl::select_from_group (g, state[7 ].x (), i + p + iShuffleOffset);
90
+ shuffle[7 ].y () = sycl::select_from_group (g, state[7 ].y (), i + p + iShuffleOffset);
87
91
88
92
assert (mix_idx <= 3 );
89
93
@@ -102,7 +106,7 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
102
106
break ;
103
107
}
104
108
105
- init0[p] = item_ct1.get_sub_group (). shuffle ( shuffle[0 ].x (), iShuffleOffset);
109
+ init0[p] = sycl::select_from_group ( item_ct1.get_sub_group (), shuffle[0 ].x (), iShuffleOffset);
106
110
}
107
111
108
112
for (uint32_t a = 0 ; a < ACCESSES; a += 4 )
@@ -116,7 +120,7 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
116
120
{
117
121
offset[p] = fnv (init0[p] ^ (a + b), ((uint32_t *)&mix[p])[b]) % d_dag_size;
118
122
119
- offset[p] = item_ct1.get_sub_group (). shuffle ( offset[p], t + iShuffleOffset);
123
+ offset[p] = sycl::select_from_group ( item_ct1.get_sub_group (), offset[p], t + iShuffleOffset);
120
124
mix[p] = fnv4 (mix[p], d_dag[offset[p]].uint4s [thread_id]);
121
125
}
122
126
}
@@ -128,14 +132,14 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
128
132
129
133
// update mix across threads
130
134
131
- shuffle[0 ].x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 0 + iShuffleOffset);
132
- shuffle[0 ].y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 1 + iShuffleOffset);
133
- shuffle[1 ].x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 2 + iShuffleOffset);
134
- shuffle[1 ].y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 3 + iShuffleOffset);
135
- shuffle[2 ].x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 4 + iShuffleOffset);
136
- shuffle[2 ].y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 5 + iShuffleOffset);
137
- shuffle[3 ].x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 6 + iShuffleOffset);
138
- shuffle[3 ].y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 7 + iShuffleOffset);
135
+ shuffle[0 ].x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 0 + iShuffleOffset);
136
+ shuffle[0 ].y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 1 + iShuffleOffset);
137
+ shuffle[1 ].x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 2 + iShuffleOffset);
138
+ shuffle[1 ].y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 3 + iShuffleOffset);
139
+ shuffle[2 ].x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 4 + iShuffleOffset);
140
+ shuffle[2 ].y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 5 + iShuffleOffset);
141
+ shuffle[3 ].x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 6 + iShuffleOffset);
142
+ shuffle[3 ].y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 7 + iShuffleOffset);
139
143
140
144
if ((i + p) == thread_id) {
141
145
// move mix into state:
@@ -158,29 +162,29 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
158
162
// ////sycl::uint2 shuffle[8];
159
163
// ////for (int j = 0; j < 8; j++)
160
164
// ////{
161
- shuffle_0.x () = item_ct1.get_sub_group (). shuffle ( state[0 ].x (), i + p + iShuffleOffset);
162
- shuffle_0.y () = item_ct1.get_sub_group (). shuffle ( state[0 ].y (), i + p + iShuffleOffset);
165
+ shuffle_0.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[0 ].x (), i + p + iShuffleOffset);
166
+ shuffle_0.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[0 ].y (), i + p + iShuffleOffset);
163
167
164
- shuffle_1.x () = item_ct1.get_sub_group (). shuffle ( state[1 ].x (), i + p + iShuffleOffset);
165
- shuffle_1.y () = item_ct1.get_sub_group (). shuffle ( state[1 ].y (), i + p + iShuffleOffset);
168
+ shuffle_1.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[1 ].x (), i + p + iShuffleOffset);
169
+ shuffle_1.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[1 ].y (), i + p + iShuffleOffset);
166
170
167
- shuffle_2.x () = item_ct1.get_sub_group (). shuffle ( state[2 ].x (), i + p + iShuffleOffset);
168
- shuffle_2.y () = item_ct1.get_sub_group (). shuffle ( state[2 ].y (), i + p + iShuffleOffset);
171
+ shuffle_2.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[2 ].x (), i + p + iShuffleOffset);
172
+ shuffle_2.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[2 ].y (), i + p + iShuffleOffset);
169
173
170
- shuffle_3.x () = item_ct1.get_sub_group (). shuffle ( state[3 ].x (), i + p + iShuffleOffset);
171
- shuffle_3.y () = item_ct1.get_sub_group (). shuffle ( state[3 ].y (), i + p + iShuffleOffset);
174
+ shuffle_3.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[3 ].x (), i + p + iShuffleOffset);
175
+ shuffle_3.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[3 ].y (), i + p + iShuffleOffset);
172
176
173
- shuffle_4.x () = item_ct1.get_sub_group (). shuffle ( state[4 ].x (), i + p + iShuffleOffset);
174
- shuffle_4.y () = item_ct1.get_sub_group (). shuffle ( state[4 ].y (), i + p + iShuffleOffset);
177
+ shuffle_4.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[4 ].x (), i + p + iShuffleOffset);
178
+ shuffle_4.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[4 ].y (), i + p + iShuffleOffset);
175
179
176
- shuffle_5.x () = item_ct1.get_sub_group (). shuffle ( state[5 ].x (), i + p + iShuffleOffset);
177
- shuffle_5.y () = item_ct1.get_sub_group (). shuffle ( state[5 ].y (), i + p + iShuffleOffset);
180
+ shuffle_5.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[5 ].x (), i + p + iShuffleOffset);
181
+ shuffle_5.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[5 ].y (), i + p + iShuffleOffset);
178
182
179
- shuffle_6.x () = item_ct1.get_sub_group (). shuffle ( state[6 ].x (), i + p + iShuffleOffset);
180
- shuffle_6.y () = item_ct1.get_sub_group (). shuffle ( state[6 ].y (), i + p + iShuffleOffset);
183
+ shuffle_6.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[6 ].x (), i + p + iShuffleOffset);
184
+ shuffle_6.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[6 ].y (), i + p + iShuffleOffset);
181
185
182
- shuffle_7.x () = item_ct1.get_sub_group (). shuffle ( state[7 ].x (), i + p + iShuffleOffset);
183
- shuffle_7.y () = item_ct1.get_sub_group (). shuffle ( state[7 ].y (), i + p + iShuffleOffset);
186
+ shuffle_7.x () = sycl::select_from_group ( item_ct1.get_sub_group (), state[7 ].x (), i + p + iShuffleOffset);
187
+ shuffle_7.y () = sycl::select_from_group ( item_ct1.get_sub_group (), state[7 ].y (), i + p + iShuffleOffset);
184
188
185
189
// ///}
186
190
assert (mix_idx <= 3 );
@@ -200,7 +204,7 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
200
204
break ;
201
205
}
202
206
203
- init0[p] = item_ct1.get_sub_group (). shuffle ( shuffle_0.x (), iShuffleOffset);
207
+ init0[p] = sycl::select_from_group ( item_ct1.get_sub_group (), shuffle_0.x (), iShuffleOffset);
204
208
}
205
209
206
210
// ////for (uint32_t a = 0; a < ACCESSES; a += 4)
@@ -554,14 +558,14 @@ DEV_INLINE bool compute_hash(uint64_t nonce, sycl::uint2 *mix_hash, sycl::nd_ite
554
558
555
559
// update mix across threads
556
560
557
- shuffle_0.x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 0 + iShuffleOffset);
558
- shuffle_0.y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 1 + iShuffleOffset);
559
- shuffle_1.x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 2 + iShuffleOffset);
560
- shuffle_1.y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 3 + iShuffleOffset);
561
- shuffle_2.x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 4 + iShuffleOffset);
562
- shuffle_2.y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 5 + iShuffleOffset);
563
- shuffle_3.x () = item_ct1.get_sub_group (). shuffle ( thread_mix, 6 + iShuffleOffset);
564
- shuffle_3.y () = item_ct1.get_sub_group (). shuffle ( thread_mix, 7 + iShuffleOffset);
561
+ shuffle_0.x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 0 + iShuffleOffset);
562
+ shuffle_0.y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 1 + iShuffleOffset);
563
+ shuffle_1.x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 2 + iShuffleOffset);
564
+ shuffle_1.y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 3 + iShuffleOffset);
565
+ shuffle_2.x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 4 + iShuffleOffset);
566
+ shuffle_2.y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 5 + iShuffleOffset);
567
+ shuffle_3.x () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 6 + iShuffleOffset);
568
+ shuffle_3.y () = sycl::select_from_group ( item_ct1.get_sub_group (), thread_mix, 7 + iShuffleOffset);
565
569
566
570
if ((i + p) == thread_id) {
567
571
// move mix into state:
0 commit comments