Skip to content

Commit e4b15ce

Browse files
authored
Improve block find performance (llvm#412)
1 parent af88610 commit e4b15ce

File tree

1 file changed

+17
-25
lines changed
  • amd/device-libs/ockl/src

1 file changed

+17
-25
lines changed

amd/device-libs/ockl/src/dm.cl

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -51,26 +51,25 @@ struct kind_info_s {
5151
uint first_unusable;
5252
uint gap_unusable;
5353
uint pattern_unusable;
54-
uint spread_factor;
5554
};
5655

5756
static const __constant struct kind_info_s kinfo[NUM_KINDS] = {
58-
{ /* 0: 16 */ 130054, 129546, 110114, 16288, 6, 256, 0x00000000, 4195 },
59-
{ /* 1: 24 */ 86927, 86758, 73744, 10904, 399, 512, 0x00000000, 2804 },
60-
{ /* 2: 32 */ 65280, 64770, 55054, 8192, 0, 128, 0x00000000, 2107 },
61-
{ /* 3: 48 */ 43576, 43406, 36895, 5504, 56, 256, 0x00000000, 1405 },
62-
{ /* 4: 64 */ 32703, 32193, 27364, 4160, 63, 64, 0x00000000, 1054 },
63-
{ /* 5: 96 */ 21816, 21646, 18399, 2816, 56, 128, 0x00000000, 703 },
64-
{ /* 6: 128 */ 16367, 15856, 13477, 2176, 15, 32, 0x00008000, 527 },
65-
{ /* 7: 192 */ 10915, 10745, 9133, 1472, 35, 64, 0x00000000, 352 },
66-
{ /* 8: 256 */ 8187, 7676, 6524, 1280, 11, 16, 0x08000800, 265 },
67-
{ /* 9: 384 */ 5459, 5289, 4495, 896, 19, 32, 0x00080000, 176 },
68-
{ /* 10: 512 */ 4094, 3583, 3045, 1024, 6, 8, 0x40404040, 133 },
69-
{ /* 11: 768 */ 2730, 2560, 2176, 512, 10, 16, 0x04000400, 89 },
70-
{ /* 12: 1024 */ 2047, 1536, 1305, 1024, 3, 4, 0x88888888, 66 },
71-
{ /* 13: 1536 */ 1365, 1195, 1015, 512, 5, 8, 0x20202020, 44 },
72-
{ /* 14: 2048 */ 1023, 512, 435, 2048, 1, 2, 0xaaaaaaaa, 34 },
73-
{ /* 15: 3072 */ 682, 512, 435, 2048, 2, 4, 0x44444444, 35 },
57+
{ /* 0: 16 */ 130054, 129546, 110114, 16288, 6, 256, 0x00000000 },
58+
{ /* 1: 24 */ 86927, 86758, 73744, 10904, 399, 512, 0x00000000 },
59+
{ /* 2: 32 */ 65280, 64770, 55054, 8192, 0, 128, 0x00000000 },
60+
{ /* 3: 48 */ 43576, 43406, 36895, 5504, 56, 256, 0x00000000 },
61+
{ /* 4: 64 */ 32703, 32193, 27364, 4160, 63, 64, 0x00000000 },
62+
{ /* 5: 96 */ 21816, 21646, 18399, 2816, 56, 128, 0x00000000 },
63+
{ /* 6: 128 */ 16367, 15856, 13477, 2176, 15, 32, 0x00008000 },
64+
{ /* 7: 192 */ 10915, 10745, 9133, 1472, 35, 64, 0x00000000 },
65+
{ /* 8: 256 */ 8187, 7676, 6524, 1280, 11, 16, 0x08000800 },
66+
{ /* 9: 384 */ 5459, 5289, 4495, 896, 19, 32, 0x00080000 },
67+
{ /* 10: 512 */ 4094, 3583, 3045, 1024, 6, 8, 0x40404040 },
68+
{ /* 11: 768 */ 2730, 2560, 2176, 512, 10, 16, 0x04000400 },
69+
{ /* 12: 1024 */ 2047, 1536, 1305, 1024, 3, 4, 0x88888888 },
70+
{ /* 13: 1536 */ 1365, 1195, 1015, 512, 5, 8, 0x20202020 },
71+
{ /* 14: 2048 */ 1023, 512, 435, 2048, 1, 2, 0xaaaaaaaa },
72+
{ /* 15: 3072 */ 682, 512, 435, 2048, 2, 4, 0x44444444 },
7473
};
7574

7675
// A slab is a chunk of memory used to provide "block"s whose addresses are
@@ -241,13 +240,6 @@ pattern_unusable(kind_t k)
241240
return kinfo[k].pattern_unusable;
242241
}
243242

244-
// The multiplier used to spread out the probes of individual lanes while searching a slab of kind k
245-
static uint
246-
spread_factor(kind_t k)
247-
{
248-
return kinfo[k].spread_factor;
249-
}
250-
251243
// The number of active lanes at this point
252244
static uint
253245
active_lane_count(void)
@@ -849,7 +841,7 @@ block_find(__global sdata_t *sdp)
849841
uint i = 0;
850842
if (aid == 0)
851843
i = AFA(&sp->start, nactive, memory_order_relaxed);
852-
i = ((first(i) + aid) * spread_factor(k) % num_blocks(k)) >> 5;
844+
i = (((first(i) + aid) << 5) % num_blocks(k)) >> 5;
853845

854846
uint n = (num_blocks(k) + 31) >> 5;
855847

0 commit comments

Comments
 (0)