Skip to content

Commit b6ad7e6

Browse files
committed
Add macro for m_o_relaxed checking in extrinsic_storage
1 parent 1a87372 commit b6ad7e6

File tree

2 files changed

+54
-42
lines changed

2 files changed

+54
-42
lines changed

experimental/extrinsic_storage.h

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,13 @@
1010

1111
// *****************************************************************
1212
// Enable/disable debug instrumentation and statistics printing here
13-
constexpr inline auto debug_instrumentation = false;
13+
constexpr inline auto debug_instrumentation = true;
14+
15+
// Try with/without m_o_relaxed
16+
#define M_O_RELAXED , std::memory_order_relaxed
17+
#define M_O_RELAXED_NOCOMMA std::memory_order_relaxed
18+
//#define M_O_RELAXED
19+
//#define M_O_RELAXED_NOCOMMA
1420
// *****************************************************************
1521

1622
#include <algorithm>
@@ -125,7 +131,7 @@ class extrinsic_storage {
125131
auto find_or_insert(void* pobj) noexcept -> Value* {
126132
if constexpr (debug_instrumentation) {
127133
// m_o_relaxed is enough, inc order doesn't matter for totals
128-
instrument_access_count.fetch_add(1, std::memory_order_relaxed);
134+
instrument_access_count.fetch_add(1 M_O_RELAXED);
129135
}
130136
return lookup(pobj, lookup_mode::find_or_insert);
131137
}
@@ -136,7 +142,7 @@ class extrinsic_storage {
136142
auto find(void* pobj) noexcept -> Value* {
137143
if constexpr (debug_instrumentation) {
138144
// m_o_relaxed is enough, inc order doesn't matter for totals
139-
instrument_access_count.fetch_add(1, std::memory_order_relaxed);
145+
instrument_access_count.fetch_add(1 M_O_RELAXED);
140146
}
141147
return lookup(pobj, lookup_mode::find);
142148
}
@@ -147,7 +153,7 @@ class extrinsic_storage {
147153
auto erase(void* pobj) noexcept -> void {
148154
if constexpr (debug_instrumentation) {
149155
// m_o_relaxed is enough, inc order doesn't matter for totals
150-
instrument_erase_count.fetch_add(1, std::memory_order_relaxed);
156+
instrument_erase_count.fetch_add(1 M_O_RELAXED);
151157
}
152158
lookup(pobj, lookup_mode::erase);
153159
}
@@ -202,7 +208,7 @@ class extrinsic_storage {
202208
assert( 0 <= hash && hash < Buckets );
203209
if constexpr (debug_instrumentation) {
204210
// m_o_relaxed is enough, inc order doesn't matter for totals
205-
instrument_bucket_access[hash].fetch_add(1, std::memory_order_relaxed);
211+
instrument_bucket_access[hash].fetch_add(1 M_O_RELAXED);
206212
}
207213

208214
// 1. If we find key==pobj, we're done
@@ -212,9 +218,9 @@ class extrinsic_storage {
212218
// (*) m_o_relaxed is enough, equality means we own the slot
213219
// and so this thread already has exclusive access to *pobj
214220
// and its .values data
215-
if (pchunk->keys[i].load(std::memory_order_relaxed) == pobj) {
221+
if (pchunk->keys[i].load(M_O_RELAXED_NOCOMMA) == pobj) {
216222
if (mode == lookup_mode::erase) {
217-
pchunk->keys[i].store(nullptr, std::memory_order_relaxed);
223+
pchunk->keys[i].store(nullptr M_O_RELAXED);
218224
return nullptr;
219225
}
220226
// Else
@@ -225,7 +231,7 @@ class extrinsic_storage {
225231
// it is first set to non-null, and if a new chunk(s) was just
226232
// concurrently added by a different thread then that new
227233
// chunk(s) cannot contain an entry for pobj
228-
pchunk = pchunk->next.load(std::memory_order_relaxed);
234+
pchunk = pchunk->next.load(M_O_RELAXED_NOCOMMA);
229235
}
230236

231237
// 2. Otherwise, if we're not allowed to insert we're done
@@ -234,7 +240,7 @@ class extrinsic_storage {
234240
if constexpr (debug_instrumentation) {
235241
if (mode == lookup_mode::erase) {
236242
// m_o_relaxed is enough, inc order doesn't matter for totals
237-
instrument_erase_fail_count.fetch_add(1, std::memory_order_relaxed);
243+
instrument_erase_fail_count.fetch_add(1 M_O_RELAXED);
238244
}
239245
}
240246
return nullptr;
@@ -249,22 +255,22 @@ class extrinsic_storage {
249255
void* null = nullptr;
250256
if (
251257
// m_o_relaxed is enough for this first load...
252-
pchunk->keys[i].load(std::memory_order_relaxed) == nullptr
258+
pchunk->keys[i].load(M_O_RELAXED_NOCOMMA) == nullptr
253259
// ... because it's just a best-effort optimization to
254260
// avoid this maybe-unneeded c_e_weak (which is safely SC)
255261
&& pchunk->keys[i].compare_exchange_weak( null, pobj )
256262
) {
257263
if constexpr (debug_instrumentation) {
258264
// m_o_relaxed is enough, inc order doesn't matter for totals
259-
instrument_insert_count.fetch_add(1, std::memory_order_relaxed);
265+
instrument_insert_count.fetch_add(1 M_O_RELAXED);
260266
}
261267
return &pchunk->values[i];
262268
}
263269
}
264270
// (*) m_o_relaxed is enough here, because if a new chunk(s)
265271
// was just concurrently added by a different thread then we'll
266272
// just add an extra chunk which is fine
267-
if ( pchunk->next.load(std::memory_order_relaxed) == nullptr ) {
273+
if ( pchunk->next.load(M_O_RELAXED_NOCOMMA) == nullptr ) {
268274
break;
269275
}
270276
pchunk = pchunk->next.load();
@@ -290,7 +296,7 @@ class extrinsic_storage {
290296

291297
if constexpr (debug_instrumentation) {
292298
// m_o_relaxed is enough, inc order doesn't matter for totals
293-
instrument_alloc_count.fetch_add(1, std::memory_order_relaxed);
299+
instrument_alloc_count.fetch_add(1 M_O_RELAXED);
294300
}
295301
return ret;
296302
}

experimental/union_test.cpp

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <cstdint>
1313
#include <fstream>
1414
#include <iostream>
15+
#include <map>
1516
#include <source_location>
1617
#include <thread>
1718
#include <vector>
@@ -151,14 +152,9 @@ auto test(int threads = 1) -> void
151152
}
152153
};
153154

154-
if (threads == 1) {
155-
run();
156-
}
157-
else {
158-
std::vector<std::jthread> thds;
159-
for (auto i = 0; i < threads; ++i) {
160-
thds.emplace_back( run );
161-
}
155+
std::vector<std::jthread> thds;
156+
for (auto i = 0; i < threads; ++i) {
157+
thds.emplace_back( run );
162158
}
163159
}
164160

@@ -176,29 +172,39 @@ int main()
176172
auto tot_raw = int64_t{0};
177173
auto tot_chk = int64_t{0};
178174

179-
// Repeat test sequence a couple of times
180-
for (auto reps = 0; reps < 2; ++reps)
175+
auto stats = std::map<int,uint64_t>{};
176+
177+
// Repeat test sequence a few times
178+
for (auto reps = 0; reps < 5; ++reps)
181179
{
182-
// Run "raw" vs "checked" test for 1, 2, 4, 8, 16, 32, and 64 threads
183-
for (auto i = 1; i <= 64; i *= 2)
184-
{
185-
std::cout << "# threads: " << i << "\n";
186-
187-
// First without checks
188-
auto t = timer{};
189-
test(i);
190-
auto raw_time = t.microseconds();
191-
std::cout << " raw: " << print(raw_time) << "\n";
192-
tot_raw += raw_time;
193-
194-
// Then with checks, via specifying <true>
195-
t = timer{};
196-
test<true>(i);
197-
auto chk_time = t.microseconds();
198-
std::cout << " checked: " << print(chk_time) << "\n";
199-
tot_chk += chk_time;
200-
}
180+
// Run "raw" vs "checked" test for 1, 2, 4, 8, 16, 32, 64, and 128 threads
181+
for (auto i = 1; i <= 128; i *= 2)
182+
{
183+
////std::cout << "# threads: " << i << "\n";
184+
185+
// First without checks
186+
auto t = timer{};
187+
test(i);
188+
auto raw_time = t.microseconds();
189+
////std::cout << " raw: " << print(raw_time) << "\n";
190+
tot_raw += raw_time;
191+
192+
// Then with checks, via specifying <true>
193+
t = timer{};
194+
test<true>(i);
195+
auto chk_time = t.microseconds();
196+
////std::cout << " checked: " << print(chk_time) << "\n";
197+
stats[i] += chk_time-raw_time;
198+
tot_chk += chk_time;
199+
}
200+
}
201+
202+
// Print each #threads timings in an Excel-friendly format
203+
for (auto [threads, timings] : stats) {
204+
if (threads != 1) { std::cout << " "; }
205+
std::cout << timings;
201206
}
207+
std::cout << "\n";
202208

203209
std::cout << "totals\n"
204210
<< " raw: " << print(tot_raw) << "\n"

0 commit comments

Comments
 (0)