Skip to content

Commit 50d93cb

Browse files
committed
trivial destructor req
1 parent d810109 commit 50d93cb

File tree

6 files changed

+86
-338
lines changed

6 files changed

+86
-338
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
55
# Note: update this to your new project's name and version
66
project(
77
RiftenDeque
8-
VERSION 1.2.0
8+
VERSION 2.0.0
99
LANGUAGES CXX
1010
)
1111

@@ -29,7 +29,7 @@ CPMAddPackage("gh:TheLartians/PackageProject.cmake@1.6.0")
2929

3030
add_library(RiftenDeque INTERFACE "include/riften/deque.hpp")
3131

32-
target_compile_features(RiftenDeque INTERFACE cxx_std_17)
32+
target_compile_features(RiftenDeque INTERFACE cxx_std_20)
3333

3434
# Enforce standards conformance on MSVC
3535
target_compile_options(RiftenDeque INTERFACE "$<$<COMPILE_LANG_AND_ID:CXX,MSVC>:/permissive>")

README.md

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,22 @@ This implementation is based on:
77
- https://github.com/taskflow/work-stealing-queue
88
- https://github.com/ssbl/concurrent-deque
99

10-
`riften::Deque` places no constraint on the types which can be placed in the deque and has no memory overhead associated with buffer recycling. Furthermore, when possible
10+
`riften::Deque` places very few constraints on the types which can be placed in the deque (they must be trivially destructible and have nothrow move constructor/assignment operators) and has no memory overhead associated with buffer recycling.
1111

1212
## Usage
1313

1414
```C++
15-
// #include <string>
1615
// #include <thread>
1716

1817
// #include "riften/deque.hpp"
1918

20-
// Work-stealing deque of strings
21-
riften::Deque<std::string> deque;
19+
// Work-stealing deque of ints
20+
riften::Deque<int> deque;
2221

2322
// One thread can push and pop items from one end (like a stack)
2423
std::thread owner([&]() {
2524
for (int i = 0; i < 10000; i = i + 1) {
26-
deque.emplace(std::to_string(i));
25+
deque.emplace(i);
2726
}
2827
while (!deque.empty()) {
2928
std::optional item = deque.pop();

include/riften/deque.hpp

Lines changed: 74 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,18 @@
22

33
#include <atomic>
44
#include <cassert>
5+
#include <cstddef>
56
#include <cstdint>
67
#include <memory>
8+
#include <new>
79
#include <optional>
810
#include <type_traits>
911
#include <utility>
1012
#include <vector>
1113

12-
// This (standalone) file implements the deque described in the papers, "Correct and Efficient
13-
// Work-Stealing for Weak Memory Models," and "Dynamic Circular Work-Stealing Deque". Both are avaliable
14-
// in 'reference/'.
14+
// This (stand-alone) file implements the deque described in the papers, "Correct and Efficient
15+
// Work-Stealing for Weak Memory Models," and "Dynamic Circular Work-Stealing Deque". Both are
16+
// available in 'reference/'.
1517

1618
namespace riften {
1719

@@ -27,11 +29,15 @@ template <typename T> struct RingBuff {
2729

2830
std::int64_t capacity() const noexcept { return _cap; }
2931

30-
// Relaxed store at modulo index
31-
void store(std::int64_t i, T x) noexcept { _buff[i & _mask].store(x, std::memory_order_relaxed); }
32+
// Store at modulo index
33+
void store(std::int64_t i, T&& x) noexcept requires std::is_nothrow_move_assignable_v<T> {
34+
_buff[i & _mask] = std::move(x);
35+
}
3236

33-
// Relaxed load at modulo index
34-
T load(std::int64_t i) const noexcept { return _buff[i & _mask].load(std::memory_order_relaxed); }
37+
// Load at modulo index
38+
T load(std::int64_t i) const noexcept requires std::is_nothrow_move_constructible_v<T> {
39+
return _buff[i & _mask];
40+
}
3541

3642
// Allocates and returns a new ring buffer, copies elements in range [b, t) into the new buffer.
3743
RingBuff<T>* resize(std::int64_t b, std::int64_t t) const {
@@ -44,34 +50,28 @@ template <typename T> struct RingBuff {
4450

4551
private:
4652
std::int64_t _cap; // Capacity of the buffer
47-
std::int64_t _mask; // Bitmask to perform modulo capacity operations
53+
std::int64_t _mask; // Bit mask to perform modulo capacity operations
4854

49-
#if !__cpp_lib_smart_ptr_for_overwrite
50-
std::unique_ptr<std::atomic<T>[]> _buff = std::make_unique<std::atomic<T>[]>(_cap);
51-
#else
52-
std::unique_ptr<std::atomic<T>[]> _buff = std::make_unique_for_overwrite<std::atomic<T>[]>(_cap);
53-
#endif
55+
std::unique_ptr<T[]> _buff = std::make_unique_for_overwrite<T[]>(_cap);
5456
};
5557

56-
template <typename T> struct is_always_lock_free {
57-
static constexpr bool value = std::atomic<T>::is_always_lock_free;
58-
};
58+
} // namespace detail
5959

60-
template <typename T> static constexpr bool lock_free_v
61-
= std::conjunction_v<std::is_trivially_copyable<T>,
62-
std::is_copy_constructible<T>,
63-
std::is_move_constructible<T>,
64-
std::is_copy_assignable<T>,
65-
std::is_move_assignable<T>,
66-
is_always_lock_free<T>>;
60+
#ifdef __cpp_lib_hardware_interference_size
61+
using std::hardware_destructive_interference_size;
62+
#else
63+
// 64 bytes on x86-64 │ L1_CACHE_BYTES │ L1_CACHE_SHIFT │ __cacheline_aligned │ ...
64+
inline constexpr std::size_t hardware_destructive_interference_size = 2 * sizeof(std::max_align_t);
65+
#endif
6766

68-
} // namespace detail
67+
template <typename T>
68+
concept trivially_destructible = std::is_trivially_destructible_v<T>;
6969

70-
// Lock-free single-producer multiple-consumer deque. There are no constraints on the type `T` that can
71-
// be stored. Only the deque owner can perform pop and push operations where the deque behaves like a
72-
// stack. Others can (only) steal data from the deque, they see a FIFO queue. All threads must have
73-
// finished using the deque before it is destructed.
74-
template <typename T> class Deque {
70+
// Lock-free single-producer multiple-consumer deque. Only the deque owner can perform pop and push
71+
// operations where the deque behaves like a stack. Others can (only) steal data from the deque, they see
72+
// a FIFO queue. All threads must have finished using the deque before it is destructed. T must be
73+
// trivially destructible and have nothrow move constructor/assignment operators.
74+
template <trivially_destructible T> class Deque {
7575
public:
7676
// Constructs the deque with a given capacity the capacity of the deque (must be power of 2)
7777
explicit Deque(std::int64_t cap = 1024);
@@ -89,155 +89,133 @@ template <typename T> class Deque {
8989
// Test if empty at instance of call
9090
bool empty() const noexcept;
9191

92-
// Emplace an item to the deque. Only the owner thread can insert an item to the deque. The operation
93-
// can trigger the deque to resize its cap if more space is required. Provides the strong exception
94-
// garantee.
92+
// Emplace an item to the deque. Only the owner thread can insert an item to the deque. The
93+
// operation can trigger the deque to resize its cap if more space is required. Provides the
94+
// strong exception guarantee.
9595
template <typename... Args> void emplace(Args&&... args);
9696

97-
// Pops out an item from the deque. Only the owner thread can pop out an item from the deque. The
98-
// return can be a std::nullopt if this operation failed (empty deque).
97+
// Pops out an item from the deque. Only the owner thread can pop out an item from the deque.
98+
// The return can be a std::nullopt if this operation fails (empty deque).
9999
std::optional<T> pop() noexcept;
100100

101-
// Steals an item from the deque Any threads can try to steal an item from the deque. The return can
102-
// be a std::nullopt if this operation failed (not necessary empty).
101+
// Steals an item from the deque Any threads can try to steal an item from the deque. The return
102+
// can be a std::nullopt if this operation failed (not necessarily empty).
103103
std::optional<T> steal() noexcept;
104104

105105
// Destruct the deque, all threads must have finished using the deque.
106106
~Deque() noexcept;
107107

108-
// If true elements of type `T` are stored directly in the ring buffer.
109-
static constexpr bool no_alloc = std::is_trivially_destructible_v<T> && detail::lock_free_v<T>;
110-
111108
private:
112-
using buffer_t = detail::RingBuff<std::conditional_t<no_alloc, T, T*>>;
109+
alignas(hardware_destructive_interference_size) std::atomic<std::int64_t> _top;
110+
alignas(hardware_destructive_interference_size) std::atomic<std::int64_t> _bottom;
111+
alignas(hardware_destructive_interference_size) std::atomic<detail::RingBuff<T>*> _buffer;
113112

114-
std::atomic<std::int64_t> _top; // Top of deque
115-
std::atomic<std::int64_t> _bottom; // Bottom of deque.
116-
std::atomic<buffer_t*> _buffer; // Current buffer.
117-
std::vector<std::unique_ptr<buffer_t>> _garbage; // Store old buffers here.
113+
std::vector<std::unique_ptr<detail::RingBuff<T>>> _garbage; // Store old buffers here.
118114

119-
// Convinience aliases.
115+
// Convenience aliases.
120116
static constexpr std::memory_order relaxed = std::memory_order_relaxed;
121117
static constexpr std::memory_order consume = std::memory_order_consume;
122118
static constexpr std::memory_order acquire = std::memory_order_acquire;
123119
static constexpr std::memory_order release = std::memory_order_release;
124120
static constexpr std::memory_order seq_cst = std::memory_order_seq_cst;
125121
};
126122

127-
template <typename T> Deque<T>::Deque(std::int64_t cap)
128-
: _top(0), _bottom(0), _buffer(new buffer_t{cap}) {
123+
template <trivially_destructible T> Deque<T>::Deque(std::int64_t cap)
124+
: _top(0), _bottom(0), _buffer(new detail::RingBuff<T>{cap}) {
129125
_garbage.reserve(32);
130126
}
131127

132-
template <typename T> std::size_t Deque<T>::size() const noexcept {
128+
template <trivially_destructible T> std::size_t Deque<T>::size() const noexcept {
133129
int64_t b = _bottom.load(relaxed);
134130
int64_t t = _top.load(relaxed);
135131
return static_cast<std::size_t>(b >= t ? b - t : 0);
136132
}
137133

138-
template <typename T> int64_t Deque<T>::capacity() const noexcept {
134+
template <trivially_destructible T> int64_t Deque<T>::capacity() const noexcept {
139135
return _buffer.load(relaxed)->capacity();
140136
}
141137

142-
template <typename T> bool Deque<T>::empty() const noexcept { return !size(); }
138+
template <trivially_destructible T> bool Deque<T>::empty() const noexcept { return !size(); }
139+
140+
template <trivially_destructible T> template <typename... Args> void Deque<T>::emplace(Args&&... args) {
141+
// Construct before acquiring slot in-case constructor throws
142+
T object(std::forward<Args>(args)...);
143143

144-
template <typename T> template <typename... Args> void Deque<T>::emplace(Args&&... args) {
145144
std::int64_t b = _bottom.load(relaxed);
146145
std::int64_t t = _top.load(acquire);
147-
buffer_t* buf = _buffer.load(relaxed);
146+
detail::RingBuff<T>* buf = _buffer.load(relaxed);
148147

149148
if (buf->capacity() < (b - t) + 1) {
150149
// Queue is full, build a new one
151150
_garbage.emplace_back(std::exchange(buf, buf->resize(b, t)));
152151
_buffer.store(buf, relaxed);
153152
}
154153

155-
// Construct new object
156-
if constexpr (no_alloc) {
157-
buf->store(b, {std::forward<Args>(args)...});
158-
} else {
159-
buf->store(b, new T{std::forward<Args>(args)...});
160-
}
154+
// Construct new object, this does not have to be atomic as no one can steal this item until after we
155+
// store the new value of bottom, ordering is maintained by surrounding atomics.
156+
buf->store(b, std::move(object));
161157

162158
std::atomic_thread_fence(release);
163159
_bottom.store(b + 1, relaxed);
164160
}
165161

166-
template <typename T> std::optional<T> Deque<T>::pop() noexcept {
162+
template <trivially_destructible T> std::optional<T> Deque<T>::pop() noexcept {
167163
std::int64_t b = _bottom.load(relaxed) - 1;
168-
buffer_t* buf = _buffer.load(relaxed);
169-
_bottom.store(b, relaxed);
164+
detail::RingBuff<T>* buf = _buffer.load(relaxed);
165+
166+
_bottom.store(b, relaxed); // Stealers can no longer steal
167+
170168
std::atomic_thread_fence(seq_cst);
171169
std::int64_t t = _top.load(relaxed);
172170

173171
if (t <= b) {
174172
// Non-empty deque
175173
if (t == b) {
176-
// The last item could get stolen
174+
// The last item could get stolen, by a stealer that loaded bottom before our write above
177175
if (!_top.compare_exchange_strong(t, t + 1, seq_cst, relaxed)) {
178-
// Failed race.
176+
// Failed race, thief got the last item.
179177
_bottom.store(b + 1, relaxed);
180178
return std::nullopt;
181179
}
182180
_bottom.store(b + 1, relaxed);
183181
}
184182

185-
// Can delay load until after aquiring slot as only this thread can push()
186-
auto x = buf->load(b);
187-
188-
if constexpr (no_alloc) {
189-
return x;
190-
} else {
191-
std::optional tmp{std::move(*x)};
192-
delete x;
193-
return tmp;
194-
}
183+
// Can delay load until after acquiring slot as only this thread can push(), this load is not
184+
// required to be atomic as we are the exclusive writer.
185+
return buf->load(b);
195186

196187
} else {
197188
_bottom.store(b + 1, relaxed);
198189
return std::nullopt;
199190
}
200191
}
201192

202-
template <typename T> std::optional<T> Deque<T>::steal() noexcept {
193+
template <trivially_destructible T> std::optional<T> Deque<T>::steal() noexcept {
203194
std::int64_t t = _top.load(acquire);
204195
std::atomic_thread_fence(seq_cst);
205196
std::int64_t b = _bottom.load(acquire);
206197

207198
if (t < b) {
208-
// Must load *before* aquiring the slot as slot may be overwritten immidiatly after aquiring.
209-
auto x = _buffer.load(consume)->load(t);
199+
// Must load *before* acquiring the slot as slot may be overwritten immediately after acquiring.
200+
// This load is NOT required to be atomic even-though it may race with an overrite as we only
201+
// return the value if we win the race below garanteeing we had no race during our read. If we
202+
// loose the race then 'x' could be corrupt due to read-during-write race but as T is trivially
203+
// destructible this does not matter.
204+
T x = _buffer.load(consume)->load(t);
210205

211206
if (!_top.compare_exchange_strong(t, t + 1, seq_cst, relaxed)) {
212207
// Failed race.
213208
return std::nullopt;
214209
}
215210

216-
if constexpr (no_alloc) {
217-
return x;
218-
} else {
219-
std::optional tmp{std::move(*x)};
220-
delete x;
221-
return tmp;
222-
}
211+
return x;
223212

224213
} else {
225214
// Empty deque.
226215
return std::nullopt;
227216
}
228217
}
229218

230-
template <typename T> Deque<T>::~Deque() noexcept {
231-
if constexpr (!no_alloc) {
232-
// Clean up all remaining items in the deque.
233-
while (!empty()) {
234-
pop();
235-
}
236-
237-
assert(empty() && "Busy during destruction"); // Check for interupts.
238-
}
239-
240-
delete _buffer.load();
241-
}
219+
template <trivially_destructible T> Deque<T>::~Deque() noexcept { delete _buffer.load(); }
242220

243-
} // namespace riften
221+
} // namespace riften

test/deque_test.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ TEST_CASE("emplace against steals, [deque]") {
9090
// Dummy work struct.
9191
struct work {
9292
int label;
93-
std::string path;
9493
};
9594

9695
TEST_CASE("pop and steal, [deque]") {
@@ -104,7 +103,7 @@ TEST_CASE("pop and steal, [deque]") {
104103
std::vector<std::thread> threads;
105104
std::atomic<int> remaining(max);
106105

107-
for (auto i = 0; i < max; ++i) worker.emplace(work{1, "/some/random/path"});
106+
for (auto i = 0; i < max; ++i) worker.emplace(work{1});
108107

109108
for (auto i = 0; i < nthreads; ++i) {
110109
threads.emplace_back([&stealer, &remaining]() {

test/example.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,30 @@
1-
#include <string>
21
#include <thread>
32

43
#include "doctest/doctest.h"
54
#include "riften/deque.hpp"
65

76
TEST_CASE("Examples") {
8-
// #include <string>
97
// #include <thread>
108

119
// #include "riften/deque.hpp"
1210

13-
// Work-stealing deque of strings
14-
riften::Deque<std::string> deque;
11+
// Work-stealing deque of ints
12+
riften::Deque<int> deque;
1513

1614
// One thread can push and pop items from one end (like a stack)
1715
std::thread owner([&]() {
1816
for (int i = 0; i < 10000; i = i + 1) {
19-
deque.emplace(std::to_string(i));
17+
deque.emplace(i);
2018
}
2119
while (!deque.empty()) {
22-
std::optional item = deque.pop();
20+
[[maybe_unused]] std::optional item = deque.pop();
2321
}
2422
});
2523

2624
// While multiple (any) threads can steal items from the other end
2725
std::thread thief([&]() {
2826
while (!deque.empty()) {
29-
std::optional item = deque.steal();
27+
[[maybe_unused]] std::optional item = deque.steal();
3028
}
3129
});
3230

0 commit comments

Comments
 (0)