25
25
#include <trace/events/lock.h>
26
26
27
27
/*
28
- * Include queued spinlock statistics code
28
+ * Include queued spinlock definitions and statistics code
29
29
*/
30
+ #include "qspinlock.h"
30
31
#include "qspinlock_stat.h"
31
32
32
33
/*
67
68
*/
68
69
69
70
#include "mcs_spinlock.h"
70
- #define MAX_NODES 4
71
-
72
- /*
73
- * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
74
- * size and four of them will fit nicely in one 64-byte cacheline. For
75
- * pvqspinlock, however, we need more space for extra data. To accommodate
76
- * that, we insert two more long words to pad it up to 32 bytes. IOW, only
77
- * two of them can fit in a cacheline in this case. That is OK as it is rare
78
- * to have more than 2 levels of slowpath nesting in actual use. We don't
79
- * want to penalize pvqspinlocks to optimize for a rare case in native
80
- * qspinlocks.
81
- */
82
- struct qnode {
83
- struct mcs_spinlock mcs ;
84
- #ifdef CONFIG_PARAVIRT_SPINLOCKS
85
- long reserved [2 ];
86
- #endif
87
- };
88
-
89
- /*
90
- * The pending bit spinning loop count.
91
- * This heuristic is used to limit the number of lockword accesses
92
- * made by atomic_cond_read_relaxed when waiting for the lock to
93
- * transition out of the "== _Q_PENDING_VAL" state. We don't spin
94
- * indefinitely because there's no guarantee that we'll make forward
95
- * progress.
96
- */
97
- #ifndef _Q_PENDING_LOOPS
98
- #define _Q_PENDING_LOOPS 1
99
- #endif
100
71
101
72
/*
102
73
* Per-CPU queue node structures; we can never have more than 4 nested
@@ -106,161 +77,7 @@ struct qnode {
106
77
*
107
78
* PV doubles the storage and uses the second cacheline for PV state.
108
79
*/
109
- static DEFINE_PER_CPU_ALIGNED (struct qnode , qnodes [MAX_NODES ]) ;
110
-
111
- /*
112
- * We must be able to distinguish between no-tail and the tail at 0:0,
113
- * therefore increment the cpu number by one.
114
- */
115
-
116
- static inline __pure u32 encode_tail (int cpu , int idx )
117
- {
118
- u32 tail ;
119
-
120
- tail = (cpu + 1 ) << _Q_TAIL_CPU_OFFSET ;
121
- tail |= idx << _Q_TAIL_IDX_OFFSET ; /* assume < 4 */
122
-
123
- return tail ;
124
- }
125
-
126
- static inline __pure struct mcs_spinlock * decode_tail (u32 tail )
127
- {
128
- int cpu = (tail >> _Q_TAIL_CPU_OFFSET ) - 1 ;
129
- int idx = (tail & _Q_TAIL_IDX_MASK ) >> _Q_TAIL_IDX_OFFSET ;
130
-
131
- return per_cpu_ptr (& qnodes [idx ].mcs , cpu );
132
- }
133
-
134
- static inline __pure
135
- struct mcs_spinlock * grab_mcs_node (struct mcs_spinlock * base , int idx )
136
- {
137
- return & ((struct qnode * )base + idx )-> mcs ;
138
- }
139
-
140
- #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
141
-
142
- #if _Q_PENDING_BITS == 8
143
- /**
144
- * clear_pending - clear the pending bit.
145
- * @lock: Pointer to queued spinlock structure
146
- *
147
- * *,1,* -> *,0,*
148
- */
149
- static __always_inline void clear_pending (struct qspinlock * lock )
150
- {
151
- WRITE_ONCE (lock -> pending , 0 );
152
- }
153
-
154
- /**
155
- * clear_pending_set_locked - take ownership and clear the pending bit.
156
- * @lock: Pointer to queued spinlock structure
157
- *
158
- * *,1,0 -> *,0,1
159
- *
160
- * Lock stealing is not allowed if this function is used.
161
- */
162
- static __always_inline void clear_pending_set_locked (struct qspinlock * lock )
163
- {
164
- WRITE_ONCE (lock -> locked_pending , _Q_LOCKED_VAL );
165
- }
166
-
167
- /*
168
- * xchg_tail - Put in the new queue tail code word & retrieve previous one
169
- * @lock : Pointer to queued spinlock structure
170
- * @tail : The new queue tail code word
171
- * Return: The previous queue tail code word
172
- *
173
- * xchg(lock, tail), which heads an address dependency
174
- *
175
- * p,*,* -> n,*,* ; prev = xchg(lock, node)
176
- */
177
- static __always_inline u32 xchg_tail (struct qspinlock * lock , u32 tail )
178
- {
179
- /*
180
- * We can use relaxed semantics since the caller ensures that the
181
- * MCS node is properly initialized before updating the tail.
182
- */
183
- return (u32 )xchg_relaxed (& lock -> tail ,
184
- tail >> _Q_TAIL_OFFSET ) << _Q_TAIL_OFFSET ;
185
- }
186
-
187
- #else /* _Q_PENDING_BITS == 8 */
188
-
189
- /**
190
- * clear_pending - clear the pending bit.
191
- * @lock: Pointer to queued spinlock structure
192
- *
193
- * *,1,* -> *,0,*
194
- */
195
- static __always_inline void clear_pending (struct qspinlock * lock )
196
- {
197
- atomic_andnot (_Q_PENDING_VAL , & lock -> val );
198
- }
199
-
200
- /**
201
- * clear_pending_set_locked - take ownership and clear the pending bit.
202
- * @lock: Pointer to queued spinlock structure
203
- *
204
- * *,1,0 -> *,0,1
205
- */
206
- static __always_inline void clear_pending_set_locked (struct qspinlock * lock )
207
- {
208
- atomic_add (- _Q_PENDING_VAL + _Q_LOCKED_VAL , & lock -> val );
209
- }
210
-
211
- /**
212
- * xchg_tail - Put in the new queue tail code word & retrieve previous one
213
- * @lock : Pointer to queued spinlock structure
214
- * @tail : The new queue tail code word
215
- * Return: The previous queue tail code word
216
- *
217
- * xchg(lock, tail)
218
- *
219
- * p,*,* -> n,*,* ; prev = xchg(lock, node)
220
- */
221
- static __always_inline u32 xchg_tail (struct qspinlock * lock , u32 tail )
222
- {
223
- u32 old , new ;
224
-
225
- old = atomic_read (& lock -> val );
226
- do {
227
- new = (old & _Q_LOCKED_PENDING_MASK ) | tail ;
228
- /*
229
- * We can use relaxed semantics since the caller ensures that
230
- * the MCS node is properly initialized before updating the
231
- * tail.
232
- */
233
- } while (!atomic_try_cmpxchg_relaxed (& lock -> val , & old , new ));
234
-
235
- return old ;
236
- }
237
- #endif /* _Q_PENDING_BITS == 8 */
238
-
239
- /**
240
- * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
241
- * @lock : Pointer to queued spinlock structure
242
- * Return: The previous lock value
243
- *
244
- * *,*,* -> *,1,*
245
- */
246
- #ifndef queued_fetch_set_pending_acquire
247
- static __always_inline u32 queued_fetch_set_pending_acquire (struct qspinlock * lock )
248
- {
249
- return atomic_fetch_or_acquire (_Q_PENDING_VAL , & lock -> val );
250
- }
251
- #endif
252
-
253
- /**
254
- * set_locked - Set the lock bit and own the lock
255
- * @lock: Pointer to queued spinlock structure
256
- *
257
- * *,*,0 -> *,0,1
258
- */
259
- static __always_inline void set_locked (struct qspinlock * lock )
260
- {
261
- WRITE_ONCE (lock -> locked , _Q_LOCKED_VAL );
262
- }
263
-
80
+ static DEFINE_PER_CPU_ALIGNED (struct qnode , qnodes [_Q_MAX_NODES ]) ;
264
81
265
82
/*
266
83
* Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for
@@ -410,7 +227,7 @@ void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
410
227
* any MCS node. This is not the most elegant solution, but is
411
228
* simple enough.
412
229
*/
413
- if (unlikely (idx >= MAX_NODES )) {
230
+ if (unlikely (idx >= _Q_MAX_NODES )) {
414
231
lockevent_inc (lock_no_node );
415
232
while (!queued_spin_trylock (lock ))
416
233
cpu_relax ();
@@ -465,7 +282,7 @@ void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
465
282
* head of the waitqueue.
466
283
*/
467
284
if (old & _Q_TAIL_MASK ) {
468
- prev = decode_tail (old );
285
+ prev = decode_tail (old , qnodes );
469
286
470
287
/* Link @node into the waitqueue. */
471
288
WRITE_ONCE (prev -> next , node );
0 commit comments