Skip to content

Commit affef66

Browse files
heiherchenhuacai
authored andcommitted
LoongArch: Relax memory ordering for atomic operations
This patch relaxes the implementation while satisfying the memory ordering requirements for atomic operations, which will help improve performance on LA664+. Unixbench with full threads (8) before after Dhrystone 2 using register variables 203910714.2 203909539.8 0.00% Double-Precision Whetstone 37930.9 37931 0.00% Execl Throughput 29431.5 29545.8 0.39% File Copy 1024 bufsize 2000 maxblocks 6645759.5 6676320 0.46% File Copy 256 bufsize 500 maxblocks 2138772.4 2144182.4 0.25% File Copy 4096 bufsize 8000 maxblocks 11640698.4 11602703 -0.33% Pipe Throughput 8849077.7 8917009.4 0.77% Pipe-based Context Switching 1255108.5 1287277.3 2.56% Process Creation 50825.9 50442.1 -0.76% Shell Scripts (1 concurrent) 25795.8 25942.3 0.57% Shell Scripts (8 concurrent) 3812.6 3835.2 0.59% System Call Overhead 9248212.6 9353348.6 1.14% ======= System Benchmarks Index Score 8076.6 8114.4 0.47% Signed-off-by: WANG Rui <wangrui@loongson.cn> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
1 parent 7194596 commit affef66

File tree

1 file changed

+68
-20
lines changed

1 file changed

+68
-20
lines changed

arch/loongarch/include/asm/atomic.h

Lines changed: 68 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -36,33 +36,33 @@
3636
static inline void arch_atomic_##op(int i, atomic_t *v) \
3737
{ \
3838
__asm__ __volatile__( \
39-
"am"#asm_op"_db.w" " $zero, %1, %0 \n" \
39+
"am"#asm_op".w" " $zero, %1, %0 \n" \
4040
: "+ZB" (v->counter) \
4141
: "r" (I) \
4242
: "memory"); \
4343
}
4444

45-
#define ATOMIC_OP_RETURN(op, I, asm_op, c_op) \
46-
static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
45+
#define ATOMIC_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \
46+
static inline int arch_atomic_##op##_return##suffix(int i, atomic_t *v) \
4747
{ \
4848
int result; \
4949
\
5050
__asm__ __volatile__( \
51-
"am"#asm_op"_db.w" " %1, %2, %0 \n" \
51+
"am"#asm_op#mb".w" " %1, %2, %0 \n" \
5252
: "+ZB" (v->counter), "=&r" (result) \
5353
: "r" (I) \
5454
: "memory"); \
5555
\
5656
return result c_op I; \
5757
}
5858

59-
#define ATOMIC_FETCH_OP(op, I, asm_op) \
60-
static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
59+
#define ATOMIC_FETCH_OP(op, I, asm_op, mb, suffix) \
60+
static inline int arch_atomic_fetch_##op##suffix(int i, atomic_t *v) \
6161
{ \
6262
int result; \
6363
\
6464
__asm__ __volatile__( \
65-
"am"#asm_op"_db.w" " %1, %2, %0 \n" \
65+
"am"#asm_op#mb".w" " %1, %2, %0 \n" \
6666
: "+ZB" (v->counter), "=&r" (result) \
6767
: "r" (I) \
6868
: "memory"); \
@@ -72,29 +72,53 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
7272

7373
#define ATOMIC_OPS(op, I, asm_op, c_op) \
7474
ATOMIC_OP(op, I, asm_op) \
75-
ATOMIC_OP_RETURN(op, I, asm_op, c_op) \
76-
ATOMIC_FETCH_OP(op, I, asm_op)
75+
ATOMIC_OP_RETURN(op, I, asm_op, c_op, _db, ) \
76+
ATOMIC_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \
77+
ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \
78+
ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed)
7779

7880
ATOMIC_OPS(add, i, add, +)
7981
ATOMIC_OPS(sub, -i, add, +)
8082

83+
#define arch_atomic_add_return arch_atomic_add_return
84+
#define arch_atomic_add_return_acquire arch_atomic_add_return
85+
#define arch_atomic_add_return_release arch_atomic_add_return
8186
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
87+
#define arch_atomic_sub_return arch_atomic_sub_return
88+
#define arch_atomic_sub_return_acquire arch_atomic_sub_return
89+
#define arch_atomic_sub_return_release arch_atomic_sub_return
8290
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
91+
#define arch_atomic_fetch_add arch_atomic_fetch_add
92+
#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add
93+
#define arch_atomic_fetch_add_release arch_atomic_fetch_add
8394
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
95+
#define arch_atomic_fetch_sub arch_atomic_fetch_sub
96+
#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub
97+
#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub
8498
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
8599

86100
#undef ATOMIC_OPS
87101

88102
#define ATOMIC_OPS(op, I, asm_op) \
89103
ATOMIC_OP(op, I, asm_op) \
90-
ATOMIC_FETCH_OP(op, I, asm_op)
104+
ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \
105+
ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed)
91106

92107
ATOMIC_OPS(and, i, and)
93108
ATOMIC_OPS(or, i, or)
94109
ATOMIC_OPS(xor, i, xor)
95110

111+
#define arch_atomic_fetch_and arch_atomic_fetch_and
112+
#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and
113+
#define arch_atomic_fetch_and_release arch_atomic_fetch_and
96114
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
115+
#define arch_atomic_fetch_or arch_atomic_fetch_or
116+
#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or
117+
#define arch_atomic_fetch_or_release arch_atomic_fetch_or
97118
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
119+
#define arch_atomic_fetch_xor arch_atomic_fetch_xor
120+
#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor
121+
#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor
98122
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
99123

100124
#undef ATOMIC_OPS
@@ -172,32 +196,32 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v)
172196
static inline void arch_atomic64_##op(long i, atomic64_t *v) \
173197
{ \
174198
__asm__ __volatile__( \
175-
"am"#asm_op"_db.d " " $zero, %1, %0 \n" \
199+
"am"#asm_op".d " " $zero, %1, %0 \n" \
176200
: "+ZB" (v->counter) \
177201
: "r" (I) \
178202
: "memory"); \
179203
}
180204

181-
#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \
182-
static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v) \
205+
#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \
206+
static inline long arch_atomic64_##op##_return##suffix(long i, atomic64_t *v) \
183207
{ \
184208
long result; \
185209
__asm__ __volatile__( \
186-
"am"#asm_op"_db.d " " %1, %2, %0 \n" \
210+
"am"#asm_op#mb".d " " %1, %2, %0 \n" \
187211
: "+ZB" (v->counter), "=&r" (result) \
188212
: "r" (I) \
189213
: "memory"); \
190214
\
191215
return result c_op I; \
192216
}
193217

194-
#define ATOMIC64_FETCH_OP(op, I, asm_op) \
195-
static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \
218+
#define ATOMIC64_FETCH_OP(op, I, asm_op, mb, suffix) \
219+
static inline long arch_atomic64_fetch_##op##suffix(long i, atomic64_t *v) \
196220
{ \
197221
long result; \
198222
\
199223
__asm__ __volatile__( \
200-
"am"#asm_op"_db.d " " %1, %2, %0 \n" \
224+
"am"#asm_op#mb".d " " %1, %2, %0 \n" \
201225
: "+ZB" (v->counter), "=&r" (result) \
202226
: "r" (I) \
203227
: "memory"); \
@@ -207,29 +231,53 @@ static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \
207231

208232
#define ATOMIC64_OPS(op, I, asm_op, c_op) \
209233
ATOMIC64_OP(op, I, asm_op) \
210-
ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \
211-
ATOMIC64_FETCH_OP(op, I, asm_op)
234+
ATOMIC64_OP_RETURN(op, I, asm_op, c_op, _db, ) \
235+
ATOMIC64_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \
236+
ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \
237+
ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed)
212238

213239
ATOMIC64_OPS(add, i, add, +)
214240
ATOMIC64_OPS(sub, -i, add, +)
215241

242+
#define arch_atomic64_add_return arch_atomic64_add_return
243+
#define arch_atomic64_add_return_acquire arch_atomic64_add_return
244+
#define arch_atomic64_add_return_release arch_atomic64_add_return
216245
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
246+
#define arch_atomic64_sub_return arch_atomic64_sub_return
247+
#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return
248+
#define arch_atomic64_sub_return_release arch_atomic64_sub_return
217249
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
250+
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
251+
#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add
252+
#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add
218253
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
254+
#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
255+
#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub
256+
#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub
219257
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
220258

221259
#undef ATOMIC64_OPS
222260

223261
#define ATOMIC64_OPS(op, I, asm_op) \
224262
ATOMIC64_OP(op, I, asm_op) \
225-
ATOMIC64_FETCH_OP(op, I, asm_op)
263+
ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \
264+
ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed)
226265

227266
ATOMIC64_OPS(and, i, and)
228267
ATOMIC64_OPS(or, i, or)
229268
ATOMIC64_OPS(xor, i, xor)
230269

270+
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
271+
#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and
272+
#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and
231273
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
274+
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
275+
#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or
276+
#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or
232277
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
278+
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
279+
#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor
280+
#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor
233281
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
234282

235283
#undef ATOMIC64_OPS

0 commit comments

Comments
 (0)