Skip to content

Commit dddf475

Browse files
committed
powerpc,s390x: Delay calculation of CAS results
1 parent d50bc0f commit dddf475

File tree

5 files changed

+54
-97
lines changed

5 files changed

+54
-97
lines changed

.github/.cspell/project-dictionary.txt

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,14 @@ casp
1313
cbnz
1414
cdsg
1515
cfgs
16+
mfcr
1617
cinc
1718
cirruslabs
1819
clrex
1920
clrlwi
2021
cmpd
2122
cmpw
2223
cmpxchg
23-
cntlz
24-
cntlzd
25-
cntlzw
2624
cset
2725
daddiu
2826
DWCAS
@@ -62,7 +60,6 @@ quadword
6260
qword
6361
rcpc
6462
risbg
65-
rldicl
6663
rldicr
6764
rlwinm
6865
sched
@@ -76,7 +73,6 @@ sllw
7673
sltiu
7774
srlv
7875
srlw
79-
srwi
8076
stlxp
8177
stpq
8278
stqcx

src/arch/powerpc.rs

Lines changed: 25 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
// - portable-atomic https://github.com/taiki-e/portable-atomic
88
//
99
// Generated asm:
10-
// - powerpc https://godbolt.org/z/1ha4GGj6n
11-
// - powerpc64 https://godbolt.org/z/G4Y4x9Exs
12-
// - powerpc64 (pwr8) https://godbolt.org/z/fvq3q56d8
13-
// - powerpc64le https://godbolt.org/z/1re3YWxTd
14-
// - powerpc64le (pwr7) https://godbolt.org/z/sc6hbGqMM
10+
// - powerpc https://godbolt.org/z/rcesG8Tds
11+
// - powerpc64 https://godbolt.org/z/b9h54fr7r
12+
// - powerpc64 (pwr8) https://godbolt.org/z/95qGn9xv4
13+
// - powerpc64le https://godbolt.org/z/GEoqbdK49
14+
// - powerpc64le (pwr7) https://godbolt.org/z/9r9eGbs9T
1515

1616
use core::{
1717
arch::asm,
@@ -21,14 +21,6 @@ use core::{
2121

2222
use crate::raw::{AtomicCompareExchange, AtomicLoad, AtomicStore, AtomicSwap};
2323

24-
macro_rules! if_d {
25-
("d", $then:expr, $else:expr) => {
26-
$then
27-
};
28-
($asm_suffix:tt, $then:expr, $else:expr) => {
29-
$else
30-
};
31-
}
3224
#[cfg(target_arch = "powerpc64")]
3325
#[cfg(any(
3426
target_feature = "quadword-atomics",
@@ -87,6 +79,11 @@ macro_rules! atomic_rmw {
8779
};
8880
}
8981

82+
#[inline]
83+
fn extract_cr0(r: usize) -> bool {
84+
r & 0x20000000 != 0
85+
}
86+
9087
macro_rules! atomic_load_store {
9188
($int_type:ident, $ld_suffix:tt, $asm_suffix:tt) => {
9289
impl AtomicLoad for $int_type {
@@ -291,12 +288,10 @@ macro_rules! atomic {
291288
concat!("st", $asm_suffix, "cx. {new_tmp}, 0, {dst}"),
292289
"bne %cr0, 2b", // continue loop if store failed
293290
"3:",
294-
"xor {r}, {out_tmp}, {old_tmp}",
291+
"mfcr {r}",
295292
$acquire,
296293
// store out_tmp pair to out
297294
concat!("st", $asm_suffix, " {out_tmp}, 0({out})"),
298-
concat!("cntlz", $cmp_suffix, " {r}, {r}"),
299-
if_d!($asm_suffix, "rldicl {r}, {r}, 58, 63", "srwi {r}, {r}, 5"),
300295
dst = inout(reg_nonzero) dst => _,
301296
old = in(reg_nonzero) old,
302297
old_tmp = out(reg_nonzero) _,
@@ -311,8 +306,7 @@ macro_rules! atomic {
311306
};
312307
}
313308
atomic_rmw!(cmpxchg, order);
314-
debug_assert!(r == 0 || r == 1, "r={}", r);
315-
r != 0
309+
extract_cr0(r)
316310
}
317311
}
318312
}
@@ -511,10 +505,8 @@ macro_rules! atomic8 {
511505
"bne %cr0, 2b",
512506
"3:",
513507
"srw %r5, %r5, %r7",
508+
"mfcr %r3",
514509
$acquire,
515-
"xor %r3, %r5, %r4",
516-
"cntlzw %r3, %r3",
517-
"srwi %r3, %r3, 5",
518510
"stb %r5, 0(%r6)",
519511
inout("r3") dst => r,
520512
inout("r4") old => _,
@@ -557,11 +549,9 @@ macro_rules! atomic8 {
557549
"bne %cr0, 2b",
558550
"3:",
559551
"srw %r5, %r9, %r5",
552+
"mfcr %r3",
560553
$acquire,
561-
"xor %r3, %r5, %r4",
562554
"stb %r5, 0(%r6)",
563-
"cntlzw %r3, %r3",
564-
"srwi %r3, %r3, 5",
565555
inout("r3") dst => r,
566556
inout("r4") old => _,
567557
inout("r5") new => _,
@@ -602,15 +592,8 @@ macro_rules! atomic8 {
602592
"stwcx. %r11, 0, %r5",
603593
"bne %cr0, 2b",
604594
"3:",
605-
"srw %r5, %r9, %r3",
606-
"li %r3, 0",
607-
"cmpw 5, 4",
608-
"li %r4, 1",
609-
"bc 12, 2, 5f",
610-
"b 6f",
611-
"5:",
612-
"addi %r3, %r4, 0",
613-
"6:",
595+
"srw %r5, %r9, %r3",
596+
"mfcr %r3",
614597
$acquire,
615598
"stb %r5, 0(%r6)",
616599
inout("r3") dst => r,
@@ -628,8 +611,7 @@ macro_rules! atomic8 {
628611
};
629612
}
630613
atomic_rmw!(cmpxchg, order);
631-
debug_assert!(r == 0 || r == 1, "r={}", r);
632-
r != 0
614+
extract_cr0(r)
633615
}
634616
}
635617
}
@@ -832,10 +814,8 @@ macro_rules! atomic16 {
832814
"bne %cr0, 2b",
833815
"3:",
834816
"srw %r5, %r3, %r7",
817+
"mfcr %r3",
835818
$acquire,
836-
"xor %r3, %r5, %r4",
837-
"cntlzw %r3, %r3",
838-
"srwi %r3, %r3, 5",
839819
"sth %r5, 0(%r6)",
840820
inout("r3") dst => r,
841821
inout("r4") old => _,
@@ -879,11 +859,9 @@ macro_rules! atomic16 {
879859
"bne %cr0, 2b",
880860
"3:",
881861
"srw %r5, %r9, %r5",
862+
"mfcr %r3",
882863
$acquire,
883-
"xor %r3, %r5, %r4",
884864
"sth %r5, 0(%r6)",
885-
"cntlzw %r3, %r3",
886-
"srwi %r3, %r3, 5",
887865
inout("r3") dst => r,
888866
inout("r4") old => _,
889867
inout("r5") new => _,
@@ -925,15 +903,8 @@ macro_rules! atomic16 {
925903
"stwcx. %r11, 0, %r3",
926904
"bne %cr0, 2b",
927905
"3:",
928-
"srw %r5, %r9, %r5",
929-
"li %r3, 0",
930-
"cmpw %r5, %r4",
931-
"li %r4, 1",
932-
"bc 12, 2, 5f",
933-
"b 6f",
934-
"5:",
935-
"addi %r3, %r4, 0",
936-
"6:",
906+
"srw %r5, %r9, %r5",
907+
"mfcr %r3",
937908
$acquire,
938909
"sth %r5, 0(%r6)",
939910
inout("r3") dst => r,
@@ -951,8 +922,7 @@ macro_rules! atomic16 {
951922
};
952923
}
953924
atomic_rmw!(cmpxchg, order);
954-
debug_assert!(r == 0 || r == 1, "r={}", r);
955-
r != 0
925+
extract_cr0(r)
956926
}
957927
}
958928
}
@@ -1226,20 +1196,14 @@ macro_rules! atomic128 {
12261196
"xor %r10, %r8, %r4",
12271197
"or. %r11, %r11, %r10",
12281198
"bne %cr0, 3f", // jump if compare failed
1229-
"mr %r11, %r7",
1230-
"mr %r10, %r6",
1231-
"stqcx. %r10, 0, {dst}",
1199+
"stqcx. %r6, 0, {dst}",
12321200
"bne %cr0, 2b", // continue loop if store failed
12331201
"3:",
1202+
"mfcr %r11",
12341203
$acquire,
12351204
// store r8-r9 pair to out
12361205
concat!("std %r8, ", p128h!(), "({out})"),
12371206
concat!("std %r9, ", p128l!(), "({out})"),
1238-
"xor %r11, %r9, %r5",
1239-
"xor %r10, %r8, %r4",
1240-
"or. %r11, %r11, %r10",
1241-
"cntlzd %r11, %r11",
1242-
"rldicl %r11, %r11, 58, 63",
12431207
dst = inout(reg_nonzero) dst => _,
12441208
old = in(reg_nonzero) old,
12451209
new = in(reg_nonzero) new,
@@ -1260,8 +1224,7 @@ macro_rules! atomic128 {
12601224
};
12611225
}
12621226
atomic_rmw!(cmpxchg, order);
1263-
debug_assert!(r == 0 || r == 1, "r={}", r);
1264-
r != 0
1227+
extract_cr0(r)
12651228
}
12661229
}
12671230
}

src/arch/s390x.rs

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
// s390x
22
//
33
// Refs:
4+
// - z/Architecture Principles of Operation https://publibfp.dhe.ibm.com/epubs/pdf/a227832d.pdf
45
// - z/Architecture Reference Summary https://www.ibm.com/support/pages/zarchitecture-reference-summary
56
// - portable-atomic https://github.com/taiki-e/portable-atomic
67
//
78
// Generated asm:
8-
// - s390x https://godbolt.org/z/8eEYWr6f8
9+
// - s390x https://godbolt.org/z/7KzrMqW9e
910

1011
use core::{
1112
arch::asm,
@@ -17,6 +18,14 @@ use core::{
1718
use crate::raw::{AtomicCompareExchange, AtomicSwap};
1819
use crate::raw::{AtomicLoad, AtomicStore};
1920

21+
#[cfg(atomic_maybe_uninit_s390x_asm_cc_clobbered)]
22+
#[inline]
23+
fn extract_cc(r: i64) -> bool {
24+
let r = r.wrapping_add(-268435456) & (1 << 31);
25+
debug_assert!(r == 0 || r == 2147483648, "r={r}");
26+
r != 0
27+
}
28+
2029
macro_rules! atomic_load_store {
2130
($int_type:ident, $asm_suffix:tt, $st_suffix:tt) => {
2231
impl AtomicLoad for $int_type {
@@ -149,18 +158,16 @@ macro_rules! atomic {
149158

150159
// SAFETY: the caller must uphold the safety contract.
151160
unsafe {
152-
let mut r: usize;
161+
let mut r: i64;
153162
// compare_exchange is always SeqCst.
154163
asm!(
155164
// load from old/new to r0/tmp
156165
concat!("l", $asm_suffix, " %r0, 0({old})"),
157166
concat!("l", $asm_suffix, " {tmp}, 0({new})"),
158167
// (atomic) compare and exchange
159168
concat!("cs", $asm_suffix, " %r0, {tmp}, 0({dst})"),
160-
// store result to r as true(1) or false(0)
161-
"ipm {tmp}",
162-
"afi {tmp}, -268435456",
163-
"risbg {r}, {tmp}, 63, 191, 33",
169+
// store condition code to r
170+
"ipm {r}",
164171
// store r0 to out
165172
concat!("st", $st_suffix, " %r0, 0({out})"),
166173
dst = in(reg) dst,
@@ -172,8 +179,7 @@ macro_rules! atomic {
172179
out("r0") _,
173180
options(nostack),
174181
);
175-
debug_assert!(r == 0 || r == 1, "r={}", r);
176-
r != 0
182+
extract_cc(r)
177183
}
178184
}
179185
}
@@ -248,7 +254,7 @@ macro_rules! atomic8 {
248254

249255
// SAFETY: the caller must uphold the safety contract.
250256
unsafe {
251-
let mut r: usize;
257+
let mut r: i64;
252258
// compare_exchange is always SeqCst.
253259
asm!(
254260
// Implement sub-word atomic operations using word-sized CAS loop.
@@ -271,10 +277,8 @@ macro_rules! atomic8 {
271277
"cs %r4, %r12, 0(%r3)",
272278
"jl 2b",
273279
"3:",
274-
// store result to r2 as true(1) or false(0)
275-
"ipm %r0",
276-
"afi %r0, -268435456",
277-
"risbg %r2, %r0, 63, 191, 33",
280+
// store condition code to r2
281+
"ipm %r2",
278282
"stc %r13, 0(%r5)",
279283
out("r0") _,
280284
out("r1") _,
@@ -287,8 +291,7 @@ macro_rules! atomic8 {
287291
out("r14") _,
288292
options(nostack),
289293
);
290-
debug_assert!(r == 0 || r == 1, "r={}", r);
291-
r != 0
294+
extract_cc(r)
292295
}
293296
}
294297
}
@@ -363,7 +366,7 @@ macro_rules! atomic16 {
363366

364367
// SAFETY: the caller must uphold the safety contract.
365368
unsafe {
366-
let mut r: usize;
369+
let mut r: i64;
367370
// compare_exchange is always SeqCst.
368371
asm!(
369372
// Implement sub-word atomic operations using word-sized CAS loop.
@@ -386,10 +389,8 @@ macro_rules! atomic16 {
386389
"cs %r4, %r12, 0(%r3)",
387390
"jl 2b",
388391
"3:",
389-
// store result to r2 as true(1) or false(0)
390-
"ipm %r0",
391-
"afi %r0, -268435456",
392-
"risbg %r2, %r0, 63, 191, 33",
392+
// store condition code to r2
393+
"ipm %r2",
393394
"sth %r13, 0(%r5)",
394395
out("r0") _,
395396
out("r1") _,
@@ -402,8 +403,7 @@ macro_rules! atomic16 {
402403
out("r14") _,
403404
options(nostack),
404405
);
405-
debug_assert!(r == 0 || r == 1, "r={}", r);
406-
r != 0
406+
extract_cc(r)
407407
}
408408
}
409409
}
@@ -562,7 +562,7 @@ macro_rules! atomic128 {
562562

563563
// SAFETY: the caller must uphold the safety contract.
564564
unsafe {
565-
let mut r: usize;
565+
let mut r: i64;
566566
// compare_exchange is always SeqCst.
567567
asm!(
568568
// load from old/new to r0-r1/r12-r13 pairs
@@ -572,10 +572,8 @@ macro_rules! atomic128 {
572572
"lg %r12, 0({new})",
573573
// (atomic) compare and exchange
574574
"cdsg %r0, %r12, 0({dst})",
575-
// store result to r as true(1) or false(0)
575+
// store condition code to r
576576
"ipm {r}",
577-
"afi {r}, -268435456",
578-
"risbg {r}, {r}, 63, 191, 33",
579577
// store r0-r1 pair to out
580578
"stg %r1, 8({out})",
581579
"stg %r0, 0({out})",
@@ -591,8 +589,7 @@ macro_rules! atomic128 {
591589
out("r13") _, // new (hi)
592590
options(nostack),
593591
);
594-
debug_assert!(r == 0 || r == 1, "r={}", r);
595-
r != 0
592+
extract_cc(r)
596593
}
597594
}
598595
}

0 commit comments

Comments
 (0)