Skip to content

Commit 37cddcf

Browse files
committed
s390x: Optimize SeqCst store on z196 (arch9) or later
1 parent dddf475 commit 37cddcf

File tree

4 files changed

+68
-40
lines changed

4 files changed

+68
-40
lines changed

.github/workflows/ci.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,13 @@ jobs:
215215
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=pwr8
216216
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=pwr8
217217
if: startsWith(matrix.target, 'powerpc64-')
218+
# z196 (arch9)
219+
- run: tools/test.sh -vv --tests $TARGET $DOCTEST_XCOMPILE $BUILD_STD
220+
env:
221+
# TODO: disable high-word LLVM target-feature to avoid LLVM codegen bug.
222+
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=z196 -C target-feature=-high-word
223+
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=z196 -C target-feature=-high-word
224+
if: startsWith(matrix.target, 's390x')
218225

219226
- run: cargo minimal-versions build -vvv --workspace --all-features --ignore-private $TARGET $BUILD_STD
220227

build.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,21 @@ fn main() {
238238
if version.probe(71, 2023, 5, 8) {
239239
println!("cargo:rustc-cfg=atomic_maybe_uninit_s390x_asm_cc_clobbered");
240240
}
241+
242+
// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/lib/Target/SystemZ/SystemZFeatures.td#L37
243+
let mut has_arch9_features = false;
244+
if let Some(cpu) = target_cpu() {
245+
// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/lib/Target/SystemZ/SystemZProcessors.td
246+
match &*cpu {
247+
"arch9" | "z196" | "arch10" | "zEC12" | "arch11" | "z13" | "arch12" | "z14"
248+
| "arch13" | "z15" | "arch14" | "z16" => has_arch9_features = true,
249+
_ => {}
250+
}
251+
}
252+
// Note: As of rustc 1.69, target_feature "fast-serialization" is not available on rustc side:
253+
// https://github.com/rust-lang/rust/blob/1.69.0/compiler/rustc_codegen_ssa/src/target_features.rs
254+
// bcr 14,0
255+
target_feature_if("fast-serialization", has_arch9_features, &version, None, false);
241256
}
242257
_ => {}
243258
}

src/arch/s390x.rs

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
// - portable-atomic https://github.com/taiki-e/portable-atomic
77
//
88
// Generated asm:
9-
// - s390x https://godbolt.org/z/7KzrMqW9e
9+
// - s390x https://godbolt.org/z/vh6x8rTjb
10+
// - s390x (z196) https://godbolt.org/z/fve3761z3
1011

1112
use core::{
1213
arch::asm,
@@ -66,33 +67,35 @@ macro_rules! atomic_load_store {
6667

6768
// SAFETY: the caller must uphold the safety contract.
6869
unsafe {
69-
match order {
70-
// Relaxed and Release stores are equivalent.
71-
Ordering::Relaxed | Ordering::Release => {
72-
asm!(
73-
// load from val to r0
74-
concat!("l", $asm_suffix, " %r0, 0({val})"),
75-
// (atomic) store r0 to dst
76-
concat!("st", $st_suffix, " %r0, 0({dst})"),
77-
dst = in(reg) dst,
78-
val = in(reg) val,
79-
out("r0") _,
80-
options(nostack, preserves_flags),
81-
);
82-
}
83-
Ordering::SeqCst => {
70+
macro_rules! atomic_store {
71+
($fence:tt) => {
8472
asm!(
8573
// load from val to r0
8674
concat!("l", $asm_suffix, " %r0, 0({val})"),
8775
// (atomic) store r0 to dst
8876
concat!("st", $st_suffix, " %r0, 0({dst})"),
89-
"bcr 15, %r0",
77+
$fence,
9078
dst = in(reg) dst,
9179
val = in(reg) val,
9280
out("r0") _,
9381
options(nostack, preserves_flags),
94-
);
95-
},
82+
)
83+
};
84+
}
85+
match order {
86+
// Relaxed and Release stores are equivalent.
87+
Ordering::Relaxed | Ordering::Release => atomic_store!(""),
88+
// bcr 14,0 (fast-BCR-serialization) requires z196 or later.
89+
#[cfg(any(
90+
target_feature = "fast-serialization",
91+
atomic_maybe_uninit_target_feature = "fast-serialization",
92+
))]
93+
Ordering::SeqCst => atomic_store!("bcr 14, 0"),
94+
#[cfg(not(any(
95+
target_feature = "fast-serialization",
96+
atomic_maybe_uninit_target_feature = "fast-serialization",
97+
)))]
98+
Ordering::SeqCst => atomic_store!("bcr 15, 0"),
9699
_ => unreachable!("{:?}", order),
97100
}
98101
}
@@ -465,39 +468,38 @@ macro_rules! atomic128 {
465468

466469
// SAFETY: the caller must uphold the safety contract.
467470
unsafe {
468-
match order {
469-
// Relaxed and Release stores are equivalent.
470-
Ordering::Relaxed | Ordering::Release => {
471+
macro_rules! atomic_store {
472+
($fence:tt) => {
471473
asm!(
472474
// load from val to r0-r1 pair
473475
"lg %r1, 8({val})",
474476
"lg %r0, 0({val})",
475477
// (atomic) store r0-r1 pair to dst
476478
"stpq %r0, 0({dst})",
479+
$fence,
477480
dst = in(reg) dst,
478481
val = in(reg) val,
479482
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
480483
out("r0") _,
481484
out("r1") _,
482485
options(nostack, preserves_flags),
483-
);
484-
}
485-
Ordering::SeqCst => {
486-
asm!(
487-
// load from val to r0-r1 pair
488-
"lg %r1, 8({val})",
489-
"lg %r0, 0({val})",
490-
// (atomic) store r0-r1 pair to dst
491-
"stpq %r0, 0({dst})",
492-
"bcr 15, %r0",
493-
dst = in(reg) dst,
494-
val = in(reg) val,
495-
// Quadword atomic instructions work with even/odd pair of specified register and subsequent register.
496-
out("r0") _,
497-
out("r1") _,
498-
options(nostack, preserves_flags),
499-
);
500-
}
486+
)
487+
};
488+
}
489+
match order {
490+
// Relaxed and Release stores are equivalent.
491+
Ordering::Relaxed | Ordering::Release => atomic_store!(""),
492+
// bcr 14,0 (fast-BCR-serialization) requires z196 or later.
493+
#[cfg(any(
494+
target_feature = "fast-serialization",
495+
atomic_maybe_uninit_target_feature = "fast-serialization",
496+
))]
497+
Ordering::SeqCst => atomic_store!("bcr 14, 0"),
498+
#[cfg(not(any(
499+
target_feature = "fast-serialization",
500+
atomic_maybe_uninit_target_feature = "fast-serialization",
501+
)))]
502+
Ordering::SeqCst => atomic_store!("bcr 15, 0"),
501503
_ => unreachable!("{:?}", order),
502504
}
503505
}

tools/build.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,10 @@ build() {
234234
RUSTFLAGS="${target_rustflags} -C target-cpu=pwr7" \
235235
x_cargo "${args[@]}" --target-dir target/pwr7 "$@"
236236
;;
237+
s390x*)
238+
RUSTFLAGS="${target_rustflags} -C target-cpu=z196" \
239+
x_cargo "${args[@]}" --target-dir target/z196 "$@"
240+
;;
237241
esac
238242
}
239243

0 commit comments

Comments
 (0)