diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs
index 5cb68634b7..4cdc9348dc 100644
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -874,16 +874,14 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
                         // Either Acquire | AcqRel | SeqCst
                         clocks.apply_acquire_fence();
                     }
-                    if atomic != AtomicFenceOrd::Acquire {
-                        // Either Release | AcqRel | SeqCst
-                        clocks.apply_release_fence();
-                    }
                     if atomic == AtomicFenceOrd::SeqCst {
                         // Behave like an RMW on the global fence location. This takes full care of
                         // all the SC fence requirements, including C++17 §32.4 [atomics.order]
                         // paragraph 6 (which would limit what future reads can see). It also rules
                         // out many legal behaviors, but we don't currently have a model that would
                         // be more precise.
+                        // Also see the second bullet on page 10 of
+                        // <https://www.cs.tau.ac.il/~orilahav/papers/popl21_robustness.pdf>.
                         let mut sc_fence_clock = data_race.last_sc_fence.borrow_mut();
                         sc_fence_clock.join(&clocks.clock);
                         clocks.clock.join(&sc_fence_clock);
@@ -891,6 +889,12 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
                         // (but this is only respected by future reads).
                         clocks.write_seqcst.join(&data_race.last_sc_write_per_thread.borrow());
                     }
+                    // The release fence is last, since both of the above could alter our clock,
+                    // which should be part of what is being released.
+                    if atomic != AtomicFenceOrd::Acquire {
+                        // Either Release | AcqRel | SeqCst
+                        clocks.apply_release_fence();
+                    }
 
                     // Increment timestamp in case of release semantics.
                     interp_ok(atomic != AtomicFenceOrd::Acquire)
diff --git a/tests/pass/0weak_memory_consistency.rs b/tests/pass/0weak_memory_consistency.rs
index 840dd56939..c0d0287855 100644
--- a/tests/pass/0weak_memory_consistency.rs
+++ b/tests/pass/0weak_memory_consistency.rs
@@ -1,4 +1,4 @@
-//@compile-flags: -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows -Zmiri-provenance-gc=10000
+//@compile-flags: -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows -Zmiri-disable-validation -Zmiri-provenance-gc=10000
 // This test's runtime explodes if the GC interval is set to 1 (which we do in CI), so we
 // override it internally back to the default frequency.
 
@@ -34,14 +34,10 @@ unsafe impl<T> Sync for EvilSend<T> {}
 // We can't create static items because we need to run each test
 // multiple times
 fn static_atomic(val: i32) -> &'static AtomicI32 {
-    let ret = Box::leak(Box::new(AtomicI32::new(val)));
-    ret.store(val, Relaxed); // work around https://github.com/rust-lang/miri/issues/2164
-    ret
+    Box::leak(Box::new(AtomicI32::new(val)))
 }
 fn static_atomic_bool(val: bool) -> &'static AtomicBool {
-    let ret = Box::leak(Box::new(AtomicBool::new(val)));
-    ret.store(val, Relaxed); // work around https://github.com/rust-lang/miri/issues/2164
-    ret
+    Box::leak(Box::new(AtomicBool::new(val)))
 }
 
 // Spins until it acquires a pre-determined value.
@@ -365,6 +361,45 @@ fn test_cpp20_rwc_syncs() {
     assert!((b, c) != (0, 0));
 }
 
+/// This checks that the *last* thing the SC fence does is act like a release fence.
+/// See <https://github.com/rust-lang/miri/pull/4057#issuecomment-2522296601>.
+fn test_sc_fence_release() {
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+    let z = static_atomic(0);
+    let k = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+        fence(SeqCst);
+        k.store(1, Relaxed);
+    });
+    let j2 = spawn(move || {
+        y.store(1, Relaxed);
+        fence(SeqCst);
+        z.store(1, Relaxed);
+    });
+
+    let j3 = spawn(move || {
+        let kval = k.load(Acquire);
+        let yval = y.load(Relaxed);
+        (kval, yval)
+    });
+    let j4 = spawn(move || {
+        let zval = z.load(Acquire);
+        let xval = x.load(Relaxed);
+        (zval, xval)
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+    let (kval, yval) = j3.join().unwrap();
+    let (zval, xval) = j4.join().unwrap();
+
+    let bad = kval == 1 && yval == 0 && zval == 1 && xval == 0;
+    assert!(!bad);
+}
+
 pub fn main() {
     for _ in 0..50 {
         test_single_thread();
@@ -378,5 +413,6 @@ pub fn main() {
         test_iriw_sc_rlx();
         test_cpp20_sc_fence_fix();
         test_cpp20_rwc_syncs();
+        test_sc_fence_release();
     }
 }