Skip to content

Commit e4f4784

Browse files
authored
Remove unnecessary fences from Heap2Local output (#7309)
We were previously over conservative when optimizing sequentially consistent accesses to non-escaping structs and emitted fences in their places. Since accesses to non-escaping structs cannot possibly synchronize with accesses in other threads, these fences are not necessary. Stop emitting them.
1 parent fab77b9 commit e4f4784

File tree

3 files changed

+6
-45
lines changed

3 files changed

+6
-45
lines changed

src/passes/Heap2Local.cpp

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -857,13 +857,8 @@ struct Struct2Local : PostWalker<Struct2Local> {
857857
builder.makeLocalSet(localIndexes[curr->index], curr->value));
858858

859859
// This struct.set cannot possibly synchronize with other threads via the
860-
// read value, since the struct never escapes this function. But if the set
861-
// is sequentially consistent, it still participates in the global order of
862-
// sequentially consistent operations. Preserve this effect on the global
863-
// ordering by inserting a fence.
864-
if (curr->order == MemoryOrder::SeqCst) {
865-
replacement = builder.blockify(replacement, builder.makeAtomicFence());
866-
}
860+
// read value, since the struct never escapes this function, so we don't
861+
// need a fence.
867862
replaceCurrent(replacement);
868863
}
869864

@@ -897,11 +892,8 @@ struct Struct2Local : PostWalker<Struct2Local> {
897892
// for other opts to handle.
898893
value = Bits::makePackedFieldGet(value, field, curr->signed_, wasm);
899894
auto* replacement = builder.blockify(builder.makeDrop(curr->ref));
900-
// See the note on seqcst struct.set. It is ok to insert the fence before
901-
// the value here since we know the value is just a local.get.
902-
if (curr->order == MemoryOrder::SeqCst) {
903-
replacement = builder.blockify(replacement, builder.makeAtomicFence());
904-
}
895+
// Just like optimized struct.set, this struct.get cannot synchronize with
896+
// anything, so we don't need a fence.
905897
replaceCurrent(builder.blockify(replacement, value));
906898
}
907899

@@ -964,11 +956,6 @@ struct Struct2Local : PostWalker<Struct2Local> {
964956
}
965957
block->list.push_back(builder.makeLocalSet(local, newVal));
966958

967-
// See the notes on seqcst struct.get and struct.set.
968-
if (curr->order == MemoryOrder::SeqCst) {
969-
block->list.push_back(builder.makeAtomicFence());
970-
}
971-
972959
// Unstash the old value.
973960
block->list.push_back(builder.makeLocalGet(oldScratch, type));
974961
block->type = type;
@@ -1020,11 +1007,6 @@ struct Struct2Local : PostWalker<Struct2Local> {
10201007
builder.makeLocalSet(
10211008
local, builder.makeLocalGet(replacementScratch, type))));
10221009

1023-
// See the notes on seqcst struct.get and struct.set.
1024-
if (curr->order == MemoryOrder::SeqCst) {
1025-
block->list.push_back(builder.makeAtomicFence());
1026-
}
1027-
10281010
// Unstash the old value.
10291011
block->list.push_back(builder.makeLocalGet(oldScratch, type));
10301012
block->type = type;

test/lit/passes/heap2local-rmw.wast

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@
9393
;; CHECK-NEXT: (local.get $2)
9494
;; CHECK-NEXT: )
9595
;; CHECK-NEXT: )
96-
;; CHECK-NEXT: (atomic.fence)
9796
;; CHECK-NEXT: (local.get $1)
9897
;; CHECK-NEXT: )
9998
(func $rmw-add-i32 (result i32)
@@ -127,7 +126,6 @@
127126
;; CHECK-NEXT: (local.get $2)
128127
;; CHECK-NEXT: )
129128
;; CHECK-NEXT: )
130-
;; CHECK-NEXT: (atomic.fence)
131129
;; CHECK-NEXT: (local.get $1)
132130
;; CHECK-NEXT: )
133131
(func $rmw-sub-i32 (result i32)
@@ -161,7 +159,6 @@
161159
;; CHECK-NEXT: (local.get $2)
162160
;; CHECK-NEXT: )
163161
;; CHECK-NEXT: )
164-
;; CHECK-NEXT: (atomic.fence)
165162
;; CHECK-NEXT: (local.get $1)
166163
;; CHECK-NEXT: )
167164
(func $rmw-and-i32 (result i32)
@@ -195,7 +192,6 @@
195192
;; CHECK-NEXT: (local.get $2)
196193
;; CHECK-NEXT: )
197194
;; CHECK-NEXT: )
198-
;; CHECK-NEXT: (atomic.fence)
199195
;; CHECK-NEXT: (local.get $1)
200196
;; CHECK-NEXT: )
201197
(func $rmw-or-i32 (result i32)
@@ -229,7 +225,6 @@
229225
;; CHECK-NEXT: (local.get $2)
230226
;; CHECK-NEXT: )
231227
;; CHECK-NEXT: )
232-
;; CHECK-NEXT: (atomic.fence)
233228
;; CHECK-NEXT: (local.get $1)
234229
;; CHECK-NEXT: )
235230
(func $rmw-xor-i32 (result i32)
@@ -260,7 +255,6 @@
260255
;; CHECK-NEXT: (local.set $0
261256
;; CHECK-NEXT: (local.get $2)
262257
;; CHECK-NEXT: )
263-
;; CHECK-NEXT: (atomic.fence)
264258
;; CHECK-NEXT: (local.get $1)
265259
;; CHECK-NEXT: )
266260
(func $rmw-xchg-i32 (result i32)
@@ -303,7 +297,6 @@
303297
;; CHECK-NEXT: )
304298
;; CHECK-NEXT: )
305299
;; CHECK-NEXT: )
306-
;; CHECK-NEXT: (atomic.fence)
307300
;; CHECK-NEXT: (local.get $1)
308301
;; CHECK-NEXT: )
309302
(func $rmw-cmpxchg-i32 (result i32)
@@ -338,7 +331,6 @@
338331
;; CHECK-NEXT: (local.get $2)
339332
;; CHECK-NEXT: )
340333
;; CHECK-NEXT: )
341-
;; CHECK-NEXT: (atomic.fence)
342334
;; CHECK-NEXT: (local.get $1)
343335
;; CHECK-NEXT: )
344336
(func $rmw-add-i64 (result i64)
@@ -372,7 +364,6 @@
372364
;; CHECK-NEXT: (local.get $2)
373365
;; CHECK-NEXT: )
374366
;; CHECK-NEXT: )
375-
;; CHECK-NEXT: (atomic.fence)
376367
;; CHECK-NEXT: (local.get $1)
377368
;; CHECK-NEXT: )
378369
(func $rmw-sub-i64 (result i64)
@@ -406,7 +397,6 @@
406397
;; CHECK-NEXT: (local.get $2)
407398
;; CHECK-NEXT: )
408399
;; CHECK-NEXT: )
409-
;; CHECK-NEXT: (atomic.fence)
410400
;; CHECK-NEXT: (local.get $1)
411401
;; CHECK-NEXT: )
412402
(func $rmw-and-i64 (result i64)
@@ -440,7 +430,6 @@
440430
;; CHECK-NEXT: (local.get $2)
441431
;; CHECK-NEXT: )
442432
;; CHECK-NEXT: )
443-
;; CHECK-NEXT: (atomic.fence)
444433
;; CHECK-NEXT: (local.get $1)
445434
;; CHECK-NEXT: )
446435
(func $rmw-or-i64 (result i64)
@@ -474,7 +463,6 @@
474463
;; CHECK-NEXT: (local.get $2)
475464
;; CHECK-NEXT: )
476465
;; CHECK-NEXT: )
477-
;; CHECK-NEXT: (atomic.fence)
478466
;; CHECK-NEXT: (local.get $1)
479467
;; CHECK-NEXT: )
480468
(func $rmw-xor-i64 (result i64)
@@ -505,7 +493,6 @@
505493
;; CHECK-NEXT: (local.set $0
506494
;; CHECK-NEXT: (local.get $2)
507495
;; CHECK-NEXT: )
508-
;; CHECK-NEXT: (atomic.fence)
509496
;; CHECK-NEXT: (local.get $1)
510497
;; CHECK-NEXT: )
511498
(func $rmw-xchg-i64 (result i64)
@@ -548,7 +535,6 @@
548535
;; CHECK-NEXT: )
549536
;; CHECK-NEXT: )
550537
;; CHECK-NEXT: )
551-
;; CHECK-NEXT: (atomic.fence)
552538
;; CHECK-NEXT: (local.get $1)
553539
;; CHECK-NEXT: )
554540
(func $rmw-cmpxchg-i64 (result i64)
@@ -580,7 +566,6 @@
580566
;; CHECK-NEXT: (local.set $1
581567
;; CHECK-NEXT: (local.get $3)
582568
;; CHECK-NEXT: )
583-
;; CHECK-NEXT: (atomic.fence)
584569
;; CHECK-NEXT: (local.get $2)
585570
;; CHECK-NEXT: )
586571
(func $rmw-xchg-ref (param (ref null $struct)) (result (ref null $struct))
@@ -623,7 +608,6 @@
623608
;; CHECK-NEXT: )
624609
;; CHECK-NEXT: )
625610
;; CHECK-NEXT: )
626-
;; CHECK-NEXT: (atomic.fence)
627611
;; CHECK-NEXT: (local.get $3)
628612
;; CHECK-NEXT: )
629613
(func $rmw-cmpxchg-ref (param (ref null $struct) (ref null $struct)) (result (ref null $struct))
@@ -661,7 +645,6 @@
661645
;; CHECK-NEXT: (local.get $1)
662646
;; CHECK-NEXT: )
663647
(func $rmw-acqrel (result i32)
664-
;; The replacement for an acqrel rmw does not need a fence.
665648
(struct.atomic.rmw.add acqrel acqrel $i32 0
666649
(struct.new_default $i32)
667650
(i32.const 1)
@@ -704,7 +687,6 @@
704687
;; CHECK-NEXT: (local.get $1)
705688
;; CHECK-NEXT: )
706689
(func $cmpxchg-acqrel (result i32)
707-
;; The replacement for an acqrel rmw does not need a fence.
708690
(struct.atomic.rmw.cmpxchg acqrel acqrel $i32 0
709691
(struct.new_default $i32)
710692
(i32.const 1)

test/lit/passes/heap2local.wast

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4678,7 +4678,6 @@
46784678
;; CHECK-NEXT: (drop
46794679
;; CHECK-NEXT: (ref.null (shared none))
46804680
;; CHECK-NEXT: )
4681-
;; CHECK-NEXT: (atomic.fence)
46824681
;; CHECK-NEXT: (local.get $1)
46834682
;; CHECK-NEXT: )
46844683
;; CHECK-NEXT: )
@@ -4689,17 +4688,15 @@
46894688
;; CHECK-NEXT: (local.set $1
46904689
;; CHECK-NEXT: (i32.const 0)
46914690
;; CHECK-NEXT: )
4692-
;; CHECK-NEXT: (atomic.fence)
46934691
;; CHECK-NEXT: )
46944692
;; CHECK-NEXT: )
46954693
(func $seqcst
46964694
(local (ref null $struct))
46974695
(local.set 0
46984696
(struct.new_default $struct)
46994697
)
4700-
;; seqcst accesses participate in the global ordering of seqcst operations,
4701-
;; so they need to be replaced with a seqcst fence to maintain that
4702-
;; ordering.
4698+
;; seqcst accesses also cannot synchronize with other threads, so we can
4699+
;; still optimize normally.
47034700
(drop
47044701
(struct.atomic.get $struct 0
47054702
(local.get 0)

0 commit comments

Comments
 (0)