Skip to content

Commit 6754be3

Browse files
authored
Move emscripten_futex_wait_non_blocking to native code. NFC (#16145)
This should be a pure transliteration of the JS function. Good to see the native code cost here is only ~100 bytes but the JS savings are ~800.
1 parent 81d05e1 commit 6754be3

9 files changed

+99
-119
lines changed

emcc.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2028,7 +2028,6 @@ def default_setting(name, new_default):
20282028
# Functions needs to be exported from the module since they are used in worker.js
20292029
settings.REQUIRED_EXPORTS += [
20302030
'emscripten_dispatch_to_thread_',
2031-
'_emscripten_main_thread_futex',
20322031
'_emscripten_thread_free_data',
20332032
'_emscripten_allow_main_runtime_queued_calls',
20342033
'emscripten_main_browser_thread_id',

src/library_pthread.js

Lines changed: 0 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -567,8 +567,6 @@ var LibraryPThread = {
567567
);
568568
#if ASSERTIONS
569569
PThread.mainRuntimeThread = true;
570-
// Verify that this native symbol used by futex_wait/wake is exported correctly.
571-
assert(__emscripten_main_thread_futex > 0);
572570
#endif
573571
PThread.threadInit();
574572
},
@@ -783,115 +781,6 @@ var LibraryPThread = {
783781
return 0;
784782
},
785783

786-
// Returns 0 on success, or one of the values -ETIMEDOUT, -EWOULDBLOCK or -EINVAL on error.
787-
_emscripten_futex_wait_non_blocking__deps: ['emscripten_main_thread_process_queued_calls'],
788-
_emscripten_futex_wait_non_blocking: function(addr, val, timeout) {
789-
#if ASSERTIONS
790-
// Should only be called from the main web thread where atomics.wait is not allowed.
791-
assert(ENVIRONMENT_IS_WEB);
792-
#endif
793-
794-
// Atomics.wait is not available in the main browser thread, so simulate it via busy spinning.
795-
var tNow = performance.now();
796-
var tEnd = tNow + timeout;
797-
798-
// Register globally which address the main thread is simulating to be
799-
// waiting on. When zero, the main thread is not waiting on anything, and on
800-
// nonzero, the contents of the address pointed by __emscripten_main_thread_futex
801-
// tell which address the main thread is simulating its wait on.
802-
// We need to be careful of recursion here: If we wait on a futex, and
803-
// then call _emscripten_main_thread_process_queued_calls() below, that
804-
// will call code that takes the proxying mutex - which can once more
805-
// reach this code in a nested call. To avoid interference between the
806-
// two (there is just a single __emscripten_main_thread_futex at a time), unmark
807-
// ourselves before calling the potentially-recursive call. See below for
808-
// how we handle the case of our futex being notified during the time in
809-
// between when we are not set as the value of __emscripten_main_thread_futex.
810-
#if ASSERTIONS
811-
assert(__emscripten_main_thread_futex > 0);
812-
#endif
813-
var lastAddr = Atomics.exchange(HEAP32, __emscripten_main_thread_futex >> 2, addr);
814-
#if ASSERTIONS
815-
// We must not have already been waiting.
816-
assert(lastAddr == 0);
817-
#endif
818-
819-
while (1) {
820-
// Check for a timeout.
821-
tNow = performance.now();
822-
if (tNow > tEnd) {
823-
// We timed out, so stop marking ourselves as waiting.
824-
lastAddr = Atomics.exchange(HEAP32, __emscripten_main_thread_futex >> 2, 0);
825-
#if ASSERTIONS
826-
// The current value must have been our address which we set, or
827-
// in a race it was set to 0 which means another thread just allowed
828-
// us to run, but (tragically) that happened just a bit too late.
829-
assert(lastAddr == addr || lastAddr == 0);
830-
#endif
831-
return -{{{ cDefine('ETIMEDOUT') }}};
832-
}
833-
// We are performing a blocking loop here, so we must handle proxied
834-
// events from pthreads, to avoid deadlocks.
835-
// Note that we have to do so carefully, as we may take a lock while
836-
// doing so, which can recurse into this function; stop marking
837-
// ourselves as waiting while we do so.
838-
lastAddr = Atomics.exchange(HEAP32, __emscripten_main_thread_futex >> 2, 0);
839-
#if ASSERTIONS
840-
assert(lastAddr == addr || lastAddr == 0);
841-
#endif
842-
if (lastAddr == 0) {
843-
// We were told to stop waiting, so stop.
844-
break;
845-
}
846-
_emscripten_main_thread_process_queued_calls();
847-
848-
// Check the value, as if we were starting the futex all over again.
849-
// This handles the following case:
850-
//
851-
// * wait on futex A
852-
// * recurse into emscripten_main_thread_process_queued_calls(),
853-
// which waits on futex B. that sets the __emscripten_main_thread_futex address to
854-
// futex B, and there is no longer any mention of futex A.
855-
// * a worker is done with futex A. it checks __emscripten_main_thread_futex but does
856-
// not see A, so it does nothing special for the main thread.
857-
// * a worker is done with futex B. it flips mainThreadMutex from B
858-
// to 0, ending the wait on futex B.
859-
// * we return to the wait on futex A. __emscripten_main_thread_futex is 0, but that
860-
// is because of futex B being done - we can't tell from
861-
// __emscripten_main_thread_futex whether A is done or not. therefore, check the
862-
// memory value of the futex.
863-
//
864-
// That case motivates the design here. Given that, checking the memory
865-
// address is also necessary for other reasons: we unset and re-set our
866-
// address in __emscripten_main_thread_futex around calls to
867-
// emscripten_main_thread_process_queued_calls(), and a worker could
868-
// attempt to wake us up right before/after such times.
869-
//
870-
// Note that checking the memory value of the futex is valid to do: we
871-
// could easily have been delayed (relative to the worker holding on
872-
// to futex A), which means we could be starting all of our work at the
873-
// later time when there is no need to block. The only "odd" thing is
874-
// that we may have caused side effects in that "delay" time. But the
875-
// only side effects we can have are to call
876-
// emscripten_main_thread_process_queued_calls(). That is always ok to
877-
// do on the main thread (it's why it is ok for us to call it in the
878-
// middle of this function, and elsewhere). So if we check the value
879-
// here and return, it's the same is if what happened on the main thread
880-
// was the same as calling emscripten_main_thread_process_queued_calls()
881-
// a few times times before calling emscripten_futex_wait().
882-
if (Atomics.load(HEAP32, addr >> 2) != val) {
883-
return -{{{ cDefine('EWOULDBLOCK') }}};
884-
}
885-
886-
// Mark us as waiting once more, and continue the loop.
887-
lastAddr = Atomics.exchange(HEAP32, __emscripten_main_thread_futex >> 2, addr);
888-
#if ASSERTIONS
889-
assert(lastAddr == 0);
890-
#endif
891-
}
892-
return 0;
893-
},
894-
895784
__call_main__deps: ['exit', '$exitOnMainThread'],
896785
__call_main: function(argc, argv) {
897786
var returnCode = {{{ exportedAsmFunc('_main') }}}(argc, argv);

system/lib/pthread/emscripten_futex_wait.c

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,103 @@
88
#include <errno.h>
99
#include <math.h>
1010
#include <emscripten/threading.h>
11+
#include "atomic.h"
1112
#include "threading_internal.h"
1213

14+
extern void* _emscripten_main_thread_futex;
15+
1316
int _emscripten_thread_supports_atomics_wait(void);
14-
int _emscripten_futex_wait_non_blocking(volatile void *addr, uint32_t val, double max_wait_ms);
17+
18+
static int futex_wait_busy(volatile void *addr, uint32_t val, double timeout) {
19+
// Atomics.wait is not available in the main browser thread, so simulate it via busy spinning.
20+
double now = emscripten_get_now();
21+
double end = now + timeout;
22+
23+
// Register globally which address the main thread is simulating to be
24+
// waiting on. When zero, the main thread is not waiting on anything, and on
25+
// nonzero, the contents of the address pointed by __emscripten_main_thread_futex
26+
// tell which address the main thread is simulating its wait on.
27+
// We need to be careful of recursion here: If we wait on a futex, and
28+
// then call _emscripten_main_thread_process_queued_calls() below, that
29+
// will call code that takes the proxying mutex - which can once more
30+
// reach this code in a nested call. To avoid interference between the
31+
// two (there is just a single __emscripten_main_thread_futex at a time), unmark
32+
// ourselves before calling the potentially-recursive call. See below for
33+
// how we handle the case of our futex being notified during the time in
34+
// between when we are not set as the value of __emscripten_main_thread_futex.
35+
void* last_addr = a_cas_p(&_emscripten_main_thread_futex, 0, (void*)addr);
36+
// We must not have already been waiting.
37+
assert(last_addr == 0);
38+
39+
while (1) {
40+
// Check for a timeout.
41+
now = emscripten_get_now();
42+
if (now > end) {
43+
// We timed out, so stop marking ourselves as waiting.
44+
last_addr = a_cas_p(&_emscripten_main_thread_futex, (void*)addr, 0);
45+
// The current value must have been our address which we set, or
46+
// in a race it was set to 0 which means another thread just allowed
47+
// us to run, but (tragically) that happened just a bit too late.
48+
assert(last_addr == addr || last_addr == 0);
49+
return -ETIMEDOUT;
50+
}
51+
// We are performing a blocking loop here, so we must handle proxied
52+
// events from pthreads, to avoid deadlocks.
53+
// Note that we have to do so carefully, as we may take a lock while
54+
// doing so, which can recurse into this function; stop marking
55+
// ourselves as waiting while we do so.
56+
last_addr = a_cas_p(&_emscripten_main_thread_futex, (void*)addr, 0);
57+
assert(last_addr == addr || last_addr == 0);
58+
if (last_addr == 0) {
59+
// We were told to stop waiting, so stop.
60+
break;
61+
}
62+
emscripten_main_thread_process_queued_calls();
63+
64+
// Check the value, as if we were starting the futex all over again.
65+
// This handles the following case:
66+
//
67+
// * wait on futex A
68+
// * recurse into emscripten_main_thread_process_queued_calls(),
69+
// which waits on futex B. that sets the __emscripten_main_thread_futex address to
70+
// futex B, and there is no longer any mention of futex A.
71+
// * a worker is done with futex A. it checks __emscripten_main_thread_futex but does
72+
// not see A, so it does nothing special for the main thread.
73+
// * a worker is done with futex B. it flips mainThreadMutex from B
74+
// to 0, ending the wait on futex B.
75+
// * we return to the wait on futex A. __emscripten_main_thread_futex is 0, but that
76+
// is because of futex B being done - we can't tell from
77+
// __emscripten_main_thread_futex whether A is done or not. therefore, check the
78+
// memory value of the futex.
79+
//
80+
// That case motivates the design here. Given that, checking the memory
81+
// address is also necessary for other reasons: we unset and re-set our
82+
// address in __emscripten_main_thread_futex around calls to
83+
// emscripten_main_thread_process_queued_calls(), and a worker could
84+
// attempt to wake us up right before/after such times.
85+
//
86+
// Note that checking the memory value of the futex is valid to do: we
87+
// could easily have been delayed (relative to the worker holding on
88+
// to futex A), which means we could be starting all of our work at the
89+
// later time when there is no need to block. The only "odd" thing is
90+
// that we may have caused side effects in that "delay" time. But the
91+
// only side effects we can have are to call
92+
// emscripten_main_thread_process_queued_calls(). That is always ok to
93+
// do on the main thread (it's why it is ok for us to call it in the
94+
// middle of this function, and elsewhere). So if we check the value
95+
// here and return, it's the same is if what happened on the main thread
96+
// was the same as calling emscripten_main_thread_process_queued_calls()
97+
// a few times before calling emscripten_futex_wait().
98+
if (__c11_atomic_load((_Atomic uintptr_t*)addr, __ATOMIC_SEQ_CST) != val) {
99+
return -EWOULDBLOCK;
100+
}
101+
102+
// Mark us as waiting once more, and continue the loop.
103+
last_addr = a_cas_p(&_emscripten_main_thread_futex, 0, (void*)addr);
104+
assert(last_addr == 0);
105+
}
106+
return 0;
107+
}
15108

16109
int emscripten_futex_wait(volatile void *addr, uint32_t val, double max_wait_ms) {
17110
if ((((intptr_t)addr)&3) != 0) {
@@ -25,7 +118,7 @@ int emscripten_futex_wait(volatile void *addr, uint32_t val, double max_wait_ms)
25118
// __builtin_wasm_memory_atomic_wait32 so we call out the JS function that
26119
// will busy wait.
27120
if (!_emscripten_thread_supports_atomics_wait()) {
28-
ret = _emscripten_futex_wait_non_blocking(addr, val, max_wait_ms);
121+
ret = futex_wait_busy(addr, val, max_wait_ms);
29122
emscripten_conditional_set_current_thread_status(EM_THREAD_STATUS_WAITFUTEX, EM_THREAD_STATUS_RUNNING);
30123
return ret;
31124
}

tests/other/metadce/minimal_main_Oz_USE_PTHREADS_PROXY_TO_PTHREAD.exports

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ F
77
G
88
H
99
I
10-
J
10+
o
1111
p
1212
q
1313
r

tests/other/metadce/minimal_main_Oz_USE_PTHREADS_PROXY_TO_PTHREAD.funcs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ $_emscripten_thread_free_data
2121
$_emscripten_thread_init
2222
$_main_thread
2323
$a_cas
24+
$a_cas_p.1
2425
$a_dec
2526
$a_fetch_add.1
2627
$a_inc

tests/other/metadce/minimal_main_Oz_USE_PTHREADS_PROXY_TO_PTHREAD.imports

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,3 @@ a.k
1212
a.l
1313
a.m
1414
a.n
15-
a.o
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
48044
1+
47209

tests/other/metadce/minimal_main_Oz_USE_PTHREADS_PROXY_TO_PTHREAD.sent

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,3 @@ k
1212
l
1313
m
1414
n
15-
o
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
17404
1+
17498

0 commit comments

Comments
 (0)