Skip to content

Commit 8e4efd6

Browse files
committed
Merge pull request #1477 from pguyot/w03/fix-crash-in-sockets
Fix crash in socket refc handling Ensure refcount of socket resources is properly handled on demonitor Also ensure that demonitor happens when selected process is cleared These changes are made under both the "Apache 2.0" and the "GNU Lesser General Public License 2.1 or later" license terms (dual license). SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
2 parents f802020 + dd552b0 commit 8e4efd6

File tree

2 files changed

+43
-27
lines changed

2 files changed

+43
-27
lines changed

src/libAtomVM/globalcontext.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ COLD_FUNC void globalcontext_destroy(GlobalContext *glb)
227227
struct RefcBinary *refc = GET_LIST_ENTRY(item, struct RefcBinary, head);
228228
#ifndef NDEBUG
229229
if (refc->resource_type) {
230-
fprintf(stderr, "Warning, dangling resource of type %s, ref_count = %d\n", refc->resource_type->name, (int) refc->ref_count);
230+
fprintf(stderr, "Warning, dangling resource of type %s, ref_count = %d, data = %p\n", refc->resource_type->name, (int) refc->ref_count, refc->data);
231231
} else {
232232
fprintf(stderr, "Warning, dangling refc binary, ref_count = %d\n", (int) refc->ref_count);
233233
}

src/libAtomVM/otp_socket.c

Lines changed: 42 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <otp_socket.h>
3333
#include <port.h>
3434
#include <posix_nifs.h>
35+
#include <refc_binary.h>
3536
#include <scheduler.h>
3637
#include <smp.h>
3738
#include <sys.h>
@@ -285,6 +286,8 @@ static void socket_stop(ErlNifEnv *caller_env, void *obj, ErlNifEvent event, int
285286
if (rsrc_obj->selecting_process_id != INVALID_PROCESS_ID) {
286287
enif_demonitor_process(caller_env, rsrc_obj, &rsrc_obj->selecting_process_monitor);
287288
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
289+
struct RefcBinary *rsrc_refc = refc_binary_from_data(rsrc_obj);
290+
refc_binary_decrement_refcount(rsrc_refc, caller_env->global);
288291
}
289292

290293
TRACE("socket_stop called on fd=%i\n", rsrc_obj->fd);
@@ -305,42 +308,43 @@ static void socket_down(ErlNifEnv *caller_env, void *obj, ErlNifPid *pid, ErlNif
305308
TRACE("socket_down called on process_id=%i\n", (int) *pid);
306309
#endif
307310

308-
// Increment the reference count so the resource doesn't go away
309-
// (enif_select will decrement the ref count)
310311
struct RefcBinary *rsrc_refc = refc_binary_from_data(rsrc_obj);
311-
refc_binary_increment_refcount(rsrc_refc);
312312
SMP_RWLOCK_WRLOCK(rsrc_obj->socket_lock);
313313

314-
#if OTP_SOCKET_BSD
315-
if (rsrc_obj->selecting_process_id != INVALID_PROCESS_ID) {
316-
// Monitor fired, so make sure we don't try to demonitor in select_stop
317-
// as it could crash trying to reacquire lock on process table
318-
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
319-
enif_select(caller_env, rsrc_obj->fd, ERL_NIF_SELECT_STOP, rsrc_obj, NULL, term_nil());
314+
if (rsrc_obj->selecting_process_id == INVALID_PROCESS_ID) {
315+
SMP_RWLOCK_UNLOCK(rsrc_obj->socket_lock);
316+
return;
320317
}
318+
319+
#if OTP_SOCKET_BSD
320+
// Monitor fired, so make sure we don't try to demonitor in select_stop
321+
// as it could crash trying to reacquire lock on process table
322+
// enif_select can decrement ref count but it's at least 2 in this case (1 for monitor and 1 for select)
323+
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
324+
enif_select(caller_env, rsrc_obj->fd, ERL_NIF_SELECT_STOP, rsrc_obj, NULL, term_nil());
321325
#elif OTP_SOCKET_LWIP
322326
// Monitor can be called when we're selecting, accepting or connecting.
323-
if (rsrc_obj->selecting_process_id != INVALID_PROCESS_ID) {
324-
LWIP_BEGIN();
325-
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
326-
if (rsrc_obj->socket_state & SocketStateTCP) {
327-
if (rsrc_obj->socket_state & SocketStateTCPListening) {
328-
(void) tcp_close(rsrc_obj->tcp_pcb);
329-
} else {
330-
tcp_abort(rsrc_obj->tcp_pcb);
331-
}
332-
rsrc_obj->tcp_pcb = NULL;
333-
rsrc_obj->socket_state = SocketStateClosed;
334-
} else if (rsrc_obj->socket_state & SocketStateUDP) {
335-
udp_remove(rsrc_obj->udp_pcb);
336-
rsrc_obj->udp_pcb = NULL;
337-
rsrc_obj->socket_state = SocketStateClosed;
327+
LWIP_BEGIN();
328+
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
329+
if (rsrc_obj->socket_state & SocketStateTCP) {
330+
if (rsrc_obj->socket_state & SocketStateTCPListening) {
331+
(void) tcp_close(rsrc_obj->tcp_pcb);
332+
} else {
333+
tcp_abort(rsrc_obj->tcp_pcb);
338334
}
339-
LWIP_END();
335+
rsrc_obj->tcp_pcb = NULL;
336+
rsrc_obj->socket_state = SocketStateClosed;
337+
} else if (rsrc_obj->socket_state & SocketStateUDP) {
338+
udp_remove(rsrc_obj->udp_pcb);
339+
rsrc_obj->udp_pcb = NULL;
340+
rsrc_obj->socket_state = SocketStateClosed;
340341
}
342+
LWIP_END();
341343
#endif
342344

343345
SMP_RWLOCK_UNLOCK(rsrc_obj->socket_lock);
346+
347+
// We're no longer monitoring so we can decrement ref count
344348
refc_binary_decrement_refcount(rsrc_refc, caller_env->global);
345349
}
346350

@@ -948,13 +952,16 @@ static term nif_socket_select_read(Context *ctx, int argc, term argv[])
948952
RAISE_ERROR(BADARG_ATOM);
949953
}
950954

955+
struct RefcBinary *rsrc_refc = refc_binary_from_data(rsrc_obj);
951956
SMP_RWLOCK_WRLOCK(rsrc_obj->socket_lock);
952957

953958
ErlNifEnv *env = erl_nif_env_from_context(ctx);
954959
if (rsrc_obj->selecting_process_id != ctx->process_id && rsrc_obj->selecting_process_id != INVALID_PROCESS_ID) {
955960
// demonitor can fail if process is gone.
956961
enif_demonitor_process(env, rsrc_obj, &rsrc_obj->selecting_process_monitor);
957962
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
963+
// decrement ref count as we are demonitoring
964+
refc_binary_decrement_refcount(rsrc_refc, ctx->global);
958965
}
959966
// Monitor first as select is less likely to fail and it's less expensive to demonitor
960967
// if select fails than to stop select if monitor fails
@@ -963,6 +970,8 @@ static term nif_socket_select_read(Context *ctx, int argc, term argv[])
963970
SMP_RWLOCK_UNLOCK(rsrc_obj->socket_lock);
964971
RAISE_ERROR(NOPROC_ATOM);
965972
}
973+
// increment ref count so the resource doesn't go away until monitor is fired
974+
refc_binary_increment_refcount(rsrc_refc);
966975
rsrc_obj->selecting_process_id = ctx->process_id;
967976
}
968977

@@ -979,6 +988,7 @@ static term nif_socket_select_read(Context *ctx, int argc, term argv[])
979988
enif_demonitor_process(env, rsrc_obj, &rsrc_obj->selecting_process_monitor);
980989
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
981990
SMP_RWLOCK_UNLOCK(rsrc_obj->socket_lock);
991+
refc_binary_decrement_refcount(rsrc_refc, ctx->global);
982992
RAISE_ERROR(BADARG_ATOM);
983993
}
984994
}
@@ -1025,6 +1035,7 @@ static term nif_socket_select_read(Context *ctx, int argc, term argv[])
10251035
enif_demonitor_process(env, rsrc_obj, &rsrc_obj->selecting_process_monitor);
10261036
LWIP_END();
10271037
SMP_RWLOCK_UNLOCK(rsrc_obj->socket_lock);
1038+
refc_binary_decrement_refcount(rsrc_refc, ctx->global);
10281039
RAISE_ERROR(BADARG_ATOM);
10291040
}
10301041
LWIP_END();
@@ -1047,7 +1058,12 @@ static term nif_socket_select_stop(Context *ctx, int argc, term argv[])
10471058
}
10481059
// Avoid the race condition with select object here.
10491060
SMP_RWLOCK_WRLOCK(rsrc_obj->socket_lock);
1050-
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
1061+
if (rsrc_obj->selecting_process_id != INVALID_PROCESS_ID) {
1062+
enif_demonitor_process(erl_nif_env_from_context(ctx), rsrc_obj, &rsrc_obj->selecting_process_monitor);
1063+
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
1064+
struct RefcBinary *rsrc_refc = refc_binary_from_data(rsrc_obj);
1065+
refc_binary_decrement_refcount(rsrc_refc, ctx->global);
1066+
}
10511067
#if OTP_SOCKET_BSD
10521068
if (UNLIKELY(enif_select(erl_nif_env_from_context(ctx), rsrc_obj->fd, ERL_NIF_SELECT_STOP, rsrc_obj, NULL, term_nil()) < 0)) {
10531069
RAISE_ERROR(BADARG_ATOM);

0 commit comments

Comments
 (0)