Skip to content

Commit ed97478

Browse files
committed
Merge pull request #1498 from pguyot/w04/fix-refcount-calculation
Fix refcount calculation when copying data to heaps Ref count should be incremented whenever the ref counted binary is added to the mso list and decremented when the mso list is swept. Also refc_binaries are allocated with a ref count of 0 until they are made a term, with the exception of enif_alloc_resource that should be balanced by an enif_release_resource following OTP semantic. Also add a new test for heap operations including gc. Also fix race conditions with demonitor in both otp_socket and emscripten code that yielded a crash by over decrementing ref count. These changes are made under both the "Apache 2.0" and the "GNU Lesser General Public License 2.1 or later" license terms (dual license). SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
2 parents 9513847 + 5f9e1df commit ed97478

19 files changed

+274
-90
lines changed

.github/workflows/build-and-test-macos.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@ jobs:
9595
run: |
9696
./tests/test-enif
9797
98+
- name: "Test: test-heap"
99+
working-directory: build
100+
run: |
101+
./tests/test-heap
102+
98103
- name: "Test: test-mailbox"
99104
working-directory: build
100105
run: |

.github/workflows/build-and-test-on-freebsd.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@ jobs:
104104
echo "%%"
105105
./tests/test-enif
106106
107+
echo "%%"
108+
echo "%% Running test-heap ..."
109+
echo "%%"
110+
./tests/test-heap
111+
107112
echo "%%"
108113
echo "%% Running test-mailbox ..."
109114
echo "%%"

.github/workflows/build-and-test-other.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,12 +169,15 @@ jobs:
169169
make AtomVM &&
170170
make test-erlang &&
171171
make test-enif &&
172+
make test-heap &&
172173
make test-mailbox &&
173174
make test-structs &&
174175
file ./tests/test-erlang &&
175176
./tests/test-erlang -s prime_smp &&
176177
file ./tests/test-enif &&
177178
./tests/test-enif &&
179+
file ./tests/test-heap &&
180+
./tests/test-heap &&
178181
file ./tests/test-mailbox &&
179182
./tests/test-mailbox &&
180183
file ./tests/test-structs &&

.github/workflows/build-and-test.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,13 @@ jobs:
342342
./tests/test-enif
343343
valgrind ./tests/test-enif
344344
345+
- name: "Test: test-heap"
346+
working-directory: build
347+
run: |
348+
ulimit -c unlimited
349+
./tests/test-heap
350+
valgrind ./tests/test-heap
351+
345352
- name: "Test: test-mailbox"
346353
working-directory: build
347354
run: |

.github/workflows/build-linux-artifacts.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,12 +226,15 @@ jobs:
226226
VERBOSE=1 make AtomVM &&
227227
make test-erlang &&
228228
make test-enif &&
229+
make test-heap &&
229230
make test-mailbox &&
230231
make test-structs &&
231232
file ./tests/test-erlang &&
232233
./tests/test-erlang -s prime_smp &&
233234
file ./tests/test-enif &&
234235
./tests/test-enif &&
236+
file ./tests/test-heap &&
237+
./tests/test-heap &&
235238
file ./tests/test-mailbox &&
236239
./tests/test-mailbox &&
237240
file ./tests/test-structs &&

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ bug when handling errors from BIFs used as NIFs (when called with `CALL_EXT` and
4747
- Fixed matching of binaries on unaligned boundaries for code compiled with older versions of OTP
4848
- Added missing out of memory handling in binary_to_atom
4949
- Fixed call to funs such as fun erlang:'not'/1, that make use of BIFs
50+
- Fixed potential crashes or memory leaks caused by a mistake in calculation of reference counts
51+
and a race condition in otp_socket code
5052

5153
## [0.6.5] - 2024-10-15
5254

src/libAtomVM/erl_nif_priv.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#include "context.h"
2525
#include "memory.h"
26+
#include "resources.h"
2627

2728
#ifdef __cplusplus
2829
extern "C" {
@@ -67,6 +68,19 @@ static inline void erl_nif_env_partial_init_from_globalcontext(ErlNifEnv *env, G
6768
env->x[1] = term_nil();
6869
}
6970

71+
static inline void erl_nif_env_partial_init_from_resource(ErlNifEnv *env, void *resource)
72+
{
73+
struct RefcBinary *refc = refc_binary_from_data(resource);
74+
env->global = refc->resource_type->global;
75+
env->heap.root = NULL;
76+
env->heap.heap_start = NULL;
77+
env->heap.heap_ptr = NULL;
78+
env->heap.heap_end = NULL;
79+
env->stack_pointer = NULL;
80+
env->x[0] = term_nil();
81+
env->x[1] = term_nil();
82+
}
83+
7084
#ifdef __cplusplus
7185
}
7286
#endif

src/libAtomVM/memory.c

Lines changed: 45 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ static inline enum MemoryGCResult memory_heap_alloc_new_fragment(Heap *heap, siz
9999
term *old_end = heap->heap_end;
100100
term mso_list = root_fragment->mso_list;
101101
if (UNLIKELY(memory_init_heap(heap, size) != MEMORY_GC_OK)) {
102-
TRACE("Unable to allocate memory fragment. size=%u\n", size);
102+
TRACE("Unable to allocate memory fragment. size=%u\n", (unsigned int) size);
103103
return MEMORY_GC_ERROR_FAILED_ALLOCATION;
104104
}
105105
// Convert root fragment to non-root fragment.
@@ -123,7 +123,7 @@ enum MemoryGCResult memory_erl_nif_env_ensure_free(ErlNifEnv *env, size_t size)
123123
}
124124
} else {
125125
if (UNLIKELY(memory_init_heap(&env->heap, size) != MEMORY_GC_OK)) {
126-
TRACE("Unable to allocate memory fragment. size=%u\n", size);
126+
TRACE("Unable to allocate memory fragment. size=%u\n", (unsigned int) size);
127127
return MEMORY_GC_ERROR_FAILED_ALLOCATION;
128128
}
129129
}
@@ -203,44 +203,42 @@ enum MemoryGCResult memory_ensure_free_with_roots(Context *c, size_t size, size_
203203
if (UNLIKELY(c->has_max_heap_size && (target_size > c->max_heap_size))) {
204204
return MEMORY_GC_DENIED_ALLOCATION;
205205
}
206-
if (target_size != memory_size) {
207-
if (UNLIKELY(memory_gc(c, target_size, num_roots, roots) != MEMORY_GC_OK)) {
208-
// TODO: handle this more gracefully
209-
TRACE("Unable to allocate memory for GC. target_size=%zu\n", target_size);
210-
return MEMORY_GC_ERROR_FAILED_ALLOCATION;
211-
}
212-
should_gc = alloc_mode == MEMORY_FORCE_SHRINK;
213-
size_t new_memory_size = memory_heap_memory_size(&c->heap);
214-
size_t new_target_size = new_memory_size;
215-
size_t new_free_space = context_avail_free_memory(c);
216-
switch (c->heap_growth_strategy) {
217-
case BoundedFreeHeapGrowth: {
218-
size_t maximum_free_space = 2 * (size + MIN_FREE_SPACE_SIZE);
219-
should_gc = should_gc || (alloc_mode != MEMORY_NO_SHRINK && new_free_space > maximum_free_space);
220-
if (should_gc) {
221-
new_target_size = (new_memory_size - new_free_space) + maximum_free_space;
222-
}
223-
} break;
224-
case MinimumHeapGrowth:
225-
should_gc = should_gc || (alloc_mode != MEMORY_NO_SHRINK && new_free_space > 0);
226-
if (should_gc) {
227-
new_target_size = new_memory_size - new_free_space + size;
228-
}
229-
break;
230-
case FibonacciHeapGrowth:
231-
should_gc = should_gc || (new_memory_size > FIBONACCI_HEAP_GROWTH_REDUCTION_THRESHOLD && new_free_space >= 3 * new_memory_size / 4);
232-
if (should_gc) {
233-
new_target_size = next_fibonacci_heap_size(new_memory_size - new_free_space + size);
234-
}
235-
break;
236-
}
237-
if (should_gc) {
238-
new_target_size = MAX(c->has_min_heap_size ? c->min_heap_size : 0, new_target_size);
239-
if (new_target_size != new_memory_size) {
240-
if (UNLIKELY(MEMORY_SHRINK(c, new_target_size, num_roots, roots) != MEMORY_GC_OK)) {
241-
TRACE("Unable to allocate memory for GC shrink. new_memory_size=%zu new_free_space=%zu size=%u\n", new_memory_size, new_free_space, size);
242-
return MEMORY_GC_ERROR_FAILED_ALLOCATION;
243-
}
206+
if (UNLIKELY(memory_gc(c, target_size, num_roots, roots) != MEMORY_GC_OK)) {
207+
// TODO: handle this more gracefully
208+
TRACE("Unable to allocate memory for GC. target_size=%zu\n", target_size);
209+
return MEMORY_GC_ERROR_FAILED_ALLOCATION;
210+
}
211+
should_gc = alloc_mode == MEMORY_FORCE_SHRINK;
212+
size_t new_memory_size = memory_heap_memory_size(&c->heap);
213+
size_t new_target_size = new_memory_size;
214+
size_t new_free_space = context_avail_free_memory(c);
215+
switch (c->heap_growth_strategy) {
216+
case BoundedFreeHeapGrowth: {
217+
size_t maximum_free_space = 2 * (size + MIN_FREE_SPACE_SIZE);
218+
should_gc = should_gc || (alloc_mode != MEMORY_NO_SHRINK && new_free_space > maximum_free_space);
219+
if (should_gc) {
220+
new_target_size = (new_memory_size - new_free_space) + maximum_free_space;
221+
}
222+
} break;
223+
case MinimumHeapGrowth:
224+
should_gc = should_gc || (alloc_mode != MEMORY_NO_SHRINK && new_free_space > 0);
225+
if (should_gc) {
226+
new_target_size = new_memory_size - new_free_space + size;
227+
}
228+
break;
229+
case FibonacciHeapGrowth:
230+
should_gc = should_gc || (new_memory_size > FIBONACCI_HEAP_GROWTH_REDUCTION_THRESHOLD && new_free_space >= 3 * new_memory_size / 4);
231+
if (should_gc) {
232+
new_target_size = next_fibonacci_heap_size(new_memory_size - new_free_space + size);
233+
}
234+
break;
235+
}
236+
if (should_gc) {
237+
new_target_size = MAX(c->has_min_heap_size ? c->min_heap_size : 0, new_target_size);
238+
if (new_target_size != new_memory_size) {
239+
if (UNLIKELY(MEMORY_SHRINK(c, new_target_size, num_roots, roots) != MEMORY_GC_OK)) {
240+
TRACE("Unable to allocate memory for GC shrink. new_memory_size=%zu new_free_space=%zu size=%u\n", new_memory_size, new_free_space, (unsigned int) size);
241+
return MEMORY_GC_ERROR_FAILED_ALLOCATION;
244242
}
245243
}
246244
}
@@ -301,7 +299,7 @@ static enum MemoryGCResult memory_gc(Context *ctx, size_t new_size, size_t num_r
301299

302300
TRACE("- Running copy GC on provided roots\n");
303301
for (size_t i = 0; i < num_roots; i++) {
304-
roots[i] = memory_shallow_copy_term(old_root_fragment, roots[i], &ctx->heap.heap_ptr, 1);
302+
roots[i] = memory_shallow_copy_term(old_root_fragment, roots[i], &ctx->heap.heap_ptr, true);
305303
}
306304

307305
term *temp_start = new_heap;
@@ -641,6 +639,7 @@ static void memory_scan_and_copy(HeapFragment *old_fragment, term *mem_start, co
641639
term ref = ((term) ptr) | TERM_BOXED_VALUE_TAG;
642640
if (!term_refc_binary_is_const(ref)) {
643641
*mso_list = term_list_init_prepend(ptr + REFC_BINARY_CONS_OFFSET, ref, *mso_list);
642+
refc_binary_increment_refcount((struct RefcBinary *) term_refc_binary_ptr(ref));
644643
}
645644
break;
646645
}
@@ -851,10 +850,6 @@ HOT_FUNC static term memory_shallow_copy_term(HeapFragment *old_fragment, term t
851850

852851
if (move) {
853852
memory_replace_with_moved_marker(boxed_value, new_term);
854-
} else if (term_is_refc_binary(t)) { // copy, not a move; increment refcount
855-
if (!term_refc_binary_is_const(t)) {
856-
refc_binary_increment_refcount((struct RefcBinary *) term_refc_binary_ptr(t));
857-
}
858853
}
859854

860855
return new_term;
@@ -930,15 +925,16 @@ void memory_sweep_mso_list(term mso_list, GlobalContext *global, bool from_task)
930925
TERM_DEBUG_ASSERT(term_is_boxed(h))
931926
term *boxed_value = term_to_term_ptr(h);
932927
if (memory_is_moved_marker(boxed_value)) {
933-
// it has been moved, so it is referenced
934-
} else if (term_is_refc_binary(h) && !term_refc_binary_is_const(h)) {
928+
h = memory_dereference_moved_marker(boxed_value);
929+
}
930+
if (term_is_refc_binary(h) && !term_refc_binary_is_const(h)) {
935931
// unreferenced binary; decrement reference count
936932
#ifdef AVM_TASK_DRIVER_ENABLED
937933
if (from_task) {
938-
globalcontext_refc_decrement_refcount_from_task(global, (struct RefcBinary *) term_refc_binary_ptr(h));
934+
globalcontext_refc_decrement_refcount_from_task(global, term_refc_binary_ptr(h));
939935
} else {
940936
#endif
941-
refc_binary_decrement_refcount((struct RefcBinary *) term_refc_binary_ptr(h), global);
937+
refc_binary_decrement_refcount(term_refc_binary_ptr(h), global);
942938
#ifdef AVM_TASK_DRIVER_ENABLED
943939
}
944940
#endif

src/libAtomVM/otp_socket.c

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -284,10 +284,11 @@ static void socket_stop(ErlNifEnv *caller_env, void *obj, ErlNifEvent event, int
284284
struct SocketResource *rsrc_obj = (struct SocketResource *) obj;
285285

286286
if (rsrc_obj->selecting_process_id != INVALID_PROCESS_ID) {
287-
enif_demonitor_process(caller_env, rsrc_obj, &rsrc_obj->selecting_process_monitor);
287+
if (LIKELY(enif_demonitor_process(caller_env, rsrc_obj, &rsrc_obj->selecting_process_monitor) == 0)) {
288+
struct RefcBinary *rsrc_refc = refc_binary_from_data(rsrc_obj);
289+
refc_binary_decrement_refcount(rsrc_refc, caller_env->global);
290+
}
288291
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
289-
struct RefcBinary *rsrc_refc = refc_binary_from_data(rsrc_obj);
290-
refc_binary_decrement_refcount(rsrc_refc, caller_env->global);
291292
}
292293

293294
TRACE("socket_stop called on fd=%i\n", rsrc_obj->fd);
@@ -958,10 +959,11 @@ static term nif_socket_select_read(Context *ctx, int argc, term argv[])
958959
ErlNifEnv *env = erl_nif_env_from_context(ctx);
959960
if (rsrc_obj->selecting_process_id != ctx->process_id && rsrc_obj->selecting_process_id != INVALID_PROCESS_ID) {
960961
// demonitor can fail if process is gone.
961-
enif_demonitor_process(env, rsrc_obj, &rsrc_obj->selecting_process_monitor);
962+
if (LIKELY(enif_demonitor_process(env, rsrc_obj, &rsrc_obj->selecting_process_monitor) == 0)) {
963+
// decrement ref count as we are demonitoring
964+
refc_binary_decrement_refcount(rsrc_refc, ctx->global);
965+
}
962966
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
963-
// decrement ref count as we are demonitoring
964-
refc_binary_decrement_refcount(rsrc_refc, ctx->global);
965967
}
966968
// Monitor first as select is less likely to fail and it's less expensive to demonitor
967969
// if select fails than to stop select if monitor fails
@@ -985,10 +987,11 @@ static term nif_socket_select_read(Context *ctx, int argc, term argv[])
985987
send_closed_notification(ctx, argv[0], ctx->process_id, rsrc_obj);
986988
} else {
987989
if (UNLIKELY(enif_select(erl_nif_env_from_context(ctx), rsrc_obj->fd, ERL_NIF_SELECT_READ, rsrc_obj, &ctx->process_id, select_ref_term) < 0)) {
988-
enif_demonitor_process(env, rsrc_obj, &rsrc_obj->selecting_process_monitor);
990+
if (LIKELY(enif_demonitor_process(env, rsrc_obj, &rsrc_obj->selecting_process_monitor) == 0)) {
991+
refc_binary_decrement_refcount(rsrc_refc, ctx->global);
992+
}
989993
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
990994
SMP_RWLOCK_UNLOCK(rsrc_obj->socket_lock);
991-
refc_binary_decrement_refcount(rsrc_refc, ctx->global);
992995
RAISE_ERROR(BADARG_ATOM);
993996
}
994997
}
@@ -1032,10 +1035,11 @@ static term nif_socket_select_read(Context *ctx, int argc, term argv[])
10321035
// noop
10331036
break;
10341037
default:
1035-
enif_demonitor_process(env, rsrc_obj, &rsrc_obj->selecting_process_monitor);
1038+
if (LIKELY(enif_demonitor_process(env, rsrc_obj, &rsrc_obj->selecting_process_monitor) == 0)) {
1039+
refc_binary_decrement_refcount(rsrc_refc, ctx->global);
1040+
}
10361041
LWIP_END();
10371042
SMP_RWLOCK_UNLOCK(rsrc_obj->socket_lock);
1038-
refc_binary_decrement_refcount(rsrc_refc, ctx->global);
10391043
RAISE_ERROR(BADARG_ATOM);
10401044
}
10411045
LWIP_END();
@@ -1059,10 +1063,11 @@ static term nif_socket_select_stop(Context *ctx, int argc, term argv[])
10591063
// Avoid the race condition with select object here.
10601064
SMP_RWLOCK_WRLOCK(rsrc_obj->socket_lock);
10611065
if (rsrc_obj->selecting_process_id != INVALID_PROCESS_ID) {
1062-
enif_demonitor_process(erl_nif_env_from_context(ctx), rsrc_obj, &rsrc_obj->selecting_process_monitor);
1066+
if (LIKELY(enif_demonitor_process(erl_nif_env_from_context(ctx), rsrc_obj, &rsrc_obj->selecting_process_monitor) == 0)) {
1067+
struct RefcBinary *rsrc_refc = refc_binary_from_data(rsrc_obj);
1068+
refc_binary_decrement_refcount(rsrc_refc, ctx->global);
1069+
}
10631070
rsrc_obj->selecting_process_id = INVALID_PROCESS_ID;
1064-
struct RefcBinary *rsrc_refc = refc_binary_from_data(rsrc_obj);
1065-
refc_binary_decrement_refcount(rsrc_refc, ctx->global);
10661071
}
10671072
#if OTP_SOCKET_BSD
10681073
if (UNLIKELY(enif_select(erl_nif_env_from_context(ctx), rsrc_obj->fd, ERL_NIF_SELECT_STOP, rsrc_obj, NULL, term_nil()) < 0)) {
@@ -1555,13 +1560,14 @@ static term nif_socket_listen(Context *ctx, int argc, term argv[])
15551560
//
15561561

15571562
#if OTP_SOCKET_LWIP
1558-
static term make_accepted_socket_term(struct SocketResource *conn_rsrc_obj, Heap *heap, GlobalContext *global)
1563+
static term make_accepted_socket_term(Context *ctx, struct SocketResource *conn_rsrc_obj)
15591564
{
1560-
term obj = term_from_resource(conn_rsrc_obj, heap);
1565+
term obj = enif_make_resource(erl_nif_env_from_context(ctx), conn_rsrc_obj);
1566+
enif_release_resource(conn_rsrc_obj);
15611567

1562-
term socket_term = term_alloc_tuple(2, heap);
1563-
uint64_t ref_ticks = globalcontext_get_ref_ticks(global);
1564-
term ref = term_from_ref_ticks(ref_ticks, heap);
1568+
term socket_term = term_alloc_tuple(2, &ctx->heap);
1569+
uint64_t ref_ticks = globalcontext_get_ref_ticks(ctx->global);
1570+
term ref = term_from_ref_ticks(ref_ticks, &ctx->heap);
15651571
term_put_tuple_element(socket_term, 0, obj);
15661572
term_put_tuple_element(socket_term, 1, ref);
15671573
return socket_term;
@@ -1674,7 +1680,7 @@ static term nif_socket_accept(Context *ctx, int argc, term argv[])
16741680
list_remove(&first_item->list_head);
16751681
free(first_item);
16761682

1677-
term socket_term = make_accepted_socket_term(new_resource, &ctx->heap, global);
1683+
term socket_term = make_accepted_socket_term(ctx, new_resource);
16781684
result = term_alloc_tuple(2, &ctx->heap);
16791685
term_put_tuple_element(result, 0, OK_ATOM);
16801686
term_put_tuple_element(result, 1, socket_term);

src/libAtomVM/posix_nifs.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,8 @@ static term nif_atomvm_posix_open(Context *ctx, int argc, term argv[])
314314
if (UNLIKELY(memory_ensure_free_opt(ctx, TUPLE_SIZE(2) + TERM_BOXED_RESOURCE_SIZE, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
315315
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
316316
}
317-
term obj = term_from_resource(fd_obj, &ctx->heap);
317+
term obj = enif_make_resource(erl_nif_env_from_context(ctx), fd_obj);
318+
enif_release_resource(fd_obj);
318319
result = term_alloc_tuple(2, &ctx->heap);
319320
term_put_tuple_element(result, 0, OK_ATOM);
320321
term_put_tuple_element(result, 1, obj);
@@ -675,7 +676,8 @@ static term nif_atomvm_posix_opendir(Context *ctx, int argc, term argv[])
675676
!= MEMORY_GC_OK)) {
676677
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
677678
}
678-
term obj = term_from_resource(dir_obj, &ctx->heap);
679+
term obj = enif_make_resource(erl_nif_env_from_context(ctx), dir_obj);
680+
enif_release_resource(dir_obj);
679681
result = term_alloc_tuple(2, &ctx->heap);
680682
term_put_tuple_element(result, 0, OK_ATOM);
681683
term_put_tuple_element(result, 1, obj);

0 commit comments

Comments
 (0)