Skip to content

Commit 92d10b4

Browse files
committed
Merge pull request #1487 from petermm/fix-network-races
Fix network.erl race conditions As evidenced by failing simtest CI on release-0.6 etc. 1. startup should use continue/handle_continue and avoid races + cleaner DRY code. 2. network:stop was using nonblocking call to stop network_port- so rapid stop/start would give the last network:start an old whereis(network_port) that was still shutting down, and lead to errors. 3. somewhat unrelated: sta_rssi was using get_port(), and would crash on no network started - guarded it erlang side, added test. These changes are made under both the "Apache 2.0" and the "GNU Lesser General Public License 2.1 or later" license terms (dual license). SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
2 parents d6f17ad + 6920c66 commit 92d10b4

File tree

4 files changed

+68
-53
lines changed

4 files changed

+68
-53
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ certain VM instructions are used.
3838
shallow comparison was performed, so it was working just for plain values such as atoms and small
3939
integers
4040
- Fixed support for setting esp32 boot_path in NVS.
41+
- Fixed race conditions in network:start/stop.
42+
- Fixed crash calling network:sta_rssi(), when network not up.
4143

4244
## [0.6.5] - 2024-10-15
4345

libs/eavmlib/src/network.erl

Lines changed: 41 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
init/1,
3333
handle_call/3,
3434
handle_cast/2,
35+
handle_continue/2,
3536
handle_info/2,
3637
terminate/2
3738
]).
@@ -270,31 +271,11 @@ wait_for_ap(ApConfig, Timeout) ->
270271
%%-----------------------------------------------------------------------------
271272
-spec start(Config :: network_config()) -> {ok, pid()} | {error, Reason :: term()}.
272273
start(Config) ->
273-
case gen_server:start({local, ?MODULE}, ?MODULE, Config, []) of
274-
{ok, Pid} = R ->
275-
case gen_server:call(Pid, start) of
276-
ok ->
277-
R;
278-
Error ->
279-
Error
280-
end;
281-
Error ->
282-
Error
283-
end.
274+
gen_server:start({local, ?MODULE}, ?MODULE, Config, []).
284275

285276
-spec start_link(Config :: network_config()) -> {ok, pid()} | {error, Reason :: term()}.
286277
start_link(Config) ->
287-
case gen_server:start_link({local, ?MODULE}, ?MODULE, Config, []) of
288-
{ok, Pid} = R ->
289-
case gen_server:call(Pid, start) of
290-
ok ->
291-
R;
292-
Error ->
293-
Error
294-
end;
295-
Error ->
296-
Error
297-
end.
278+
gen_server:start_link({local, ?MODULE}, ?MODULE, Config, []).
298279

299280
%%-----------------------------------------------------------------------------
300281
%% @returns ok, if the network interface was stopped, or {error, Reason} if
@@ -314,13 +295,17 @@ stop() ->
314295
%%-----------------------------------------------------------------------------
315296
-spec sta_rssi() -> {ok, Rssi :: db()} | {error, Reason :: term()}.
316297
sta_rssi() ->
317-
Port = get_port(),
318-
Ref = make_ref(),
319-
Port ! {self(), Ref, rssi},
320-
receive
321-
{Ref, {error, Reason}} -> {error, Reason};
322-
{Ref, {rssi, Rssi}} -> {ok, Rssi};
323-
Other -> {error, Other}
298+
case whereis(network_port) of
299+
undefined ->
300+
{error, network_down};
301+
Port ->
302+
Ref = make_ref(),
303+
Port ! {self(), Ref, rssi},
304+
receive
305+
{Ref, {error, Reason}} -> {error, Reason};
306+
{Ref, {rssi, Rssi}} -> {ok, Rssi};
307+
Other -> {error, Other}
308+
end
324309
end.
325310

326311
%%
@@ -329,28 +314,23 @@ sta_rssi() ->
329314

330315
%% @hidden
331316
init(Config) ->
332-
{ok, #state{config = Config}}.
333-
334-
%% @hidden
335-
handle_call(start, From, #state{config = Config} = State) ->
336317
Port = get_port(),
337318
Ref = make_ref(),
338-
Port ! {self(), Ref, {start, Config}},
339-
wait_start_reply(Ref, From, Port, State);
340-
handle_call(_Msg, _From, State) ->
341-
{reply, {error, unknown_message}, State}.
319+
{ok, #state{config = Config, port = Port, ref = Ref}, {continue, start_port}}.
342320

343-
%% @private
344-
wait_start_reply(Ref, From, Port, State) ->
321+
handle_continue(start_port, #state{config = Config, port = Port, ref = Ref} = State) ->
322+
Port ! {self(), Ref, {start, Config}},
345323
receive
346324
{Ref, ok} ->
347-
gen_server:reply(From, ok),
348-
{noreply, State#state{port = Port, ref = Ref}};
349-
{Ref, {error, Reason} = ER} ->
350-
gen_server:reply(From, {error, Reason}),
351-
{stop, {start_failed, Reason}, ER, State}
325+
{noreply, State};
326+
{Ref, {error, Reason}} ->
327+
{stop, {start_port_failed, Reason}, State}
352328
end.
353329

330+
%% @hidden
331+
handle_call(_Msg, _From, State) ->
332+
{reply, {error, unknown_message}, State}.
333+
354334
%% @hidden
355335
handle_cast(_Msg, State) ->
356336
{noreply, State}.
@@ -390,10 +370,25 @@ handle_info(Msg, State) ->
390370
{noreply, State}.
391371

392372
%% @hidden
393-
terminate(_Reason, _State) ->
373+
%% Wait for port to be closed
374+
terminate(_Reason, State) ->
394375
Ref = make_ref(),
376+
Port = State#state.port,
377+
PortMonitor = erlang:monitor(port, Port),
395378
network_port ! {?SERVER, Ref, stop},
396-
ok.
379+
wait_for_port_close(PortMonitor, Port).
380+
381+
wait_for_port_close(PortMonitor, Port) ->
382+
receive
383+
{'DOWN', PortMonitor, port, Port, _DownReason} ->
384+
ok;
385+
_Other ->
386+
% Handle unexpected messages if necessary
387+
wait_for_port_close(PortMonitor, Port)
388+
% Timeout after 1 second just in case.
389+
after 1000 ->
390+
{error, timeout}
391+
end.
397392

398393
%%
399394
%% Internal operations

src/platforms/esp32/components/avm_builtins/network_driver.c

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ static const char *const sta_connected_atom = ATOM_STR("\xD", "sta_connected");
7979
static const char *const sta_beacon_timeout_atom = ATOM_STR("\x12", "sta_beacon_timeout");
8080
static const char *const sta_disconnected_atom = ATOM_STR("\x10", "sta_disconnected");
8181
static const char *const sta_got_ip_atom = ATOM_STR("\xA", "sta_got_ip");
82+
static const char *const network_down_atom = ATOM_STR("\x0C", "network_down");
8283

8384
ESP_EVENT_DECLARE_BASE(sntp_event_base);
8485
ESP_EVENT_DEFINE_BASE(sntp_event_base);
@@ -847,13 +848,24 @@ static void get_sta_rssi(Context *ctx, term pid, term ref)
847848
size_t tuple_reply_size = PORT_REPLY_SIZE + TUPLE_SIZE(2);
848849

849850
int sta_rssi = 0;
850-
esp_err_t err = esp_wifi_sta_get_rssi(&sta_rssi);
851-
if (UNLIKELY(err != ESP_OK)) {
852-
term error_term = term_from_int(err);
853-
ESP_LOGE(TAG, "error obtaining RSSI: [%i] %u", err, error_term);
854-
// Reply: {Ref, {error, Reason}}
851+
wifi_ap_record_t ap_info;
852+
esp_err_t status = esp_wifi_sta_get_ap_info(&ap_info);
853+
if (status == ESP_OK) {
854+
esp_err_t err = esp_wifi_sta_get_rssi(&sta_rssi);
855+
if (UNLIKELY(err != ESP_OK)) {
856+
term error_term = term_from_int(err);
857+
ESP_LOGE(TAG, "error obtaining RSSI: [%i] %u", err, error_term);
858+
// Reply: {Ref, {error, Reason}}
859+
port_ensure_available(ctx, tuple_reply_size);
860+
term error = port_create_error_tuple(ctx, error_term);
861+
port_send_reply(ctx, pid, ref, error);
862+
return;
863+
}
864+
} else {
865+
ESP_LOGE(TAG, "Device is not connected to any AP.");
866+
// Reply: {Ref, {error, network_down}}
855867
port_ensure_available(ctx, tuple_reply_size);
856-
term error = port_create_error_tuple(ctx, error_term);
868+
term error = port_create_error_tuple(ctx, make_atom(ctx->global, network_down_atom));
857869
port_send_reply(ctx, pid, ref, error);
858870
return;
859871
}

src/platforms/esp32/test/main/test_erl_sources/test_wifi_example.erl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
start() ->
2626
case verify_platform(atomvm:platform()) of
2727
ok ->
28+
% test that sta_rssi() can be safely called, when network is not started
29+
{error, network_down} = network:sta_rssi(),
2830
ok = start_network(),
2931
ok = network:stop(),
3032
start_network(),
@@ -59,7 +61,11 @@ start_network() ->
5961
],
6062
case network:start(Config) of
6163
{ok, _Pid} ->
62-
io:format("Network started.~n");
64+
% test that sta_rssi() can be safely called
65+
% when network is just started, but may not yet be connected.
66+
network:sta_rssi(),
67+
io:format("Network started.~n"),
68+
ok;
6369
Error ->
6470
Error
6571
end.

0 commit comments

Comments
 (0)