Skip to content

Commit d5f6c4e

Browse files
committed
nifs: implement binary:match/2 and binary:match/3
Signed-off-by: Jakub Gonet <jakub.gonet@swmansion.com>
1 parent 58fe3ae commit d5f6c4e

File tree

9 files changed

+270
-2
lines changed

9 files changed

+270
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
5050
- Added WiFi support for ESP32P4 via esp-wifi-external for build with ESP-IDF v5.4 and later
5151
- Added Process.link/1 and unlink/1 to Elixir Process.ex
5252
- Added `erlang:module_loaded/1`
53+
- Added `binary:match/2` and `binary:match/3`
5354

5455
### Changed
5556

src/libAtomVM/defaultatoms.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,6 @@ X(CONNECT_ATOM, "\x7", "connect")
185185
X(SYSTEM_VERSION_ATOM, "\xE", "system_version")
186186
X(OTP_RELEASE_ATOM, "\xB", "otp_release")
187187
X(BREAK_IGNORED_ATOM, "\xD", "break_ignored")
188+
189+
X(SCOPE_ATOM, "\x5", "scope")
190+
X(NOMATCH_ATOM, "\x7", "nomatch")

src/libAtomVM/nifs.c

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ static term nif_binary_first_1(Context *ctx, int argc, term argv[]);
9595
static term nif_binary_last_1(Context *ctx, int argc, term argv[]);
9696
static term nif_binary_part_3(Context *ctx, int argc, term argv[]);
9797
static term nif_binary_split(Context *ctx, int argc, term argv[]);
98+
static term nif_binary_match(Context *ctx, int argc, term argv[]);
9899
static term nif_calendar_system_time_to_universal_time_2(Context *ctx, int argc, term argv[]);
99100
static term nif_erlang_delete_element_2(Context *ctx, int argc, term argv[]);
100101
static term nif_erlang_atom_to_binary(Context *ctx, int argc, term argv[]);
@@ -259,6 +260,12 @@ static const struct Nif binary_split_nif =
259260
.nif_ptr = nif_binary_split
260261
};
261262

263+
static const struct Nif binary_match_nif =
264+
{
265+
.base.type = NIFFunctionType,
266+
.nif_ptr = nif_binary_match
267+
};
268+
262269
static const struct Nif make_ref_nif =
263270
{
264271
.base.type = NIFFunctionType,
@@ -3280,6 +3287,142 @@ static term nif_binary_split(Context *ctx, int argc, term argv[])
32803287
return result_list;
32813288
}
32823289

3290+
static bool get_binary_scope_slice(term binary, term options, BinaryPosLen *scope_slice)
3291+
{
3292+
term scope_opt = term_invalid_term();
3293+
while (term_is_nonempty_list(options)) {
3294+
term head = term_get_list_head(options);
3295+
// BEAM ignores improper lists so we don't check for it
3296+
if (LIKELY(term_is_tuple(head) && term_get_tuple_arity(head) == 2 && term_get_tuple_element(head, 0) == SCOPE_ATOM)) {
3297+
scope_opt = term_get_tuple_element(head, 1);
3298+
} else {
3299+
return false;
3300+
}
3301+
options = term_get_list_tail(options);
3302+
}
3303+
3304+
if (term_is_invalid_term(scope_opt)) {
3305+
size_t size = term_binary_size(binary);
3306+
return term_normalize_binary_pos_len(binary, 0, (avm_int_t) size, scope_slice);
3307+
}
3308+
3309+
if (UNLIKELY(!term_is_tuple(scope_opt) || term_get_tuple_arity(scope_opt) != 2)) {
3310+
return false;
3311+
}
3312+
3313+
term pos_term = term_get_tuple_element(scope_opt, 0);
3314+
term len_term = term_get_tuple_element(scope_opt, 1);
3315+
if (UNLIKELY(!term_is_integer(pos_term) || !term_is_integer(len_term))) {
3316+
return false;
3317+
}
3318+
3319+
avm_int_t pos = term_to_int(pos_term);
3320+
avm_int_t len = term_to_int(len_term);
3321+
return term_normalize_binary_pos_len(binary, pos, len, scope_slice);
3322+
}
3323+
3324+
static bool is_valid_pattern(term t)
3325+
{
3326+
if (term_is_binary(t)) {
3327+
return term_binary_size(t) > 0;
3328+
}
3329+
3330+
if (!term_is_nonempty_list(t)) {
3331+
return false;
3332+
}
3333+
3334+
while (term_is_nonempty_list(t)) {
3335+
term pattern_term = term_get_list_head(t);
3336+
if (UNLIKELY(!term_is_binary(pattern_term))) {
3337+
return false;
3338+
}
3339+
if (UNLIKELY(term_binary_size(pattern_term) == 0)) {
3340+
return false;
3341+
}
3342+
t = term_get_list_tail(t);
3343+
}
3344+
bool proper = term_is_nil(t);
3345+
if (UNLIKELY(!proper)) {
3346+
return false;
3347+
}
3348+
return true;
3349+
}
3350+
3351+
static BinaryPosLen find_pattern_in_binary(term binary_term, BinaryPosLen scope_slice, term pattern_term)
3352+
{
3353+
const char *binary = term_binary_data(binary_term) + scope_slice.pos;
3354+
size_t size = scope_slice.len;
3355+
const char *pattern = term_binary_data(pattern_term);
3356+
size_t pattern_size = term_binary_size(pattern_term);
3357+
3358+
BinaryPosLen pattern_slice = term_nomatch_binary_pos_len();
3359+
const char *sub_binary = memmem(binary, size, pattern, pattern_size);
3360+
if (sub_binary != NULL) {
3361+
pattern_slice.len = pattern_size;
3362+
pattern_slice.pos = (sub_binary - binary) + scope_slice.pos;
3363+
}
3364+
return pattern_slice;
3365+
}
3366+
3367+
static BinaryPosLen select_earlier_slice(BinaryPosLen old_slice, BinaryPosLen new_slice)
3368+
{
3369+
if (term_is_nomatch_binary_pos_len(new_slice)) {
3370+
return old_slice;
3371+
}
3372+
if (term_is_nomatch_binary_pos_len(old_slice)) {
3373+
return new_slice;
3374+
}
3375+
if (new_slice.pos < old_slice.pos) {
3376+
return new_slice;
3377+
}
3378+
if (new_slice.pos == old_slice.pos && new_slice.len > old_slice.len) {
3379+
return new_slice;
3380+
}
3381+
return old_slice;
3382+
}
3383+
3384+
static term nif_binary_match(Context *ctx, int argc, term argv[])
3385+
{
3386+
term binary_term = argv[0];
3387+
term pattern_or_patterns_term = argv[1];
3388+
term options_term = argc == 3 ? argv[2] : term_nil();
3389+
3390+
VALIDATE_VALUE(binary_term, term_is_binary);
3391+
VALIDATE_VALUE(options_term, term_is_list);
3392+
VALIDATE_VALUE(pattern_or_patterns_term, is_valid_pattern);
3393+
3394+
BinaryPosLen scope_slice;
3395+
if (UNLIKELY(!get_binary_scope_slice(binary_term, options_term, &scope_slice))) {
3396+
RAISE_ERROR(BADARG_ATOM);
3397+
}
3398+
3399+
BinaryPosLen match_slice = term_nomatch_binary_pos_len();
3400+
if (term_is_binary(pattern_or_patterns_term)) {
3401+
term pattern_term = pattern_or_patterns_term;
3402+
match_slice = find_pattern_in_binary(binary_term, scope_slice, pattern_term);
3403+
} else {
3404+
term patterns = pattern_or_patterns_term;
3405+
while (term_is_nonempty_list(patterns)) {
3406+
term pattern_term = term_get_list_head(patterns);
3407+
BinaryPosLen new_match_slice = find_pattern_in_binary(binary_term, scope_slice, pattern_term);
3408+
match_slice = select_earlier_slice(match_slice, new_match_slice);
3409+
patterns = term_get_list_tail(patterns);
3410+
}
3411+
}
3412+
3413+
if (term_is_nomatch_binary_pos_len(match_slice)) {
3414+
return NOMATCH_ATOM;
3415+
}
3416+
3417+
if (UNLIKELY(memory_ensure_free_opt(ctx, TUPLE_SIZE(2), MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
3418+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
3419+
}
3420+
term result_tuple = term_alloc_tuple(2, &ctx->heap);
3421+
term_put_tuple_element(result_tuple, 0, term_from_int(match_slice.pos));
3422+
term_put_tuple_element(result_tuple, 1, term_from_int(match_slice.len));
3423+
return result_tuple;
3424+
}
3425+
32833426
static term nif_erlang_throw(Context *ctx, int argc, term argv[])
32843427
{
32853428
UNUSED(argc);

src/libAtomVM/nifs.gperf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ binary:last/1, &binary_last_nif
3939
binary:part/3, &binary_part_nif
4040
binary:split/2, &binary_split_nif
4141
binary:split/3, &binary_split_nif
42+
binary:match/2, &binary_match_nif
43+
binary:match/3, &binary_match_nif
4244
calendar:system_time_to_universal_time/2, &system_time_to_universal_time_nif
4345
erlang:atom_to_binary/1, &atom_to_binary_nif
4446
erlang:atom_to_binary/2, &atom_to_binary_nif

src/libAtomVM/term.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,12 +1342,12 @@ static inline bool term_normalize_binary_pos_len(term binary, avm_int_t pos, avm
13421342
return true;
13431343
}
13441344

1345-
static inline bool term_is_invalid_binary_pos_len(BinaryPosLen pos_len)
1345+
static inline bool term_is_nomatch_binary_pos_len(BinaryPosLen pos_len)
13461346
{
13471347
return pos_len.pos == -1 && pos_len.len == -1;
13481348
}
13491349

1350-
static inline BinaryPosLen term_invalid_binary_pos_len(void)
1350+
static inline BinaryPosLen term_nomatch_binary_pos_len()
13511351
{
13521352
return (BinaryPosLen) { .pos = -1, .len = -1 };
13531353
}

tests/erlang_tests/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ compile_erlang(test_unicode)
284284
compile_erlang(test_binary_part)
285285
compile_erlang(test_binary_split)
286286
compile_erlang(test_split_binary)
287+
compile_erlang(test_binary_match)
287288

288289
compile_erlang(plusone)
289290
compile_erlang(plusone2)
@@ -775,6 +776,7 @@ add_custom_target(erlang_test_modules DEPENDS
775776
test_binary_part.beam
776777
test_binary_split.beam
777778
test_split_binary.beam
779+
test_binary_match.beam
778780

779781
plusone.beam
780782
plusone2.beam
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
%
2+
% This file is part of AtomVM.
3+
%
4+
% Copyright 2025 Jakub Gonet <jakub.gonet@swmansion.com>
5+
%
6+
% Licensed under the Apache License, Version 2.0 (the "License");
7+
% you may not use this file except in compliance with the License.
8+
% You may obtain a copy of the License at
9+
%
10+
% http://www.apache.org/licenses/LICENSE-2.0
11+
%
12+
% Unless required by applicable law or agreed to in writing, software
13+
% distributed under the License is distributed on an "AS IS" BASIS,
14+
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
% See the License for the specific language governing permissions and
16+
% limitations under the License.
17+
%
18+
% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
19+
%
20+
21+
-module(test_binary_match).
22+
23+
-export([start/0, id/1, fail_with_badarg/1]).
24+
-define(ID(Arg), ?MODULE:id(Arg)).
25+
26+
start() ->
27+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID(<<"">>)) end),
28+
{0, 1} = binary:match(?ID(<<"a">>), ?ID(<<"a">>)),
29+
{0, 1} = binary:match(?ID(<<"aa">>), ?ID(<<"a">>)),
30+
{0, 2} = binary:match(?ID(<<"aba">>), ?ID(<<"ab">>)),
31+
32+
% empty subject
33+
case get_otp_version() of
34+
OTP when OTP =< 26 ->
35+
nomatch = binary:match(?ID(<<"">>), ?ID(<<"">>)),
36+
nomatch = binary:match(?ID(<<"">>), ?ID(<<"a">>)),
37+
nomatch = binary:match(?ID(<<"">>), ?ID([])),
38+
% for /3, nomatch only if empty subject + empty options
39+
nomatch = binary:match(?ID(<<"">>), ?ID(not_binary), ?ID([]));
40+
_AVM_or_newer_OTP ->
41+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"">>), ?ID(<<"">>)) end),
42+
nomatch = binary:match(?ID(<<"">>), ?ID(<<"a">>)),
43+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"">>), ?ID([])) end),
44+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"">>), ?ID(not_binary), ?ID([])) end)
45+
end,
46+
47+
% list of patterns
48+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID([])) end),
49+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID([<<"">>])) end),
50+
{0, 1} = binary:match(?ID(<<"a">>), ?ID([<<"a">>])),
51+
{0, 1} = binary:match(?ID(<<"a">>), ?ID([<<"a">>, <<"a">>])),
52+
{0, 2} = binary:match(?ID(<<"aa">>), ?ID([<<"a">>, <<"aa">>])),
53+
54+
% scope opt
55+
nomatch = binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {0, 0}}])),
56+
nomatch = binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {1, 0}}])),
57+
nomatch = binary:match(?ID(<<"bab">>), ?ID(<<"b">>), ?ID([{scope, {1, 0}}])),
58+
{2, 1} = binary:match(?ID(<<"bab">>), ?ID(<<"b">>), ?ID([{scope, {1, 2}}])),
59+
% {scope, {1, -1}}: starts at 0, 1 byte long
60+
{0, 1} = binary:match(?ID(<<"bab">>), ?ID(<<"b">>), ?ID([{scope, {1, -1}}])),
61+
62+
% bad inputs, subjects must be non-empty to not short-circuit
63+
ok = fail_with_badarg(fun() -> binary:match(?ID(not_binary), ?ID(<<"a">>)) end),
64+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID(not_binary)) end),
65+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID([<<"a">> | <<"a">>])) end),
66+
ok = fail_with_badarg(fun() -> binary:match(?ID(not_binary), ?ID(<<"a">>), ?ID([])) end),
67+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID(not_binary), ?ID([])) end),
68+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID(not_list)) end),
69+
ok = fail_with_badarg(fun() ->
70+
binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {1, 0}}, badopt]))
71+
end),
72+
% following line should raise but BEAM doesn't do that
73+
nomatch = binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {1, 0}} | badopt])),
74+
ok = fail_with_badarg(fun() ->
75+
binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, not_tuple}]))
76+
end),
77+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {1}}])) end),
78+
ok = fail_with_badarg(fun() ->
79+
binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {1, 2, 3}}]))
80+
end),
81+
82+
% bad scope
83+
ok = fail_with_badarg(fun() ->
84+
binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {0, 2}}]))
85+
end),
86+
ok = fail_with_badarg(fun() ->
87+
binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {0, -1}}]))
88+
end),
89+
ok = fail_with_badarg(fun() ->
90+
binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {-1, 1}}]))
91+
end),
92+
0.
93+
94+
id(X) ->
95+
X.
96+
97+
fail_with_badarg(Fun) ->
98+
try Fun() of
99+
Ret -> {unexpected, Ret}
100+
catch
101+
error:badarg -> ok;
102+
C:E -> {unexpected, C, E}
103+
end.
104+
105+
get_otp_version() ->
106+
case erlang:system_info(machine) of
107+
"BEAM" ->
108+
list_to_integer(erlang:system_info(otp_release));
109+
_ ->
110+
atomvm
111+
end.

tests/libs/estdlib/tests.erl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ get_non_networking_tests(OTPVersion) when
5959
(is_integer(OTPVersion) andalso OTPVersion >= 27) orelse OTPVersion =:= atomvm
6060
->
6161
[test_sets | get_non_networking_tests(undefined)];
62+
% test_binary uses encode_hex/1 (OTP-24), encode_hex/2 (OTP-26)
63+
get_non_networking_tests(OTPVersion) when
64+
(is_integer(OTPVersion) andalso OTPVersion >= 26) orelse OTPVersion =:= atomvm
65+
->
66+
[test_binary | get_non_networking_tests(undefined)];
6267
get_non_networking_tests(_OTPVersion) ->
6368
[
6469
test_apply,

tests/test.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ struct Test tests[] = {
325325
TEST_CASE(test_binary_part),
326326
TEST_CASE(test_binary_split),
327327
TEST_CASE(test_split_binary),
328+
TEST_CASE(test_binary_match),
328329

329330
TEST_CASE_COND(plusone, 134217728, LONG_MAX != 9223372036854775807),
330331

0 commit comments

Comments
 (0)