Skip to content

Commit fb8b95a

Browse files
committed
Merge pull request #1621 from jgonet/jgonet/binary-match
Implement binary:match/2 and binary:match/3 These changes are made under both the "Apache 2.0" and the "GNU Lesser General Public License 2.1 or later" license terms (dual license). SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
2 parents 0d5b0de + 846fa18 commit fb8b95a

File tree

9 files changed

+282
-9
lines changed

9 files changed

+282
-9
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
5151
- Added Process.link/1 and unlink/1 to Elixir Process.ex
5252
- Added `erlang:module_loaded/1`
5353
- Added `binary:replace/3`, `binary:replace/4`
54+
- Added `binary:match/2` and `binary:match/3`
5455

5556
### Changed
5657

src/libAtomVM/defaultatoms.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,6 @@ X(CONNECT_ATOM, "\x7", "connect")
185185
X(SYSTEM_VERSION_ATOM, "\xE", "system_version")
186186
X(OTP_RELEASE_ATOM, "\xB", "otp_release")
187187
X(BREAK_IGNORED_ATOM, "\xD", "break_ignored")
188+
189+
X(SCOPE_ATOM, "\x5", "scope")
190+
X(NOMATCH_ATOM, "\x7", "nomatch")

src/libAtomVM/nifs.c

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ static term nif_binary_last_1(Context *ctx, int argc, term argv[]);
9696
static term nif_binary_part_3(Context *ctx, int argc, term argv[]);
9797
static term nif_binary_split(Context *ctx, int argc, term argv[]);
9898
static term nif_binary_replace(Context *ctx, int argc, term argv[]);
99+
static term nif_binary_match(Context *ctx, int argc, term argv[]);
99100
static term nif_calendar_system_time_to_universal_time_2(Context *ctx, int argc, term argv[]);
100101
static term nif_erlang_delete_element_2(Context *ctx, int argc, term argv[]);
101102
static term nif_erlang_atom_to_binary(Context *ctx, int argc, term argv[]);
@@ -266,6 +267,12 @@ static const struct Nif binary_replace_nif =
266267
.nif_ptr = nif_binary_replace
267268
};
268269

270+
static const struct Nif binary_match_nif =
271+
{
272+
.base.type = NIFFunctionType,
273+
.nif_ptr = nif_binary_match
274+
};
275+
269276
static const struct Nif make_ref_nif =
270277
{
271278
.base.type = NIFFunctionType,
@@ -3387,6 +3394,142 @@ static term nif_binary_replace(Context *ctx, int argc, term argv[])
33873394
return result_binary;
33883395
}
33893396

3397+
static bool get_binary_scope_slice(term binary, term options, BinaryPosLen *scope_slice)
3398+
{
3399+
term scope_opt = term_invalid_term();
3400+
while (term_is_nonempty_list(options)) {
3401+
term head = term_get_list_head(options);
3402+
// BEAM ignores improper lists so we don't check for it
3403+
if (LIKELY(term_is_tuple(head) && term_get_tuple_arity(head) == 2 && term_get_tuple_element(head, 0) == SCOPE_ATOM)) {
3404+
scope_opt = term_get_tuple_element(head, 1);
3405+
} else {
3406+
return false;
3407+
}
3408+
options = term_get_list_tail(options);
3409+
}
3410+
3411+
if (term_is_invalid_term(scope_opt)) {
3412+
size_t size = term_binary_size(binary);
3413+
return term_normalize_binary_pos_len(binary, 0, (avm_int_t) size, scope_slice);
3414+
}
3415+
3416+
if (UNLIKELY(!term_is_tuple(scope_opt) || term_get_tuple_arity(scope_opt) != 2)) {
3417+
return false;
3418+
}
3419+
3420+
term pos_term = term_get_tuple_element(scope_opt, 0);
3421+
term len_term = term_get_tuple_element(scope_opt, 1);
3422+
if (UNLIKELY(!term_is_integer(pos_term) || !term_is_integer(len_term))) {
3423+
return false;
3424+
}
3425+
3426+
avm_int_t pos = term_to_int(pos_term);
3427+
avm_int_t len = term_to_int(len_term);
3428+
return term_normalize_binary_pos_len(binary, pos, len, scope_slice);
3429+
}
3430+
3431+
static bool is_valid_pattern(term t)
3432+
{
3433+
if (term_is_binary(t)) {
3434+
return term_binary_size(t) > 0;
3435+
}
3436+
3437+
if (!term_is_nonempty_list(t)) {
3438+
return false;
3439+
}
3440+
3441+
while (term_is_nonempty_list(t)) {
3442+
term pattern_term = term_get_list_head(t);
3443+
if (UNLIKELY(!term_is_binary(pattern_term))) {
3444+
return false;
3445+
}
3446+
if (UNLIKELY(term_binary_size(pattern_term) == 0)) {
3447+
return false;
3448+
}
3449+
t = term_get_list_tail(t);
3450+
}
3451+
bool proper = term_is_nil(t);
3452+
if (UNLIKELY(!proper)) {
3453+
return false;
3454+
}
3455+
return true;
3456+
}
3457+
3458+
static BinaryPosLen find_pattern_in_binary(term binary_term, BinaryPosLen scope_slice, term pattern_term)
3459+
{
3460+
const char *binary = term_binary_data(binary_term) + scope_slice.pos;
3461+
size_t size = scope_slice.len;
3462+
const char *pattern = term_binary_data(pattern_term);
3463+
size_t pattern_size = term_binary_size(pattern_term);
3464+
3465+
BinaryPosLen pattern_slice = term_nomatch_binary_pos_len();
3466+
const char *sub_binary = memmem(binary, size, pattern, pattern_size);
3467+
if (sub_binary != NULL) {
3468+
pattern_slice.len = pattern_size;
3469+
pattern_slice.pos = (sub_binary - binary) + scope_slice.pos;
3470+
}
3471+
return pattern_slice;
3472+
}
3473+
3474+
static BinaryPosLen select_earlier_slice(BinaryPosLen old_slice, BinaryPosLen new_slice)
3475+
{
3476+
if (term_is_nomatch_binary_pos_len(new_slice)) {
3477+
return old_slice;
3478+
}
3479+
if (term_is_nomatch_binary_pos_len(old_slice)) {
3480+
return new_slice;
3481+
}
3482+
if (new_slice.pos < old_slice.pos) {
3483+
return new_slice;
3484+
}
3485+
if (new_slice.pos == old_slice.pos && new_slice.len > old_slice.len) {
3486+
return new_slice;
3487+
}
3488+
return old_slice;
3489+
}
3490+
3491+
static term nif_binary_match(Context *ctx, int argc, term argv[])
3492+
{
3493+
term binary_term = argv[0];
3494+
term pattern_or_patterns_term = argv[1];
3495+
term options_term = argc == 3 ? argv[2] : term_nil();
3496+
3497+
VALIDATE_VALUE(binary_term, term_is_binary);
3498+
VALIDATE_VALUE(options_term, term_is_list);
3499+
VALIDATE_VALUE(pattern_or_patterns_term, is_valid_pattern);
3500+
3501+
BinaryPosLen scope_slice;
3502+
if (UNLIKELY(!get_binary_scope_slice(binary_term, options_term, &scope_slice))) {
3503+
RAISE_ERROR(BADARG_ATOM);
3504+
}
3505+
3506+
BinaryPosLen match_slice = term_nomatch_binary_pos_len();
3507+
if (term_is_binary(pattern_or_patterns_term)) {
3508+
term pattern_term = pattern_or_patterns_term;
3509+
match_slice = find_pattern_in_binary(binary_term, scope_slice, pattern_term);
3510+
} else {
3511+
term patterns = pattern_or_patterns_term;
3512+
while (term_is_nonempty_list(patterns)) {
3513+
term pattern_term = term_get_list_head(patterns);
3514+
BinaryPosLen new_match_slice = find_pattern_in_binary(binary_term, scope_slice, pattern_term);
3515+
match_slice = select_earlier_slice(match_slice, new_match_slice);
3516+
patterns = term_get_list_tail(patterns);
3517+
}
3518+
}
3519+
3520+
if (term_is_nomatch_binary_pos_len(match_slice)) {
3521+
return NOMATCH_ATOM;
3522+
}
3523+
3524+
if (UNLIKELY(memory_ensure_free_opt(ctx, TUPLE_SIZE(2), MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
3525+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
3526+
}
3527+
term result_tuple = term_alloc_tuple(2, &ctx->heap);
3528+
term_put_tuple_element(result_tuple, 0, term_from_int(match_slice.pos));
3529+
term_put_tuple_element(result_tuple, 1, term_from_int(match_slice.len));
3530+
return result_tuple;
3531+
}
3532+
33903533
static term nif_erlang_throw(Context *ctx, int argc, term argv[])
33913534
{
33923535
UNUSED(argc);

src/libAtomVM/nifs.gperf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ binary:split/2, &binary_split_nif
4141
binary:split/3, &binary_split_nif
4242
binary:replace/3, &binary_replace_nif
4343
binary:replace/4, &binary_replace_nif
44+
binary:match/2, &binary_match_nif
45+
binary:match/3, &binary_match_nif
4446
calendar:system_time_to_universal_time/2, &system_time_to_universal_time_nif
4547
erlang:atom_to_binary/1, &atom_to_binary_nif
4648
erlang:atom_to_binary/2, &atom_to_binary_nif

src/libAtomVM/term.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,12 +1342,12 @@ static inline bool term_normalize_binary_pos_len(term binary, avm_int_t pos, avm
13421342
return true;
13431343
}
13441344

1345-
static inline bool term_is_invalid_binary_pos_len(BinaryPosLen pos_len)
1345+
static inline bool term_is_nomatch_binary_pos_len(BinaryPosLen pos_len)
13461346
{
13471347
return pos_len.pos == -1 && pos_len.len == -1;
13481348
}
13491349

1350-
static inline BinaryPosLen term_invalid_binary_pos_len(void)
1350+
static inline BinaryPosLen term_nomatch_binary_pos_len()
13511351
{
13521352
return (BinaryPosLen) { .pos = -1, .len = -1 };
13531353
}

tests/erlang_tests/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ compile_erlang(test_binary_part)
285285
compile_erlang(test_binary_split)
286286
compile_erlang(test_split_binary)
287287
compile_erlang(test_binary_replace)
288+
compile_erlang(test_binary_match)
288289

289290
compile_erlang(plusone)
290291
compile_erlang(plusone2)
@@ -777,6 +778,7 @@ add_custom_target(erlang_test_modules DEPENDS
777778
test_binary_split.beam
778779
test_split_binary.beam
779780
test_binary_replace.beam
781+
test_binary_match.beam
780782

781783
plusone.beam
782784
plusone2.beam
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
%
2+
% This file is part of AtomVM.
3+
%
4+
% Copyright 2025 Jakub Gonet <jakub.gonet@swmansion.com>
5+
%
6+
% Licensed under the Apache License, Version 2.0 (the "License");
7+
% you may not use this file except in compliance with the License.
8+
% You may obtain a copy of the License at
9+
%
10+
% http://www.apache.org/licenses/LICENSE-2.0
11+
%
12+
% Unless required by applicable law or agreed to in writing, software
13+
% distributed under the License is distributed on an "AS IS" BASIS,
14+
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
% See the License for the specific language governing permissions and
16+
% limitations under the License.
17+
%
18+
% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
19+
%
20+
21+
-module(test_binary_match).
22+
23+
-export([start/0, id/1, fail_with_badarg/1]).
24+
-define(ID(Arg), ?MODULE:id(Arg)).
25+
26+
start() ->
27+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID(<<"">>)) end),
28+
{0, 1} = binary:match(?ID(<<"a">>), ?ID(<<"a">>)),
29+
{0, 1} = binary:match(?ID(<<"aa">>), ?ID(<<"a">>)),
30+
{0, 2} = binary:match(?ID(<<"aba">>), ?ID(<<"ab">>)),
31+
32+
% empty subject
33+
case get_otp_version() of
34+
OTP when OTP =< 26 ->
35+
nomatch = binary:match(?ID(<<"">>), ?ID(<<"">>)),
36+
nomatch = binary:match(?ID(<<"">>), ?ID(<<"a">>)),
37+
nomatch = binary:match(?ID(<<"">>), ?ID([])),
38+
% for /3, nomatch only if empty subject + empty options
39+
nomatch = binary:match(?ID(<<"">>), ?ID(not_binary), ?ID([]));
40+
_AVM_or_newer_OTP ->
41+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"">>), ?ID(<<"">>)) end),
42+
nomatch = binary:match(?ID(<<"">>), ?ID(<<"a">>)),
43+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"">>), ?ID([])) end),
44+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"">>), ?ID(not_binary), ?ID([])) end)
45+
end,
46+
47+
% list of patterns
48+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID([])) end),
49+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID([<<"">>])) end),
50+
{0, 1} = binary:match(?ID(<<"a">>), ?ID([<<"a">>])),
51+
{0, 1} = binary:match(?ID(<<"a">>), ?ID([<<"a">>, <<"a">>])),
52+
{0, 2} = binary:match(?ID(<<"aa">>), ?ID([<<"a">>, <<"aa">>])),
53+
54+
% scope opt
55+
nomatch = binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {0, 0}}])),
56+
nomatch = binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {1, 0}}])),
57+
nomatch = binary:match(?ID(<<"bab">>), ?ID(<<"b">>), ?ID([{scope, {1, 0}}])),
58+
{2, 1} = binary:match(?ID(<<"bab">>), ?ID(<<"b">>), ?ID([{scope, {1, 2}}])),
59+
% {scope, {1, -1}}: starts at 0, 1 byte long
60+
{0, 1} = binary:match(?ID(<<"bab">>), ?ID(<<"b">>), ?ID([{scope, {1, -1}}])),
61+
62+
% bad inputs, subjects must be non-empty to not short-circuit
63+
ok = fail_with_badarg(fun() -> binary:match(?ID(not_binary), ?ID(<<"a">>)) end),
64+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID(not_binary)) end),
65+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID([<<"a">> | <<"a">>])) end),
66+
ok = fail_with_badarg(fun() -> binary:match(?ID(not_binary), ?ID(<<"a">>), ?ID([])) end),
67+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID(not_binary), ?ID([])) end),
68+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID(not_list)) end),
69+
ok = fail_with_badarg(fun() ->
70+
binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {1, 0}}, badopt]))
71+
end),
72+
% following line should raise but BEAM doesn't do that
73+
nomatch = binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {1, 0}} | badopt])),
74+
ok = fail_with_badarg(fun() ->
75+
binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, not_tuple}]))
76+
end),
77+
ok = fail_with_badarg(fun() -> binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {1}}])) end),
78+
ok = fail_with_badarg(fun() ->
79+
binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {1, 2, 3}}]))
80+
end),
81+
82+
% bad scope
83+
ok = fail_with_badarg(fun() ->
84+
binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {0, 2}}]))
85+
end),
86+
ok = fail_with_badarg(fun() ->
87+
binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {0, -1}}]))
88+
end),
89+
ok = fail_with_badarg(fun() ->
90+
binary:match(?ID(<<"a">>), ?ID(<<"a">>), ?ID([{scope, {-1, 1}}]))
91+
end),
92+
0.
93+
94+
id(X) ->
95+
X.
96+
97+
fail_with_badarg(Fun) ->
98+
try Fun() of
99+
Ret -> {unexpected, Ret}
100+
catch
101+
error:badarg -> ok;
102+
C:E -> {unexpected, C, E}
103+
end.
104+
105+
get_otp_version() ->
106+
case erlang:system_info(machine) of
107+
"BEAM" ->
108+
list_to_integer(erlang:system_info(otp_release));
109+
_ ->
110+
atomvm
111+
end.

tests/libs/estdlib/tests.erl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ get_non_networking_tests(OTPVersion) when
5959
(is_integer(OTPVersion) andalso OTPVersion >= 27) orelse OTPVersion =:= atomvm
6060
->
6161
[test_sets | get_non_networking_tests(undefined)];
62+
% test_binary uses encode_hex/1 (OTP-24), encode_hex/2 (OTP-26)
63+
get_non_networking_tests(OTPVersion) when
64+
(is_integer(OTPVersion) andalso OTPVersion >= 26) orelse OTPVersion =:= atomvm
65+
->
66+
[test_binary | get_non_networking_tests(undefined)];
6267
get_non_networking_tests(_OTPVersion) ->
6368
[
6469
test_apply,

tests/test.c

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ struct Test tests[] = {
326326
TEST_CASE(test_binary_split),
327327
TEST_CASE(test_split_binary),
328328
TEST_CASE(test_binary_replace),
329+
TEST_CASE(test_binary_match),
329330

330331
TEST_CASE_COND(plusone, 134217728, LONG_MAX != 9223372036854775807),
331332

@@ -634,18 +635,23 @@ static int test_atom(struct Test *test)
634635
static int test_beam(struct Test *test)
635636
{
636637
char command[512];
637-
snprintf(command, sizeof(command),
638+
size_t written = snprintf(command, sizeof(command),
638639
"erl -pa . -eval '"
639-
"erlang:process_flag(trap_exit, false), " /* init(3) traps exists */
640-
"R = %s:start(), "
641-
"S = if"
642-
" R =:= %i -> 0;"
643-
" true -> io:format(\"Expected ~B, got ~p\n\", [%i, R]) "
644-
"end, "
640+
"erlang:process_flag(trap_exit, false), \n" /* init(3) traps exits */
641+
"S = try %s:start() of\n"
642+
" R when R =:= %i -> 0;\n"
643+
" R -> io:format(\"Expected ~B, got ~p\\n\", [%i, R]), 1\n"
644+
"catch\n"
645+
" _C:E:ST -> io:format(\"Raised ~p, stacktrace:\\n~p\\n\", [E, ST]), 1\n"
646+
"end,\n"
645647
"erlang:halt(S).' -noshell",
646648
test->test_module,
647649
test->expected_value,
648650
test->expected_value);
651+
if (written >= sizeof(command) - 1) {
652+
fprintf(stderr, "Exceeded buffer size for module %s\n", test->test_module);
653+
return 1;
654+
}
649655
return system(command);
650656
}
651657

0 commit comments

Comments
 (0)