Skip to content

Commit 0d5b0de

Browse files
committed
Merge pull request #1704 from jgonet/jgonet/binary-replace
Add binary:replace/4 These changes are made under both the "Apache 2.0" and the "GNU Lesser General Public License 2.1 or later" license terms (dual license). SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
2 parents 58fe3ae + cf3caf9 commit 0d5b0de

File tree

7 files changed

+241
-1
lines changed

7 files changed

+241
-1
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
5050
- Added WiFi support for ESP32P4 via esp-wifi-external for build with ESP-IDF v5.4 and later
5151
- Added Process.link/1 and unlink/1 to Elixir Process.ex
5252
- Added `erlang:module_loaded/1`
53+
- Added `binary:replace/3`, `binary:replace/4`
5354

5455
### Changed
5556

libs/estdlib/src/binary.erl

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525
%%-----------------------------------------------------------------------------
2626
-module(binary).
2727

28-
-export([at/2, decode_hex/1, encode_hex/1, encode_hex/2, part/3, split/2, split/3]).
28+
-export([
29+
at/2, decode_hex/1, encode_hex/1, encode_hex/2, part/3, split/2, split/3, replace/3, replace/4
30+
]).
2931

3032
%%-----------------------------------------------------------------------------
3133
%% @param Binary binary to get a byte from
@@ -115,3 +117,37 @@ split(_Binary, _Pattern) ->
115117
-spec split(Binary :: binary(), Pattern :: binary(), Option :: [global]) -> [binary()].
116118
split(_Binary, _Pattern, _Option) ->
117119
erlang:nif_error(undefined).
120+
121+
%%-----------------------------------------------------------------------------
122+
%% @equiv replace(Binary, Pattern, Replacement, [])
123+
%% @param Binary binary where the replacements should occur
124+
%% @param Pattern binary pattern which is to be replaced
125+
%% @param Replacement binary which will replace the pattern
126+
%% @return resulting binary after replacements
127+
%% @doc Replaces first occurrence of Pattern in Binary with Replacement.
128+
%% If Pattern is not found, returns the original binary unchanged.
129+
%% Pattern and Replacement must be binaries.
130+
%% @end
131+
%%-----------------------------------------------------------------------------
132+
-spec replace(Binary :: binary(), Pattern :: binary(), Replacement :: binary()) -> binary().
133+
replace(_Binary, _Pattern, _Replacement) ->
134+
erlang:nif_error(undefined).
135+
136+
%%-----------------------------------------------------------------------------
137+
%% @param Binary binary where the replacements should occur
138+
%% @param Pattern binary pattern which is to be replaced
139+
%% @param Replacement binary which will replace the pattern
140+
%% @param Options list of options for the replacement operations.
141+
%% @return resulting binary after replacements
142+
%% @doc Replaces occurrences of Pattern in Binary with Replacement.
143+
%% If Options includes `global', replaces all occurrences; otherwise, replaces just the first occurrence.
144+
%% If Pattern is not found, returns the original binary unchanged.
145+
%% Pattern and Replacement must be binaries.
146+
%% Only implemented option is `global'.
147+
%% @end
148+
%%-----------------------------------------------------------------------------
149+
-spec replace(
150+
Binary :: binary(), Pattern :: binary(), Replacement :: binary(), Options :: [global] | []
151+
) -> binary().
152+
replace(_Binary, _Pattern, _Replacement, _Options) ->
153+
erlang:nif_error(undefined).

src/libAtomVM/nifs.c

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ static term nif_binary_first_1(Context *ctx, int argc, term argv[]);
9595
static term nif_binary_last_1(Context *ctx, int argc, term argv[]);
9696
static term nif_binary_part_3(Context *ctx, int argc, term argv[]);
9797
static term nif_binary_split(Context *ctx, int argc, term argv[]);
98+
static term nif_binary_replace(Context *ctx, int argc, term argv[]);
9899
static term nif_calendar_system_time_to_universal_time_2(Context *ctx, int argc, term argv[]);
99100
static term nif_erlang_delete_element_2(Context *ctx, int argc, term argv[]);
100101
static term nif_erlang_atom_to_binary(Context *ctx, int argc, term argv[]);
@@ -259,6 +260,12 @@ static const struct Nif binary_split_nif =
259260
.nif_ptr = nif_binary_split
260261
};
261262

263+
static const struct Nif binary_replace_nif =
264+
{
265+
.base.type = NIFFunctionType,
266+
.nif_ptr = nif_binary_replace
267+
};
268+
262269
static const struct Nif make_ref_nif =
263270
{
264271
.base.type = NIFFunctionType,
@@ -3280,6 +3287,106 @@ static term nif_binary_split(Context *ctx, int argc, term argv[])
32803287
return result_list;
32813288
}
32823289

3290+
static term nif_binary_replace(Context *ctx, int argc, term argv[])
3291+
{
3292+
term bin_term = argv[0];
3293+
term pattern = argv[1];
3294+
term replacement = argv[2];
3295+
term options = argc == 4 ? argv[3] : term_nil();
3296+
3297+
VALIDATE_VALUE(bin_term, term_is_binary);
3298+
VALIDATE_VALUE(pattern, term_is_binary);
3299+
VALIDATE_VALUE(replacement, term_is_binary);
3300+
VALIDATE_VALUE(options, term_is_list);
3301+
3302+
bool global = false;
3303+
while (term_is_nonempty_list(options)) {
3304+
term head = term_get_list_head(options);
3305+
if (LIKELY(head == GLOBAL_ATOM)) {
3306+
global = true;
3307+
} else {
3308+
RAISE_ERROR(BADARG_ATOM);
3309+
}
3310+
options = term_get_list_tail(options);
3311+
}
3312+
3313+
size_t bin_size = term_binary_size(bin_term);
3314+
size_t pattern_size = term_binary_size(pattern);
3315+
size_t repl_size = term_binary_size(replacement);
3316+
3317+
if (UNLIKELY(pattern_size == 0 || bin_size == 0)) {
3318+
RAISE_ERROR(BADARG_ATOM);
3319+
}
3320+
3321+
if (bin_size < pattern_size) {
3322+
return bin_term;
3323+
}
3324+
3325+
const char *bin_data = term_binary_data(bin_term);
3326+
const char *pattern_data = term_binary_data(pattern);
3327+
const char *repl_data = term_binary_data(replacement);
3328+
3329+
int pattern_n = 0;
3330+
const char *sub_bin = bin_data;
3331+
size_t sub_bin_size = bin_size;
3332+
while (sub_bin_size >= pattern_size) {
3333+
const char *found_pattern = memmem(sub_bin, sub_bin_size, pattern_data, pattern_size);
3334+
if (found_pattern == NULL) {
3335+
break;
3336+
}
3337+
++pattern_n;
3338+
if (!global) {
3339+
break;
3340+
}
3341+
size_t found_offset = found_pattern - sub_bin;
3342+
sub_bin_size -= found_offset + pattern_size;
3343+
sub_bin = found_pattern + pattern_size;
3344+
}
3345+
size_t result_size = bin_size + pattern_n * (repl_size - pattern_size);
3346+
3347+
size_t size_binary = term_binary_data_size_in_terms(result_size);
3348+
term roots[3] = { bin_term, pattern, replacement };
3349+
if (UNLIKELY(memory_ensure_free_with_roots(ctx, size_binary, 3, roots, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
3350+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
3351+
}
3352+
// update pointers after possible GC
3353+
bin_term = roots[0];
3354+
pattern = roots[1];
3355+
replacement = roots[2];
3356+
bin_data = term_binary_data(bin_term);
3357+
pattern_data = term_binary_data(pattern);
3358+
repl_data = term_binary_data(replacement);
3359+
3360+
term result_binary = term_create_uninitialized_binary(result_size, &ctx->heap, ctx->global);
3361+
char *result_data = (char *) term_binary_data(result_binary);
3362+
3363+
size_t bin_idx = 0;
3364+
size_t result_idx = 0;
3365+
while (bin_idx <= bin_size - pattern_size) {
3366+
bool pattern_found = memcmp(bin_data + bin_idx, pattern_data, pattern_size) == 0;
3367+
if (pattern_found) {
3368+
memcpy(result_data + result_idx, repl_data, repl_size);
3369+
result_idx += repl_size;
3370+
bin_idx += pattern_size;
3371+
if (!global) {
3372+
break;
3373+
}
3374+
} else {
3375+
result_data[result_idx] = bin_data[bin_idx];
3376+
++result_idx;
3377+
++bin_idx;
3378+
}
3379+
}
3380+
3381+
bool has_leftover = bin_idx < bin_size;
3382+
if (has_leftover) {
3383+
// result_idx is not updated, we don't need it
3384+
memcpy(result_data + result_idx, bin_data + bin_idx, bin_size - bin_idx);
3385+
}
3386+
3387+
return result_binary;
3388+
}
3389+
32833390
static term nif_erlang_throw(Context *ctx, int argc, term argv[])
32843391
{
32853392
UNUSED(argc);

src/libAtomVM/nifs.gperf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ binary:last/1, &binary_last_nif
3939
binary:part/3, &binary_part_nif
4040
binary:split/2, &binary_split_nif
4141
binary:split/3, &binary_split_nif
42+
binary:replace/3, &binary_replace_nif
43+
binary:replace/4, &binary_replace_nif
4244
calendar:system_time_to_universal_time/2, &system_time_to_universal_time_nif
4345
erlang:atom_to_binary/1, &atom_to_binary_nif
4446
erlang:atom_to_binary/2, &atom_to_binary_nif

tests/erlang_tests/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ compile_erlang(test_unicode)
284284
compile_erlang(test_binary_part)
285285
compile_erlang(test_binary_split)
286286
compile_erlang(test_split_binary)
287+
compile_erlang(test_binary_replace)
287288

288289
compile_erlang(plusone)
289290
compile_erlang(plusone2)
@@ -775,6 +776,7 @@ add_custom_target(erlang_test_modules DEPENDS
775776
test_binary_part.beam
776777
test_binary_split.beam
777778
test_split_binary.beam
779+
test_binary_replace.beam
778780

779781
plusone.beam
780782
plusone2.beam
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
%
2+
% This file is part of AtomVM.
3+
%
4+
% Copyright 2024 Tomasz Sobkiewicz <tomasz.sobkiewicz@swmansion.com>
5+
%
6+
% Licensed under the Apache License, Version 2.0 (the "License");
7+
% you may not use this file except in compliance with the License.
8+
% You may obtain a copy of the License at
9+
%
10+
% http://www.apache.org/licenses/LICENSE-2.0
11+
%
12+
% Unless required by applicable law or agreed to in writing, software
13+
% distributed under the License is distributed on an "AS IS" BASIS,
14+
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
% See the License for the specific language governing permissions and
16+
% limitations under the License.
17+
%
18+
% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
19+
%
20+
21+
-module(test_binary_replace).
22+
23+
-export([start/0]).
24+
25+
start() ->
26+
ok = replace(),
27+
ok = global_replace(),
28+
ok = invalid_args(),
29+
0.
30+
31+
replace() ->
32+
<<"barbar">> = binary:replace(<<"foobar">>, <<"foo">>, <<"bar">>),
33+
<<"foooobar">> = binary:replace(<<"foobar">>, <<"o">>, <<"ooo">>),
34+
<<"">> = binary:replace(<<"foobar">>, <<"foobar">>, <<"">>),
35+
<<"foobar">> = binary:replace(<<"o">>, <<"o">>, <<"foobar">>),
36+
<<"fof">> = binary:replace(<<"foobar">>, <<"obar">>, <<"f">>),
37+
<<"fobar">> = binary:replace(<<"foobar">>, <<"oo">>, <<"o">>),
38+
<<"o">> = binary:replace(<<"o">>, <<"foobar">>, <<"o">>),
39+
<<"foobar">> = binary:replace(<<"o">>, <<"o">>, <<"foobar">>),
40+
<<"fobar">> = binary:replace(<<"foobar">>, <<"oo">>, <<"o">>, []),
41+
<<"foo">> = binary:replace(<<"foofoo">>, <<"foo">>, <<"">>, []),
42+
ok.
43+
44+
global_replace() ->
45+
<<"foobar">> = binary:replace(<<"foooobar">>, <<"oo">>, <<"o">>, [global]),
46+
<<"foooobar">> = binary:replace(<<"foobar">>, <<"o">>, <<"oo">>, [global]),
47+
<<"">> = binary:replace(<<"foofoo">>, <<"foo">>, <<"">>, [global]),
48+
<<"foofoo">> = binary:replace(<<"oo">>, <<"o">>, <<"foo">>, [global]),
49+
ok.
50+
51+
invalid_args() ->
52+
case vm_info() of
53+
{beam, V} when V =< 26 ->
54+
<<"">> = binary:replace(<<"">>, <<"">>, <<"">>),
55+
<<"">> = binary:replace(<<"">>, <<"">>, <<"">>, [global]);
56+
_Otp27OrAtomVm ->
57+
ok = raises(badarg, fun() -> binary:replace(<<"">>, <<"">>, <<"">>) end),
58+
ok = raises(badarg, fun() -> binary:replace(<<"">>, <<"">>, <<"">>, [global]) end)
59+
end,
60+
ok = raises(badarg, fun() -> binary:replace(<<"o">>, <<"">>, <<"">>) end),
61+
ok = raises(badarg, fun() -> binary:replace(not_binary, <<"">>, <<"">>) end),
62+
ok = raises(badarg, fun() -> binary:replace(<<"o">>, not_binary, <<"">>) end),
63+
ok = raises(badarg, fun() -> binary:replace(<<"o">>, <<"">>, not_binary) end),
64+
ok = raises(badarg, fun() -> binary:replace(<<"o">>, <<"">>, <<"">>, [global]) end),
65+
ok = raises(badarg, fun() -> binary:replace(<<"o">>, <<"o">>, <<"">>, [{global, true}]) end),
66+
% insert_replaced not supported
67+
ok = raises(badarg, fun() ->
68+
binary:replace(<<"o">>, <<"o">>, <<"">>, [global, {insert_replaced, 1}])
69+
end),
70+
ok = raises(badarg, fun() ->
71+
binary:replace(<<"o">>, <<"o">>, <<"">>, [{insert_replaced, 1}])
72+
end),
73+
ok.
74+
75+
raises(Error, F) ->
76+
try F() of
77+
V ->
78+
{unexpected, V}
79+
catch
80+
error:Error -> ok;
81+
C:E -> {unexpected, C, E}
82+
end.
83+
84+
vm_info() ->
85+
case erlang:system_info(machine) of
86+
"BEAM" ->
87+
Otp = list_to_integer(erlang:system_info(otp_release)),
88+
{beam, Otp};
89+
_ ->
90+
atomvm
91+
end.

tests/test.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ struct Test tests[] = {
325325
TEST_CASE(test_binary_part),
326326
TEST_CASE(test_binary_split),
327327
TEST_CASE(test_split_binary),
328+
TEST_CASE(test_binary_replace),
328329

329330
TEST_CASE_COND(plusone, 134217728, LONG_MAX != 9223372036854775807),
330331

0 commit comments

Comments
 (0)