Skip to content

Commit f629e56

Browse files
committed
Merge pull request atomvm#1708 from jgonet/jgonet/unicode-option
Add unicode alias for utf8 option These changes are made under both the "Apache 2.0" and the "GNU Lesser General Public License 2.1 or later" license terms (dual license). SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
2 parents aa0c85b + 9187aff commit f629e56

File tree

3 files changed

+46
-15
lines changed

3 files changed

+46
-15
lines changed

libs/estdlib/src/unicode.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
char() | unicode_binary() | charlist(), unicode_binary() | []
4848
).
4949

50-
-type encoding() :: utf8 | latin1.
50+
-type encoding() :: utf8 | unicode | latin1.
5151

5252
-export_type([
5353
unicode_binary/0,

src/libAtomVM/nifs.c

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5341,13 +5341,28 @@ static term nif_maps_next(Context *ctx, int argc, term argv[])
53415341
return ret;
53425342
}
53435343

5344+
static bool encoding_from_atom(term encoding_atom, enum CharDataEncoding *encoding)
5345+
{
5346+
switch (encoding_atom) {
5347+
case LATIN1_ATOM:
5348+
*encoding = Latin1Encoding;
5349+
return true;
5350+
case UTF8_ATOM:
5351+
case UNICODE_ATOM:
5352+
*encoding = UTF8Encoding;
5353+
return true;
5354+
default:
5355+
return false;
5356+
}
5357+
}
5358+
53445359
static term nif_unicode_characters_to_list(Context *ctx, int argc, term argv[])
53455360
{
53465361
enum CharDataEncoding in_encoding = UTF8Encoding;
5347-
if (argc == 2) {
5348-
if (argv[1] == LATIN1_ATOM) {
5349-
in_encoding = Latin1Encoding;
5350-
} else if (UNLIKELY((argv[1] != UTF8_ATOM))) {
5362+
bool has_in_encoding = argc == 2;
5363+
if (has_in_encoding) {
5364+
term in_encoding_atom = argv[1];
5365+
if (UNLIKELY(!encoding_from_atom(in_encoding_atom, &in_encoding))) {
53515366
RAISE_ERROR(BADARG_ATOM);
53525367
}
53535368
}
@@ -5406,18 +5421,18 @@ static term nif_unicode_characters_to_binary(Context *ctx, int argc, term argv[]
54065421
{
54075422
enum CharDataEncoding in_encoding = UTF8Encoding;
54085423
enum CharDataEncoding out_encoding = UTF8Encoding;
5409-
if (argc > 1) {
5410-
if (argv[1] == LATIN1_ATOM) {
5411-
in_encoding = Latin1Encoding;
5412-
} else if (UNLIKELY((argv[1] != UTF8_ATOM))) {
5424+
bool has_in_encoding = argc > 1;
5425+
bool has_out_encoding = argc == 3;
5426+
if (has_in_encoding) {
5427+
term in_encoding_atom = argv[1];
5428+
if (UNLIKELY(!encoding_from_atom(in_encoding_atom, &in_encoding))) {
54135429
RAISE_ERROR(BADARG_ATOM);
54145430
}
5415-
if (argc == 3) {
5416-
if (argv[2] == LATIN1_ATOM) {
5417-
out_encoding = Latin1Encoding;
5418-
} else if (UNLIKELY((argv[2] != UTF8_ATOM))) {
5419-
RAISE_ERROR(BADARG_ATOM);
5420-
}
5431+
}
5432+
if (has_out_encoding) {
5433+
term out_encoding_atom = argv[2];
5434+
if (UNLIKELY(!encoding_from_atom(out_encoding_atom, &out_encoding))) {
5435+
RAISE_ERROR(BADARG_ATOM);
54215436
}
54225437
}
54235438
size_t len;

tests/erlang_tests/test_unicode.erl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,17 @@ test_to_list_latin1() ->
5252
test_to_list_utf8() ->
5353
"hello" = unicode:characters_to_list(<<"hello">>),
5454
"hello" = unicode:characters_to_list("hello", utf8),
55+
"hello" = unicode:characters_to_list("hello", unicode),
5556
"" = unicode:characters_to_list(<<""/utf8>>),
5657
"" = unicode:characters_to_list(""),
5758
"" = unicode:characters_to_list(<<""/utf8>>, utf8),
59+
"" = unicode:characters_to_list(<<""/utf8>>, unicode),
5860
{error, "h", [-1]} = unicode:characters_to_list([$h, -1], utf8),
61+
{error, "h", [-1]} = unicode:characters_to_list([$h, -1], unicode),
5962
[$h, 16#10ffff] = unicode:characters_to_list([$h, 16#10ffff], utf8),
63+
[$h, 16#10ffff] = unicode:characters_to_list([$h, 16#10ffff], unicode),
6064
{error, "h", [16#110000]} = unicode:characters_to_list([$h, 16#110000], utf8),
65+
{error, "h", [16#110000]} = unicode:characters_to_list([$h, 16#110000], unicode),
6166
{incomplete, "h", <<"é">>} = unicode:characters_to_list(<<"">>),
6267
{error, [], <<16#A0, 16#A1>>} = unicode:characters_to_list(<<16#A0, 16#A1>>),
6368
% Erlang/OTP documentation writes: "The last part is mostly for debugging"
@@ -131,10 +136,21 @@ test_to_binary_latin1() ->
131136

132137
test_to_binary_utf8() ->
133138
<<"hello">> = unicode:characters_to_binary("hello", utf8, utf8),
139+
<<"hello">> = unicode:characters_to_binary("hello", unicode, unicode),
140+
<<"hello">> = unicode:characters_to_binary("hello", utf8, unicode),
141+
<<"hello">> = unicode:characters_to_binary("hello", unicode, utf8),
134142
<<"hello">> = unicode:characters_to_binary(<<"hello">>, utf8, utf8),
143+
<<"hello">> = unicode:characters_to_binary(<<"hello">>, unicode, unicode),
144+
<<"hello">> = unicode:characters_to_binary(<<"hello">>, utf8, unicode),
145+
<<"hello">> = unicode:characters_to_binary(<<"hello">>, unicode, utf8),
135146
<<""/utf8>> = unicode:characters_to_binary("", latin1, utf8),
147+
<<""/utf8>> = unicode:characters_to_binary("", latin1, unicode),
136148
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>, utf8, utf8),
149+
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>, unicode, unicode),
150+
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>, utf8, unicode),
151+
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>, unicode, utf8),
137152
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>, utf8),
153+
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>, unicode),
138154
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>),
139155
{error, <<"h">>, [-1]} = unicode:characters_to_binary([$h, -1]),
140156
<<"h", 244, 143, 191, 191>> = unicode:characters_to_binary([$h, 16#10FFFF]),

0 commit comments

Comments
 (0)