Skip to content

Commit 9187aff

Browse files
FKubisSWMjgonet
authored andcommitted
Add unicode alias for utf8 option
Signed-off-by: Jakub Gonet <jakub.gonet@swmansion.com>
1 parent f42fb23 commit 9187aff

File tree

3 files changed

+46
-15
lines changed

3 files changed

+46
-15
lines changed

libs/estdlib/src/unicode.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
char() | unicode_binary() | charlist(), unicode_binary() | []
4848
).
4949

50-
-type encoding() :: utf8 | latin1.
50+
-type encoding() :: utf8 | unicode | latin1.
5151

5252
-export_type([
5353
unicode_binary/0,

src/libAtomVM/nifs.c

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5341,13 +5341,28 @@ static term nif_maps_next(Context *ctx, int argc, term argv[])
53415341
return ret;
53425342
}
53435343

5344+
static bool encoding_from_atom(term encoding_atom, enum CharDataEncoding *encoding)
5345+
{
5346+
switch (encoding_atom) {
5347+
case LATIN1_ATOM:
5348+
*encoding = Latin1Encoding;
5349+
return true;
5350+
case UTF8_ATOM:
5351+
case UNICODE_ATOM:
5352+
*encoding = UTF8Encoding;
5353+
return true;
5354+
default:
5355+
return false;
5356+
}
5357+
}
5358+
53445359
static term nif_unicode_characters_to_list(Context *ctx, int argc, term argv[])
53455360
{
53465361
enum CharDataEncoding in_encoding = UTF8Encoding;
5347-
if (argc == 2) {
5348-
if (argv[1] == LATIN1_ATOM) {
5349-
in_encoding = Latin1Encoding;
5350-
} else if (UNLIKELY((argv[1] != UTF8_ATOM))) {
5362+
bool has_in_encoding = argc == 2;
5363+
if (has_in_encoding) {
5364+
term in_encoding_atom = argv[1];
5365+
if (UNLIKELY(!encoding_from_atom(in_encoding_atom, &in_encoding))) {
53515366
RAISE_ERROR(BADARG_ATOM);
53525367
}
53535368
}
@@ -5406,18 +5421,18 @@ static term nif_unicode_characters_to_binary(Context *ctx, int argc, term argv[]
54065421
{
54075422
enum CharDataEncoding in_encoding = UTF8Encoding;
54085423
enum CharDataEncoding out_encoding = UTF8Encoding;
5409-
if (argc > 1) {
5410-
if (argv[1] == LATIN1_ATOM) {
5411-
in_encoding = Latin1Encoding;
5412-
} else if (UNLIKELY((argv[1] != UTF8_ATOM))) {
5424+
bool has_in_encoding = argc > 1;
5425+
bool has_out_encoding = argc == 3;
5426+
if (has_in_encoding) {
5427+
term in_encoding_atom = argv[1];
5428+
if (UNLIKELY(!encoding_from_atom(in_encoding_atom, &in_encoding))) {
54135429
RAISE_ERROR(BADARG_ATOM);
54145430
}
5415-
if (argc == 3) {
5416-
if (argv[2] == LATIN1_ATOM) {
5417-
out_encoding = Latin1Encoding;
5418-
} else if (UNLIKELY((argv[2] != UTF8_ATOM))) {
5419-
RAISE_ERROR(BADARG_ATOM);
5420-
}
5431+
}
5432+
if (has_out_encoding) {
5433+
term out_encoding_atom = argv[2];
5434+
if (UNLIKELY(!encoding_from_atom(out_encoding_atom, &out_encoding))) {
5435+
RAISE_ERROR(BADARG_ATOM);
54215436
}
54225437
}
54235438
size_t len;

tests/erlang_tests/test_unicode.erl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,17 @@ test_to_list_latin1() ->
5252
test_to_list_utf8() ->
5353
"hello" = unicode:characters_to_list(<<"hello">>),
5454
"hello" = unicode:characters_to_list("hello", utf8),
55+
"hello" = unicode:characters_to_list("hello", unicode),
5556
"" = unicode:characters_to_list(<<""/utf8>>),
5657
"" = unicode:characters_to_list(""),
5758
"" = unicode:characters_to_list(<<""/utf8>>, utf8),
59+
"" = unicode:characters_to_list(<<""/utf8>>, unicode),
5860
{error, "h", [-1]} = unicode:characters_to_list([$h, -1], utf8),
61+
{error, "h", [-1]} = unicode:characters_to_list([$h, -1], unicode),
5962
[$h, 16#10ffff] = unicode:characters_to_list([$h, 16#10ffff], utf8),
63+
[$h, 16#10ffff] = unicode:characters_to_list([$h, 16#10ffff], unicode),
6064
{error, "h", [16#110000]} = unicode:characters_to_list([$h, 16#110000], utf8),
65+
{error, "h", [16#110000]} = unicode:characters_to_list([$h, 16#110000], unicode),
6166
{incomplete, "h", <<"é">>} = unicode:characters_to_list(<<"">>),
6267
{error, [], <<16#A0, 16#A1>>} = unicode:characters_to_list(<<16#A0, 16#A1>>),
6368
% Erlang/OTP documentation writes: "The last part is mostly for debugging"
@@ -131,10 +136,21 @@ test_to_binary_latin1() ->
131136

132137
test_to_binary_utf8() ->
133138
<<"hello">> = unicode:characters_to_binary("hello", utf8, utf8),
139+
<<"hello">> = unicode:characters_to_binary("hello", unicode, unicode),
140+
<<"hello">> = unicode:characters_to_binary("hello", utf8, unicode),
141+
<<"hello">> = unicode:characters_to_binary("hello", unicode, utf8),
134142
<<"hello">> = unicode:characters_to_binary(<<"hello">>, utf8, utf8),
143+
<<"hello">> = unicode:characters_to_binary(<<"hello">>, unicode, unicode),
144+
<<"hello">> = unicode:characters_to_binary(<<"hello">>, utf8, unicode),
145+
<<"hello">> = unicode:characters_to_binary(<<"hello">>, unicode, utf8),
135146
<<""/utf8>> = unicode:characters_to_binary("", latin1, utf8),
147+
<<""/utf8>> = unicode:characters_to_binary("", latin1, unicode),
136148
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>, utf8, utf8),
149+
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>, unicode, unicode),
150+
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>, utf8, unicode),
151+
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>, unicode, utf8),
137152
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>, utf8),
153+
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>, unicode),
138154
<<""/utf8>> = unicode:characters_to_binary(<<""/utf8>>),
139155
{error, <<"h">>, [-1]} = unicode:characters_to_binary([$h, -1]),
140156
<<"h", 244, 143, 191, 191>> = unicode:characters_to_binary([$h, 16#10FFFF]),

0 commit comments

Comments
 (0)