Skip to content

Commit 2c82f47

Browse files
committed
Merge pull request #1490 from bettio/otp28-support
Add support to OTP-28 (master branch) Enable again tests against OTP from master branch. In order to support what will be released as OTP-28 some changes have been required: - New encoding for atoms - Uncompressed literals - to_atom NIFs (`binary_to_atom`, etc...) are now BIFs when used in guards (hence support for GCBIFs in CALL_EXT related opcodes has been introduced for OTP < 28 support and when this optimization is not applied) These changes are made under both the "Apache 2.0" and the "GNU Lesser General Public License 2.1 or later" license terms (dual license). SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
2 parents 652d60e + c079677 commit 2c82f47

File tree

14 files changed

+445
-193
lines changed

14 files changed

+445
-193
lines changed

.github/workflows/build-and-test.yaml

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -226,15 +226,12 @@ jobs:
226226
elixir_version: "1.14"
227227
rebar3_version: "3.23.0"
228228

229-
# TODO: enable master again
230-
# master will not work until we don't adapt to atom table changes
231-
# # master/main version of OTP/Elixir
232-
# - os: "ubuntu-24.04"
233-
# cc: "cc"
234-
# cxx: "c++"
235-
# otp: "master"
236-
# elixir_version: "main"
237-
# rebar3_version: "3.24.0"
229+
- os: "ubuntu-24.04"
230+
cc: "cc"
231+
cxx: "c++"
232+
otp: "master"
233+
elixir_version: "main"
234+
rebar3_version: "3.24.0"
238235

239236
# Additional default compiler builds
240237
- os: "ubuntu-20.04"

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1717
- Added `supervisor:terminate_child/2`, `supervisor:restart_child/2` and `supervisor:delete_child/2`
1818
- Added `esp:partition_read/3`, and documentation for `esp:partition_erase_range/2/3` and `esp:partition_write/3`
1919
- Added support for list insertion in 'ets:insert/2'.
20+
- Support to OTP-28
2021

2122
### Fixed
2223
- ESP32: improved sntp sync speed from a cold boot.

doc/release-notes.md.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ AtomVM will run BEAM files that have been compiled using the following Erlang an
3333
| ✅ OTP 24 | ✅ 1.14 |
3434
| ✅ OTP 25 | ✅ 1.14 |
3535
| ✅ OTP 26 | ✅ 1.15 |
36+
| ✅ OTP 28 | ✅ 1.17 |
3637

3738
```{note}
3839
Versions of Elixir that are compatible with a particular OTP version may work. This table reflects the versions that are tested.

src/libAtomVM/atom_table.c

Lines changed: 78 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -445,26 +445,71 @@ long atom_table_ensure_atom(struct AtomTable *table, AtomString string, enum Ato
445445
return new_index;
446446
}
447447

448-
int atom_table_ensure_atoms(
449-
struct AtomTable *table, const void *atoms, int count, int *translate_table)
448+
static inline int read_encoded_len(const uint8_t **len_bytes)
450449
{
450+
uint8_t byte0 = (*len_bytes)[0];
451+
452+
if ((byte0 & 0x8) == 0) {
453+
(*len_bytes)++;
454+
return byte0 >> 4;
455+
456+
} else if ((byte0 & 0x10) == 0) {
457+
uint8_t byte1 = (*len_bytes)[1];
458+
(*len_bytes) += 2;
459+
return ((byte0 >> 5) << 8) | byte1;
460+
461+
} else {
462+
return -1;
463+
}
464+
}
465+
466+
int atom_table_ensure_atoms(struct AtomTable *table, const void *atoms, int count,
467+
int *translate_table, enum EnsureAtomsOpt opt)
468+
{
469+
bool is_long_format = (opt & EnsureLongEncoding) != 0;
470+
451471
SMP_WRLOCK(table);
452472

453473
int new_atoms_count = 0;
454474

455475
const uint8_t *current_atom = atoms;
456476

457477
for (int i = 0; i < count; i++) {
458-
struct HNode *node = get_node(table, current_atom);
478+
struct HNode *node;
479+
if (is_long_format) {
480+
int atom_len = read_encoded_len(&current_atom);
481+
if (UNLIKELY(atom_len < 0)) {
482+
fprintf(stderr, "Found invalid atom len.");
483+
SMP_UNLOCK(table);
484+
return ATOM_TABLE_INVALID_LEN;
485+
} else if (UNLIKELY(atom_len > 255)) {
486+
fprintf(stderr,
487+
"Unsupported atom length %i bytes.\n"
488+
"Unlike OTP >= 28, AtomVM supports a maximum of 255 bytes"
489+
"regardeless the number of codepoints.\n"
490+
"If you are seeing this error please open an issue on GitHub:\n"
491+
"https://github.com/atomvm/AtomVM/issues\n",
492+
atom_len);
493+
SMP_UNLOCK(table);
494+
return ATOM_TABLE_INVALID_LEN;
495+
}
496+
char tmp_old_fmt[256];
497+
tmp_old_fmt[0] = atom_len;
498+
memcpy(tmp_old_fmt + 1, current_atom, atom_len);
499+
node = get_node(table, tmp_old_fmt);
500+
current_atom += atom_len;
501+
} else {
502+
node = get_node(table, current_atom);
503+
uint8_t atom_len = current_atom[0];
504+
current_atom += 1 + atom_len;
505+
}
506+
459507
if (node) {
460508
translate_table[i] = node->index;
461509
} else {
462510
new_atoms_count++;
463511
translate_table[i] = ATOM_TABLE_NOT_FOUND;
464512
}
465-
466-
uint8_t atom_len = current_atom[0];
467-
current_atom += 1 + atom_len;
468513
}
469514

470515
maybe_rehash(table, new_atoms_count);
@@ -473,6 +518,19 @@ int atom_table_ensure_atoms(
473518
int remaining_atoms = new_atoms_count;
474519
struct HNodeGroup *node_group = table->last_node_group;
475520
for (int i = 0; i < count; i++) {
521+
522+
const uint8_t *to_be_copied = NULL;
523+
const uint8_t *next_atom = current_atom;
524+
uint8_t atom_len;
525+
if (is_long_format) {
526+
atom_len = read_encoded_len(&next_atom);
527+
to_be_copied = next_atom;
528+
next_atom += atom_len;
529+
} else {
530+
atom_len = current_atom[0];
531+
next_atom += 1 + atom_len;
532+
}
533+
476534
if (translate_table[i] == ATOM_TABLE_NOT_FOUND) {
477535
if (!table->last_node_group_avail) {
478536
node_group = new_node_group(table, remaining_atoms);
@@ -482,7 +540,19 @@ int atom_table_ensure_atoms(
482540
}
483541
}
484542

485-
unsigned long hash = sdbm_hash(current_atom, atom_string_len(current_atom));
543+
if (is_long_format) {
544+
uint8_t *atom_copy = malloc(atom_len + 1);
545+
if (IS_NULL_PTR(atom_copy)) {
546+
// we are not going to remove atoms that have already been added up to this one
547+
SMP_UNLOCK(table);
548+
return ATOM_TABLE_ALLOC_FAIL;
549+
}
550+
atom_copy[0] = atom_len;
551+
memcpy(atom_copy + 1, to_be_copied, atom_len);
552+
current_atom = atom_copy;
553+
}
554+
555+
unsigned long hash = sdbm_hash(current_atom, atom_len);
486556
unsigned long bucket_index = hash % table->capacity;
487557

488558
translate_table[i] = insert_node(table, node_group, bucket_index, current_atom);
@@ -491,8 +561,7 @@ int atom_table_ensure_atoms(
491561
break;
492562
}
493563
}
494-
uint8_t atom_len = current_atom[0];
495-
current_atom += 1 + atom_len;
564+
current_atom = next_atom;
496565
}
497566

498567
SMP_UNLOCK(table);

src/libAtomVM/atom_table.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,16 @@ extern "C" {
3131

3232
#define ATOM_TABLE_NOT_FOUND -1
3333
#define ATOM_TABLE_ALLOC_FAIL -2
34+
#define ATOM_TABLE_INVALID_LEN -3
3435

3536
struct AtomTable;
3637

38+
enum EnsureAtomsOpt
39+
{
40+
EnsureAtomsNoOpts = 0,
41+
EnsureLongEncoding = 1
42+
};
43+
3744
enum AtomTableCopyOpt
3845
{
3946
AtomTableNoOpts = 0,
@@ -56,8 +63,8 @@ AtomString atom_table_get_atom_string(struct AtomTable *table, long index);
5663

5764
long atom_table_get_index(struct AtomTable *table, AtomString string);
5865

59-
int atom_table_ensure_atoms(
60-
struct AtomTable *table, const void *atoms, int count, int *translate_table);
66+
int atom_table_ensure_atoms(struct AtomTable *table, const void *atoms, int count,
67+
int *translate_table, enum EnsureAtomsOpt opts);
6168

6269
int atom_table_cmp_using_atom_index(
6370
struct AtomTable *table, int t_atom_index, int other_atom_index);

src/libAtomVM/bif.c

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,14 @@
2424
#include <math.h>
2525

2626
#include "atom.h"
27+
#include "bitstring.h"
2728
#include "defaultatoms.h"
2829
#include "dictionary.h"
30+
#include "interop.h"
2931
#include "overflow_helpers.h"
3032
#include "term.h"
3133
#include "trace.h"
34+
#include "unicode.h"
3235
#include "utils.h"
3336

3437
//Ignore warning caused by gperf generated code
@@ -1512,3 +1515,176 @@ term bif_erlang_size_1(Context *ctx, uint32_t fail_label, int live, term arg1)
15121515

15131516
RAISE_ERROR_BIF(fail_label, BADARG_ATOM);
15141517
}
1518+
1519+
static term list_to_atom(Context *ctx, term a_list, bool create_new, term *error_reason);
1520+
1521+
term bif_erlang_list_to_atom_1(Context *ctx, uint32_t fail_label, int live, term arg1)
1522+
{
1523+
UNUSED(live);
1524+
1525+
term error_reason;
1526+
term result = list_to_atom(ctx, arg1, true, &error_reason);
1527+
if (UNLIKELY(term_is_invalid_term(result))) {
1528+
RAISE_ERROR_BIF(fail_label, error_reason);
1529+
}
1530+
return result;
1531+
}
1532+
1533+
term bif_erlang_list_to_existing_atom_1(Context *ctx, uint32_t fail_label, int live, term arg1)
1534+
{
1535+
UNUSED(live);
1536+
1537+
term error_reason;
1538+
term result = list_to_atom(ctx, arg1, false, &error_reason);
1539+
if (UNLIKELY(term_is_invalid_term(result))) {
1540+
RAISE_ERROR_BIF(fail_label, error_reason);
1541+
}
1542+
return result;
1543+
}
1544+
1545+
static term list_to_atom(Context *ctx, term a_list, bool create_new, term *error_reason)
1546+
{
1547+
if (UNLIKELY(!term_is_list(a_list))) {
1548+
*error_reason = BADARG_ATOM;
1549+
return term_invalid_term();
1550+
}
1551+
1552+
int ok;
1553+
char *atom_string = interop_list_to_utf8_string(a_list, &ok);
1554+
if (UNLIKELY(!ok)) {
1555+
*error_reason = OUT_OF_MEMORY_ATOM;
1556+
return term_invalid_term();
1557+
}
1558+
int atom_string_len = strlen(atom_string);
1559+
if (UNLIKELY(atom_string_len > 255)) {
1560+
free(atom_string);
1561+
*error_reason = SYSTEM_LIMIT_ATOM;
1562+
return term_invalid_term();
1563+
}
1564+
1565+
AtomString atom = malloc(atom_string_len + 1);
1566+
if (IS_NULL_PTR(atom)) {
1567+
free(atom_string);
1568+
*error_reason = OUT_OF_MEMORY_ATOM;
1569+
return term_invalid_term();
1570+
}
1571+
((uint8_t *) atom)[0] = atom_string_len;
1572+
memcpy(((char *) atom) + 1, atom_string, atom_string_len);
1573+
free(atom_string);
1574+
1575+
enum AtomTableCopyOpt atom_opts = AtomTableCopyAtom;
1576+
if (!create_new) {
1577+
atom_opts |= AtomTableAlreadyExisting;
1578+
}
1579+
long global_atom_index = atom_table_ensure_atom(ctx->global->atom_table, atom, atom_opts);
1580+
free((void *) atom);
1581+
if (UNLIKELY(global_atom_index == ATOM_TABLE_NOT_FOUND)) {
1582+
*error_reason = BADARG_ATOM;
1583+
return term_invalid_term();
1584+
} else if (UNLIKELY(global_atom_index == ATOM_TABLE_ALLOC_FAIL)) {
1585+
*error_reason = OUT_OF_MEMORY_ATOM;
1586+
return term_invalid_term();
1587+
}
1588+
return term_from_atom_index(global_atom_index);
1589+
}
1590+
1591+
term bif_erlang_binary_to_atom_2(Context *ctx, uint32_t fail_label, int live, term arg1, term arg2)
1592+
{
1593+
UNUSED(live);
1594+
1595+
term error_reason;
1596+
term result = binary_to_atom(ctx, arg1, arg2, true, &error_reason);
1597+
if (UNLIKELY(term_is_invalid_term(result))) {
1598+
RAISE_ERROR_BIF(fail_label, error_reason);
1599+
}
1600+
return result;
1601+
}
1602+
1603+
term bif_erlang_binary_to_existing_atom_2(Context *ctx, uint32_t fail_label, int live, term arg1, term arg2)
1604+
{
1605+
UNUSED(live);
1606+
1607+
term error_reason;
1608+
term result = binary_to_atom(ctx, arg1, arg2, false, &error_reason);
1609+
if (UNLIKELY(term_is_invalid_term(result))) {
1610+
RAISE_ERROR_BIF(fail_label, error_reason);
1611+
}
1612+
return result;
1613+
}
1614+
1615+
term binary_to_atom(Context *ctx, term a_binary, term encoding, bool create_new, term *error_reason)
1616+
{
1617+
if (UNLIKELY(!term_is_binary(a_binary))) {
1618+
*error_reason = BADARG_ATOM;
1619+
return term_invalid_term();
1620+
}
1621+
1622+
const char *atom_string = term_binary_data(a_binary);
1623+
size_t atom_string_len = term_binary_size(a_binary);
1624+
if (UNLIKELY(atom_string_len > 255)) {
1625+
*error_reason = SYSTEM_LIMIT_ATOM;
1626+
return term_invalid_term();
1627+
}
1628+
1629+
bool encode_latin1_to_utf8 = false;
1630+
if (UNLIKELY((encoding == LATIN1_ATOM)
1631+
&& !unicode_buf_is_ascii((const uint8_t *) atom_string, atom_string_len))) {
1632+
encode_latin1_to_utf8 = true;
1633+
} else if (UNLIKELY((encoding != LATIN1_ATOM) && (encoding != UNICODE_ATOM)
1634+
&& (encoding != UTF8_ATOM))) {
1635+
*error_reason = BADARG_ATOM;
1636+
return term_invalid_term();
1637+
}
1638+
1639+
AtomString atom;
1640+
if (LIKELY(!encode_latin1_to_utf8)) {
1641+
size_t i = 0;
1642+
while (i < atom_string_len) {
1643+
uint32_t codepoint;
1644+
size_t codepoint_size;
1645+
if (UNLIKELY(bitstring_utf8_decode(
1646+
(uint8_t *) atom_string + i, atom_string_len, &codepoint, &codepoint_size))
1647+
!= UnicodeTransformDecodeSuccess) {
1648+
*error_reason = BADARG_ATOM;
1649+
return term_invalid_term();
1650+
}
1651+
i += codepoint_size;
1652+
}
1653+
1654+
atom = malloc(atom_string_len + 1);
1655+
((uint8_t *) atom)[0] = atom_string_len;
1656+
memcpy(((char *) atom) + 1, atom_string, atom_string_len);
1657+
} else {
1658+
// * 2 is the worst case size
1659+
size_t buf_len = atom_string_len * 2;
1660+
atom = malloc(buf_len + 1);
1661+
uint8_t *atom_data = ((uint8_t *) atom) + 1;
1662+
size_t out_pos = 0;
1663+
for (size_t i = 0; i < atom_string_len; i++) {
1664+
size_t out_size;
1665+
bitstring_utf8_encode(((uint8_t) atom_string[i]), &atom_data[out_pos], &out_size);
1666+
out_pos += out_size;
1667+
}
1668+
if (out_pos > 255) {
1669+
free((void *) atom);
1670+
*error_reason = SYSTEM_LIMIT_ATOM;
1671+
return term_invalid_term();
1672+
}
1673+
((uint8_t *) atom)[0] = out_pos;
1674+
}
1675+
1676+
enum AtomTableCopyOpt atom_opts = AtomTableCopyAtom;
1677+
if (!create_new) {
1678+
atom_opts |= AtomTableAlreadyExisting;
1679+
}
1680+
long global_atom_index = atom_table_ensure_atom(ctx->global->atom_table, atom, atom_opts);
1681+
free((void *) atom);
1682+
if (UNLIKELY(global_atom_index == ATOM_TABLE_NOT_FOUND)) {
1683+
*error_reason = BADARG_ATOM;
1684+
return term_invalid_term();
1685+
} else if (UNLIKELY(global_atom_index == ATOM_TABLE_ALLOC_FAIL)) {
1686+
*error_reason = OUT_OF_MEMORY_ATOM;
1687+
return term_invalid_term();
1688+
}
1689+
return term_from_atom_index(global_atom_index);
1690+
}

0 commit comments

Comments
 (0)