Skip to content

Commit 30d9077

Browse files
committed
Add support to new atom chunk format introduced with OTP-28
Atom length prefix now is a varlen int, so up to 255 codepoints can be supported. AtomVM will not support this. When going embedded some features should be left out, and this is one of them. See also: - erlang/otp#8913 - erlang/otp#9336 Signed-off-by: Davide Bettio <davide@uninstall.it>
1 parent 312f7c6 commit 30d9077

File tree

4 files changed

+100
-14
lines changed

4 files changed

+100
-14
lines changed

src/libAtomVM/atom_table.c

Lines changed: 78 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -445,26 +445,71 @@ long atom_table_ensure_atom(struct AtomTable *table, AtomString string, enum Ato
445445
return new_index;
446446
}
447447

448-
int atom_table_ensure_atoms(
449-
struct AtomTable *table, const void *atoms, int count, int *translate_table)
448+
static inline int read_encoded_len(const uint8_t **len_bytes)
450449
{
450+
uint8_t byte0 = (*len_bytes)[0];
451+
452+
if ((byte0 & 0x8) == 0) {
453+
(*len_bytes)++;
454+
return byte0 >> 4;
455+
456+
} else if ((byte0 & 0x10) == 0) {
457+
uint8_t byte1 = (*len_bytes)[1];
458+
(*len_bytes) += 2;
459+
return ((byte0 >> 5) << 8) | byte1;
460+
461+
} else {
462+
return -1;
463+
}
464+
}
465+
466+
int atom_table_ensure_atoms(struct AtomTable *table, const void *atoms, int count,
467+
int *translate_table, enum EnsureAtomsOpt opt)
468+
{
469+
bool is_long_format = (opt & EnsureLongEncoding) != 0;
470+
451471
SMP_WRLOCK(table);
452472

453473
int new_atoms_count = 0;
454474

455475
const uint8_t *current_atom = atoms;
456476

457477
for (int i = 0; i < count; i++) {
458-
struct HNode *node = get_node(table, current_atom);
478+
struct HNode *node;
479+
if (is_long_format) {
480+
int atom_len = read_encoded_len(&current_atom);
481+
if (UNLIKELY(atom_len < 0)) {
482+
fprintf(stderr, "Found invalid atom len.");
483+
SMP_UNLOCK(table);
484+
return ATOM_TABLE_INVALID_LEN;
485+
} else if (UNLIKELY(atom_len > 255)) {
486+
fprintf(stderr,
487+
"Unsupported atom length %i bytes.\n"
488+
"Unlike OTP >= 28, AtomVM supports a maximum of 255 bytes"
489+
"regardeless the number of codepoints.\n"
490+
"If you are seeing this error please open an issue on GitHub:\n"
491+
"https://github.com/atomvm/AtomVM/issues\n",
492+
atom_len);
493+
SMP_UNLOCK(table);
494+
return ATOM_TABLE_INVALID_LEN;
495+
}
496+
char tmp_old_fmt[256];
497+
tmp_old_fmt[0] = atom_len;
498+
memcpy(tmp_old_fmt + 1, current_atom, atom_len);
499+
node = get_node(table, tmp_old_fmt);
500+
current_atom += atom_len;
501+
} else {
502+
node = get_node(table, current_atom);
503+
uint8_t atom_len = current_atom[0];
504+
current_atom += 1 + atom_len;
505+
}
506+
459507
if (node) {
460508
translate_table[i] = node->index;
461509
} else {
462510
new_atoms_count++;
463511
translate_table[i] = ATOM_TABLE_NOT_FOUND;
464512
}
465-
466-
uint8_t atom_len = current_atom[0];
467-
current_atom += 1 + atom_len;
468513
}
469514

470515
maybe_rehash(table, new_atoms_count);
@@ -473,6 +518,19 @@ int atom_table_ensure_atoms(
473518
int remaining_atoms = new_atoms_count;
474519
struct HNodeGroup *node_group = table->last_node_group;
475520
for (int i = 0; i < count; i++) {
521+
522+
const uint8_t *to_be_copied = NULL;
523+
const uint8_t *next_atom = current_atom;
524+
uint8_t atom_len;
525+
if (is_long_format) {
526+
atom_len = read_encoded_len(&next_atom);
527+
to_be_copied = next_atom;
528+
next_atom += atom_len;
529+
} else {
530+
atom_len = current_atom[0];
531+
next_atom += 1 + atom_len;
532+
}
533+
476534
if (translate_table[i] == ATOM_TABLE_NOT_FOUND) {
477535
if (!table->last_node_group_avail) {
478536
node_group = new_node_group(table, remaining_atoms);
@@ -482,7 +540,19 @@ int atom_table_ensure_atoms(
482540
}
483541
}
484542

485-
unsigned long hash = sdbm_hash(current_atom, atom_string_len(current_atom));
543+
if (is_long_format) {
544+
uint8_t *atom_copy = malloc(atom_len + 1);
545+
if (IS_NULL_PTR(atom_copy)) {
546+
// we are not going to remove atoms that have already been added up to this one
547+
SMP_UNLOCK(table);
548+
return ATOM_TABLE_ALLOC_FAIL;
549+
}
550+
atom_copy[0] = atom_len;
551+
memcpy(atom_copy + 1, to_be_copied, atom_len);
552+
current_atom = atom_copy;
553+
}
554+
555+
unsigned long hash = sdbm_hash(current_atom, atom_len);
486556
unsigned long bucket_index = hash % table->capacity;
487557

488558
translate_table[i] = insert_node(table, node_group, bucket_index, current_atom);
@@ -491,8 +561,7 @@ int atom_table_ensure_atoms(
491561
break;
492562
}
493563
}
494-
uint8_t atom_len = current_atom[0];
495-
current_atom += 1 + atom_len;
564+
current_atom = next_atom;
496565
}
497566

498567
SMP_UNLOCK(table);

src/libAtomVM/atom_table.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,16 @@ extern "C" {
3131

3232
#define ATOM_TABLE_NOT_FOUND -1
3333
#define ATOM_TABLE_ALLOC_FAIL -2
34+
#define ATOM_TABLE_INVALID_LEN -3
3435

3536
struct AtomTable;
3637

38+
enum EnsureAtomsOpt
39+
{
40+
EnsureAtomsNoOpts = 0,
41+
EnsureLongEncoding = 1
42+
};
43+
3744
enum AtomTableCopyOpt
3845
{
3946
AtomTableNoOpts = 0,
@@ -56,8 +63,8 @@ AtomString atom_table_get_atom_string(struct AtomTable *table, long index);
5663

5764
long atom_table_get_index(struct AtomTable *table, AtomString string);
5865

59-
int atom_table_ensure_atoms(
60-
struct AtomTable *table, const void *atoms, int count, int *translate_table);
66+
int atom_table_ensure_atoms(struct AtomTable *table, const void *atoms, int count,
67+
int *translate_table, enum EnsureAtomsOpt opts);
6168

6269
int atom_table_cmp_using_atom_index(
6370
struct AtomTable *table, int t_atom_index, int other_atom_index);

src/libAtomVM/module.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,13 @@ static void parse_line_table(uint16_t **line_refs, struct ModuleFilename **filen
7272
static enum ModuleLoadResult module_populate_atoms_table(Module *this_module, uint8_t *table_data, GlobalContext *glb)
7373
{
7474
int atoms_count = READ_32_ALIGNED(table_data + 8);
75+
76+
enum EnsureAtomsOpt ensure_opts = EnsureAtomsNoOpts;
77+
if (atoms_count < 0) {
78+
ensure_opts = EnsureLongEncoding;
79+
atoms_count = -atoms_count;
80+
}
81+
7582
const char *current_atom = (const char *) table_data + 12;
7683

7784
this_module->local_atoms_to_global_table = calloc(atoms_count + 1, sizeof(int));
@@ -81,10 +88,12 @@ static enum ModuleLoadResult module_populate_atoms_table(Module *this_module, ui
8188
}
8289

8390
long ensure_result = atom_table_ensure_atoms(
84-
glb->atom_table, current_atom, atoms_count, this_module->local_atoms_to_global_table + 1);
85-
if (ensure_result == ATOM_TABLE_ALLOC_FAIL) {
91+
glb->atom_table, current_atom, atoms_count, this_module->local_atoms_to_global_table + 1, ensure_opts);
92+
if (UNLIKELY(ensure_result == ATOM_TABLE_ALLOC_FAIL)) {
8693
fprintf(stderr, "Cannot allocate memory while loading module (line: %i).\n", __LINE__);
8794
return MODULE_ERROR_FAILED_ALLOCATION;
95+
} else if (UNLIKELY(ensure_result == ATOM_TABLE_INVALID_LEN)) {
96+
return MODULE_ERROR_INVALID;
8897
}
8998

9099
return MODULE_LOAD_OK;

src/libAtomVM/module.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@ typedef struct Module Module;
144144
enum ModuleLoadResult
145145
{
146146
MODULE_LOAD_OK = 0,
147-
MODULE_ERROR_FAILED_ALLOCATION = 1
147+
MODULE_ERROR_FAILED_ALLOCATION = 1,
148+
MODULE_ERROR_INVALID = 2
148149
};
149150

150151
#ifdef ENABLE_ADVANCED_TRACE

0 commit comments

Comments
 (0)