atomvm
diff --git a/‎src/libAtomVM/atom_table.c
+67-147 b/‎src/libAtomVM/atom_table.c
+67-147
@@ -42,15 +42,17 @@
 
 #define DEFAULT_SIZE 8
 #define CAPACITY_INCREASE 8
+#define MAX_ATOM_LEN ((1 << 12) - 1)
 
 #define ATOM_TABLE_THRESHOLD(capacity) (capacity + (capacity >> 2))
 #define ATOM_TABLE_NEW_CAPACITY(new_count) (new_count + CAPACITY_INCREASE)
 
 struct HNode
 {
     struct HNode *next;
-    AtomString key;
-    atom_index_t index;
+    const uint8_t *key;
+    uint32_t index : 20;
+    uint32_t len : 10;
 };
 
 struct HNodeGroup
@@ -148,7 +150,7 @@ static struct HNodeGroup *new_node_group(struct AtomTable *table, int len)
 
 static unsigned long sdbm_hash(const unsigned char *str, int len)
 {
-    unsigned long hash = 0;
+    unsigned long hash = len;
     int c;
 
     for (int i = 0; i < len; i++) {
@@ -160,11 +162,11 @@ static unsigned long sdbm_hash(const unsigned char *str, int len)
 }
 
 static inline struct HNode *get_node_from_bucket(
-    const struct AtomTable *hash_table, unsigned long bucket_index, AtomString string)
+    const struct AtomTable *hash_table, unsigned long bucket_index, const uint8_t *string, size_t string_len)
 {
     struct HNode *node = hash_table->buckets[bucket_index];
     while (node) {
-        if (atom_are_equals(string, node->key)) {
+        if (node->len == string_len && memcmp(node->key, string, string_len) == 0) {
             return node;
         }
 
@@ -175,17 +177,17 @@ static inline struct HNode *get_node_from_bucket(
 }
 
 static inline struct HNode *get_node_with_hash(
-    const struct AtomTable *hash_table, AtomString string, unsigned long hash)
+    const struct AtomTable *hash_table, const uint8_t *string, size_t string_len, unsigned long hash)
 {
     unsigned long bucket_index = hash % hash_table->capacity;
-    return get_node_from_bucket(hash_table, bucket_index, string);
+    return get_node_from_bucket(hash_table, bucket_index, string, string_len);
 }
 
-static inline struct HNode *get_node(const struct AtomTable *hash_table, AtomString string)
+static inline struct HNode *get_node(const struct AtomTable *hash_table, const uint8_t *string, size_t string_len)
 {
-    unsigned long hash = sdbm_hash(string, atom_string_len(string));
+    unsigned long hash = sdbm_hash(string, string_len);
 
-    return get_node_with_hash(hash_table, string, hash);
+    return get_node_with_hash(hash_table, string, string_len, hash);
 }
 
 // TODO: this function needs use an efficient structure such as a skip list
@@ -208,33 +210,47 @@ static struct HNode *get_node_using_index(struct AtomTable *table, atom_index_t
     return NULL;
 }
 
-AtomString atom_table_get_atom_string(struct AtomTable *table, atom_index_t index)
+const uint8_t *atom_table_get_atom_string(struct AtomTable *table, atom_index_t index, size_t *out_size)
 {
+    const uint8_t *result;
     SMP_RDLOCK(table);
 
     struct HNode *node = get_node_using_index(table, index);
     if (IS_NULL_PTR(node)) {
         SMP_UNLOCK(table);
         return NULL;
     }
-
-    AtomString found_key = node->key;
+    result = node->key;
+    *out_size = node->len;
 
     SMP_UNLOCK(table);
-    return found_key;
+    return result;
 }
 
-int atom_table_cmp_using_atom_index(struct AtomTable *table, int t_atom_index, int other_atom_index)
+bool atom_table_is_equal_to_atom_string(struct AtomTable *table, atom_index_t t_atom_index, AtomString string)
 {
-    AtomString t_atom_string = atom_table_get_atom_string(table, t_atom_index);
+    size_t t_atom_len;
+    const uint8_t *t_atom_data = atom_table_get_atom_string(table, t_atom_index, &t_atom_len);
+    if (IS_NULL_PTR(t_atom_data)) {
+        return false;
+    }
 
-    int t_atom_len = atom_string_len(t_atom_string);
-    const char *t_atom_data = (const char *) atom_string_data(t_atom_string);
+    return (t_atom_len == atom_string_len(string)) && (memcmp(t_atom_data, atom_string_data(string), t_atom_len) == 0);
+}
 
-    AtomString other_atom_string = atom_table_get_atom_string(table, other_atom_index);
+int atom_table_cmp_using_atom_index(struct AtomTable *table, atom_index_t t_atom_index, atom_index_t other_atom_index)
+{
+    size_t t_atom_len;
+    const uint8_t *t_atom_data = atom_table_get_atom_string(table, t_atom_index, &t_atom_len);
+    if (IS_NULL_PTR(t_atom_data)) {
+        return -1;
+    }
 
-    int other_atom_len = atom_string_len(other_atom_string);
-    const char *other_atom_data = (const char *) atom_string_data(other_atom_string);
+    size_t other_atom_len;
+    const uint8_t *other_atom_data = atom_table_get_atom_string(table, other_atom_index, &other_atom_len);
+    if (IS_NULL_PTR(other_atom_data)) {
+        return 1;
+    }
 
     int cmp_size = (t_atom_len > other_atom_len) ? other_atom_len : t_atom_len;
 
@@ -267,73 +283,10 @@ atom_ref_t atom_table_get_atom_ptr_and_len(struct AtomTable *table, atom_index_t
     return node;
 }
 
-char *atom_table_atom_to_new_cstring(struct AtomTable *table, atom_index_t atom_index, const char *suffix)
-{
-    AtomString atom_string = atom_table_get_atom_string(table, atom_index);
-    size_t atom_len = atom_string_len(atom_string);
-    const uint8_t *atom_data = atom_string_data(atom_string);
-    size_t suffix_len = 0;
-    if (suffix) {
-        suffix_len = strlen(suffix);
-    }
-
-    char *result = malloc(atom_len + suffix_len + 1);
-    if (IS_NULL_PTR(result)) {
-        return NULL;
-    }
-
-    memcpy(result, atom_data, atom_len);
-    if (suffix) {
-        memcpy(result + atom_len, suffix, suffix_len);
-    }
-    result[atom_len + suffix_len] = 0;
-
-    return result;
-}
-
-bool atom_table_is_atom_ref_ascii(struct AtomTable *table, atom_ref_t atom)
+static inline void init_node(struct HNode *node, const uint8_t *atom_data, size_t atom_len, long index)
 {
-    SMP_RDLOCK(table);
-
-    struct HNode *node = (struct HNode *) atom;
-    const uint8_t *data = atom_string_data(node->key);
-    size_t len = atom_string_len(node->key);
-
-    bool result = unicode_buf_is_ascii(data, len);
-
-    SMP_UNLOCK(table);
-    return result;
-}
-
-void atom_table_write_bytes(struct AtomTable *table, atom_ref_t atom, size_t buf_len, void *outbuf)
-{
-    SMP_RDLOCK(table);
-
-    struct HNode *node = (struct HNode *) atom;
-    size_t len = atom_string_len(node->key);
-    if (len > buf_len) {
-        len = buf_len;
-    }
-
-    memcpy(outbuf, atom_string_data(node->key), len);
-
-    SMP_UNLOCK(table);
-}
-
-void atom_table_write_cstring(
-    struct AtomTable *table, atom_ref_t atom, size_t buf_len, char *outbuf)
-{
-    SMP_RDLOCK(table);
-
-    struct HNode *node = (struct HNode *) atom;
-    atom_string_to_c(node->key, outbuf, buf_len);
-
-    SMP_UNLOCK(table);
-}
-
-static inline void init_node(struct HNode *node, AtomString atom, long index)
-{
-    node->key = atom;
+    node->key = atom_data;
+    node->len = atom_len;
     node->index = index;
 }
 
@@ -346,14 +299,14 @@ static inline void insert_node_into_bucket(
 }
 
 static inline atom_index_t insert_node(struct AtomTable *table, struct HNodeGroup *node_group,
-    unsigned long bucket_index, AtomString string)
+    unsigned long bucket_index, const uint8_t *atom_data, size_t atom_len)
 {
     atom_index_t new_index = table->count;
     table->count++;
 
     struct HNode *node = &node_group->nodes[new_index - node_group->first_index];
     table->last_node_group_avail--;
-    init_node(node, string, new_index);
+    init_node(node, atom_data, atom_len, new_index);
     insert_node_into_bucket(table, bucket_index, node);
 
     return new_index;
@@ -383,9 +336,7 @@ static bool do_rehash(struct AtomTable *table, int new_capacity)
 
         for (int i = 0; i < group_count; i++) {
             struct HNode *node = &group->nodes[i];
-            AtomString key = node->key;
-
-            unsigned long hash = sdbm_hash(key, atom_string_len(key));
+            unsigned long hash = sdbm_hash(node->key, node->len);
             unsigned long bucket_index = hash % table->capacity;
 
             insert_node_into_bucket(table, bucket_index, node);
@@ -409,13 +360,13 @@ static inline bool maybe_rehash(struct AtomTable *table, int new_entries)
     return do_rehash(table, new_capacity);
 }
 
-enum AtomTableEnsureAtomResult atom_table_ensure_atom(struct AtomTable *table, AtomString string, enum AtomTableCopyOpt opts, atom_index_t *result)
+enum AtomTableEnsureAtomResult atom_table_ensure_atom(struct AtomTable *table, const uint8_t *atom_data, size_t atom_len, enum AtomTableCopyOpt opts, atom_index_t *result)
 {
-    unsigned long hash = sdbm_hash(string, atom_string_len(string));
+    unsigned long hash = sdbm_hash(atom_data, atom_len);
     SMP_WRLOCK(table);
     unsigned long bucket_index = hash % table->capacity;
 
-    struct HNode *node = get_node_from_bucket(table, bucket_index, string);
+    struct HNode *node = get_node_from_bucket(table, bucket_index, atom_data, atom_len);
     if (node) {
         SMP_UNLOCK(table);
         *result = node->index;
@@ -435,29 +386,27 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atom(struct AtomTable *table, A
         }
     }
 
-    AtomString maybe_copied = string;
     if (opts & AtomTableCopyAtom) {
-        uint8_t len = *((uint8_t *) string);
-        uint8_t *buf = malloc(1 + len);
+        uint8_t *buf = malloc(atom_len);
         if (IS_NULL_PTR(buf)) {
             SMP_UNLOCK(table);
             return AtomTableEnsureAtomAllocFail;
         }
-        memcpy(buf, string, 1 + len);
-        maybe_copied = buf;
+        memcpy(buf, atom_data, atom_len);
+        atom_data = buf;
     }
 
     if (maybe_rehash(table, 1)) {
         bucket_index = hash % table->capacity;
     }
 
-    *result = insert_node(table, node_group, bucket_index, maybe_copied);
+    *result = insert_node(table, node_group, bucket_index, atom_data, atom_len);
 
     SMP_UNLOCK(table);
     return AtomTableEnsureAtomOk;
 }
 
-static inline int read_encoded_len(const uint8_t **len_bytes)
+static inline ssize_t read_encoded_len(const uint8_t **len_bytes)
 {
     uint8_t byte0 = (*len_bytes)[0];
 
@@ -478,7 +427,7 @@ static inline int read_encoded_len(const uint8_t **len_bytes)
 // -1 is not a valid atom index as we're limited to 2^20
 #define ATOM_TABLE_NOT_FOUND_MARKER ((atom_index_t) -1)
 
-enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table, const void *atoms, int count,
+enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table, const void *atoms, size_t count,
     atom_index_t *translate_table, enum EnsureAtomsOpt opt)
 {
     bool is_long_format = (opt & EnsureLongEncoding) != 0;
@@ -489,35 +438,22 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table,
 
     const uint8_t *current_atom = atoms;
 
-    for (int i = 0; i < count; i++) {
+    for (size_t i = 0; i < count; i++) {
         struct HNode *node;
+        ssize_t atom_len;
         if (is_long_format) {
-            int atom_len = read_encoded_len(&current_atom);
-            if (UNLIKELY(atom_len < 0)) {
+            atom_len = read_encoded_len(&current_atom);
+            if (UNLIKELY(atom_len < 0 || atom_len > MAX_ATOM_LEN)) {
                 fprintf(stderr, "Found invalid atom len.");
                 SMP_UNLOCK(table);
                 return AtomTableEnsureAtomInvalidLen;
-            } else if (UNLIKELY(atom_len > 255)) {
-                fprintf(stderr,
-                    "Unsupported atom length %i bytes.\n"
-                    "Unlike OTP >= 28, AtomVM supports a maximum of 255 bytes"
-                    "regardeless the number of codepoints.\n"
-                    "If you are seeing this error please open an issue on GitHub:\n"
-                    "https://github.com/atomvm/AtomVM/issues\n",
-                    atom_len);
-                SMP_UNLOCK(table);
-                return AtomTableEnsureAtomInvalidLen;
             }
-            char tmp_old_fmt[256];
-            tmp_old_fmt[0] = atom_len;
-            memcpy(tmp_old_fmt + 1, current_atom, atom_len);
-            node = get_node(table, tmp_old_fmt);
-            current_atom += atom_len;
         } else {
-            node = get_node(table, current_atom);
-            uint8_t atom_len = current_atom[0];
-            current_atom += 1 + atom_len;
+            atom_len = current_atom[0];
+            current_atom++;
         }
+        node = get_node(table, current_atom, atom_len);
+        current_atom += atom_len;
 
         if (node) {
             translate_table[i] = node->index;
@@ -532,18 +468,14 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table,
     current_atom = atoms;
     int remaining_atoms = new_atoms_count;
     struct HNodeGroup *node_group = table->last_node_group;
-    for (int i = 0; i < count; i++) {
-
-        const uint8_t *to_be_copied = NULL;
-        const uint8_t *next_atom = current_atom;
-        uint8_t atom_len;
+    for (size_t i = 0; i < count; i++) {
+        size_t atom_len;
         if (is_long_format) {
-            atom_len = read_encoded_len(&next_atom);
-            to_be_copied = next_atom;
-            next_atom += atom_len;
+            // Size was checked above
+            atom_len = (size_t) read_encoded_len(&current_atom);
         } else {
             atom_len = current_atom[0];
-            next_atom += 1 + atom_len;
+            current_atom++;
         }
 
         if (translate_table[i] == ATOM_TABLE_NOT_FOUND_MARKER) {
@@ -555,28 +487,16 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table,
                 }
             }
 
-            if (is_long_format) {
-                uint8_t *atom_copy = malloc(atom_len + 1);
-                if (IS_NULL_PTR(atom_copy)) {
-                    // we are not going to remove atoms that have already been added up to this one
-                    SMP_UNLOCK(table);
-                    return AtomTableEnsureAtomAllocFail;
-                }
-                atom_copy[0] = atom_len;
-                memcpy(atom_copy + 1, to_be_copied, atom_len);
-                current_atom = atom_copy;
-            }
-
             unsigned long hash = sdbm_hash(current_atom, atom_len);
             unsigned long bucket_index = hash % table->capacity;
 
-            translate_table[i] = insert_node(table, node_group, bucket_index, current_atom);
+            translate_table[i] = insert_node(table, node_group, bucket_index, current_atom, atom_len);
             remaining_atoms--;
             if (remaining_atoms == 0) {
                 break;
             }
         }
-        current_atom = next_atom;
+        current_atom += atom_len;
     }
 
     SMP_UNLOCK(table);