42
42
43
43
#define DEFAULT_SIZE 8
44
44
#define CAPACITY_INCREASE 8
45
+ #define MAX_ATOM_LEN ((1 << 12) - 1)
45
46
46
47
#define ATOM_TABLE_THRESHOLD (capacity ) (capacity + (capacity >> 2))
47
48
#define ATOM_TABLE_NEW_CAPACITY (new_count ) (new_count + CAPACITY_INCREASE)
48
49
49
50
struct HNode
50
51
{
51
52
struct HNode * next ;
52
- AtomString key ;
53
- atom_index_t index ;
53
+ const uint8_t * key ;
54
+ uint32_t index : 20 ;
55
+ uint32_t len : 10 ;
54
56
};
55
57
56
58
struct HNodeGroup
@@ -148,7 +150,7 @@ static struct HNodeGroup *new_node_group(struct AtomTable *table, int len)
148
150
149
151
static unsigned long sdbm_hash (const unsigned char * str , int len )
150
152
{
151
- unsigned long hash = 0 ;
153
+ unsigned long hash = len ;
152
154
int c ;
153
155
154
156
for (int i = 0 ; i < len ; i ++ ) {
@@ -160,11 +162,11 @@ static unsigned long sdbm_hash(const unsigned char *str, int len)
160
162
}
161
163
162
164
static inline struct HNode * get_node_from_bucket (
163
- const struct AtomTable * hash_table , unsigned long bucket_index , AtomString string )
165
+ const struct AtomTable * hash_table , unsigned long bucket_index , const uint8_t * string , size_t string_len )
164
166
{
165
167
struct HNode * node = hash_table -> buckets [bucket_index ];
166
168
while (node ) {
167
- if (atom_are_equals ( string , node -> key ) ) {
169
+ if (node -> len == string_len && memcmp ( node -> key , string , string_len ) == 0 ) {
168
170
return node ;
169
171
}
170
172
@@ -175,17 +177,17 @@ static inline struct HNode *get_node_from_bucket(
175
177
}
176
178
177
179
static inline struct HNode * get_node_with_hash (
178
- const struct AtomTable * hash_table , AtomString string , unsigned long hash )
180
+ const struct AtomTable * hash_table , const uint8_t * string , size_t string_len , unsigned long hash )
179
181
{
180
182
unsigned long bucket_index = hash % hash_table -> capacity ;
181
- return get_node_from_bucket (hash_table , bucket_index , string );
183
+ return get_node_from_bucket (hash_table , bucket_index , string , string_len );
182
184
}
183
185
184
- static inline struct HNode * get_node (const struct AtomTable * hash_table , AtomString string )
186
+ static inline struct HNode * get_node (const struct AtomTable * hash_table , const uint8_t * string , size_t string_len )
185
187
{
186
- unsigned long hash = sdbm_hash (string , atom_string_len ( string ) );
188
+ unsigned long hash = sdbm_hash (string , string_len );
187
189
188
- return get_node_with_hash (hash_table , string , hash );
190
+ return get_node_with_hash (hash_table , string , string_len , hash );
189
191
}
190
192
191
193
// TODO: this function needs use an efficient structure such as a skip list
@@ -208,33 +210,47 @@ static struct HNode *get_node_using_index(struct AtomTable *table, atom_index_t
208
210
return NULL ;
209
211
}
210
212
211
- AtomString atom_table_get_atom_string (struct AtomTable * table , atom_index_t index )
213
+ const uint8_t * atom_table_get_atom_string (struct AtomTable * table , atom_index_t index , size_t * out_size )
212
214
{
215
+ const uint8_t * result ;
213
216
SMP_RDLOCK (table );
214
217
215
218
struct HNode * node = get_node_using_index (table , index );
216
219
if (IS_NULL_PTR (node )) {
217
220
SMP_UNLOCK (table );
218
221
return NULL ;
219
222
}
220
-
221
- AtomString found_key = node -> key ;
223
+ result = node -> key ;
224
+ * out_size = node -> len ;
222
225
223
226
SMP_UNLOCK (table );
224
- return found_key ;
227
+ return result ;
225
228
}
226
229
227
- int atom_table_cmp_using_atom_index (struct AtomTable * table , int t_atom_index , int other_atom_index )
230
+ bool atom_table_is_equal_to_atom_string (struct AtomTable * table , atom_index_t t_atom_index , AtomString string )
228
231
{
229
- AtomString t_atom_string = atom_table_get_atom_string (table , t_atom_index );
232
+ size_t t_atom_len ;
233
+ const uint8_t * t_atom_data = atom_table_get_atom_string (table , t_atom_index , & t_atom_len );
234
+ if (IS_NULL_PTR (t_atom_data )) {
235
+ return false;
236
+ }
230
237
231
- int t_atom_len = atom_string_len (t_atom_string );
232
- const char * t_atom_data = ( const char * ) atom_string_data ( t_atom_string );
238
+ return ( t_atom_len == atom_string_len (string )) && ( memcmp ( t_atom_data , atom_string_data ( string ), t_atom_len ) == 0 );
239
+ }
233
240
234
- AtomString other_atom_string = atom_table_get_atom_string (table , other_atom_index );
241
+ int atom_table_cmp_using_atom_index (struct AtomTable * table , atom_index_t t_atom_index , atom_index_t other_atom_index )
242
+ {
243
+ size_t t_atom_len ;
244
+ const uint8_t * t_atom_data = atom_table_get_atom_string (table , t_atom_index , & t_atom_len );
245
+ if (IS_NULL_PTR (t_atom_data )) {
246
+ return -1 ;
247
+ }
235
248
236
- int other_atom_len = atom_string_len (other_atom_string );
237
- const char * other_atom_data = (const char * ) atom_string_data (other_atom_string );
249
+ size_t other_atom_len ;
250
+ const uint8_t * other_atom_data = atom_table_get_atom_string (table , other_atom_index , & other_atom_len );
251
+ if (IS_NULL_PTR (other_atom_data )) {
252
+ return 1 ;
253
+ }
238
254
239
255
int cmp_size = (t_atom_len > other_atom_len ) ? other_atom_len : t_atom_len ;
240
256
@@ -269,9 +285,11 @@ atom_ref_t atom_table_get_atom_ptr_and_len(struct AtomTable *table, atom_index_t
269
285
270
286
char * atom_table_atom_to_new_cstring (struct AtomTable * table , atom_index_t atom_index )
271
287
{
272
- AtomString atom_string = atom_table_get_atom_string (table , atom_index );
273
- size_t atom_len = atom_string_len (atom_string );
274
- const uint8_t * atom_data = atom_string_data (atom_string );
288
+ size_t atom_len ;
289
+ const uint8_t * atom_data = atom_table_get_atom_string (table , atom_index , & atom_len );
290
+ if (IS_NULL_PTR (atom_data )) {
291
+ return NULL ;
292
+ }
275
293
276
294
char * result = malloc (atom_len + 1 );
277
295
if (IS_NULL_PTR (result )) {
@@ -280,53 +298,13 @@ char *atom_table_atom_to_new_cstring(struct AtomTable *table, atom_index_t atom_
280
298
281
299
memcpy (result , atom_data , atom_len );
282
300
result [atom_len ] = 0 ;
283
-
284
301
return result ;
285
302
}
286
303
287
- bool atom_table_is_atom_ref_ascii (struct AtomTable * table , atom_ref_t atom )
304
+ static inline void init_node (struct HNode * node , const uint8_t * atom_data , size_t atom_len , long index )
288
305
{
289
- SMP_RDLOCK (table );
290
-
291
- struct HNode * node = (struct HNode * ) atom ;
292
- const uint8_t * data = atom_string_data (node -> key );
293
- size_t len = atom_string_len (node -> key );
294
-
295
- bool result = unicode_buf_is_ascii (data , len );
296
-
297
- SMP_UNLOCK (table );
298
- return result ;
299
- }
300
-
301
- void atom_table_write_bytes (struct AtomTable * table , atom_ref_t atom , size_t buf_len , void * outbuf )
302
- {
303
- SMP_RDLOCK (table );
304
-
305
- struct HNode * node = (struct HNode * ) atom ;
306
- size_t len = atom_string_len (node -> key );
307
- if (len > buf_len ) {
308
- len = buf_len ;
309
- }
310
-
311
- memcpy (outbuf , atom_string_data (node -> key ), len );
312
-
313
- SMP_UNLOCK (table );
314
- }
315
-
316
- void atom_table_write_cstring (
317
- struct AtomTable * table , atom_ref_t atom , size_t buf_len , char * outbuf )
318
- {
319
- SMP_RDLOCK (table );
320
-
321
- struct HNode * node = (struct HNode * ) atom ;
322
- atom_string_to_c (node -> key , outbuf , buf_len );
323
-
324
- SMP_UNLOCK (table );
325
- }
326
-
327
- static inline void init_node (struct HNode * node , AtomString atom , long index )
328
- {
329
- node -> key = atom ;
306
+ node -> key = atom_data ;
307
+ node -> len = atom_len ;
330
308
node -> index = index ;
331
309
}
332
310
@@ -339,14 +317,14 @@ static inline void insert_node_into_bucket(
339
317
}
340
318
341
319
static inline atom_index_t insert_node (struct AtomTable * table , struct HNodeGroup * node_group ,
342
- unsigned long bucket_index , AtomString string )
320
+ unsigned long bucket_index , const uint8_t * atom_data , size_t atom_len )
343
321
{
344
322
atom_index_t new_index = table -> count ;
345
323
table -> count ++ ;
346
324
347
325
struct HNode * node = & node_group -> nodes [new_index - node_group -> first_index ];
348
326
table -> last_node_group_avail -- ;
349
- init_node (node , string , new_index );
327
+ init_node (node , atom_data , atom_len , new_index );
350
328
insert_node_into_bucket (table , bucket_index , node );
351
329
352
330
return new_index ;
@@ -376,9 +354,7 @@ static bool do_rehash(struct AtomTable *table, int new_capacity)
376
354
377
355
for (int i = 0 ; i < group_count ; i ++ ) {
378
356
struct HNode * node = & group -> nodes [i ];
379
- AtomString key = node -> key ;
380
-
381
- unsigned long hash = sdbm_hash (key , atom_string_len (key ));
357
+ unsigned long hash = sdbm_hash (node -> key , node -> len );
382
358
unsigned long bucket_index = hash % table -> capacity ;
383
359
384
360
insert_node_into_bucket (table , bucket_index , node );
@@ -402,13 +378,13 @@ static inline bool maybe_rehash(struct AtomTable *table, int new_entries)
402
378
return do_rehash (table , new_capacity );
403
379
}
404
380
405
- enum AtomTableEnsureAtomResult atom_table_ensure_atom (struct AtomTable * table , AtomString string , enum AtomTableCopyOpt opts , atom_index_t * result )
381
+ enum AtomTableEnsureAtomResult atom_table_ensure_atom (struct AtomTable * table , const uint8_t * atom_data , size_t atom_len , enum AtomTableCopyOpt opts , atom_index_t * result )
406
382
{
407
- unsigned long hash = sdbm_hash (string , atom_string_len ( string ) );
383
+ unsigned long hash = sdbm_hash (atom_data , atom_len );
408
384
SMP_WRLOCK (table );
409
385
unsigned long bucket_index = hash % table -> capacity ;
410
386
411
- struct HNode * node = get_node_from_bucket (table , bucket_index , string );
387
+ struct HNode * node = get_node_from_bucket (table , bucket_index , atom_data , atom_len );
412
388
if (node ) {
413
389
SMP_UNLOCK (table );
414
390
* result = node -> index ;
@@ -428,29 +404,27 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atom(struct AtomTable *table, A
428
404
}
429
405
}
430
406
431
- AtomString maybe_copied = string ;
432
407
if (opts & AtomTableCopyAtom ) {
433
- uint8_t len = * ((uint8_t * ) string );
434
- uint8_t * buf = malloc (1 + len );
408
+ uint8_t * buf = malloc (atom_len );
435
409
if (IS_NULL_PTR (buf )) {
436
410
SMP_UNLOCK (table );
437
411
return AtomTableEnsureAtomAllocFail ;
438
412
}
439
- memcpy (buf , string , 1 + len );
440
- maybe_copied = buf ;
413
+ memcpy (buf , atom_data , atom_len );
414
+ atom_data = buf ;
441
415
}
442
416
443
417
if (maybe_rehash (table , 1 )) {
444
418
bucket_index = hash % table -> capacity ;
445
419
}
446
420
447
- * result = insert_node (table , node_group , bucket_index , maybe_copied );
421
+ * result = insert_node (table , node_group , bucket_index , atom_data , atom_len );
448
422
449
423
SMP_UNLOCK (table );
450
424
return AtomTableEnsureAtomOk ;
451
425
}
452
426
453
- static inline int read_encoded_len (const uint8_t * * len_bytes )
427
+ static inline ssize_t read_encoded_len (const uint8_t * * len_bytes )
454
428
{
455
429
uint8_t byte0 = (* len_bytes )[0 ];
456
430
@@ -471,7 +445,7 @@ static inline int read_encoded_len(const uint8_t **len_bytes)
471
445
// -1 is not a valid atom index as we're limited to 2^20
472
446
#define ATOM_TABLE_NOT_FOUND_MARKER ((atom_index_t) -1)
473
447
474
- enum AtomTableEnsureAtomResult atom_table_ensure_atoms (struct AtomTable * table , const void * atoms , int count ,
448
+ enum AtomTableEnsureAtomResult atom_table_ensure_atoms (struct AtomTable * table , const void * atoms , size_t count ,
475
449
atom_index_t * translate_table , enum EnsureAtomsOpt opt )
476
450
{
477
451
bool is_long_format = (opt & EnsureLongEncoding ) != 0 ;
@@ -482,35 +456,22 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table,
482
456
483
457
const uint8_t * current_atom = atoms ;
484
458
485
- for (int i = 0 ; i < count ; i ++ ) {
459
+ for (size_t i = 0 ; i < count ; i ++ ) {
486
460
struct HNode * node ;
461
+ ssize_t atom_len ;
487
462
if (is_long_format ) {
488
- int atom_len = read_encoded_len (& current_atom );
489
- if (UNLIKELY (atom_len < 0 )) {
463
+ atom_len = read_encoded_len (& current_atom );
464
+ if (UNLIKELY (atom_len < 0 || atom_len > MAX_ATOM_LEN )) {
490
465
fprintf (stderr , "Found invalid atom len." );
491
466
SMP_UNLOCK (table );
492
467
return AtomTableEnsureAtomInvalidLen ;
493
- } else if (UNLIKELY (atom_len > 255 )) {
494
- fprintf (stderr ,
495
- "Unsupported atom length %i bytes.\n"
496
- "Unlike OTP >= 28, AtomVM supports a maximum of 255 bytes"
497
- "regardeless the number of codepoints.\n"
498
- "If you are seeing this error please open an issue on GitHub:\n"
499
- "https://github.com/atomvm/AtomVM/issues\n" ,
500
- atom_len );
501
- SMP_UNLOCK (table );
502
- return AtomTableEnsureAtomInvalidLen ;
503
468
}
504
- char tmp_old_fmt [256 ];
505
- tmp_old_fmt [0 ] = atom_len ;
506
- memcpy (tmp_old_fmt + 1 , current_atom , atom_len );
507
- node = get_node (table , tmp_old_fmt );
508
- current_atom += atom_len ;
509
469
} else {
510
- node = get_node (table , current_atom );
511
- uint8_t atom_len = current_atom [0 ];
512
- current_atom += 1 + atom_len ;
470
+ atom_len = current_atom [0 ];
471
+ current_atom ++ ;
513
472
}
473
+ node = get_node (table , current_atom , atom_len );
474
+ current_atom += atom_len ;
514
475
515
476
if (node ) {
516
477
translate_table [i ] = node -> index ;
@@ -525,18 +486,14 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table,
525
486
current_atom = atoms ;
526
487
int remaining_atoms = new_atoms_count ;
527
488
struct HNodeGroup * node_group = table -> last_node_group ;
528
- for (int i = 0 ; i < count ; i ++ ) {
529
-
530
- const uint8_t * to_be_copied = NULL ;
531
- const uint8_t * next_atom = current_atom ;
532
- uint8_t atom_len ;
489
+ for (size_t i = 0 ; i < count ; i ++ ) {
490
+ size_t atom_len ;
533
491
if (is_long_format ) {
534
- atom_len = read_encoded_len (& next_atom );
535
- to_be_copied = next_atom ;
536
- next_atom += atom_len ;
492
+ // Size was checked above
493
+ atom_len = (size_t ) read_encoded_len (& current_atom );
537
494
} else {
538
495
atom_len = current_atom [0 ];
539
- next_atom += 1 + atom_len ;
496
+ current_atom ++ ;
540
497
}
541
498
542
499
if (translate_table [i ] == ATOM_TABLE_NOT_FOUND_MARKER ) {
@@ -548,28 +505,16 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table,
548
505
}
549
506
}
550
507
551
- if (is_long_format ) {
552
- uint8_t * atom_copy = malloc (atom_len + 1 );
553
- if (IS_NULL_PTR (atom_copy )) {
554
- // we are not going to remove atoms that have already been added up to this one
555
- SMP_UNLOCK (table );
556
- return AtomTableEnsureAtomAllocFail ;
557
- }
558
- atom_copy [0 ] = atom_len ;
559
- memcpy (atom_copy + 1 , to_be_copied , atom_len );
560
- current_atom = atom_copy ;
561
- }
562
-
563
508
unsigned long hash = sdbm_hash (current_atom , atom_len );
564
509
unsigned long bucket_index = hash % table -> capacity ;
565
510
566
- translate_table [i ] = insert_node (table , node_group , bucket_index , current_atom );
511
+ translate_table [i ] = insert_node (table , node_group , bucket_index , current_atom , atom_len );
567
512
remaining_atoms -- ;
568
513
if (remaining_atoms == 0 ) {
569
514
break ;
570
515
}
571
516
}
572
- current_atom = next_atom ;
517
+ current_atom += atom_len ;
573
518
}
574
519
575
520
SMP_UNLOCK (table );
0 commit comments