@@ -445,26 +445,71 @@ long atom_table_ensure_atom(struct AtomTable *table, AtomString string, enum Ato
445
445
return new_index ;
446
446
}
447
447
448
- int atom_table_ensure_atoms (
449
- struct AtomTable * table , const void * atoms , int count , int * translate_table )
448
+ static inline int read_encoded_len (const uint8_t * * len_bytes )
450
449
{
450
+ uint8_t byte0 = (* len_bytes )[0 ];
451
+
452
+ if ((byte0 & 0x8 ) == 0 ) {
453
+ (* len_bytes )++ ;
454
+ return byte0 >> 4 ;
455
+
456
+ } else if ((byte0 & 0x10 ) == 0 ) {
457
+ uint8_t byte1 = (* len_bytes )[1 ];
458
+ (* len_bytes ) += 2 ;
459
+ return ((byte0 >> 5 ) << 8 ) | byte1 ;
460
+
461
+ } else {
462
+ return -1 ;
463
+ }
464
+ }
465
+
466
+ int atom_table_ensure_atoms (struct AtomTable * table , const void * atoms , int count ,
467
+ int * translate_table , enum EnsureAtomsOpt opt )
468
+ {
469
+ bool is_long_format = (opt & EnsureLongEncoding ) != 0 ;
470
+
451
471
SMP_WRLOCK (table );
452
472
453
473
int new_atoms_count = 0 ;
454
474
455
475
const uint8_t * current_atom = atoms ;
456
476
457
477
for (int i = 0 ; i < count ; i ++ ) {
458
- struct HNode * node = get_node (table , current_atom );
478
+ struct HNode * node ;
479
+ if (is_long_format ) {
480
+ int atom_len = read_encoded_len (& current_atom );
481
+ if (UNLIKELY (atom_len < 0 )) {
482
+ fprintf (stderr , "Found invalid atom len." );
483
+ SMP_UNLOCK (table );
484
+ return ATOM_TABLE_INVALID_LEN ;
485
+ } else if (UNLIKELY (atom_len > 255 )) {
486
+ fprintf (stderr ,
487
+ "Unsupported atom length %i bytes.\n"
488
+ "Unlike OTP >= 28, AtomVM supports a maximum of 255 bytes"
489
+ "regardeless the number of codepoints.\n"
490
+ "If you are seeing this error please open an issue on GitHub:\n"
491
+ "https://github.com/atomvm/AtomVM/issues\n" ,
492
+ atom_len );
493
+ SMP_UNLOCK (table );
494
+ return ATOM_TABLE_INVALID_LEN ;
495
+ }
496
+ char tmp_old_fmt [256 ];
497
+ tmp_old_fmt [0 ] = atom_len ;
498
+ memcpy (tmp_old_fmt + 1 , current_atom , atom_len );
499
+ node = get_node (table , tmp_old_fmt );
500
+ current_atom += atom_len ;
501
+ } else {
502
+ node = get_node (table , current_atom );
503
+ uint8_t atom_len = current_atom [0 ];
504
+ current_atom += 1 + atom_len ;
505
+ }
506
+
459
507
if (node ) {
460
508
translate_table [i ] = node -> index ;
461
509
} else {
462
510
new_atoms_count ++ ;
463
511
translate_table [i ] = ATOM_TABLE_NOT_FOUND ;
464
512
}
465
-
466
- uint8_t atom_len = current_atom [0 ];
467
- current_atom += 1 + atom_len ;
468
513
}
469
514
470
515
maybe_rehash (table , new_atoms_count );
@@ -473,6 +518,19 @@ int atom_table_ensure_atoms(
473
518
int remaining_atoms = new_atoms_count ;
474
519
struct HNodeGroup * node_group = table -> last_node_group ;
475
520
for (int i = 0 ; i < count ; i ++ ) {
521
+
522
+ const uint8_t * to_be_copied = NULL ;
523
+ const uint8_t * next_atom = current_atom ;
524
+ uint8_t atom_len ;
525
+ if (is_long_format ) {
526
+ atom_len = read_encoded_len (& next_atom );
527
+ to_be_copied = next_atom ;
528
+ next_atom += atom_len ;
529
+ } else {
530
+ atom_len = current_atom [0 ];
531
+ next_atom += 1 + atom_len ;
532
+ }
533
+
476
534
if (translate_table [i ] == ATOM_TABLE_NOT_FOUND ) {
477
535
if (!table -> last_node_group_avail ) {
478
536
node_group = new_node_group (table , remaining_atoms );
@@ -482,7 +540,19 @@ int atom_table_ensure_atoms(
482
540
}
483
541
}
484
542
485
- unsigned long hash = sdbm_hash (current_atom , atom_string_len (current_atom ));
543
+ if (is_long_format ) {
544
+ uint8_t * atom_copy = malloc (atom_len + 1 );
545
+ if (IS_NULL_PTR (atom_copy )) {
546
+ // we are not going to remove atoms that have already been added up to this one
547
+ SMP_UNLOCK (table );
548
+ return ATOM_TABLE_ALLOC_FAIL ;
549
+ }
550
+ atom_copy [0 ] = atom_len ;
551
+ memcpy (atom_copy + 1 , to_be_copied , atom_len );
552
+ current_atom = atom_copy ;
553
+ }
554
+
555
+ unsigned long hash = sdbm_hash (current_atom , atom_len );
486
556
unsigned long bucket_index = hash % table -> capacity ;
487
557
488
558
translate_table [i ] = insert_node (table , node_group , bucket_index , current_atom );
@@ -491,8 +561,7 @@ int atom_table_ensure_atoms(
491
561
break ;
492
562
}
493
563
}
494
- uint8_t atom_len = current_atom [0 ];
495
- current_atom += 1 + atom_len ;
564
+ current_atom = next_atom ;
496
565
}
497
566
498
567
SMP_UNLOCK (table );
0 commit comments