42
42
43
43
#define DEFAULT_SIZE 8
44
44
#define CAPACITY_INCREASE 8
45
+ #define MAX_ATOM_LEN ((1 << 12) - 1)
45
46
46
47
#define ATOM_TABLE_THRESHOLD (capacity ) (capacity + (capacity >> 2))
47
48
#define ATOM_TABLE_NEW_CAPACITY (new_count ) (new_count + CAPACITY_INCREASE)
48
49
49
50
struct HNode
50
51
{
51
52
struct HNode * next ;
52
- AtomString key ;
53
- atom_index_t index ;
53
+ const uint8_t * key ;
54
+ uint32_t index : 20 ;
55
+ uint32_t len : 10 ;
54
56
};
55
57
56
58
struct HNodeGroup
@@ -148,7 +150,7 @@ static struct HNodeGroup *new_node_group(struct AtomTable *table, int len)
148
150
149
151
static unsigned long sdbm_hash (const unsigned char * str , int len )
150
152
{
151
- unsigned long hash = 0 ;
153
+ unsigned long hash = len ;
152
154
int c ;
153
155
154
156
for (int i = 0 ; i < len ; i ++ ) {
@@ -160,11 +162,11 @@ static unsigned long sdbm_hash(const unsigned char *str, int len)
160
162
}
161
163
162
164
static inline struct HNode * get_node_from_bucket (
163
- const struct AtomTable * hash_table , unsigned long bucket_index , AtomString string )
165
+ const struct AtomTable * hash_table , unsigned long bucket_index , const uint8_t * string , size_t string_len )
164
166
{
165
167
struct HNode * node = hash_table -> buckets [bucket_index ];
166
168
while (node ) {
167
- if (atom_are_equals ( string , node -> key ) ) {
169
+ if (node -> len == string_len && memcmp ( node -> key , string , string_len ) == 0 ) {
168
170
return node ;
169
171
}
170
172
@@ -175,17 +177,17 @@ static inline struct HNode *get_node_from_bucket(
175
177
}
176
178
177
179
static inline struct HNode * get_node_with_hash (
178
- const struct AtomTable * hash_table , AtomString string , unsigned long hash )
180
+ const struct AtomTable * hash_table , const uint8_t * string , size_t string_len , unsigned long hash )
179
181
{
180
182
unsigned long bucket_index = hash % hash_table -> capacity ;
181
- return get_node_from_bucket (hash_table , bucket_index , string );
183
+ return get_node_from_bucket (hash_table , bucket_index , string , string_len );
182
184
}
183
185
184
- static inline struct HNode * get_node (const struct AtomTable * hash_table , AtomString string )
186
+ static inline struct HNode * get_node (const struct AtomTable * hash_table , const uint8_t * string , size_t string_len )
185
187
{
186
- unsigned long hash = sdbm_hash (string , atom_string_len ( string ) );
188
+ unsigned long hash = sdbm_hash (string , string_len );
187
189
188
- return get_node_with_hash (hash_table , string , hash );
190
+ return get_node_with_hash (hash_table , string , string_len , hash );
189
191
}
190
192
191
193
// TODO: this function needs use an efficient structure such as a skip list
@@ -208,33 +210,47 @@ static struct HNode *get_node_using_index(struct AtomTable *table, atom_index_t
208
210
return NULL ;
209
211
}
210
212
211
- AtomString atom_table_get_atom_string (struct AtomTable * table , atom_index_t index )
213
+ const uint8_t * atom_table_get_atom_string (struct AtomTable * table , atom_index_t index , size_t * out_size )
212
214
{
215
+ const uint8_t * result ;
213
216
SMP_RDLOCK (table );
214
217
215
218
struct HNode * node = get_node_using_index (table , index );
216
219
if (IS_NULL_PTR (node )) {
217
220
SMP_UNLOCK (table );
218
221
return NULL ;
219
222
}
220
-
221
- AtomString found_key = node -> key ;
223
+ result = node -> key ;
224
+ * out_size = node -> len ;
222
225
223
226
SMP_UNLOCK (table );
224
- return found_key ;
227
+ return result ;
225
228
}
226
229
227
- int atom_table_cmp_using_atom_index (struct AtomTable * table , int t_atom_index , int other_atom_index )
230
+ bool atom_table_is_equal_to_atom_string (struct AtomTable * table , atom_index_t t_atom_index , AtomString string )
228
231
{
229
- AtomString t_atom_string = atom_table_get_atom_string (table , t_atom_index );
232
+ size_t t_atom_len ;
233
+ const uint8_t * t_atom_data = atom_table_get_atom_string (table , t_atom_index , & t_atom_len );
234
+ if (IS_NULL_PTR (t_atom_data )) {
235
+ return false;
236
+ }
230
237
231
- int t_atom_len = atom_string_len (t_atom_string );
232
- const char * t_atom_data = ( const char * ) atom_string_data ( t_atom_string );
238
+ return ( t_atom_len == atom_string_len (string )) && ( memcmp ( t_atom_data , atom_string_data ( string ), t_atom_len ) == 0 );
239
+ }
233
240
234
- AtomString other_atom_string = atom_table_get_atom_string (table , other_atom_index );
241
+ int atom_table_cmp_using_atom_index (struct AtomTable * table , atom_index_t t_atom_index , atom_index_t other_atom_index )
242
+ {
243
+ size_t t_atom_len ;
244
+ const uint8_t * t_atom_data = atom_table_get_atom_string (table , t_atom_index , & t_atom_len );
245
+ if (IS_NULL_PTR (t_atom_data )) {
246
+ return -1 ;
247
+ }
235
248
236
- int other_atom_len = atom_string_len (other_atom_string );
237
- const char * other_atom_data = (const char * ) atom_string_data (other_atom_string );
249
+ size_t other_atom_len ;
250
+ const uint8_t * other_atom_data = atom_table_get_atom_string (table , other_atom_index , & other_atom_len );
251
+ if (IS_NULL_PTR (other_atom_data )) {
252
+ return 1 ;
253
+ }
238
254
239
255
int cmp_size = (t_atom_len > other_atom_len ) ? other_atom_len : t_atom_len ;
240
256
@@ -267,73 +283,10 @@ atom_ref_t atom_table_get_atom_ptr_and_len(struct AtomTable *table, atom_index_t
267
283
return node ;
268
284
}
269
285
270
- char * atom_table_atom_to_new_cstring (struct AtomTable * table , atom_index_t atom_index , const char * suffix )
271
- {
272
- AtomString atom_string = atom_table_get_atom_string (table , atom_index );
273
- size_t atom_len = atom_string_len (atom_string );
274
- const uint8_t * atom_data = atom_string_data (atom_string );
275
- size_t suffix_len = 0 ;
276
- if (suffix ) {
277
- suffix_len = strlen (suffix );
278
- }
279
-
280
- char * result = malloc (atom_len + suffix_len + 1 );
281
- if (IS_NULL_PTR (result )) {
282
- return NULL ;
283
- }
284
-
285
- memcpy (result , atom_data , atom_len );
286
- if (suffix ) {
287
- memcpy (result + atom_len , suffix , suffix_len );
288
- }
289
- result [atom_len + suffix_len ] = 0 ;
290
-
291
- return result ;
292
- }
293
-
294
- bool atom_table_is_atom_ref_ascii (struct AtomTable * table , atom_ref_t atom )
286
+ static inline void init_node (struct HNode * node , const uint8_t * atom_data , size_t atom_len , long index )
295
287
{
296
- SMP_RDLOCK (table );
297
-
298
- struct HNode * node = (struct HNode * ) atom ;
299
- const uint8_t * data = atom_string_data (node -> key );
300
- size_t len = atom_string_len (node -> key );
301
-
302
- bool result = unicode_buf_is_ascii (data , len );
303
-
304
- SMP_UNLOCK (table );
305
- return result ;
306
- }
307
-
308
- void atom_table_write_bytes (struct AtomTable * table , atom_ref_t atom , size_t buf_len , void * outbuf )
309
- {
310
- SMP_RDLOCK (table );
311
-
312
- struct HNode * node = (struct HNode * ) atom ;
313
- size_t len = atom_string_len (node -> key );
314
- if (len > buf_len ) {
315
- len = buf_len ;
316
- }
317
-
318
- memcpy (outbuf , atom_string_data (node -> key ), len );
319
-
320
- SMP_UNLOCK (table );
321
- }
322
-
323
- void atom_table_write_cstring (
324
- struct AtomTable * table , atom_ref_t atom , size_t buf_len , char * outbuf )
325
- {
326
- SMP_RDLOCK (table );
327
-
328
- struct HNode * node = (struct HNode * ) atom ;
329
- atom_string_to_c (node -> key , outbuf , buf_len );
330
-
331
- SMP_UNLOCK (table );
332
- }
333
-
334
- static inline void init_node (struct HNode * node , AtomString atom , long index )
335
- {
336
- node -> key = atom ;
288
+ node -> key = atom_data ;
289
+ node -> len = atom_len ;
337
290
node -> index = index ;
338
291
}
339
292
@@ -346,14 +299,14 @@ static inline void insert_node_into_bucket(
346
299
}
347
300
348
301
static inline atom_index_t insert_node (struct AtomTable * table , struct HNodeGroup * node_group ,
349
- unsigned long bucket_index , AtomString string )
302
+ unsigned long bucket_index , const uint8_t * atom_data , size_t atom_len )
350
303
{
351
304
atom_index_t new_index = table -> count ;
352
305
table -> count ++ ;
353
306
354
307
struct HNode * node = & node_group -> nodes [new_index - node_group -> first_index ];
355
308
table -> last_node_group_avail -- ;
356
- init_node (node , string , new_index );
309
+ init_node (node , atom_data , atom_len , new_index );
357
310
insert_node_into_bucket (table , bucket_index , node );
358
311
359
312
return new_index ;
@@ -383,9 +336,7 @@ static bool do_rehash(struct AtomTable *table, int new_capacity)
383
336
384
337
for (int i = 0 ; i < group_count ; i ++ ) {
385
338
struct HNode * node = & group -> nodes [i ];
386
- AtomString key = node -> key ;
387
-
388
- unsigned long hash = sdbm_hash (key , atom_string_len (key ));
339
+ unsigned long hash = sdbm_hash (node -> key , node -> len );
389
340
unsigned long bucket_index = hash % table -> capacity ;
390
341
391
342
insert_node_into_bucket (table , bucket_index , node );
@@ -409,13 +360,13 @@ static inline bool maybe_rehash(struct AtomTable *table, int new_entries)
409
360
return do_rehash (table , new_capacity );
410
361
}
411
362
412
- enum AtomTableEnsureAtomResult atom_table_ensure_atom (struct AtomTable * table , AtomString string , enum AtomTableCopyOpt opts , atom_index_t * result )
363
+ enum AtomTableEnsureAtomResult atom_table_ensure_atom (struct AtomTable * table , const uint8_t * atom_data , size_t atom_len , enum AtomTableCopyOpt opts , atom_index_t * result )
413
364
{
414
- unsigned long hash = sdbm_hash (string , atom_string_len ( string ) );
365
+ unsigned long hash = sdbm_hash (atom_data , atom_len );
415
366
SMP_WRLOCK (table );
416
367
unsigned long bucket_index = hash % table -> capacity ;
417
368
418
- struct HNode * node = get_node_from_bucket (table , bucket_index , string );
369
+ struct HNode * node = get_node_from_bucket (table , bucket_index , atom_data , atom_len );
419
370
if (node ) {
420
371
SMP_UNLOCK (table );
421
372
* result = node -> index ;
@@ -435,29 +386,27 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atom(struct AtomTable *table, A
435
386
}
436
387
}
437
388
438
- AtomString maybe_copied = string ;
439
389
if (opts & AtomTableCopyAtom ) {
440
- uint8_t len = * ((uint8_t * ) string );
441
- uint8_t * buf = malloc (1 + len );
390
+ uint8_t * buf = malloc (atom_len );
442
391
if (IS_NULL_PTR (buf )) {
443
392
SMP_UNLOCK (table );
444
393
return AtomTableEnsureAtomAllocFail ;
445
394
}
446
- memcpy (buf , string , 1 + len );
447
- maybe_copied = buf ;
395
+ memcpy (buf , atom_data , atom_len );
396
+ atom_data = buf ;
448
397
}
449
398
450
399
if (maybe_rehash (table , 1 )) {
451
400
bucket_index = hash % table -> capacity ;
452
401
}
453
402
454
- * result = insert_node (table , node_group , bucket_index , maybe_copied );
403
+ * result = insert_node (table , node_group , bucket_index , atom_data , atom_len );
455
404
456
405
SMP_UNLOCK (table );
457
406
return AtomTableEnsureAtomOk ;
458
407
}
459
408
460
- static inline int read_encoded_len (const uint8_t * * len_bytes )
409
+ static inline ssize_t read_encoded_len (const uint8_t * * len_bytes )
461
410
{
462
411
uint8_t byte0 = (* len_bytes )[0 ];
463
412
@@ -478,7 +427,7 @@ static inline int read_encoded_len(const uint8_t **len_bytes)
478
427
// -1 is not a valid atom index as we're limited to 2^20
479
428
#define ATOM_TABLE_NOT_FOUND_MARKER ((atom_index_t) -1)
480
429
481
- enum AtomTableEnsureAtomResult atom_table_ensure_atoms (struct AtomTable * table , const void * atoms , int count ,
430
+ enum AtomTableEnsureAtomResult atom_table_ensure_atoms (struct AtomTable * table , const void * atoms , size_t count ,
482
431
atom_index_t * translate_table , enum EnsureAtomsOpt opt )
483
432
{
484
433
bool is_long_format = (opt & EnsureLongEncoding ) != 0 ;
@@ -489,35 +438,22 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table,
489
438
490
439
const uint8_t * current_atom = atoms ;
491
440
492
- for (int i = 0 ; i < count ; i ++ ) {
441
+ for (size_t i = 0 ; i < count ; i ++ ) {
493
442
struct HNode * node ;
443
+ ssize_t atom_len ;
494
444
if (is_long_format ) {
495
- int atom_len = read_encoded_len (& current_atom );
496
- if (UNLIKELY (atom_len < 0 )) {
445
+ atom_len = read_encoded_len (& current_atom );
446
+ if (UNLIKELY (atom_len < 0 || atom_len > MAX_ATOM_LEN )) {
497
447
fprintf (stderr , "Found invalid atom len." );
498
448
SMP_UNLOCK (table );
499
449
return AtomTableEnsureAtomInvalidLen ;
500
- } else if (UNLIKELY (atom_len > 255 )) {
501
- fprintf (stderr ,
502
- "Unsupported atom length %i bytes.\n"
503
- "Unlike OTP >= 28, AtomVM supports a maximum of 255 bytes"
504
- "regardeless the number of codepoints.\n"
505
- "If you are seeing this error please open an issue on GitHub:\n"
506
- "https://github.com/atomvm/AtomVM/issues\n" ,
507
- atom_len );
508
- SMP_UNLOCK (table );
509
- return AtomTableEnsureAtomInvalidLen ;
510
450
}
511
- char tmp_old_fmt [256 ];
512
- tmp_old_fmt [0 ] = atom_len ;
513
- memcpy (tmp_old_fmt + 1 , current_atom , atom_len );
514
- node = get_node (table , tmp_old_fmt );
515
- current_atom += atom_len ;
516
451
} else {
517
- node = get_node (table , current_atom );
518
- uint8_t atom_len = current_atom [0 ];
519
- current_atom += 1 + atom_len ;
452
+ atom_len = current_atom [0 ];
453
+ current_atom ++ ;
520
454
}
455
+ node = get_node (table , current_atom , atom_len );
456
+ current_atom += atom_len ;
521
457
522
458
if (node ) {
523
459
translate_table [i ] = node -> index ;
@@ -532,18 +468,14 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table,
532
468
current_atom = atoms ;
533
469
int remaining_atoms = new_atoms_count ;
534
470
struct HNodeGroup * node_group = table -> last_node_group ;
535
- for (int i = 0 ; i < count ; i ++ ) {
536
-
537
- const uint8_t * to_be_copied = NULL ;
538
- const uint8_t * next_atom = current_atom ;
539
- uint8_t atom_len ;
471
+ for (size_t i = 0 ; i < count ; i ++ ) {
472
+ size_t atom_len ;
540
473
if (is_long_format ) {
541
- atom_len = read_encoded_len (& next_atom );
542
- to_be_copied = next_atom ;
543
- next_atom += atom_len ;
474
+ // Size was checked above
475
+ atom_len = (size_t ) read_encoded_len (& current_atom );
544
476
} else {
545
477
atom_len = current_atom [0 ];
546
- next_atom += 1 + atom_len ;
478
+ current_atom ++ ;
547
479
}
548
480
549
481
if (translate_table [i ] == ATOM_TABLE_NOT_FOUND_MARKER ) {
@@ -555,28 +487,16 @@ enum AtomTableEnsureAtomResult atom_table_ensure_atoms(struct AtomTable *table,
555
487
}
556
488
}
557
489
558
- if (is_long_format ) {
559
- uint8_t * atom_copy = malloc (atom_len + 1 );
560
- if (IS_NULL_PTR (atom_copy )) {
561
- // we are not going to remove atoms that have already been added up to this one
562
- SMP_UNLOCK (table );
563
- return AtomTableEnsureAtomAllocFail ;
564
- }
565
- atom_copy [0 ] = atom_len ;
566
- memcpy (atom_copy + 1 , to_be_copied , atom_len );
567
- current_atom = atom_copy ;
568
- }
569
-
570
490
unsigned long hash = sdbm_hash (current_atom , atom_len );
571
491
unsigned long bucket_index = hash % table -> capacity ;
572
492
573
- translate_table [i ] = insert_node (table , node_group , bucket_index , current_atom );
493
+ translate_table [i ] = insert_node (table , node_group , bucket_index , current_atom , atom_len );
574
494
remaining_atoms -- ;
575
495
if (remaining_atoms == 0 ) {
576
496
break ;
577
497
}
578
498
}
579
- current_atom = next_atom ;
499
+ current_atom += atom_len ;
580
500
}
581
501
582
502
SMP_UNLOCK (table );
0 commit comments