19
19
import com .oracle .truffle .api .CompilerDirectives ;
20
20
import org .truffleruby .RubyContext ;
21
21
import org .truffleruby .RubyLanguage ;
22
- import org .truffleruby .annotations .SuppressFBWarnings ;
23
22
import org .truffleruby .collections .PEBiFunction ;
24
23
import org .truffleruby .core .array .ArrayHelpers ;
25
24
import org .truffleruby .core .array .ArrayUtils ;
@@ -112,7 +111,6 @@ public final class CompactHashStore {
112
111
* <li>Offset == -1: Deleted, the data in the hash field and the offset field is NOT valid, but the slot was
113
112
* occupied with valid data before.</li> */
114
113
private static final int INDEX_SLOT_UNUSED = 0 ;
115
- private static final int INDEX_SLOT_DELETED = -1 ;
116
114
117
115
// Returned by methods doing array search which don't find what they are looking for
118
116
static final int KEY_NOT_FOUND = -2 ;
@@ -333,7 +331,8 @@ void rehash(RubyHash hash,
333
331
@ ExportMessage
334
332
boolean verify (RubyHash hash ) {
335
333
assert hash .store == this ;
336
- assert kvStoreInsertionPos > 0 && kvStoreInsertionPos < kvStore .length ;
334
+ assert kvStoreInsertionPos > 0 ;
335
+ assert kvStoreInsertionPos <= kvStore .length ;
337
336
assert kvStoreInsertionPos % 2 == 0 ;
338
337
assert kvStoreInsertionPos >= hash .size ; // because deletes only decrease hash.size
339
338
@@ -348,24 +347,68 @@ boolean verify(RubyHash hash) {
348
347
return true ;
349
348
}
350
349
351
- @ SuppressFBWarnings ("IM_BAD_CHECK_FOR_ODD" )
352
350
private void assertAllKvPositionsAreValid () {
353
- for (int i = 1 ; i < index .length ; i += 2 ) {
354
- if (index [i ] != INDEX_SLOT_UNUSED && index [i ] != INDEX_SLOT_DELETED ) {
355
- assert index [i ] > 0 ;
356
- assert index [i ] < kvStoreInsertionPos ;
357
- assert index [i ] % 2 == 1 ;
358
- assert kvStore [index [i ] - 1 ] != null ;
351
+ boolean foundAtLeastOneUnusedSlot = false ;
352
+ for (int indexPos = 0 ; indexPos < index .length ; indexPos += 2 ) {
353
+ int valuePos = indexPosToValuePos (index , indexPos );
354
+ if (valuePos == INDEX_SLOT_UNUSED ) {
355
+ foundAtLeastOneUnusedSlot = true ;
356
+ }
357
+ if (valuePos != INDEX_SLOT_UNUSED ) {
358
+ assert valuePos > 0 ;
359
+ assert valuePos < kvStoreInsertionPos ;
360
+ assert (valuePos & 1 ) == 1 ;
361
+ int keyPos = valuePos - 1 ;
362
+ assert kvStore [keyPos ] != null ;
363
+ assert kvStore [valuePos ] != null ;
364
+
365
+ int hashCode = index [indexPos ];
366
+ int startPos = indexPosFromHashCode (hashCode , index .length );
367
+ for (int i = startPos ; i != indexPos ; i = incrementIndexPos (i , index .length )) {
368
+ assert indexPosToValuePos (index , i ) > INDEX_SLOT_UNUSED ;
369
+ }
359
370
}
360
371
}
372
+ assert foundAtLeastOneUnusedSlot ;
361
373
}
362
374
363
375
private Object deleteKvAndGetV (RubyHash hash , int indexPos , int keyPos ) {
364
376
assert verify (hash );
365
377
366
- Object deletedValue = kvStore [keyPos + 1 ];
378
+ // Remove the index slot, move entries following it to avoid tombstones which would make the code
379
+ // more complex, lookup slower and could fill the entire index array with tombstones,
380
+ // in which case lookup would become linear.
381
+
382
+ // From https://en.wikipedia.org/wiki/Open_addressing#Example_pseudocode,
383
+ // https://stackoverflow.com/a/60709252/388803 and https://stackoverflow.com/a/60644631/388803
384
+
385
+ // First let's override this slot so in case nothing follows it is correctly marked as unused.
386
+ // An alternative would be to clear at reusePos after the loop but then the intermediate state is less clear.
387
+ index [indexPos ] = 0 ;
388
+ index [indexPos + 1 ] = INDEX_SLOT_UNUSED ;
389
+
390
+ int reusePos = indexPos ; // i
391
+ int nextPos = incrementIndexPos (indexPos , index .length ); // j
392
+ while (indexPosToValuePos (index , nextPos ) != INDEX_SLOT_UNUSED ) { // not found an unused slot
393
+ int hashCode = index [nextPos ];
394
+ int startPos = indexPosFromHashCode (hashCode , index .length ); // k
395
+ if (nextPos < reusePos ^ startPos <= reusePos ^ startPos > nextPos ) {
396
+ // move the slot to the reuse slot and keep going
397
+ assert index [reusePos + 1 ] == INDEX_SLOT_UNUSED : "safe to move the slot to reuse slot" ;
398
+ index [reusePos ] = index [nextPos ];
399
+ index [reusePos + 1 ] = index [nextPos + 1 ];
400
+ index [nextPos ] = 0 ;
401
+ index [nextPos + 1 ] = INDEX_SLOT_UNUSED ;
402
+ reusePos = nextPos ;
403
+ }
404
+
405
+ nextPos = incrementIndexPos (nextPos , index .length );
406
+ assert nextPos != indexPos ;
407
+ }
367
408
368
- index [indexPos + 1 ] = INDEX_SLOT_DELETED ;
409
+ // Remove the kvStore slot
410
+
411
+ Object deletedValue = kvStore [keyPos + 1 ];
369
412
370
413
// TODO: Instead of naively nulling out the key-value, which can produce long gaps of nulls in the kvStore,
371
414
// See if we can annotate each gap with its length so that iteration code can "jump" over it
@@ -436,7 +479,6 @@ abstract static class GetIndexPosForKeyNode extends RubyBaseNode {
436
479
@ Specialization
437
480
int findIndexPos (Object key , int hash , boolean compareByIdentity , int [] index , Object [] kvStore ,
438
481
@ Cached CompareHashKeysNode .AssumingEqualHashes compareHashKeysNode ,
439
- @ Cached @ Exclusive InlinedConditionProfile notDeleted ,
440
482
@ Cached @ Exclusive InlinedConditionProfile unused ,
441
483
@ Cached @ Exclusive InlinedConditionProfile sameHash ,
442
484
@ Bind ("$node" ) Node node ) {
@@ -446,8 +488,7 @@ int findIndexPos(Object key, int hash, boolean compareByIdentity, int[] index, O
446
488
int valuePos = indexPosToValuePos (index , indexPos );
447
489
if (unused .profile (node , valuePos == INDEX_SLOT_UNUSED )) {
448
490
return KEY_NOT_FOUND ;
449
- } else if (notDeleted .profile (node , valuePos != INDEX_SLOT_DELETED ) &&
450
- sameHash .profile (node , index [indexPos ] == hash ) &&
491
+ } else if (sameHash .profile (node , index [indexPos ] == hash ) &&
451
492
compareHashKeysNode .execute (compareByIdentity , key , kvStore [valuePos - 1 ])) { // keyPos == valuePos - 1
452
493
return indexPos ;
453
494
}
@@ -467,21 +508,15 @@ abstract static class GetInsertionIndexPosForKeyNode extends RubyBaseNode {
467
508
@ Specialization
468
509
int findIndexPos (Object key , int hash , boolean compareByIdentity , int [] index , Object [] kvStore ,
469
510
@ Cached CompareHashKeysNode .AssumingEqualHashes compareHashKeysNode ,
470
- @ Cached @ Exclusive InlinedConditionProfile deleted ,
471
511
@ Cached @ Exclusive InlinedConditionProfile unused ,
472
512
@ Cached @ Exclusive InlinedConditionProfile sameHash ,
473
513
@ Bind ("$node" ) Node node ) {
474
514
int startPos = indexPosFromHashCode (hash , index .length );
475
515
int indexPos = startPos ;
476
- int insertionPos = -1 ;
477
516
while (true ) {
478
517
int valuePos = indexPosToValuePos (index , indexPos );
479
518
if (unused .profile (node , valuePos == INDEX_SLOT_UNUSED )) {
480
- return insertionPos != -1 ? insertionPos : indexPos ;
481
- } else if (deleted .profile (node , valuePos == INDEX_SLOT_DELETED )) {
482
- if (insertionPos == -1 ) {
483
- insertionPos = indexPos ;
484
- }
519
+ return indexPos ;
485
520
} else {
486
521
int keyPos = valuePos - 1 ;
487
522
if (sameHash .profile (node , index [indexPos ] == hash ) &&
@@ -502,7 +537,6 @@ abstract static class GetHashNextPosInIndexNode extends RubyBaseNode {
502
537
503
538
@ Specialization
504
539
int getHashNextPos (int startingFromPos , int hash , int [] index , int stop ,
505
- @ Cached @ Exclusive InlinedConditionProfile slotIsNotDeleted ,
506
540
@ Cached @ Exclusive InlinedConditionProfile slotIsUnused ,
507
541
@ Cached @ Exclusive InlinedConditionProfile hashFound ,
508
542
@ Cached @ Exclusive InlinedLoopConditionProfile stopNotYetReached ,
@@ -514,10 +548,8 @@ int getHashNextPos(int startingFromPos, int hash, int[] index, int stop,
514
548
return HASH_NOT_FOUND ;
515
549
}
516
550
517
- if (slotIsNotDeleted .profile (node , index [nextHashPos + 1 ] != INDEX_SLOT_DELETED )) {
518
- if (hashFound .profile (node , index [nextHashPos ] == hash )) {
519
- return nextHashPos ;
520
- }
551
+ if (hashFound .profile (node , index [nextHashPos ] == hash )) {
552
+ return nextHashPos ;
521
553
}
522
554
523
555
nextHashPos = incrementIndexPos (nextHashPos , index .length );
@@ -625,7 +657,6 @@ private static void insertIntoKv(CompactHashStore store, Object key, Object valu
625
657
@ TruffleBoundary
626
658
private static void resizeIndex (CompactHashStore store , Node node ) {
627
659
int [] oldIndex = store .index ;
628
- // TODO: doubling size is not good if many deletes
629
660
int [] newIndex = new int [2 * oldIndex .length ];
630
661
int newIndexCapacity = newIndex .length >> 1 ;
631
662
0 commit comments