Skip to content

Commit 455d7fc

Browse files
committed
Use eager deletion for CompactHashStore to avoid tombstones in index array
1 parent 7c22393 commit 455d7fc

File tree

1 file changed

+59
-28
lines changed

1 file changed

+59
-28
lines changed

src/main/java/org/truffleruby/core/hash/library/CompactHashStore.java

Lines changed: 59 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import com.oracle.truffle.api.CompilerDirectives;
2020
import org.truffleruby.RubyContext;
2121
import org.truffleruby.RubyLanguage;
22-
import org.truffleruby.annotations.SuppressFBWarnings;
2322
import org.truffleruby.collections.PEBiFunction;
2423
import org.truffleruby.core.array.ArrayHelpers;
2524
import org.truffleruby.core.array.ArrayUtils;
@@ -112,7 +111,6 @@ public final class CompactHashStore {
112111
* <li>Offset == -1: Deleted, the data in the hash field and the offset field is NOT valid, but the slot was
113112
* occupied with valid data before.</li> */
114113
private static final int INDEX_SLOT_UNUSED = 0;
115-
private static final int INDEX_SLOT_DELETED = -1;
116114

117115
// Returned by methods doing array search which don't find what they are looking for
118116
static final int KEY_NOT_FOUND = -2;
@@ -333,7 +331,8 @@ void rehash(RubyHash hash,
333331
@ExportMessage
334332
boolean verify(RubyHash hash) {
335333
assert hash.store == this;
336-
assert kvStoreInsertionPos > 0 && kvStoreInsertionPos < kvStore.length;
334+
assert kvStoreInsertionPos > 0;
335+
assert kvStoreInsertionPos <= kvStore.length;
337336
assert kvStoreInsertionPos % 2 == 0;
338337
assert kvStoreInsertionPos >= hash.size; // because deletes only decrease hash.size
339338

@@ -348,24 +347,68 @@ boolean verify(RubyHash hash) {
348347
return true;
349348
}
350349

351-
@SuppressFBWarnings("IM_BAD_CHECK_FOR_ODD")
352350
private void assertAllKvPositionsAreValid() {
353-
for (int i = 1; i < index.length; i += 2) {
354-
if (index[i] != INDEX_SLOT_UNUSED && index[i] != INDEX_SLOT_DELETED) {
355-
assert index[i] > 0;
356-
assert index[i] < kvStoreInsertionPos;
357-
assert index[i] % 2 == 1;
358-
assert kvStore[index[i] - 1] != null;
351+
boolean foundAtLeastOneUnusedSlot = false;
352+
for (int indexPos = 0; indexPos < index.length; indexPos += 2) {
353+
int valuePos = indexPosToValuePos(index, indexPos);
354+
if (valuePos == INDEX_SLOT_UNUSED) {
355+
foundAtLeastOneUnusedSlot = true;
356+
}
357+
if (valuePos != INDEX_SLOT_UNUSED) {
358+
assert valuePos > 0;
359+
assert valuePos < kvStoreInsertionPos;
360+
assert (valuePos & 1) == 1;
361+
int keyPos = valuePos - 1;
362+
assert kvStore[keyPos] != null;
363+
assert kvStore[valuePos] != null;
364+
365+
int hashCode = index[indexPos];
366+
int startPos = indexPosFromHashCode(hashCode, index.length);
367+
for (int i = startPos; i != indexPos; i = incrementIndexPos(i, index.length)) {
368+
assert indexPosToValuePos(index, i) > INDEX_SLOT_UNUSED;
369+
}
359370
}
360371
}
372+
assert foundAtLeastOneUnusedSlot;
361373
}
362374

363375
private Object deleteKvAndGetV(RubyHash hash, int indexPos, int keyPos) {
364376
assert verify(hash);
365377

366-
Object deletedValue = kvStore[keyPos + 1];
378+
// Remove the index slot, move entries following it to avoid tombstones which would make the code
379+
// more complex, lookup slower and could fill the entire index array with tombstones,
380+
// in which case lookup would become linear.
381+
382+
// From https://en.wikipedia.org/wiki/Open_addressing#Example_pseudocode,
383+
// https://stackoverflow.com/a/60709252/388803 and https://stackoverflow.com/a/60644631/388803
384+
385+
// First let's override this slot so in case nothing follows it is correctly marked as unused.
386+
// An alternative would be to clear at reusePos after the loop but then the intermediate state is less clear.
387+
index[indexPos] = 0;
388+
index[indexPos + 1] = INDEX_SLOT_UNUSED;
389+
390+
int reusePos = indexPos; // i
391+
int nextPos = incrementIndexPos(indexPos, index.length); // j
392+
while (indexPosToValuePos(index, nextPos) != INDEX_SLOT_UNUSED) { // not found an unused slot
393+
int hashCode = index[nextPos];
394+
int startPos = indexPosFromHashCode(hashCode, index.length); // k
395+
if (nextPos < reusePos ^ startPos <= reusePos ^ startPos > nextPos) {
396+
// move the slot to the reuse slot and keep going
397+
assert index[reusePos + 1] == INDEX_SLOT_UNUSED : "safe to move the slot to reuse slot";
398+
index[reusePos] = index[nextPos];
399+
index[reusePos + 1] = index[nextPos + 1];
400+
index[nextPos] = 0;
401+
index[nextPos + 1] = INDEX_SLOT_UNUSED;
402+
reusePos = nextPos;
403+
}
404+
405+
nextPos = incrementIndexPos(nextPos, index.length);
406+
assert nextPos != indexPos;
407+
}
367408

368-
index[indexPos + 1] = INDEX_SLOT_DELETED;
409+
// Remove the kvStore slot
410+
411+
Object deletedValue = kvStore[keyPos + 1];
369412

370413
// TODO: Instead of naively nulling out the key-value, which can produce long gaps of nulls in the kvStore,
371414
// See if we can annotate each gap with its length so that iteration code can "jump" over it
@@ -436,7 +479,6 @@ abstract static class GetIndexPosForKeyNode extends RubyBaseNode {
436479
@Specialization
437480
int findIndexPos(Object key, int hash, boolean compareByIdentity, int[] index, Object[] kvStore,
438481
@Cached CompareHashKeysNode.AssumingEqualHashes compareHashKeysNode,
439-
@Cached @Exclusive InlinedConditionProfile notDeleted,
440482
@Cached @Exclusive InlinedConditionProfile unused,
441483
@Cached @Exclusive InlinedConditionProfile sameHash,
442484
@Bind("$node") Node node) {
@@ -446,8 +488,7 @@ int findIndexPos(Object key, int hash, boolean compareByIdentity, int[] index, O
446488
int valuePos = indexPosToValuePos(index, indexPos);
447489
if (unused.profile(node, valuePos == INDEX_SLOT_UNUSED)) {
448490
return KEY_NOT_FOUND;
449-
} else if (notDeleted.profile(node, valuePos != INDEX_SLOT_DELETED) &&
450-
sameHash.profile(node, index[indexPos] == hash) &&
491+
} else if (sameHash.profile(node, index[indexPos] == hash) &&
451492
compareHashKeysNode.execute(compareByIdentity, key, kvStore[valuePos - 1])) { // keyPos == valuePos - 1
452493
return indexPos;
453494
}
@@ -467,21 +508,15 @@ abstract static class GetInsertionIndexPosForKeyNode extends RubyBaseNode {
467508
@Specialization
468509
int findIndexPos(Object key, int hash, boolean compareByIdentity, int[] index, Object[] kvStore,
469510
@Cached CompareHashKeysNode.AssumingEqualHashes compareHashKeysNode,
470-
@Cached @Exclusive InlinedConditionProfile deleted,
471511
@Cached @Exclusive InlinedConditionProfile unused,
472512
@Cached @Exclusive InlinedConditionProfile sameHash,
473513
@Bind("$node") Node node) {
474514
int startPos = indexPosFromHashCode(hash, index.length);
475515
int indexPos = startPos;
476-
int insertionPos = -1;
477516
while (true) {
478517
int valuePos = indexPosToValuePos(index, indexPos);
479518
if (unused.profile(node, valuePos == INDEX_SLOT_UNUSED)) {
480-
return insertionPos != -1 ? insertionPos : indexPos;
481-
} else if (deleted.profile(node, valuePos == INDEX_SLOT_DELETED)) {
482-
if (insertionPos == -1) {
483-
insertionPos = indexPos;
484-
}
519+
return indexPos;
485520
} else {
486521
int keyPos = valuePos - 1;
487522
if (sameHash.profile(node, index[indexPos] == hash) &&
@@ -502,7 +537,6 @@ abstract static class GetHashNextPosInIndexNode extends RubyBaseNode {
502537

503538
@Specialization
504539
int getHashNextPos(int startingFromPos, int hash, int[] index, int stop,
505-
@Cached @Exclusive InlinedConditionProfile slotIsNotDeleted,
506540
@Cached @Exclusive InlinedConditionProfile slotIsUnused,
507541
@Cached @Exclusive InlinedConditionProfile hashFound,
508542
@Cached @Exclusive InlinedLoopConditionProfile stopNotYetReached,
@@ -514,10 +548,8 @@ int getHashNextPos(int startingFromPos, int hash, int[] index, int stop,
514548
return HASH_NOT_FOUND;
515549
}
516550

517-
if (slotIsNotDeleted.profile(node, index[nextHashPos + 1] != INDEX_SLOT_DELETED)) {
518-
if (hashFound.profile(node, index[nextHashPos] == hash)) {
519-
return nextHashPos;
520-
}
551+
if (hashFound.profile(node, index[nextHashPos] == hash)) {
552+
return nextHashPos;
521553
}
522554

523555
nextHashPos = incrementIndexPos(nextHashPos, index.length);
@@ -625,7 +657,6 @@ private static void insertIntoKv(CompactHashStore store, Object key, Object valu
625657
@TruffleBoundary
626658
private static void resizeIndex(CompactHashStore store, Node node) {
627659
int[] oldIndex = store.index;
628-
// TODO: doubling size is not good if many deletes
629660
int[] newIndex = new int[2 * oldIndex.length];
630661
int newIndexCapacity = newIndex.length >> 1;
631662

0 commit comments

Comments
 (0)