From 2064c3cad13f97a5905be49a82e536e1b6259a77 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Mon, 8 Jul 2024 23:41:33 -0700 Subject: [PATCH] constructor takes fingerprint length and load factor --- .../quotientfilter/QuotientFilter.java | 78 ++++++++++--------- .../filters/quotientfilter/DeletionTests.java | 56 ++++++------- .../quotientfilter/QuotientFilterTest.java | 75 ++++++------------ 3 files changed, 93 insertions(+), 116 deletions(-) diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java index a47bb5f6a..7a860f857 100644 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java +++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java @@ -31,10 +31,11 @@ public class QuotientFilter extends Filter { - public static final double LOAD_FACTOR = 0.9; + public static final float DEFAULT_LOAD_FACTOR = 0.8f; - int numBitsPerEntry_; - int powerOfTwoSize_; + int lgQ_; + int numFingerprintBits_; + float loadFactor_; int numEntries_; int numExpansions_; BitArray bitArray_; @@ -45,10 +46,15 @@ public class QuotientFilter extends Filter { public double avgRunLength_; public double avgClusterLength_; - public QuotientFilter(final int powerOfTwo, final int numBitsPerEntry) { - powerOfTwoSize_ = powerOfTwo; - numBitsPerEntry_ = numBitsPerEntry; - bitArray_ = makeFilter(getNumSlots(), numBitsPerEntry); + public QuotientFilter(final int lgQ, final int numFingerprintBits) { + this(lgQ, numFingerprintBits, DEFAULT_LOAD_FACTOR); + } + + public QuotientFilter(final int lgQ, final int numFingerprintBits, final float loadFactor) { + lgQ_ = lgQ; + numFingerprintBits_ = numFingerprintBits; + loadFactor_ = loadFactor; + bitArray_ = makeFilter(getNumSlots(), getNumBitsPerEntry()); numExpansions_ = 0; //hash_type = XxHash.hashLong ; //HashType.xxh; } @@ -66,7 +72,7 @@ public int getNumExpansions() { } public long getMaxEntriesBeforeExpansion() { - return (long)(getNumSlots() * LOAD_FACTOR); + return (long)(getNumSlots() * loadFactor_); } BitArray makeFilter(final long initSize, final int bitsPerEntry) { @@ -74,23 +80,23 @@ BitArray makeFilter(final long initSize, final int bitsPerEntry) { } public int getFingerprintLength() { - return numBitsPerEntry_ - 3; + return numFingerprintBits_; } - QuotientFilter(final int powerOfTwo, final int numBitsPerEntry, final BitArray bitArray) { - powerOfTwoSize_ = powerOfTwo; - numBitsPerEntry_ = numBitsPerEntry; - bitArray_ = bitArray; - } +// QuotientFilter(final int powerOfTwo, final int numBitsPerEntry, final BitArray bitArray) { +// powerOfTwoSize_ = powerOfTwo; +// numBitsPerEntry_ = numBitsPerEntry; +// bitArray_ = bitArray; +// } void expand() { if (getFingerprintLength() < 2) throw new SketchesException("for expansion value must have at least 2 bits"); - QuotientFilter other = new QuotientFilter(powerOfTwoSize_ + 1, numBitsPerEntry_ - 1); + final QuotientFilter other = new QuotientFilter(lgQ_ + 1, numFingerprintBits_ - 1, loadFactor_); long i = 0; if (!isSlotEmpty(i)) { i = findClusterStart(i); } - Queue fifo = new LinkedList(); + final Queue fifo = new LinkedList(); long count = 0; while (count < numEntries_) { if (!isSlotEmpty(i)) { @@ -104,8 +110,8 @@ void expand() { i = (i + 1) & getSlotMask(); if (!fifo.isEmpty() && ! isContinuation(i)) { fifo.remove(); } } - powerOfTwoSize_++; - numBitsPerEntry_--; + lgQ_++; + numFingerprintBits_--; bitArray_ = other.bitArray_; numExpansions_++; } @@ -146,7 +152,7 @@ public double getUtilization() { // returns the number of slots in the filter without the extension/buffer slots public long getNumSlots() { - return 1L << powerOfTwoSize_; + return 1L << lgQ_; } long getSlotMask() { @@ -166,18 +172,18 @@ void modifySlot(final boolean isOccupied, final boolean isContinuation, final bo // sets the fingerprint for a given slot index void setFingerprint(final long index, final long fingerprint) { - bitArray_.setBits(index * numBitsPerEntry_ + 3, getFingerprintLength(), fingerprint); + bitArray_.setBits(index * getNumBitsPerEntry() + 3, getFingerprintLength(), fingerprint); } // print a nice representation of the filter that can be understood. // if vertical is on, each line will represent a slot public String getPrettyStr(final boolean vertical) { final StringBuffer sbr = new StringBuffer(); - final long numBits = getNumSlots() * numBitsPerEntry_; + final long numBits = getNumSlots() * getNumBitsPerEntry(); for (long i = 0; i < numBits; i++) { - final long remainder = i % numBitsPerEntry_; + final long remainder = i % getNumBitsPerEntry(); if (remainder == 0) { - final long slot = i / numBitsPerEntry_; + final long slot = i / getNumBitsPerEntry(); sbr.append(" "); if (vertical) { sbr.append("\n" + String.format("%-10d", slot) + "\t"); @@ -199,12 +205,12 @@ public void prettyPrint() { // return a fingerprint in a given slot index long getFingerprint(final long index) { - return bitArray_.getBits(index * numBitsPerEntry_ + 3, getFingerprintLength()); + return bitArray_.getBits(index * getNumBitsPerEntry() + 3, getFingerprintLength()); } // return an entire slot representation, including metadata flags and fingerprint long getSlot(final long index) { - return bitArray_.getBits(index * numBitsPerEntry_, numBitsPerEntry_); + return bitArray_.getBits(index * getNumBitsPerEntry(), getNumBitsPerEntry()); } // compare a fingerprint input to the fingerprint in some slot index @@ -222,7 +228,7 @@ void modifySlot(final boolean isOccupied, final boolean isContinuation, final bo // summarize some statistical measures about the filter public void printFilterSummary() { final long slots = getNumSlots(); - final long numBits = slots * numBitsPerEntry_; + final long numBits = slots * getNumBitsPerEntry(); System.out.println("slots: " + slots); System.out.println("bits: " + numBits); System.out.println("bits/entry: " + numBits / (double)numEntries_); @@ -242,35 +248,35 @@ public void printFilterSummary() { */ @Override public long getSpaceUse() { - return getNumSlots() * numBitsPerEntry_; + return getNumSlots() * getNumBitsPerEntry(); } public int getNumBitsPerEntry() { - return numBitsPerEntry_; + return numFingerprintBits_ + 3; } boolean isOccupied(final long index) { - return bitArray_.getBit(index * numBitsPerEntry_); + return bitArray_.getBit(index * getNumBitsPerEntry()); } boolean isContinuation(final long index) { - return bitArray_.getBit(index * numBitsPerEntry_ + 1); + return bitArray_.getBit(index * getNumBitsPerEntry() + 1); } boolean isShifted(final long index) { - return bitArray_.getBit(index * numBitsPerEntry_ + 2); + return bitArray_.getBit(index * getNumBitsPerEntry() + 2); } void setOccupied(final long index, final boolean val) { - bitArray_.assignBit(index * numBitsPerEntry_, val); + bitArray_.assignBit(index * getNumBitsPerEntry(), val); } void setContinuation(final long index, final boolean val) { - bitArray_.assignBit(index * numBitsPerEntry_ + 1, val); + bitArray_.assignBit(index * getNumBitsPerEntry() + 1, val); } void setShifted(final long index, final boolean val) { - bitArray_.assignBit(index * numBitsPerEntry_ + 2, val); + bitArray_.assignBit(index * getNumBitsPerEntry() + 2, val); } boolean isSlotEmpty(final long index) { @@ -432,7 +438,7 @@ void insertFingerprintAndPushAllElse(long fingerprint, long index, final long ca numEntries_++; } - boolean delete(final long fingerprint, final long canonicalSlot, long runStartIndex, long matchingFingerprintIndex) { + boolean delete(final long canonicalSlot, long runStartIndex, long matchingFingerprintIndex) { long runEnd = findRunEnd(matchingFingerprintIndex); // the run has only one entry, we need to disable its is_occupied flag @@ -524,7 +530,7 @@ boolean delete(final long fingerprint, final long canonicalSlot) { // we didn't find a matching fingerprint return false; } - return delete(fingerprint, canonicalSlot, runStartIndex, matchingFingerprintIndex); + return delete(canonicalSlot, runStartIndex, matchingFingerprintIndex); } long getSlotFromHash(final long largeHash) { diff --git a/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java b/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java index 6e1beb9f2..432e5a6df 100644 --- a/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java +++ b/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java @@ -35,10 +35,10 @@ public class DeletionTests { */ @Test static public void BasicDeletions() { - int bits_per_entry = 8; + int fingerprint_len_bits = 5; int num_entries_power = 3; - int num_entries = (int)Math.pow(2, num_entries_power); - QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); + int num_entries = 1 << num_entries_power; + QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits); long fp1 = 1 << 4; long fp2 = 1 << 3; @@ -60,9 +60,9 @@ static public void BasicDeletions() { qf.delete(fp1, 1); qf.delete(fp1, 1); - BitSet result = new BitSet(num_entries * bits_per_entry); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, false, false, fp2); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, true, false, false, fp3); + BitSet result = new BitSet(num_entries * qf.getNumBitsPerEntry()); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 2, true, false, false, fp2); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 4, true, false, false, fp3); assertTrue(QuotientFilterTest.check_equality(qf, result, true)); } @@ -76,10 +76,10 @@ static public void BasicDeletions() { */ @Test static public void Deletions() { - int bits_per_entry = 8; + int fingerprint_len_bits = 5; int num_entries_power = 3; int num_entries = (int)Math.pow(2, num_entries_power); - QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); + QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits); qf.insert(1, 1); qf.insert(2, 1); @@ -96,15 +96,15 @@ static public void Deletions() { qf.delete(3, 2); qf.delete(5, 3); - BitSet result = new BitSet(num_entries * bits_per_entry); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 0, false, false, false, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 1, true, false, false, 1); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, true, true, 2); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 3, true, false, true, 4); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, false, false, true, 6); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 5, false, true, true, 7); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 6, true, false, false, 8); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 7, false, false, false, 0); + BitSet result = new BitSet(num_entries * qf.getNumBitsPerEntry()); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 0, false, false, false, 0); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 1, true, false, false, 1); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 2, true, true, true, 2); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 3, true, false, true, 4); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 4, false, false, true, 6); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 5, false, true, true, 7); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 6, true, false, false, 8); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 7, false, false, false, 0); assertTrue(QuotientFilterTest.check_equality(qf, result, true)); } @@ -121,10 +121,10 @@ static public void Deletions() { * The expected outcome is that after deletion, the remaining keys should be in their canonical slots. */ static public void DeletionsWithWrap() { - int bits_per_entry = 8; + int fingerprint_len_bits = 5; int num_entries_power = 3; int num_entries = (int)Math.pow(2, num_entries_power); - QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); + QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits); qf.insert(1, 1); qf.insert(2, 1); @@ -139,15 +139,15 @@ static public void DeletionsWithWrap() { qf.delete(5, 3); //qf.pretty_print(); - BitSet result = new BitSet(num_entries * bits_per_entry); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 0, false, false, false, 0); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 1, true, false, false, 1); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, true, true, 2); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 3, false, false, true, 3); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, true, true, true, 4); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 5, true, false, true, 6); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 6, false, true, true, 7); - result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 7, false, false, true, 8); + BitSet result = new BitSet(num_entries * qf.getNumBitsPerEntry()); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 0, false, false, false, 0); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 1, true, false, false, 1); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 2, true, true, true, 2); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 3, false, false, true, 3); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 4, true, true, true, 4); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 5, true, false, true, 6); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 6, false, true, true, 7); + result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 7, false, false, true, 8); assertTrue(QuotientFilterTest.check_equality(qf, result, true)); } } diff --git a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java index 3e9644dfe..00eb994a9 100644 --- a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java +++ b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java @@ -44,9 +44,9 @@ public static boolean get_fingerprint_bit(long index, long fingerprint) { */ @Test public void WikiInsertionTest() { - int bits_per_entry = 6; // 6 bits per entry => 3 bits fingerprint, resolved internally in the filter. + int fingerprint_len_bits = 3; // 3 bits fingerprint => 6 bits per entry, resolved internally in the filter. int num_entries_power = 3; - QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); + QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits, 1.0f); final int A = 1; final int B = 2; @@ -93,10 +93,10 @@ public int getState(QuotientFilter filter, int slot) { */ @Test public void PaperInsertionTest() { - int bits_per_entry = 8; + int fingerprint_len_bits = 5; int num_entries_power = 4; int num_entries = (int)Math.pow(2, num_entries_power); - QuotientFilter qf = new QuotientFilter(4, 8); + QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits); final int A = 1; final int B = 2; @@ -117,57 +117,30 @@ public void PaperInsertionTest() { qf.insert(G, 6); qf.insert(H, 6); - BitSet result = new BitSet(num_entries * bits_per_entry); - result = set_slot_in_test(result, bits_per_entry, 0, false, false, false, 0); - result = set_slot_in_test(result, bits_per_entry, 1, true, false, false, A); - result = set_slot_in_test(result, bits_per_entry, 2, false, true, true, B); - result = set_slot_in_test(result, bits_per_entry, 3, true, false, false, C); - result = set_slot_in_test(result, bits_per_entry, 4, true, true, true, D); - result = set_slot_in_test(result, bits_per_entry, 5, false, true, true, E); - result = set_slot_in_test(result, bits_per_entry, 6, true, false, true, F); - result = set_slot_in_test(result, bits_per_entry, 7, false, false, true, G); - result = set_slot_in_test(result, bits_per_entry, 8, false, true, true, H); + BitSet result = new BitSet(num_entries * qf.getNumBitsPerEntry()); + result = set_slot_in_test(result, qf.getNumBitsPerEntry(), 0, false, false, false, 0); + result = set_slot_in_test(result, qf.getNumBitsPerEntry(), 1, true, false, false, A); + result = set_slot_in_test(result, qf.getNumBitsPerEntry(), 2, false, true, true, B); + result = set_slot_in_test(result, qf.getNumBitsPerEntry(), 3, true, false, false, C); + result = set_slot_in_test(result, qf.getNumBitsPerEntry(), 4, true, true, true, D); + result = set_slot_in_test(result, qf.getNumBitsPerEntry(), 5, false, true, true, E); + result = set_slot_in_test(result, qf.getNumBitsPerEntry(), 6, true, false, true, F); + result = set_slot_in_test(result, qf.getNumBitsPerEntry(), 7, false, false, true, G); + result = set_slot_in_test(result, qf.getNumBitsPerEntry(), 8, false, true, true, H); assertTrue(check_equality(qf, result, false)); } // test we don't get any false negatives for quotient filter @Test public void FalseNegativeTest() { - int bits_per_entry = 10; + int fingerprint_len_bits = 7; int num_entries_power = 10; - QuotientFilter filter = new QuotientFilter(num_entries_power, bits_per_entry); - int num_entries = (int) (Math.pow(2, num_entries_power) * 0.9 ); + QuotientFilter filter = new QuotientFilter(num_entries_power, fingerprint_len_bits); + int num_entries = (int) ((1 << num_entries_power) * 0.8); assertTrue(test_no_false_negatives(filter, num_entries)); } - /* - * Adds two entries to the end of the filter, causing an overflow into the extension slots. - * Checks this can be handled by the internal data structure and then deletes one of the keys from the filter. - */ - @Test - public void OverflowTest() { - final int bits_per_entry = 8; - final int num_entries_power = 3; - final int num_entries = (int)Math.pow(2, num_entries_power); - final int fingerprint_size = bits_per_entry - 3; - final QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); - - final long fp1 = 1; - final long fp2 = 1 << fingerprint_size - 1; - qf.insert(fp1, num_entries - 1); - assertEquals(qf.getFingerprint(num_entries - 1), fp1); - assertEquals(getState(qf, num_entries - 1), 0b100); - qf.insert(fp2, num_entries - 1); - assertEquals(qf.getFingerprint(0), fp2); - assertEquals(getState(qf, 0), 0b011); - qf.delete(fp2, num_entries - 1); - assertEquals(qf.getFingerprint(0), 0); - assertEquals(getState(qf, 0), 0b000); - final boolean found = qf.search(fp1, num_entries - 1); - assertTrue(found); - } - /** * This method tests the functionality of the QuotientFilter and Iterator classes. It creates a QuotientFilter and inserts * six entries into it. An Iterator is then used to traverse the entries in the QuotientFilter. The method checks if the @@ -177,11 +150,9 @@ public void OverflowTest() { @Test public void testQuotientFilterInsertionAndIteration() { - int bits_per_entry = 8; + int fingerprint_len_bits = 5; int num_entries_power = 4; - //int num_entries = (int)Math.pow(2, num_entries_power); - //int fingerprint_size = bits_per_entry - 3; - QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); + QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits); qf.insert(0x1F, 2); qf.insert(0x1F, 3); @@ -200,9 +171,9 @@ public void testQuotientFilterInsertionAndIteration() { @Test public void testQuotientFilterIterator() { - int bits_per_entry = 8; + int fingerprint_len_bits = 5; int num_entries_power = 4; - QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); + QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits); qf.insert(0, 1); qf.insert(0, 4); @@ -299,7 +270,7 @@ static public boolean test_no_false_negatives(QuotientFilter filter, int num_ent @Test public void smallExpansion() { - final QuotientFilter qf = new QuotientFilter(5, 12); + final QuotientFilter qf = new QuotientFilter(5, 9); final int n = 30; for (int i = 0; i < n; i++) { qf.insert(i); } qf.printFilterSummary(); @@ -319,7 +290,7 @@ public void smallExpansion() { @Test public void expansion() { - final QuotientFilter qf = new QuotientFilter(16, 16); + final QuotientFilter qf = new QuotientFilter(16, 13); final int n = 60000; for (int i = 0; i < n; i++) { qf.insert(i); } // qf.printFilterSummary();