Skip to content

constructor takes fingerprint length and load factor #581

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@

public class QuotientFilter extends Filter {

public static final double LOAD_FACTOR = 0.9;
public static final float DEFAULT_LOAD_FACTOR = 0.8f;

int numBitsPerEntry_;
int powerOfTwoSize_;
int lgQ_;
int numFingerprintBits_;
float loadFactor_;
int numEntries_;
int numExpansions_;
BitArray bitArray_;
Expand All @@ -45,10 +46,15 @@ public class QuotientFilter extends Filter {
public double avgRunLength_;
public double avgClusterLength_;

public QuotientFilter(final int powerOfTwo, final int numBitsPerEntry) {
powerOfTwoSize_ = powerOfTwo;
numBitsPerEntry_ = numBitsPerEntry;
bitArray_ = makeFilter(getNumSlots(), numBitsPerEntry);
public QuotientFilter(final int lgQ, final int numFingerprintBits) {
this(lgQ, numFingerprintBits, DEFAULT_LOAD_FACTOR);
}

public QuotientFilter(final int lgQ, final int numFingerprintBits, final float loadFactor) {
lgQ_ = lgQ;
numFingerprintBits_ = numFingerprintBits;
loadFactor_ = loadFactor;
bitArray_ = makeFilter(getNumSlots(), getNumBitsPerEntry());
numExpansions_ = 0;
//hash_type = XxHash.hashLong ; //HashType.xxh;
}
Expand All @@ -66,31 +72,31 @@ public int getNumExpansions() {
}

public long getMaxEntriesBeforeExpansion() {
return (long)(getNumSlots() * LOAD_FACTOR);
return (long)(getNumSlots() * loadFactor_);
}

BitArray makeFilter(final long initSize, final int bitsPerEntry) {
return new HeapBitArray(initSize * bitsPerEntry);
}

public int getFingerprintLength() {
return numBitsPerEntry_ - 3;
return numFingerprintBits_;
}

QuotientFilter(final int powerOfTwo, final int numBitsPerEntry, final BitArray bitArray) {
powerOfTwoSize_ = powerOfTwo;
numBitsPerEntry_ = numBitsPerEntry;
bitArray_ = bitArray;
}
// QuotientFilter(final int powerOfTwo, final int numBitsPerEntry, final BitArray bitArray) {
// powerOfTwoSize_ = powerOfTwo;
// numBitsPerEntry_ = numBitsPerEntry;
// bitArray_ = bitArray;
// }

void expand() {
if (getFingerprintLength() < 2) throw new SketchesException("for expansion value must have at least 2 bits");
QuotientFilter other = new QuotientFilter(powerOfTwoSize_ + 1, numBitsPerEntry_ - 1);
final QuotientFilter other = new QuotientFilter(lgQ_ + 1, numFingerprintBits_ - 1, loadFactor_);

long i = 0;
if (!isSlotEmpty(i)) { i = findClusterStart(i); }

Queue<Long> fifo = new LinkedList<Long>();
final Queue<Long> fifo = new LinkedList<Long>();
long count = 0;
while (count < numEntries_) {
if (!isSlotEmpty(i)) {
Expand All @@ -104,8 +110,8 @@ void expand() {
i = (i + 1) & getSlotMask();
if (!fifo.isEmpty() && ! isContinuation(i)) { fifo.remove(); }
}
powerOfTwoSize_++;
numBitsPerEntry_--;
lgQ_++;
numFingerprintBits_--;
bitArray_ = other.bitArray_;
numExpansions_++;
}
Expand Down Expand Up @@ -146,7 +152,7 @@ public double getUtilization() {

// returns the number of slots in the filter without the extension/buffer slots
public long getNumSlots() {
return 1L << powerOfTwoSize_;
return 1L << lgQ_;
}

long getSlotMask() {
Expand All @@ -166,18 +172,18 @@ void modifySlot(final boolean isOccupied, final boolean isContinuation, final bo

// sets the fingerprint for a given slot index
void setFingerprint(final long index, final long fingerprint) {
bitArray_.setBits(index * numBitsPerEntry_ + 3, getFingerprintLength(), fingerprint);
bitArray_.setBits(index * getNumBitsPerEntry() + 3, getFingerprintLength(), fingerprint);
}

// print a nice representation of the filter that can be understood.
// if vertical is on, each line will represent a slot
public String getPrettyStr(final boolean vertical) {
final StringBuffer sbr = new StringBuffer();
final long numBits = getNumSlots() * numBitsPerEntry_;
final long numBits = getNumSlots() * getNumBitsPerEntry();
for (long i = 0; i < numBits; i++) {
final long remainder = i % numBitsPerEntry_;
final long remainder = i % getNumBitsPerEntry();
if (remainder == 0) {
final long slot = i / numBitsPerEntry_;
final long slot = i / getNumBitsPerEntry();
sbr.append(" ");
if (vertical) {
sbr.append("\n" + String.format("%-10d", slot) + "\t");
Expand All @@ -199,12 +205,12 @@ public void prettyPrint() {

// return a fingerprint in a given slot index
long getFingerprint(final long index) {
return bitArray_.getBits(index * numBitsPerEntry_ + 3, getFingerprintLength());
return bitArray_.getBits(index * getNumBitsPerEntry() + 3, getFingerprintLength());
}

// return an entire slot representation, including metadata flags and fingerprint
long getSlot(final long index) {
return bitArray_.getBits(index * numBitsPerEntry_, numBitsPerEntry_);
return bitArray_.getBits(index * getNumBitsPerEntry(), getNumBitsPerEntry());
}

// compare a fingerprint input to the fingerprint in some slot index
Expand All @@ -222,7 +228,7 @@ void modifySlot(final boolean isOccupied, final boolean isContinuation, final bo
// summarize some statistical measures about the filter
public void printFilterSummary() {
final long slots = getNumSlots();
final long numBits = slots * numBitsPerEntry_;
final long numBits = slots * getNumBitsPerEntry();
System.out.println("slots: " + slots);
System.out.println("bits: " + numBits);
System.out.println("bits/entry: " + numBits / (double)numEntries_);
Expand All @@ -242,35 +248,35 @@ public void printFilterSummary() {
*/
@Override
public long getSpaceUse() {
return getNumSlots() * numBitsPerEntry_;
return getNumSlots() * getNumBitsPerEntry();
}

public int getNumBitsPerEntry() {
return numBitsPerEntry_;
return numFingerprintBits_ + 3;
}

boolean isOccupied(final long index) {
return bitArray_.getBit(index * numBitsPerEntry_);
return bitArray_.getBit(index * getNumBitsPerEntry());
}

boolean isContinuation(final long index) {
return bitArray_.getBit(index * numBitsPerEntry_ + 1);
return bitArray_.getBit(index * getNumBitsPerEntry() + 1);
}

boolean isShifted(final long index) {
return bitArray_.getBit(index * numBitsPerEntry_ + 2);
return bitArray_.getBit(index * getNumBitsPerEntry() + 2);
}

void setOccupied(final long index, final boolean val) {
bitArray_.assignBit(index * numBitsPerEntry_, val);
bitArray_.assignBit(index * getNumBitsPerEntry(), val);
}

void setContinuation(final long index, final boolean val) {
bitArray_.assignBit(index * numBitsPerEntry_ + 1, val);
bitArray_.assignBit(index * getNumBitsPerEntry() + 1, val);
}

void setShifted(final long index, final boolean val) {
bitArray_.assignBit(index * numBitsPerEntry_ + 2, val);
bitArray_.assignBit(index * getNumBitsPerEntry() + 2, val);
}

boolean isSlotEmpty(final long index) {
Expand Down Expand Up @@ -432,7 +438,7 @@ void insertFingerprintAndPushAllElse(long fingerprint, long index, final long ca
numEntries_++;
}

boolean delete(final long fingerprint, final long canonicalSlot, long runStartIndex, long matchingFingerprintIndex) {
boolean delete(final long canonicalSlot, long runStartIndex, long matchingFingerprintIndex) {
long runEnd = findRunEnd(matchingFingerprintIndex);

// the run has only one entry, we need to disable its is_occupied flag
Expand Down Expand Up @@ -524,7 +530,7 @@ boolean delete(final long fingerprint, final long canonicalSlot) {
// we didn't find a matching fingerprint
return false;
}
return delete(fingerprint, canonicalSlot, runStartIndex, matchingFingerprintIndex);
return delete(canonicalSlot, runStartIndex, matchingFingerprintIndex);
}

long getSlotFromHash(final long largeHash) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ public class DeletionTests {
*/
@Test
static public void BasicDeletions() {
int bits_per_entry = 8;
int fingerprint_len_bits = 5;
int num_entries_power = 3;
int num_entries = (int)Math.pow(2, num_entries_power);
QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry);
int num_entries = 1 << num_entries_power;
QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits);

long fp1 = 1 << 4;
long fp2 = 1 << 3;
Expand All @@ -60,9 +60,9 @@ static public void BasicDeletions() {
qf.delete(fp1, 1);
qf.delete(fp1, 1);

BitSet result = new BitSet(num_entries * bits_per_entry);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, false, false, fp2);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, true, false, false, fp3);
BitSet result = new BitSet(num_entries * qf.getNumBitsPerEntry());
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 2, true, false, false, fp2);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 4, true, false, false, fp3);
assertTrue(QuotientFilterTest.check_equality(qf, result, true));
}

Expand All @@ -76,10 +76,10 @@ static public void BasicDeletions() {
*/
@Test
static public void Deletions() {
int bits_per_entry = 8;
int fingerprint_len_bits = 5;
int num_entries_power = 3;
int num_entries = (int)Math.pow(2, num_entries_power);
QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry);
QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits);

qf.insert(1, 1);
qf.insert(2, 1);
Expand All @@ -96,15 +96,15 @@ static public void Deletions() {
qf.delete(3, 2);
qf.delete(5, 3);

BitSet result = new BitSet(num_entries * bits_per_entry);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 0, false, false, false, 0);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 1, true, false, false, 1);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, true, true, 2);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 3, true, false, true, 4);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, false, false, true, 6);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 5, false, true, true, 7);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 6, true, false, false, 8);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 7, false, false, false, 0);
BitSet result = new BitSet(num_entries * qf.getNumBitsPerEntry());
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 0, false, false, false, 0);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 1, true, false, false, 1);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 2, true, true, true, 2);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 3, true, false, true, 4);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 4, false, false, true, 6);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 5, false, true, true, 7);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 6, true, false, false, 8);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 7, false, false, false, 0);

assertTrue(QuotientFilterTest.check_equality(qf, result, true));
}
Expand All @@ -121,10 +121,10 @@ static public void Deletions() {
* The expected outcome is that after deletion, the remaining keys should be in their canonical slots.
*/
static public void DeletionsWithWrap() {
int bits_per_entry = 8;
int fingerprint_len_bits = 5;
int num_entries_power = 3;
int num_entries = (int)Math.pow(2, num_entries_power);
QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry);
QuotientFilter qf = new QuotientFilter(num_entries_power, fingerprint_len_bits);

qf.insert(1, 1);
qf.insert(2, 1);
Expand All @@ -139,15 +139,15 @@ static public void DeletionsWithWrap() {
qf.delete(5, 3);
//qf.pretty_print();

BitSet result = new BitSet(num_entries * bits_per_entry);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 0, false, false, false, 0);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 1, true, false, false, 1);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 2, true, true, true, 2);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 3, false, false, true, 3);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 4, true, true, true, 4);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 5, true, false, true, 6);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 6, false, true, true, 7);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 7, false, false, true, 8);
BitSet result = new BitSet(num_entries * qf.getNumBitsPerEntry());
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 0, false, false, false, 0);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 1, true, false, false, 1);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 2, true, true, true, 2);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 3, false, false, true, 3);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 4, true, true, true, 4);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 5, true, false, true, 6);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 6, false, true, true, 7);
result = QuotientFilterTest.set_slot_in_test(result, qf.getNumBitsPerEntry(), 7, false, false, true, 8);
assertTrue(QuotientFilterTest.check_equality(qf, result, true));
}
}
Loading
Loading