From 9a93a94eaae5fdd75308eb851e4bffd2ea252537 Mon Sep 17 00:00:00 2001 From: AlexanderSaydakov Date: Thu, 1 Aug 2024 17:42:06 -0700 Subject: [PATCH] cleanup --- .../quotientfilter/QuotientFilter.java | 154 ++++-------------- .../QuotientFilterBuilderTest.java | 26 +-- .../quotientfilter/QuotientFilterTest.java | 2 +- 3 files changed, 39 insertions(+), 143 deletions(-) diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java index 11f87bf77..b6ff86731 100644 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java +++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java @@ -19,7 +19,8 @@ package org.apache.datasketches.filters.quotientfilter; -import java.util.ArrayList; +import static org.apache.datasketches.common.Util.LS; + import java.util.HashSet; import java.util.LinkedList; import java.util.Queue; @@ -41,12 +42,6 @@ public class QuotientFilter extends Filter { int numExpansions_; BitArray bitArray_; - // statistics, computed in the compute_statistics method. method should be called before these are used - long numRuns_; - long numClusters_; - public double avgRunLength_; - public double avgClusterLength_; - public QuotientFilter(final int lgQ, final int numFingerprintBits) { this(lgQ, numFingerprintBits, DEFAULT_LOAD_FACTOR); } @@ -57,7 +52,6 @@ public QuotientFilter(final int lgQ, final int numFingerprintBits, final float l loadFactor_ = loadFactor; bitArray_ = makeFilter(getNumSlots(), getNumBitsPerEntry()); numExpansions_ = 0; - //hash_type = XxHash.hashLong ; //HashType.xxh; } public boolean rejuvenate(final long key) { @@ -85,7 +79,9 @@ public int getFingerprintLength() { } void expand() { - if (getFingerprintLength() < 2) throw new SketchesException("for expansion value must have at least 2 bits"); + if (getFingerprintLength() < 2) { + throw new SketchesException("for expansion value must have at least 2 bits"); + } final QuotientFilter other = new QuotientFilter(lgQ_ + 1, numFingerprintBits_ - 1, loadFactor_); long i = 0; @@ -111,40 +107,6 @@ void expand() { numExpansions_++; } - // measures the number of bits per entry for the filter - public double measureNumBitsPerEntry() { - return measureNumBitsPerEntry(this, new ArrayList()); - } - - // measures the number of bits per entry for the filter - // it takes an array of filters as a parameter since some filter implementations here consist of multiple filter objects - protected static double measureNumBitsPerEntry(final QuotientFilter current, final ArrayList otherFilters) { - //System.out.println("--------------------------"); - //current.print_filter_summary(); - //System.out.println(); - double numEntries = current.getNumEntries(); - for (QuotientFilter q : otherFilters) { - //q.print_filter_summary(); - //System.out.println(); - numEntries += q.getNumEntries(); - } - long numBits = current.getNumBitsPerEntry() * current.getNumSlots(); - for (final QuotientFilter q : otherFilters) { - numBits += q.getNumBitsPerEntry() * q.getNumSlots(); - } - //System.out.println("total entries: \t\t" + num_entries); - //System.out.println("total bits: \t\t" + num_bits); - final double bits_per_entry = numBits / numEntries; - //System.out.println("total bits/entry: \t" + bits_per_entry); - //System.out.println(); - return bits_per_entry; - } - - // returns the fraction of occupied slots in the filter - public double getUtilization() { - return numEntries_ / (double) getNumSlots(); - } - public int getLgQ() { return lgQ_; } @@ -228,23 +190,21 @@ void modifySlot(final boolean isOccupied, final boolean isContinuation, final bo setFingerprint(index, fingerprint); } - // summarize some statistical measures about the filter - public void printFilterSummary() { + public String toString() { + final StringBuilder sb = new StringBuilder(); final long slots = getNumSlots(); final long numBits = slots * getNumBitsPerEntry(); - System.out.println("lgQ: " + lgQ_); - System.out.println("FP length: " + getFingerprintLength()); - System.out.println("load factor: " + getLoadFactor()); - System.out.println("bits: " + numBits); - System.out.println("bits/entry: " + numBits / (double)numEntries_); - System.out.println("entries: " + numEntries_); - System.out.println("expansions: " + numExpansions_); - System.out.println("load: " + numEntries_ / (double)(slots)); - computeStatistics(); - //System.out.println("num runs: \t\t" + num_runs); - //System.out.println("avg run length: \t" + avg_run_length); - //System.out.println("num clusters: \t\t" + num_clusters); - //System.out.println("avg cluster length: \t" + avg_cluster_length); + sb.append("***Quotient Filter Summary***").append(LS); + sb.append("lgQ: " + lgQ_).append(LS); + sb.append("FP length: " + getFingerprintLength()).append(LS); + sb.append("load factor: " + getLoadFactor()).append(LS); + sb.append("bits: " + numBits).append(LS); + sb.append("bits/entry: " + numBits / (double)numEntries_).append(LS); + sb.append("entries: " + numEntries_).append(LS); + sb.append("expansions: " + numExpansions_).append(LS); + sb.append("load: " + numEntries_ / (double)(slots)).append(LS); + sb.append("*********End Summary*********").append(LS); + return sb.toString(); } /* @@ -442,16 +402,16 @@ void insertFingerprintAndPushAllElse(long fingerprint, long index, final long ca numEntries_++; } - boolean delete(final long canonicalSlot, long runStartIndex, long matchingFingerprintIndex) { + boolean delete(final long canonicalSlot, final long runStartIndex, final long matchingFingerprintIndex) { long runEnd = findRunEnd(matchingFingerprintIndex); // the run has only one entry, we need to disable its is_occupied flag // we just remember we need to do this here, and we do it later to not interfere with counts - boolean turnOffOccupied = runStartIndex == runEnd; + final boolean turnOffOccupied = runStartIndex == runEnd; // First thing to do is move everything else in the run back by one slot for (long i = matchingFingerprintIndex; i != runEnd; i = (i + 1) & getSlotMask()) { - long f = getFingerprint((i + 1) & getSlotMask()); + final long f = getFingerprint((i + 1) & getSlotMask()); setFingerprint(i, f); } @@ -459,7 +419,7 @@ boolean delete(final long canonicalSlot, long runStartIndex, long matchingFinger // we can do this by counting the number of continuation flags set to true // and the number of occupied flags set to false from the start of the cluster to the given cell // and then subtracting: num_shifted_count - num_non_occupied = number of slots by which an entry is shifted - long clusterStart = findClusterStart(canonicalSlot); + final long clusterStart = findClusterStart(canonicalSlot); long numShiftedCount = 0; long numNonOccupied = 0; for (long i = clusterStart; i != ((runEnd + 1) & getSlotMask()); i = (i + 1) & getSlotMask()) { @@ -496,7 +456,8 @@ boolean delete(final long canonicalSlot, long runStartIndex, long matchingFinger runEnd = findRunEnd(nextRunStart); // before we start processing the next run, we check whether the previous run we shifted is now back to its canonical slot - // The condition num_shifted_count - num_non_occupied == 1 ensures that the run was shifted by only 1 slot, meaning it is now back in its proper place + // The condition num_shifted_count - num_non_occupied == 1 ensures that the run was shifted by only 1 slot, + // meaning it is now back in its proper place if (isOccupied((nextRunStart - 1) & getSlotMask()) && numShiftedCount - numNonOccupied == 1) { setShifted((nextRunStart - 1) & getSlotMask(), false); } else { @@ -504,7 +465,7 @@ boolean delete(final long canonicalSlot, long runStartIndex, long matchingFinger } for (long i = nextRunStart; i != ((runEnd + 1) & getSlotMask()); i = (i + 1) & getSlotMask()) { - long f = getFingerprint(i); + final long f = getFingerprint(i); setFingerprint((i - 1) & getSlotMask(), f); if (isContinuation(i)) { setContinuation((i - 1) & getSlotMask(), true); @@ -524,12 +485,12 @@ boolean delete(final long canonicalSlot, long runStartIndex, long matchingFinger boolean delete(final long fingerprint, final long canonicalSlot) { // if the run doesn't exist, the key can't have possibly been inserted - boolean doesRunExist = isOccupied(canonicalSlot); + final boolean doesRunExist = isOccupied(canonicalSlot); if (!doesRunExist) { return false; } - long runStartIndex = findRunStart(canonicalSlot); - long matchingFingerprintIndex = decideWhichFingerprintToDelete(runStartIndex, fingerprint); + final long runStartIndex = findRunStart(canonicalSlot); + final long matchingFingerprintIndex = decideWhichFingerprintToDelete(runStartIndex, fingerprint); if (matchingFingerprintIndex == -1) { // we didn't find a matching fingerprint return false; @@ -565,8 +526,8 @@ protected boolean _insert(final long largeHash) { protected boolean _delete(final long largeHash) { final long slotIndex = getSlotFromHash(largeHash); - long fingerprint = getFingerprintFromHash(largeHash); - boolean success = delete(fingerprint, slotIndex); + final long fingerprint = getFingerprintFromHash(largeHash); + final boolean success = delete(fingerprint, slotIndex); if (success) { numEntries_--; } @@ -583,59 +544,6 @@ public boolean getBitAtOffset(final int offset) { return bitArray_.getBit(offset); } - public void computeStatistics() { - numRuns_ = 0; - numClusters_ = 0; - double sumRunLengths = 0; - double sumClusterLengths = 0; - - int currentRunLength = 0; - int currentCluster_length = 0; - - final long numSlots = getNumSlots(); - for (long i = 0; i < numSlots; i++) { - final boolean occupied = isOccupied(i); - final boolean continuation = isContinuation(i); - final boolean shifted = isShifted(i); - - if (!occupied && !continuation && !shifted) { // empty slot - sumClusterLengths += currentCluster_length; - currentCluster_length = 0; - sumRunLengths += currentRunLength; - currentRunLength = 0; - } else if ( !occupied && !continuation && shifted ) { // start of new run - numRuns_++; - sumRunLengths += currentRunLength; - currentRunLength = 1; - currentCluster_length++; - } else if ( !occupied && continuation && !shifted ) { - // not used - } else if ( !occupied && continuation && shifted ) { // continuation of run - currentCluster_length++; - currentRunLength++; - } else if ( occupied && !continuation && !shifted ) { // start of new cluster & run - numRuns_++; - numClusters_++; - sumClusterLengths += currentCluster_length; - sumRunLengths += currentRunLength; - currentCluster_length = 1; - currentRunLength = 1; - } else if (occupied && !continuation && shifted ) { // start of new run - numRuns_++; - sumRunLengths += currentRunLength; - currentRunLength = 1; - currentCluster_length++; - } else if (occupied && continuation && !shifted ) { - // not used - } else if (occupied && continuation && shifted ) { // continuation of run - currentCluster_length++; - currentRunLength++; - } - } - avgRunLength_ = sumRunLengths / numRuns_; - avgClusterLength_ = sumClusterLengths / numClusters_; - } - public void merge(final QuotientFilter other) { if (lgQ_ + numFingerprintBits_ != other.lgQ_ + other.numFingerprintBits_) { throw new SketchesArgumentException("incompatible sketches in merge"); @@ -658,4 +566,4 @@ public void merge(final QuotientFilter other) { if (!fifo.isEmpty() && ! other.isContinuation(i)) { fifo.remove(); } } } -} \ No newline at end of file +} diff --git a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilderTest.java b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilderTest.java index f3920a70c..8f708455b 100644 --- a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilderTest.java +++ b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilderTest.java @@ -20,8 +20,6 @@ package org.apache.datasketches.filters.quotientfilter; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.filters.quotientfilter.QuotientFilterBuilder; -import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; import static org.testng.Assert.*; @@ -43,8 +41,6 @@ public void testSuggestFingerprintLengthFromFPP(){ @Test public static void testSuggestLgNumSlots(){ - QuotientFilterBuilder qfb = new QuotientFilterBuilder(); - // invalid number of items assertThrows(SketchesArgumentException.class, () -> QuotientFilterBuilder.suggestLgNumSlots(0,0.9f)); assertThrows(SketchesArgumentException.class, () -> QuotientFilterBuilder.suggestLgNumSlots(-1, 0.9f)); @@ -58,17 +54,15 @@ public static void testSuggestLgNumSlots(){ for (int i = 0; i < numItems.length; i++) { long num = numItems[i]; - byte result = qfb.suggestLgNumSlots(num, 0.9f); + byte result = QuotientFilterBuilder.suggestLgNumSlots(num, 0.9f); assertEquals(result, results[i]); - result = qfb.suggestLgNumSlots(num); + result = QuotientFilterBuilder.suggestLgNumSlots(num); assertEquals(result, results[i]); } } @Test public static void testSuggestMaxNumItems(){ - QuotientFilterBuilder qfb = new QuotientFilterBuilder(); - // invalid number of slots assertThrows(SketchesArgumentException.class, () -> QuotientFilterBuilder.suggestMaxNumItemsFromNumSlots((byte)-127)); assertThrows(SketchesArgumentException.class, () -> QuotientFilterBuilder.suggestMaxNumItemsFromNumSlots((byte)0)); @@ -83,8 +77,8 @@ public static void testSuggestMaxNumItems(){ float eighty_pc_appx = 820f / 1024f; // ≈ 0.8 for (int i = 0; i < lgNumSlots.length; i++) { - long result_ninety = qfb.suggestMaxNumItemsFromNumSlots(lgNumSlots[i], ninety_pc_appx); - long result_eighty = qfb.suggestMaxNumItemsFromNumSlots(lgNumSlots[i], eighty_pc_appx); + long result_ninety = QuotientFilterBuilder.suggestMaxNumItemsFromNumSlots(lgNumSlots[i], ninety_pc_appx); + long result_eighty = QuotientFilterBuilder.suggestMaxNumItemsFromNumSlots(lgNumSlots[i], eighty_pc_appx); assertEquals(result_ninety, results_ninety_pc[i]); assertEquals(result_eighty, results_eighty_pc[i]); } @@ -92,15 +86,12 @@ public static void testSuggestMaxNumItems(){ @Test public static void testSuggestParamsFromMaxDistinctsFPP(){ - // invalid number of slots assertThrows(SketchesArgumentException.class, () -> QuotientFilterBuilder.suggestParamsFromMaxDistinctsFPP(5000000000L, 0.0001)); assertThrows(SketchesArgumentException.class, () -> QuotientFilterBuilder.suggestParamsFromMaxDistinctsFPP(100000000, 0.)); assertThrows(SketchesArgumentException.class, () -> QuotientFilterBuilder.suggestParamsFromMaxDistinctsFPP(100000000, 1.5)); assertThrows(SketchesArgumentException.class, () -> QuotientFilterBuilder.suggestParamsFromMaxDistinctsFPP(5000000000L, -1.)); - - QuotientFilterBuilder qfb = new QuotientFilterBuilder(); byte lgNumSlots ; byte fingerprintLength ; long[] numItems = {1L, 900L, 500_000_000L} ; @@ -112,21 +103,18 @@ public static void testSuggestParamsFromMaxDistinctsFPP(){ byte[] expected_fingerprintLength = {34, 7, 24} ; for (int i = 0; i < numItems.length; i++) { - QuotientFilterBuilder.QFPair pair = qfb.suggestParamsFromMaxDistinctsFPP(numItems[i], 0.9f, fpp[i]); + QuotientFilterBuilder.QFPair pair = QuotientFilterBuilder.suggestParamsFromMaxDistinctsFPP(numItems[i], 0.9f, fpp[i]); lgNumSlots = pair.lgNumSlots; fingerprintLength = pair.fingerprintLength; assertEquals(expected_lgNumSlotsNinety[i], lgNumSlots); assertEquals(expected_fingerprintLength[i], fingerprintLength); // 80% load - pair = qfb.suggestParamsFromMaxDistinctsFPP(numItems[i], fpp[i]); + pair = QuotientFilterBuilder.suggestParamsFromMaxDistinctsFPP(numItems[i], fpp[i]); lgNumSlots = pair.lgNumSlots; fingerprintLength = pair.fingerprintLength; assertEquals(expected_lgNumSlotsEighty[i], lgNumSlots); assertEquals(expected_fingerprintLength[i], fingerprintLength); } } - - - -} \ No newline at end of file +} diff --git a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java index 5dc775b0c..9ddf9a28c 100644 --- a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java +++ b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java @@ -274,7 +274,7 @@ public void smallExpansion() { final QuotientFilter qf = new QuotientFilter(5, 9); final int n = 30; for (int i = 0; i < n; i++) { qf.insert(i); } - qf.printFilterSummary(); + System.out.println(qf.toString()); assertEquals(qf.getNumExpansions(), 1); assertEquals(qf.getNumEntries(), n);