From 5c7f5abf9f58bebb9af0732bcec09dd7688d3dbf Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 31 May 2024 17:53:39 -0700 Subject: [PATCH 01/22] Changes in ds-java required to move to java 17 as ds-java 7.x and compatible with the new ds-memory branch integrateJava17_v3 -> ds-memory 3.X --- pom.xml | 10 +- .../apache/datasketches/cpc/CpcSketch.java | 2 +- .../datasketches/hll/DirectHllArray.java | 8 +- .../org/apache/datasketches/theta/Sketch.java | 6 +- .../apache/datasketches/theta/Sketches.java | 12 +- .../filters/bloomfilter/BloomFilterTest.java | 13 +- .../bloomfilter/DirectBitArrayRTest.java | 4 +- .../bloomfilter/DirectBitArrayTest.java | 12 +- ...ash3v2Test.java => MurmurHash3v3Test.java} | 75 ++++---- .../hll/DirectAuxHashMapTest.java | 14 +- .../hll/DirectCouponListTest.java | 11 +- .../datasketches/hll/PreambleUtilTest.java | 8 +- .../quantiles/DebugUnionTest.java | 12 +- .../DirectQuantilesMemoryRequestTest.java | 58 +++--- .../quantiles/DoublesSketchTest.java | 39 ++-- .../quantiles/PreambleUtilTest.java | 13 +- .../datasketches/theta/CompactSketchTest.java | 12 +- .../theta/DirectQuickSelectSketchTest.java | 166 +++++++++-------- .../theta/HeapifyWrapSerVer1and2Test.java | 171 +++++++++--------- .../datasketches/theta/SketchesTest.java | 2 +- .../datasketches/theta/UnionImplTest.java | 26 ++- .../datasketches/theta/UpdateSketchTest.java | 4 +- 22 files changed, 364 insertions(+), 314 deletions(-) rename src/test/java/org/apache/datasketches/hash/{MurmurHash3v2Test.java => MurmurHash3v3Test.java} (82%) diff --git a/pom.xml b/pom.xml index 35107f24c..e43a426ba 100644 --- a/pom.xml +++ b/pom.xml @@ -88,7 +88,7 @@ under the License. - 7.5.1 + 7.10.0 generate_java_files check_cpp_files @@ -96,7 +96,7 @@ under the License. 3.6.3 - 1.8 + 17 ${java.version} ${java.version} -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 @@ -134,12 +134,12 @@ under the License. - + @@ -193,7 +193,7 @@ under the License. - [1.8.0,9.0),[11.0,12.0) + [1.8.0,9.0),[11.0,12.0), [17,18) [${maven.version},) diff --git a/src/main/java/org/apache/datasketches/cpc/CpcSketch.java b/src/main/java/org/apache/datasketches/cpc/CpcSketch.java index a374b5745..4ed89dd19 100644 --- a/src/main/java/org/apache/datasketches/cpc/CpcSketch.java +++ b/src/main/java/org/apache/datasketches/cpc/CpcSketch.java @@ -300,7 +300,7 @@ public byte[] toByteArray() { final long cap = state.getRequiredSerializedBytes(); final WritableMemory wmem = WritableMemory.allocate((int) cap); state.exportToMemory(wmem); - return (byte[]) wmem.getArray(); + return wmem.getArray(); } /** diff --git a/src/main/java/org/apache/datasketches/hll/DirectHllArray.java b/src/main/java/org/apache/datasketches/hll/DirectHllArray.java index 5b3e1a4ff..07835da69 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectHllArray.java +++ b/src/main/java/org/apache/datasketches/hll/DirectHllArray.java @@ -56,7 +56,7 @@ abstract class DirectHllArray extends AbstractHllArray { WritableMemory wmem; Memory mem; Object memObj; - long memAdd; + //long memAdd; final boolean compact; private static int checkMemCompactFlag(final WritableMemory wmem, final int lgConfigK) { @@ -70,7 +70,7 @@ private static int checkMemCompactFlag(final WritableMemory wmem, final int lgCo this.wmem = wmem; mem = wmem; memObj = wmem.getArray(); - memAdd = wmem.getCumulativeOffset(0L); + //memAdd = wmem.getCumulativeOffset(0L); compact = extractCompactFlag(mem); insertEmptyFlag(wmem, false); } @@ -81,7 +81,7 @@ private static int checkMemCompactFlag(final WritableMemory wmem, final int lgCo wmem = null; this.mem = mem; memObj = ((WritableMemory) mem).getArray(); - memAdd = mem.getCumulativeOffset(0L); + //memAdd = mem.getCumulativeOffset(0L); compact = extractCompactFlag(mem); } @@ -90,7 +90,7 @@ final void updateMemory(final WritableMemory newWmem) { wmem = newWmem; mem = newWmem; memObj = wmem.getArray(); - memAdd = wmem.getCumulativeOffset(0L); + //memAdd = wmem.getCumulativeOffset(0L); } @Override diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java index cc1fd4d23..d13fe9b80 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta/Sketch.java @@ -311,11 +311,11 @@ public static int getMaxCompactSketchBytes(final int numberOfEntries) { * log_base2 of the number of nominal entries, which is a power of 2. * @param lgNomEntries Nominal Entries * @return the maximum number of storage bytes required for a CompactSketch with the given - * nomEntries. + * lgNomEntries. */ public static int getCompactSketchMaxBytes(final int lgNomEntries) { - return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD) - + Family.QUICKSELECT.getMaxPreLongs() * Long.BYTES; + return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD + + Family.QUICKSELECT.getMaxPreLongs()) * Long.BYTES; } /** diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java b/src/main/java/org/apache/datasketches/theta/Sketches.java index 4b1461876..077d9d5ae 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketches.java +++ b/src/main/java/org/apache/datasketches/theta/Sketches.java @@ -86,7 +86,7 @@ public static int getMaxAnotBResultBytes(final int maxNomEntries) { * @return the maximum number of storage bytes required for a CompactSketch with the given number * of entries. * @see Sketch#getMaxCompactSketchBytes(int) - * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead} + * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) getCompactSketchMaxBytes(int)} instead. */ @Deprecated public static int getMaxCompactSketchBytes(final int numberOfEntries) { @@ -95,14 +95,14 @@ public static int getMaxCompactSketchBytes(final int numberOfEntries) { /** * Returns the maximum number of storage bytes required for a CompactSketch given the configured - * number of nominal entries (power of 2). - * @param nomEntries Nominal Entries + * log_base2 of the number of nominal entries, which is a power of 2. + * @param lgNomEntries Nominal Entries * @return the maximum number of storage bytes required for a CompactSketch with the given - * nomEntries. + * lgNomEntries. * @see Sketch#getCompactSketchMaxBytes(int) */ - public static int getCompactSketchMaxBytes(final int nomEntries) { - return Sketch.getCompactSketchMaxBytes(nomEntries); + public static int getCompactSketchMaxBytes(final int lgNomEntries) { + return Sketch.getCompactSketchMaxBytes(lgNomEntries); } /** diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java index dedaf9db3..7a0fbb7ff 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java @@ -24,14 +24,19 @@ import static org.testng.Assert.assertThrows; import static org.testng.Assert.assertTrue; +import java.nio.ByteOrder; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; +//import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + public class BloomFilterTest { @Test @@ -51,8 +56,10 @@ public void createNewFilterTest() throws Exception { assertFalse(bf1.isDirect()); assertFalse(bf1.isReadOnly()); - try (WritableHandle wh = WritableMemory.allocateDirect(sizeBytes)) { - final WritableMemory wmem = wh.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(sizeBytes, + new DefaultMemoryRequestServer())).scope()) { + final BloomFilter bf2 = new BloomFilter(numBits, numHashes, seed, wmem); assertTrue(bf2.isEmpty()); assertTrue(bf2.hasMemory()); diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java index 521019e62..bbedd2fb7 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java @@ -142,8 +142,8 @@ public void bitAddressOutOfBoundsNonEmptyTest() { final Memory mem = bitArrayToMemory(hba); DirectBitArrayR dba = DirectBitArrayR.wrap(mem, hba.isEmpty()); - assertThrows(AssertionError.class, () -> dba.getBit(-10)); - assertThrows(AssertionError.class, () -> dba.getBit(2048)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getBit(-10)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getBit(2048)); } @Test diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java index a45bcbb82..1df6cc9d9 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java @@ -172,12 +172,12 @@ public void bitAddresOutOfBoundsNonEmptyTest() { dba.getAndSetBit(i); } - assertThrows(AssertionError.class, () -> dba.getBit(-10)); - assertThrows(AssertionError.class, () -> dba.getBit(2048)); - assertThrows(AssertionError.class, () -> dba.setBit(-20)); - assertThrows(AssertionError.class, () -> dba.setBit(4096)); - assertThrows(AssertionError.class, () -> dba.getAndSetBit(-30)); - assertThrows(AssertionError.class, () -> dba.getAndSetBit(8192)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getBit(-10)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getBit(2048)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.setBit(-20)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.setBit(4096)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getAndSetBit(-30)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getAndSetBit(8192)); } @Test diff --git a/src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java b/src/test/java/org/apache/datasketches/hash/MurmurHash3v3Test.java similarity index 82% rename from src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java rename to src/test/java/org/apache/datasketches/hash/MurmurHash3v3Test.java index 23f369e63..8699a091a 100644 --- a/src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java +++ b/src/test/java/org/apache/datasketches/hash/MurmurHash3v3Test.java @@ -28,13 +28,13 @@ import org.testng.annotations.Test; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.MurmurHash3v2; +import org.apache.datasketches.memory.internal.MurmurHash3v3; import org.apache.datasketches.memory.WritableMemory; /** * @author Lee Rhodes */ -public class MurmurHash3v2Test { +public class MurmurHash3v3Test { private Random rand = new Random(); private static final int trials = 1 << 20; @@ -154,33 +154,33 @@ private static final long[] hashV1(byte[] key, long seed) { } private static final long[] hashV2(long[] key, long seed) { - return MurmurHash3v2.hash(key, seed); + return MurmurHash3v3.hash(key, seed); } private static final long[] hashV2(int[] key2, long seed) { - return MurmurHash3v2.hash(key2, seed); + return MurmurHash3v3.hash(key2, seed); } private static final long[] hashV2(char[] key, long seed) { - return MurmurHash3v2.hash(key, seed); + return MurmurHash3v3.hash(key, seed); } private static final long[] hashV2(byte[] key, long seed) { - return MurmurHash3v2.hash(key, seed); + return MurmurHash3v3.hash(key, seed); } //V2 single primitives private static final long[] hashV2(long key, long seed, long[] out) { - return MurmurHash3v2.hash(key, seed, out); + return MurmurHash3v3.hash(key, seed, out); } // private static final long[] hashV2(double key, long seed, long[] out) { -// return MurmurHash3v2.hash(key, seed, out); +// return MurmurHash3v3.hash(key, seed, out); // } // private static final long[] hashV2(String key, long seed, long[] out) { -// return MurmurHash3v2.hash(key, seed, out); +// return MurmurHash3v3.hash(key, seed, out); // } @@ -199,7 +199,7 @@ public void offsetChecks() { for (int offset = 0; offset < 16; offset++) { int arrLen = cap - offset; - hash1 = MurmurHash3v2.hash(wmem, offset, arrLen, seed, hash1); + hash1 = MurmurHash3v3.hash(wmem, offset, arrLen, seed, hash1); byte[] byteArr2 = new byte[arrLen]; wmem.getByteArray(offset, byteArr2, 0, arrLen); hash2 = MurmurHash3.hash(byteArr2, seed); @@ -222,8 +222,8 @@ public void byteArrChecks() { for (int i = 0; i < j; i++) { wmem.putByte(i, (byte) (-128 + i)); } long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -246,8 +246,8 @@ public void charArrChecks() { for (int i = 0; i < j; i++) { wmem.putInt(i, i); } long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -270,8 +270,8 @@ public void intArrChecks() { for (int i = 0; i < j; i++) { wmem.putInt(i, i); } long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -294,8 +294,8 @@ public void longArrChecks() { for (int i = 0; i < j; i++) { wmem.putLong(i, i); } long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -313,8 +313,8 @@ public void longCheck() { WritableMemory wmem = WritableMemory.writableWrap(in); long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -325,62 +325,57 @@ public void checkEmptiesNulls() { long seed = 123; long[] hashOut = new long[2]; try { - MurmurHash3v2.hash(Memory.wrap(new long[0]), 0, 0, seed, hashOut); //mem empty - fail(); - } catch (final IllegalArgumentException e) { } //OK - try { - Memory mem = null; - MurmurHash3v2.hash(mem, 0, 0, seed, hashOut); //mem null + MurmurHash3v3.hash(Memory.wrap(new long[0]), 0, 0, seed, hashOut); //mem empty fail(); } catch (final IllegalArgumentException e) { } //OK try { String s = ""; - MurmurHash3v2.hash(s, seed, hashOut); //string empty + MurmurHash3v3.hash(s, seed, hashOut); //string empty fail(); } catch (final IllegalArgumentException e) { } //OK try { String s = null; - MurmurHash3v2.hash(s, seed, hashOut); //string null + MurmurHash3v3.hash(s, seed, hashOut); //string null fail(); } catch (final IllegalArgumentException e) { } //OK try { byte[] barr = new byte[0]; - MurmurHash3v2.hash(barr, seed); //byte[] empty + MurmurHash3v3.hash(barr, seed); //byte[] empty fail(); } catch (final IllegalArgumentException e) { } //OK try { byte[] barr = null; - MurmurHash3v2.hash(barr, seed); //byte[] null + MurmurHash3v3.hash(barr, seed); //byte[] null fail(); } catch (final IllegalArgumentException e) { } //OK try { char[] carr = new char[0]; - MurmurHash3v2.hash(carr, seed); //char[] empty + MurmurHash3v3.hash(carr, seed); //char[] empty fail(); } catch (final IllegalArgumentException e) { } //OK try { char[] carr = null; - MurmurHash3v2.hash(carr, seed); //char[] null + MurmurHash3v3.hash(carr, seed); //char[] null fail(); } catch (final IllegalArgumentException e) { } //OK try { int[] iarr = new int[0]; - MurmurHash3v2.hash(iarr, seed); //int[] empty + MurmurHash3v3.hash(iarr, seed); //int[] empty fail(); } catch (final IllegalArgumentException e) { } //OK try { int[] iarr = null; - MurmurHash3v2.hash(iarr, seed); //int[] null + MurmurHash3v3.hash(iarr, seed); //int[] null fail(); } catch (final IllegalArgumentException e) { } //OK try { long[] larr = new long[0]; - MurmurHash3v2.hash(larr, seed); //long[] empty + MurmurHash3v3.hash(larr, seed); //long[] empty fail(); } catch (final IllegalArgumentException e) { } //OK try { long[] larr = null; - MurmurHash3v2.hash(larr, seed); //long[] null + MurmurHash3v3.hash(larr, seed); //long[] null fail(); } catch (final IllegalArgumentException e) { } //OK } @@ -390,9 +385,9 @@ public void checkStringLong() { long seed = 123; long[] hashOut = new long[2]; String s = "123"; - assertTrue(MurmurHash3v2.hash(s, seed, hashOut)[0] != 0); + assertTrue(MurmurHash3v3.hash(s, seed, hashOut)[0] != 0); long v = 123; - assertTrue(MurmurHash3v2.hash(v, seed, hashOut)[0] != 0); + assertTrue(MurmurHash3v3.hash(v, seed, hashOut)[0] != 0); } @Test @@ -420,8 +415,8 @@ private static long[] checkDouble(double dbl) { WritableMemory wmem = WritableMemory.writableWrap(dataArr); long[] hash1 = MurmurHash3.hash(dataArr, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(dbl, seed, hash2); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(dbl, seed, hash2); assertEquals(hash1, hash2); assertEquals(hash1, hash3); diff --git a/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java b/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java index 78b18c1cc..7b3cd5c67 100644 --- a/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java +++ b/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java @@ -31,12 +31,11 @@ import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.memory.DefaultMemoryRequestServer; -import org.testng.annotations.Test; - import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; /** * @author Lee Rhodes @@ -48,11 +47,12 @@ public void checkGrow() { int lgConfigK = 4; TgtHllType tgtHllType = TgtHllType.HLL_4; int n = 8; //put lgConfigK == 4 into HLL mode - int bytes = HllSketch.getMaxUpdatableSerializationBytes(lgConfigK, tgtHllType); + long bytes = HllSketch.getMaxUpdatableSerializationBytes(lgConfigK, tgtHllType); HllSketch hllSketch; - try (WritableHandle handle = WritableMemory.allocateDirect(bytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - WritableMemory wmem = handle.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem); for (int i = 0; i < n; i++) { hllSketch.update(i); diff --git a/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java b/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java index 38cbc4977..c6c006f29 100644 --- a/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java +++ b/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java @@ -27,8 +27,11 @@ import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; +//import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; /** @@ -69,9 +72,9 @@ private static void promotions(int lgConfigK, int n, TgtHllType tgtHllType, bool //println("DIRECT"); byte[] barr1; - WritableMemory wmem = null; - try (WritableHandle hand = WritableMemory.allocateDirect(bytes)) { - wmem = hand.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes, + new DefaultMemoryRequestServer())).scope()) { //byte[] byteArr = new byte[bytes]; //WritableMemory wmem = WritableMemory.wrap(byteArr); hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem); diff --git a/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java b/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java index cd1e0cbcf..deb8c5be5 100644 --- a/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java +++ b/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java @@ -109,7 +109,7 @@ public void checkCorruptMemoryInput() { HllSketch sk = new HllSketch(12); byte[] memObj = sk.toCompactByteArray(); WritableMemory wmem = WritableMemory.writableWrap(memObj); - long memAdd = wmem.getCumulativeOffset(0); + //long memAdd = wmem.getCumulativeOffset(0); HllSketch bad; //checkFamily @@ -148,7 +148,7 @@ public void checkCorruptMemoryInput() { for (int i = 1; i <= 15; i++) { sk.update(i); } memObj = sk.toCompactByteArray(); wmem = WritableMemory.writableWrap(memObj); - memAdd = wmem.getCumulativeOffset(0); + //memAdd = wmem.getCumulativeOffset(0); //check wrong PreInts and SET try { @@ -162,7 +162,7 @@ public void checkCorruptMemoryInput() { for (int i = 15; i <= 1000; i++) { sk.update(i); } memObj = sk.toCompactByteArray(); wmem = WritableMemory.writableWrap(memObj); - memAdd = wmem.getCumulativeOffset(0); + //memAdd = wmem.getCumulativeOffset(0); //check wrong PreInts and HLL try { @@ -179,7 +179,7 @@ public void checkExtractFlags() { int bytes = HllSketch.getMaxUpdatableSerializationBytes(4, TgtHllType.HLL_4); WritableMemory wmem = WritableMemory.allocate(bytes); Object memObj = wmem.getArray(); - long memAdd = wmem.getCumulativeOffset(0L); + //long memAdd = wmem.getCumulativeOffset(0L); HllSketch sk = new HllSketch(4, TgtHllType.HLL_4, wmem); int flags = extractFlags(wmem); assertEquals(flags, EMPTY_FLAG_MASK); diff --git a/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java b/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java index bbf89f338..d19d3b471 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java @@ -23,10 +23,15 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import java.nio.ByteOrder; import java.util.HashSet; import org.testng.annotations.Test; -import org.apache.datasketches.memory.WritableHandle; + +import jdk.incubator.foreign.ResourceScope; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +//import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator; @@ -61,8 +66,9 @@ public void test() { DoublesSketch.setRandom(1); //make deterministic for test DoublesUnion dUnion; DoublesSketch dSketch; - try ( WritableHandle wdh = WritableMemory.allocateDirect(10_000_000) ) { - WritableMemory wmem = wdh.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(10_000_000, + new DefaultMemoryRequestServer())).scope()) { dUnion = DoublesUnion.builder().setMaxK(8).build(wmem); for (int s = 0; s < numSketches; s++) { dUnion.union(sketchArr[s]); } dSketch = dUnion.getResult(); //result is on heap diff --git a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java index 5c2882c5e..84145239e 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java @@ -30,9 +30,11 @@ import org.testng.annotations.Test; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; +//import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; +import jdk.incubator.foreign.ResourceScope; + /** * The concept for these tests is that the "MemoryManager" classes below are proxies for the * implementation that owns the native memory allocations, thus is responsible for @@ -47,10 +49,11 @@ public void checkLimitedMemoryScenarios() { //Requesting application final int initBytes = ((2 * k) + 4) << 3; //just the BB //########## Owning Implementation - // This part would actually be part of the Memory owning implemention so it is faked here - try (WritableHandle wdh = WritableMemory.allocateDirect(initBytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - final WritableMemory wmem = wdh.getWritable(); + // This part would actually be part of the Memory owning implementation so it is faked here + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(initBytes, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + println("Initial mem size: " + wmem.getCapacity()); //########## Receiving Application @@ -84,13 +87,12 @@ public void checkGrowBaseBuf() { final int u = 32; // don't need the BB to fill here final int initBytes = (4 + (u / 2)) << 3; // not enough to hold everything - try (WritableHandle memHandler = WritableMemory.allocateDirect(initBytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - //final MemoryManager memMgr = new MemoryManager(); - //final WritableMemory mem1 = memMgr.request(initBytes); - final WritableMemory mem1 = memHandler.getWritable(); - println("Initial mem size: " + mem1.getCapacity()); - final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(mem1); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(initBytes, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + + println("Initial mem size: " + wmem.getCapacity()); + final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); for (int i = 1; i <= u; i++) { usk1.update(i); } @@ -108,13 +110,12 @@ public void checkGrowCombBuf() { final int u = (2 * k) - 1; //just to fill the BB final int initBytes = ((2 * k) + 4) << 3; //just room for BB - try (WritableHandle memHandler = WritableMemory.allocateDirect(initBytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - //final MemoryManager memMgr = new MemoryManager(); - //final WritableMemory mem1 = memMgr.request(initBytes); - final WritableMemory mem1 = memHandler.getWritable(); - println("Initial mem size: " + mem1.getCapacity()); - final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(mem1); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(initBytes, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + + println("Initial mem size: " + wmem.getCapacity()); + final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); for (int i = 1; i <= u; i++) { usk1.update(i); } @@ -138,21 +139,22 @@ public void checkGrowFromWrappedEmptySketch() { final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(); final Memory origSketchMem = Memory.wrap(usk1.toByteArray()); - try (WritableHandle memHandle = WritableMemory.allocateDirect(initBytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - WritableMemory mem = memHandle.getWritable(); - origSketchMem.copyTo(0, mem, 0, initBytes); - UpdateDoublesSketch usk2 = DirectUpdateDoublesSketch.wrapInstance(mem); - assertTrue(mem.isSameResource(usk2.getMemory())); - assertEquals(mem.getCapacity(), initBytes); - assertTrue(mem.isDirect()); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(initBytes, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + + origSketchMem.copyTo(0, wmem, 0, initBytes); + UpdateDoublesSketch usk2 = DirectUpdateDoublesSketch.wrapInstance(wmem); + assertTrue(wmem.isSameResource(usk2.getMemory())); + assertEquals(wmem.getCapacity(), initBytes); + assertTrue(wmem.isDirect()); assertTrue(usk2.isEmpty()); //update the sketch forcing it to grow on-heap for (int i = 1; i <= 5; i++) { usk2.update(i); } assertEquals(usk2.getN(), 5); WritableMemory mem2 = usk2.getMemory(); - assertFalse(mem.isSameResource(mem2)); + assertFalse(wmem.isSameResource(mem2)); assertFalse(mem2.isDirect()); //should now be on-heap final int expectedSize = COMBINED_BUFFER + ((2 * k) << 3); diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java index 5bc5b4eaa..fbac2af1e 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java @@ -28,13 +28,15 @@ import java.nio.ByteOrder; import org.apache.datasketches.memory.DefaultMemoryRequestServer; -import org.apache.datasketches.memory.WritableHandle; +//import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.DoublesSortedView; import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator; import org.testng.Assert; import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + public class DoublesSketchTest { @Test @@ -140,15 +142,17 @@ public void checkEmptyExceptions() { @Test public void directSketchShouldMoveOntoHeapEventually() { - try (WritableHandle wdh = WritableMemory.allocateDirect(1000, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - WritableMemory mem = wdh.getWritable(); - UpdateDoublesSketch sketch = DoublesSketch.builder().build(mem); - Assert.assertTrue(sketch.isSameResource(mem)); + + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(1000, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + + UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); + Assert.assertTrue(sketch.isSameResource(wmem)); for (int i = 0; i < 1000; i++) { sketch.update(i); } - Assert.assertFalse(sketch.isSameResource(mem)); + Assert.assertFalse(sketch.isSameResource(wmem)); } catch (final Exception e) { throw new RuntimeException(e); } @@ -157,13 +161,14 @@ public void directSketchShouldMoveOntoHeapEventually() { @Test public void directSketchShouldMoveOntoHeapEventually2() { int i = 0; - try (WritableHandle wdh = - WritableMemory.allocateDirect(50, ByteOrder.LITTLE_ENDIAN, new DefaultMemoryRequestServer())) { - WritableMemory mem = wdh.getWritable(); - UpdateDoublesSketch sketch = DoublesSketch.builder().build(mem); - Assert.assertTrue(sketch.isSameResource(mem)); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(50, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + + UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); + Assert.assertTrue(sketch.isSameResource(wmem)); for (; i < 1000; i++) { - if (sketch.isSameResource(mem)) { + if (sketch.isSameResource(wmem)) { sketch.update(i); } else { //println("MOVED OUT at i = " + i); @@ -177,9 +182,11 @@ public void directSketchShouldMoveOntoHeapEventually2() { @Test public void checkEmptyDirect() { - try (WritableHandle wdh = WritableMemory.allocateDirect(1000)) { - WritableMemory mem = wdh.getWritable(); - UpdateDoublesSketch sketch = DoublesSketch.builder().build(mem); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(1000, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + + UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); sketch.toByteArray(); //exercises a specific path } catch (final Exception e) { throw new RuntimeException(e); diff --git a/src/test/java/org/apache/datasketches/quantiles/PreambleUtilTest.java b/src/test/java/org/apache/datasketches/quantiles/PreambleUtilTest.java index be2f328b2..adf916ef3 100644 --- a/src/test/java/org/apache/datasketches/quantiles/PreambleUtilTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/PreambleUtilTest.java @@ -37,19 +37,24 @@ import static org.apache.datasketches.quantiles.PreambleUtil.insertSerVer; import static org.testng.Assert.assertEquals; -import org.testng.annotations.Test; +import java.nio.ByteOrder; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +import jdk.incubator.foreign.ResourceScope; public class PreambleUtilTest { @Test public void checkInsertsAndExtracts() { final int bytes = 32; - try (WritableHandle offHeapMemHandler = WritableMemory.allocateDirect(bytes)) { - final WritableMemory offHeapMem = offHeapMemHandler.getWritable(); + WritableMemory offHeapMem; + try (ResourceScope scope = (offHeapMem = WritableMemory.allocateDirect(bytes, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + final WritableMemory onHeapMem = WritableMemory.writableWrap(new byte[bytes]); onHeapMem.clear(); diff --git a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java index 42b6069da..1ad55696c 100644 --- a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java @@ -26,13 +26,18 @@ import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; +import java.nio.ByteOrder; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; +//import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + /** * @author Lee Rhodes */ @@ -78,8 +83,9 @@ public void checkHeapifyWrap(int k, int u, boolean ordered) { //Prepare Memory for direct int bytes = usk.getCompactBytes(); //for Compact - try (WritableHandle wdh = WritableMemory.allocateDirect(bytes)) { - WritableMemory directMem = wdh.getWritable(); + WritableMemory directMem; + try (ResourceScope scope = (directMem = WritableMemory.allocateDirect(bytes, + new DefaultMemoryRequestServer())).scope()) { /**Via CompactSketch.compact**/ refSk = usk.compact(ordered, directMem); diff --git a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java index 5191c7c68..1f1bfa9ec 100644 --- a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java @@ -38,19 +38,23 @@ import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; +import java.nio.ByteOrder; import java.util.Arrays; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; +//import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + /** * @author Lee Rhodes */ @@ -59,10 +63,10 @@ public class DirectQuickSelectSketchTest { @Test//(expectedExceptions = SketchesArgumentException.class) public void checkBadSerVer() { int k = 512; - try (WritableHandle h = makeNativeMemory(k)) { - WritableMemory mem = h.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = makeNativeMemory(k)).scope()) { - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(mem); + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wmem); DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks assertTrue(usk.isEmpty()); @@ -73,9 +77,9 @@ public void checkBadSerVer() { assertEquals(usk.getEstimate(), k, 0.0); assertEquals(sk1.getRetainedEntries(false), k); - mem.putByte(SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte + wmem.putByte(SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte - Sketch.wrap(mem); + Sketch.wrap(wmem); } catch (final Exception e) { if (e instanceof SketchesArgumentException) {} else { throw new RuntimeException(e); } @@ -85,9 +89,9 @@ public void checkBadSerVer() { @Test//(expectedExceptions = SketchesArgumentException.class) public void checkConstructorKtooSmall() { int k = 8; - try (WritableHandle h = makeNativeMemory(k)) { - WritableMemory mem = h.getWritable(); - UpdateSketch.builder().setNominalEntries(k).build(mem); + WritableMemory wmem; + try (ResourceScope scope = (wmem = makeNativeMemory(k)).scope()) { + UpdateSketch.builder().setNominalEntries(k).build(wmem); } catch (final Exception e) { if (e instanceof SketchesArgumentException) {} else { throw new RuntimeException(e); } @@ -97,9 +101,9 @@ public void checkConstructorKtooSmall() { @Test//(expectedExceptions = SketchesArgumentException.class) public void checkConstructorMemTooSmall() { int k = 16; - try (WritableHandle h = makeNativeMemory(k/2)) { - WritableMemory mem = h.getWritable(); - UpdateSketch.builder().setNominalEntries(k).build(mem); + WritableMemory wmem; + try (ResourceScope scope = (wmem = makeNativeMemory(k/2)).scope()) { + UpdateSketch.builder().setNominalEntries(k).build(wmem); } catch (final Exception e) { if (e instanceof SketchesArgumentException) {} else { throw new RuntimeException(e); } @@ -124,10 +128,10 @@ public void checkHeapifyMemoryEstimating() { int k = 512; int u = 2*k; //thus estimating - try (WritableHandle h = makeNativeMemory(k)) { - WritableMemory mem = h.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = makeNativeMemory(k)).scope()) { - UpdateSketch sk1 = UpdateSketch.builder().setNominalEntries(k).build(mem); + UpdateSketch sk1 = UpdateSketch.builder().setNominalEntries(k).build(wmem); for (int i=0; i Date: Fri, 14 Jun 2024 17:12:19 -0700 Subject: [PATCH 02/22] Fixes to align with modified Memory --- .github/workflows/auto-jdk-matrix.yml | 41 ++--- .../hll/DirectAuxHashMapTest.java | 83 +++++----- .../DirectQuantilesMemoryRequestTest.java | 147 ++++++++---------- .../quantiles/DoublesSketchTest.java | 43 ++--- .../theta/DirectQuickSelectSketchTest.java | 64 ++++---- .../datasketches/theta/UnionImplTest.java | 25 ++- 6 files changed, 179 insertions(+), 224 deletions(-) diff --git a/.github/workflows/auto-jdk-matrix.yml b/.github/workflows/auto-jdk-matrix.yml index 83da580ca..ce15bb57e 100644 --- a/.github/workflows/auto-jdk-matrix.yml +++ b/.github/workflows/auto-jdk-matrix.yml @@ -7,49 +7,51 @@ on: workflow_dispatch: env: - MAVEN_OPTS: -Xmx4g -Xms1g + MAVEN_OPTS: -Xmx1g -Xms1g jobs: build: name: Build, Test, Install runs-on: ubuntu-latest + strategy: fail-fast: false - matrix: - jdk: [ 8,11 ] + env: - JDK_VERSION: ${{ matrix.jdk }} + JDK_VERSION: 17 steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false + - name: Print Current workflow + run: > + cat .github/workflows/auto-jdk-matrix.yml + + - name: Setup Java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-package: jdk + architecture: x64 + java-version: 17 + - name: Cache local Maven repository - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.m2/repository key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: build-${{ runner.os }}-maven- - - name: Install Matrix JDK - uses: actions/setup-java@v3 - with: - java-version: ${{ matrix.jdk }} - distribution: 'temurin' - java-package: jdk - architecture: x64 -# Architecture options: x86, x64, armv7, aarch64, ppc64le -# setup-java@v3 has a "with cache" option - - name: Echo Java Version run: > java -version - name: Test run: > - mvn clean test + mvn clean test -B -Dmaven.javadoc.skip=true -Dgpg.skip=true @@ -59,7 +61,10 @@ jobs: -DskipTests=true -Dgpg.skip=true +# Architecture options: x86, x64, armv7, aarch64, ppc64le +# setup-java@v4 has a "with cache" option # Lifecycle: validate, compile, test, package, verify, install, deploy -# -B batch mode +# -B batch mode, never stops for user input # -V show Version without stopping +# -X debug mode # -q quiet, only show errors diff --git a/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java b/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java index 7b3cd5c67..98039616f 100644 --- a/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java +++ b/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java @@ -49,52 +49,47 @@ public void checkGrow() { int n = 8; //put lgConfigK == 4 into HLL mode long bytes = HllSketch.getMaxUpdatableSerializationBytes(lgConfigK, tgtHllType); HllSketch hllSketch; - WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes, 1, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + WritableMemory wmem = WritableMemory.allocateDirect(bytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); - hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem); - for (int i = 0; i < n; i++) { - hllSketch.update(i); - } - hllSketch.couponUpdate(HllUtil.pair(7, 15)); //mock extreme values - hllSketch.couponUpdate(HllUtil.pair(8, 15)); - hllSketch.couponUpdate(HllUtil.pair(9, 15)); - //println(hllSketch.toString(true, true, true, true)); - DirectHllArray dha = (DirectHllArray) hllSketch.hllSketchImpl; - assertEquals(dha.getAuxHashMap().getLgAuxArrInts(), 2); - assertTrue(hllSketch.isMemory()); - assertTrue(hllSketch.isOffHeap()); - assertTrue(hllSketch.isSameResource(wmem)); - - //Check heapify - byte[] byteArray = hllSketch.toCompactByteArray(); - HllSketch hllSketch2 = HllSketch.heapify(byteArray); - HllArray ha = (HllArray) hllSketch2.hllSketchImpl; - assertEquals(ha.getAuxHashMap().getLgAuxArrInts(), 2); - assertEquals(ha.getAuxHashMap().getAuxCount(), 3); - - //Check wrap - byteArray = hllSketch.toUpdatableByteArray(); - WritableMemory wmem2 = WritableMemory.writableWrap(byteArray); - hllSketch2 = HllSketch.writableWrap(wmem2); - //println(hllSketch2.toString(true, true, true, true)); - DirectHllArray dha2 = (DirectHllArray) hllSketch2.hllSketchImpl; - assertEquals(dha2.getAuxHashMap().getLgAuxArrInts(), 2); - assertEquals(dha2.getAuxHashMap().getAuxCount(), 3); - - //Check grow to on-heap - hllSketch.couponUpdate(HllUtil.pair(10, 15)); //puts it over the edge, must grow - //println(hllSketch.toString(true, true, true, true)); - dha = (DirectHllArray) hllSketch.hllSketchImpl; - assertEquals(dha.getAuxHashMap().getLgAuxArrInts(), 3); - assertEquals(dha.getAuxHashMap().getAuxCount(), 4); - assertTrue(hllSketch.isMemory()); - assertFalse(hllSketch.isOffHeap()); - assertFalse(hllSketch.isSameResource(wmem)); - } catch (final Exception e) { - throw new RuntimeException(e); + hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem); + for (int i = 0; i < n; i++) { + hllSketch.update(i); } + hllSketch.couponUpdate(HllUtil.pair(7, 15)); //mock extreme values + hllSketch.couponUpdate(HllUtil.pair(8, 15)); + hllSketch.couponUpdate(HllUtil.pair(9, 15)); + //println(hllSketch.toString(true, true, true, true)); + DirectHllArray dha = (DirectHllArray) hllSketch.hllSketchImpl; + assertEquals(dha.getAuxHashMap().getLgAuxArrInts(), 2); + assertTrue(hllSketch.isMemory()); + assertTrue(hllSketch.isOffHeap()); + assertTrue(hllSketch.isSameResource(wmem)); + + //Check heapify + byte[] byteArray = hllSketch.toCompactByteArray(); + HllSketch hllSketch2 = HllSketch.heapify(byteArray); + HllArray ha = (HllArray) hllSketch2.hllSketchImpl; + assertEquals(ha.getAuxHashMap().getLgAuxArrInts(), 2); + assertEquals(ha.getAuxHashMap().getAuxCount(), 3); + + //Check wrap + byteArray = hllSketch.toUpdatableByteArray(); + WritableMemory wmem2 = WritableMemory.writableWrap(byteArray); + hllSketch2 = HllSketch.writableWrap(wmem2); + //println(hllSketch2.toString(true, true, true, true)); + DirectHllArray dha2 = (DirectHllArray) hllSketch2.hllSketchImpl; + assertEquals(dha2.getAuxHashMap().getLgAuxArrInts(), 2); + assertEquals(dha2.getAuxHashMap().getAuxCount(), 3); + + //Check grow to on-heap + hllSketch.couponUpdate(HllUtil.pair(10, 15)); //puts it over the edge, must grow + //println(hllSketch.toString(true, true, true, true)); + dha = (DirectHllArray) hllSketch.hllSketchImpl; + assertEquals(dha.getAuxHashMap().getLgAuxArrInts(), 3); + assertEquals(dha.getAuxHashMap().getAuxCount(), 4); + assertTrue(hllSketch.isMemory()); + assertFalse(hllSketch.isOffHeap()); + assertFalse(hllSketch.isSameResource(wmem)); } @Test diff --git a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java index 84145239e..6552d0ae3 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java @@ -46,39 +46,34 @@ public class DirectQuantilesMemoryRequestTest { public void checkLimitedMemoryScenarios() { //Requesting application final int k = 128; final int u = 40 * k; - final int initBytes = ((2 * k) + 4) << 3; //just the BB + final int initBytes = ((2 * k) + 4) << 3; //just the BaseBuffer //########## Owning Implementation // This part would actually be part of the Memory owning implementation so it is faked here - WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(initBytes, 1, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { - - println("Initial mem size: " + wmem.getCapacity()); - - //########## Receiving Application - // The receiving application has been given wmem to use for a sketch, - // but alas, it is not ultimately large enough. - final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); - assertTrue(usk1.isEmpty()); - - //Load the sketch - for (int i = 0; i < u; i++) { - // The sketch uses The MemoryRequest, acquired from wmem, to acquire more memory as - // needed, and requests via the MemoryRequest to free the old allocations. - usk1.update(i); - } - final double result = usk1.getQuantile(0.5); - println("Result: " + result); - assertEquals(result, u / 2.0, 0.05 * u); //Success - - //########## Owning Implementation - //The actual Memory has been re-allocated several times, - // so the above wmem reference is invalid. - println("\nFinal mem size: " + wmem.getCapacity()); - } catch (Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + + println("Initial mem size: " + wmem.getCapacity()); + + //########## Receiving Application + // The receiving application has been given wmem to use for a sketch, + // but alas, it is not ultimately large enough. + final UpdateDoublesSketch usk = DoublesSketch.builder().setK(k).build(wmem); + assertTrue(usk.isEmpty()); + + //Load the sketch + for (int i = 0; i < u; i++) { + // The sketch uses The MemoryRequest, acquired from wmem, to acquire more memory as + // needed, and requests via the MemoryRequest to free the old allocations. + usk.update(i); } + final double result = usk.getQuantile(0.5); + println("Result: " + result); + assertEquals(result, u / 2.0, 0.05 * u); //Success + + //########## Owning Implementation + //The actual Memory has been re-allocated several times, + // so the above wmem reference is invalid. + println("\nFinal mem size: " + wmem.getCapacity()); } @Test @@ -87,21 +82,16 @@ public void checkGrowBaseBuf() { final int u = 32; // don't need the BB to fill here final int initBytes = (4 + (u / 2)) << 3; // not enough to hold everything - WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(initBytes, 1, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { - - println("Initial mem size: " + wmem.getCapacity()); - final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); - for (int i = 1; i <= u; i++) { - usk1.update(i); - } - final int currentSpace = usk1.getCombinedBufferItemCapacity(); - println("curCombBufItemCap: " + currentSpace); - assertEquals(currentSpace, 2 * k); - } catch (final Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + + println("Initial mem size: " + wmem.getCapacity()); + final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); + for (int i = 1; i <= u; i++) { + usk1.update(i); } + final int currentSpace = usk1.getCombinedBufferItemCapacity(); + println("curCombBufItemCap: " + currentSpace); + assertEquals(currentSpace, 2 * k); } @Test @@ -110,25 +100,19 @@ public void checkGrowCombBuf() { final int u = (2 * k) - 1; //just to fill the BB final int initBytes = ((2 * k) + 4) << 3; //just room for BB - WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(initBytes, 1, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { - - println("Initial mem size: " + wmem.getCapacity()); - final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); - for (int i = 1; i <= u; i++) { - usk1.update(i); - } - final int currentSpace = usk1.getCombinedBufferItemCapacity(); - println("curCombBufItemCap: " + currentSpace); - final double[] newCB = usk1.growCombinedBuffer(currentSpace, 3 * k); - final int newSpace = usk1.getCombinedBufferItemCapacity(); - println("newCombBurItemCap: " + newSpace); - assertEquals(newCB.length, 3 * k); - //memMgr.free(mem1); - } catch (final Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + + println("Initial mem size: " + wmem.getCapacity()); + final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); + for (int i = 1; i <= u; i++) { + usk1.update(i); } + final int currentSpace = usk1.getCombinedBufferItemCapacity(); + println("curCombBufItemCap: " + currentSpace); + final double[] newCB = usk1.growCombinedBuffer(currentSpace, 3 * k); + final int newSpace = usk1.getCombinedBufferItemCapacity(); + println("newCombBurItemCap: " + newSpace); + assertEquals(newCB.length, 3 * k); } @Test @@ -139,29 +123,24 @@ public void checkGrowFromWrappedEmptySketch() { final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(); final Memory origSketchMem = Memory.wrap(usk1.toByteArray()); - WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(initBytes, 1, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { - - origSketchMem.copyTo(0, wmem, 0, initBytes); - UpdateDoublesSketch usk2 = DirectUpdateDoublesSketch.wrapInstance(wmem); - assertTrue(wmem.isSameResource(usk2.getMemory())); - assertEquals(wmem.getCapacity(), initBytes); - assertTrue(wmem.isDirect()); - assertTrue(usk2.isEmpty()); - - //update the sketch forcing it to grow on-heap - for (int i = 1; i <= 5; i++) { usk2.update(i); } - assertEquals(usk2.getN(), 5); - WritableMemory mem2 = usk2.getMemory(); - assertFalse(wmem.isSameResource(mem2)); - assertFalse(mem2.isDirect()); //should now be on-heap - - final int expectedSize = COMBINED_BUFFER + ((2 * k) << 3); - assertEquals(mem2.getCapacity(), expectedSize); - } catch (final Exception e) { - throw new RuntimeException(e); - } + WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + + origSketchMem.copyTo(0, wmem, 0, initBytes); + UpdateDoublesSketch usk2 = DirectUpdateDoublesSketch.wrapInstance(wmem); + assertTrue(wmem.isSameResource(usk2.getMemory())); + assertEquals(wmem.getCapacity(), initBytes); + assertTrue(wmem.isDirect()); + assertTrue(usk2.isEmpty()); + + //update the sketch forcing it to grow on-heap + for (int i = 1; i <= 5; i++) { usk2.update(i); } + assertEquals(usk2.getN(), 5); + WritableMemory mem2 = usk2.getMemory(); + assertFalse(wmem.isSameResource(mem2)); + assertFalse(mem2.isDirect()); //should now be on-heap + + final int expectedSize = COMBINED_BUFFER + ((2 * k) << 3); + assertEquals(mem2.getCapacity(), expectedSize); } @Test diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java index fbac2af1e..8f4243bfe 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java @@ -143,46 +143,35 @@ public void checkEmptyExceptions() { @Test public void directSketchShouldMoveOntoHeapEventually() { - WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(1000, 1, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { - - UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); - Assert.assertTrue(sketch.isSameResource(wmem)); - for (int i = 0; i < 1000; i++) { - sketch.update(i); - } - Assert.assertFalse(sketch.isSameResource(wmem)); - } catch (final Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(1000, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); + Assert.assertTrue(sketch.isSameResource(wmem)); + for (int i = 0; i < 1000; i++) { + sketch.update(i); } + Assert.assertFalse(sketch.isSameResource(wmem)); } @Test public void directSketchShouldMoveOntoHeapEventually2() { int i = 0; - WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(50, 1, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + WritableMemory wmem = WritableMemory.allocateDirect(50, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); - UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); - Assert.assertTrue(sketch.isSameResource(wmem)); - for (; i < 1000; i++) { - if (sketch.isSameResource(wmem)) { - sketch.update(i); - } else { - //println("MOVED OUT at i = " + i); - break; - } + UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); + Assert.assertTrue(sketch.isSameResource(wmem)); + for (; i < 1000; i++) { + if (sketch.isSameResource(wmem)) { + sketch.update(i); + } else { + //println("MOVED OUT at i = " + i); + break; } - } catch (final Exception e) { - throw new RuntimeException(e); } } @Test public void checkEmptyDirect() { - WritableMemory wmem; + WritableMemory wmem ; try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(1000, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { diff --git a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java index 1f1bfa9ec..32b9bbb44 100644 --- a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java @@ -876,16 +876,12 @@ public void checkMoveAndResize() { int u = 2 * k; int bytes = Sketches.getMaxUpdateSketchBytes(k); - WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes / 2, 1, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { - UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem); - assertTrue(sketch.isSameResource(wmem)); - for (int i = 0; i < u; i++) { sketch.update(i); } - assertFalse(sketch.isSameResource(wmem)); - } catch (final Exception e) { - throw new RuntimeException(e); - } + WritableMemory wmem = WritableMemory.allocateDirect(bytes / 2, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + + UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem); + assertTrue(sketch.isSameResource(wmem)); + for (int i = 0; i < u; i++) { sketch.update(i); } + assertFalse(sketch.isSameResource(wmem)); } @Test @@ -893,36 +889,30 @@ public void checkReadOnlyRebuildResize() { int k = 1 << 12; int u = 2 * k; int bytes = Sketches.getMaxUpdateSketchBytes(k); - WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes / 2, 1, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { - UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem); - for (int i = 0; i < u; i++) { sketch.update(i); } - double est1 = sketch.getEstimate(); - byte[] ser = sketch.toByteArray(); - Memory mem = Memory.wrap(ser); - UpdateSketch roSketch = (UpdateSketch) Sketches.wrapSketch(mem); - double est2 = roSketch.getEstimate(); - assertEquals(est2, est1); - try { - roSketch.rebuild(); - fail(); - } catch (SketchesReadOnlyException e) { - //expected - } - try { - roSketch.reset(); - fail(); - } catch (SketchesReadOnlyException e) { - //expected - } - } catch (final Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(bytes / 2, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + + UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem); + for (int i = 0; i < u; i++) { sketch.update(i); } + double est1 = sketch.getEstimate(); + byte[] ser = sketch.toByteArray(); + Memory mem = Memory.wrap(ser); + UpdateSketch roSketch = (UpdateSketch) Sketches.wrapSketch(mem); + double est2 = roSketch.getEstimate(); + assertEquals(est2, est1); + try { + roSketch.rebuild(); + fail(); + } catch (SketchesReadOnlyException e) { + //expected + } + try { + roSketch.reset(); + fail(); + } catch (SketchesReadOnlyException e) { + //expected } - } - @Test public void printlnTest() { println("PRINTING: "+this.getClass().getName()); diff --git a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java index ee0fed44c..fe76240a7 100644 --- a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java +++ b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java @@ -197,24 +197,21 @@ public void checkMoveAndResize() { final int k = 1 << 12; final int u = 2 * k; final int bytes = Sketches.getMaxUpdateSketchBytes(k); - try (ResourceScope scope = ResourceScope.newConfinedScope()) { + ResourceScope scope = ResourceScope.newConfinedScope(); - final WritableMemory wmem = WritableMemory.allocateDirect(bytes / 2, 1, scope, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); - final UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem); - assertTrue(sketch.isSameResource(wmem)); + final WritableMemory wmem = WritableMemory.allocateDirect(bytes / 2, 1, scope, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + final UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem); + assertTrue(sketch.isSameResource(wmem)); - final WritableMemory wmem2 = WritableMemory.allocateDirect(bytes / 2, 1, scope, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); - final Union union = SetOperation.builder().buildUnion(wmem2); - assertTrue(union.isSameResource(wmem2)); + final WritableMemory wmem2 = WritableMemory.allocateDirect(bytes / 2, 1, scope, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + final Union union = SetOperation.builder().buildUnion(wmem2); + assertTrue(union.isSameResource(wmem2)); - for (int i = 0; i < u; i++) { union.update(i); } - assertFalse(union.isSameResource(wmem)); + for (int i = 0; i < u; i++) { union.update(i); } + assertFalse(union.isSameResource(wmem)); - final Union union2 = SetOperation.builder().buildUnion(); //on-heap union - assertFalse(union2.isSameResource(wmem2)); //obviously not - } catch (final Exception e) { - throw new RuntimeException(e); - } + final Union union2 = SetOperation.builder().buildUnion(); //on-heap union + assertFalse(union2.isSameResource(wmem2)); //obviously not } @Test From b984e6523343ad6b5143c6b17b461d91a57c45e5 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Thu, 20 Jun 2024 08:33:58 -0700 Subject: [PATCH 03/22] Changes required for ds-java to move to java17 and also depend on ds-Memory 4.0.0, which is built around FFM/Panama. --- pom.xml | 157 +++++------------- .../quantiles/DirectUpdateDoublesSketch.java | 3 +- .../filters/bloomfilter/BloomFilterTest.java | 3 +- .../hll/DirectCouponListTest.java | 3 +- .../quantiles/DebugUnionTest.java | 3 +- .../datasketches/theta/CompactSketchTest.java | 3 +- .../theta/DirectQuickSelectSketchTest.java | 5 +- .../theta/HeapifyWrapSerVer1and2Test.java | 2 +- .../datasketches/theta/UnionImplTest.java | 3 +- 9 files changed, 48 insertions(+), 134 deletions(-) diff --git a/pom.xml b/pom.xml index e43a426ba..949152680 100644 --- a/pom.xml +++ b/pom.xml @@ -18,19 +18,11 @@ specific language governing permissions and limitations under the License. --> - + 4.0.0 - - org.apache - apache - 32 - - org.apache.datasketches datasketches-java 6.1.0-SNAPSHOT @@ -66,8 +58,8 @@ under the License. - + + GitHub https://github.com/apache/${project.artifactId}/issues @@ -83,9 +75,7 @@ under the License. - 2.2.0 - 7.10.0 @@ -95,52 +85,51 @@ under the License. check_cpp_historical_files - 3.6.3 + 3.6.3 17 ${java.version} ${java.version} - -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 + -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 --add-modules=jdk.incubator.foreign UTF-8 ${charset.encoding} ${charset.encoding} ${charset.encoding} yyyy-MM-dd'T'HH-mm-ss'Z' - - 2.16.2 - - 3.7.1 - 3.13.0 - 3.1.1 - 3.4.1 - 3.2.3 - 3.4.0 - 3.6.3 - 3.0.1 - 3.2.0 - 3.3.1 - 3.2.5 + 3.7.1 + 3.13.0 + 3.1.1 + 3.4.1 + 3.2.3 + 3.4.0 + 3.7.0 + 3.0.1 + 3.2.0 + 3.3.1 + 3.2.5 + 3.1.2 - 0.16.1 + 0.16.1 0.8.12 4.3.0 + + 2.16.2 + - 1.0.0 + 1.0.0 4.9.10 - - + org.testng @@ -170,6 +159,11 @@ under the License. org.apache.maven.plugins maven-compiler-plugin ${maven-compiler-plugin.version} + + + --add-modules=jdk.incubator.foreign + + @@ -193,7 +187,7 @@ under the License. - [1.8.0,9.0),[11.0,12.0), [17,18) + [17,18) [${maven.version},) @@ -212,7 +206,6 @@ under the License. - org.apache.maven.plugins maven-jar-plugin ${maven-jar-plugin.version} @@ -235,12 +228,15 @@ under the License. - org.apache.maven.plugins maven-javadoc-plugin ${maven-javadoc-plugin.version} true + public + + --add-modules=jdk.incubator.foreign + @@ -253,14 +249,12 @@ under the License. - org.apache.maven.plugins maven-release-plugin ${maven-release-plugin.version} - org.apache.maven.plugins maven-source-plugin ${maven-source-plugin.version} @@ -283,11 +277,11 @@ under the License. - org.apache.maven.plugins maven-surefire-plugin ${maven-surefire-failsafe-plugins.version} + --add-modules=jdk.incubator.foreign false false true @@ -297,7 +291,6 @@ under the License. - org.apache.rat apache-rat-plugin ${apache-rat-plugin.version} @@ -423,44 +416,8 @@ under the License. - - - - only-eclipse - - - m2e.version - - - - - - - org.apache.maven.plugins - maven-remote-resources-plugin - ${maven-remote-resources-plugin.version} - - - process-resource-bundles - none - - - - - - - - org.apache.maven.plugins - maven-remote-resources-plugin - - - - + + This is performed from a script outside Maven apache-release @@ -624,44 +582,7 @@ under the License. - - - - java8only - - [1.8,1.9),8 - - - - - java11plus - - [11,14) - - - 8 - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - @{argLine} - --add-exports java.base/jdk.internal.misc=ALL-UNNAMED - --add-exports java.base/jdk.internal.ref=ALL-UNNAMED - --add-opens java.base/java.nio=ALL-UNNAMED - --add-opens java.base/sun.nio.ch=ALL-UNNAMED - - ${testng.generate-java-files},${testng.check-cpp-files} - - - - - - + --> generate-java-files diff --git a/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java index 96c01d931..0998223e8 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java @@ -268,8 +268,7 @@ private WritableMemory growCombinedMemBuffer(final int itemSpaceNeeded) { final WritableMemory newMem = memReqSvr.request(mem_, needBytes); mem_.copyTo(0, newMem, 0, memBytes); - - memReqSvr.requestClose(mem_, newMem); + if (mem_.isCloseable()) { memReqSvr.requestClose(mem_, newMem); } return newMem; } diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java index 7a0fbb7ff..81f9e0d2e 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java @@ -57,8 +57,7 @@ public void createNewFilterTest() throws Exception { assertFalse(bf1.isReadOnly()); WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(sizeBytes, - new DefaultMemoryRequestServer())).scope()) { + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(sizeBytes)).scope()) { final BloomFilter bf2 = new BloomFilter(numBits, numHashes, seed, wmem); assertTrue(bf2.isEmpty()); diff --git a/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java b/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java index c6c006f29..985cdf798 100644 --- a/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java +++ b/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java @@ -73,8 +73,7 @@ private static void promotions(int lgConfigK, int n, TgtHllType tgtHllType, bool //println("DIRECT"); byte[] barr1; WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes, - new DefaultMemoryRequestServer())).scope()) { + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes)).scope()) { //byte[] byteArr = new byte[bytes]; //WritableMemory wmem = WritableMemory.wrap(byteArr); hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem); diff --git a/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java b/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java index d19d3b471..b1bd5818d 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java @@ -67,8 +67,7 @@ public void test() { DoublesUnion dUnion; DoublesSketch dSketch; WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(10_000_000, - new DefaultMemoryRequestServer())).scope()) { + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(10_000_000)).scope()) { dUnion = DoublesUnion.builder().setMaxK(8).build(wmem); for (int s = 0; s < numSketches; s++) { dUnion.union(sketchArr[s]); } dSketch = dUnion.getResult(); //result is on heap diff --git a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java index 1ad55696c..1e2089b51 100644 --- a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java @@ -84,8 +84,7 @@ public void checkHeapifyWrap(int k, int u, boolean ordered) { int bytes = usk.getCompactBytes(); //for Compact WritableMemory directMem; - try (ResourceScope scope = (directMem = WritableMemory.allocateDirect(bytes, - new DefaultMemoryRequestServer())).scope()) { + try (ResourceScope scope = (directMem = WritableMemory.allocateDirect(bytes)).scope()) { /**Via CompactSketch.compact**/ refSk = usk.compact(ordered, directMem); diff --git a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java index 32b9bbb44..63957c3cd 100644 --- a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java @@ -683,8 +683,7 @@ public void checkEstModeNativeMemory() { int memCapacity = (k << 4) + (Family.QUICKSELECT.getMinPreLongs() << 3); WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(memCapacity, - new DefaultMemoryRequestServer())).scope()) { + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(memCapacity)).scope()) { UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wmem); DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks @@ -930,7 +929,7 @@ private static final int getMaxBytes(int k) { } private static WritableMemory makeNativeMemory(int k) { - return WritableMemory.allocateDirect(getMaxBytes(k), new DefaultMemoryRequestServer()); + return WritableMemory.allocateDirect(getMaxBytes(k)); } } diff --git a/src/test/java/org/apache/datasketches/theta/HeapifyWrapSerVer1and2Test.java b/src/test/java/org/apache/datasketches/theta/HeapifyWrapSerVer1and2Test.java index d3617c67e..6c90eafe4 100644 --- a/src/test/java/org/apache/datasketches/theta/HeapifyWrapSerVer1and2Test.java +++ b/src/test/java/org/apache/datasketches/theta/HeapifyWrapSerVer1and2Test.java @@ -566,7 +566,7 @@ public void checkWrapSketchGivenDifferentSeed() { private static WritableMemory putOffHeap(Memory heapMem) { final long cap = heapMem.getCapacity(); - WritableMemory wmem = WritableMemory.allocateDirect(cap, new DefaultMemoryRequestServer()); + WritableMemory wmem = WritableMemory.allocateDirect(cap); heapMem.copyTo(0, wmem, 0, cap); return wmem; } diff --git a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java index fe76240a7..f28212061 100644 --- a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java +++ b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java @@ -234,8 +234,7 @@ public void checkUnionCompactOrderedSource() { final int bytes = Sketches.getCompactSketchMaxBytes(lgK); WritableMemory wmem; - try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes, - new DefaultMemoryRequestServer())).scope()) { + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes)).scope()) { final CompactSketch csk = sk.compact(true, wmem); //ordered, direct final Union union = Sketches.setOperationBuilder().buildUnion(); From 59a3ff46fe966c98466ffb9f2b1925245952f38d Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 9 Oct 2024 14:25:35 -0700 Subject: [PATCH 04/22] Add kll main 8 longs --- .../apache/datasketches/kll/KllSketch.java | 9 +- .../kll/KllDirectCompactLongsSketchTest.java | 224 +++++ .../kll/KllDirectLongsSketchIteratorTest.java | 78 ++ .../kll/KllDirectLongsSketchTest.java | 686 +++++++++++++++ .../kll/KllLongsSketchIteratorTest.java | 114 +++ .../kll/KllLongsSketchSerDeTest.java | 131 +++ .../datasketches/kll/KllLongsSketchTest.java | 719 ++++++++++++++++ .../kll/KllMiscDirectLongsTest.java | 459 ++++++++++ .../datasketches/kll/KllMiscLongsTest.java | 790 ++++++++++++++++++ 9 files changed, 3209 insertions(+), 1 deletion(-) create mode 100644 src/test/java/org/apache/datasketches/kll/KllDirectCompactLongsSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchIteratorTest.java create mode 100644 src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/kll/KllLongsSketchIteratorTest.java create mode 100644 src/test/java/org/apache/datasketches/kll/KllLongsSketchSerDeTest.java create mode 100644 src/test/java/org/apache/datasketches/kll/KllLongsSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/kll/KllMiscDirectLongsTest.java create mode 100644 src/test/java/org/apache/datasketches/kll/KllMiscLongsTest.java diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index c398ed8ce..03b3a2bee 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -34,6 +34,7 @@ import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.ITEMS_SKETCH; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; import java.util.Arrays; import java.util.Random; @@ -450,6 +451,8 @@ final boolean isCompactSingleItem() { boolean isFloatsSketch() { return sketchType == FLOATS_SKETCH; } + boolean isLongsSketch() { return sketchType == LONGS_SKETCH; } + boolean isItemsSketch() { return sketchType == ITEMS_SKETCH; } abstract boolean isLevelZeroSorted(); @@ -503,7 +506,11 @@ public enum SketchType { /** * KllItemsSketch */ - ITEMS_SKETCH(0, "KllItemsSketch"); + ITEMS_SKETCH(0, "KllItemsSketch"), + /** + * KllDoublesSketch + */ + LONGS_SKETCH(Long.BYTES, "KllLongsSketch"); private int typeBytes; private String name; diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectCompactLongsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectCompactLongsSketchTest.java new file mode 100644 index 000000000..6b57fccac --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectCompactLongsSketchTest.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.kll.KllDirectLongsSketch.KllDirectCompactLongsSketch; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +public class KllDirectCompactLongsSketchTest { + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void checkRODirectUpdatable_ROandWritable() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + byte[] byteArr = KllHelper.toByteArray(sk, true); //request updatable + Memory srcMem = Memory.wrap(byteArr); //cast to Memory -> read only + KllLongsSketch sk2 = KllLongsSketch.wrap(srcMem); + assertTrue(sk2 instanceof KllDirectLongsSketch); + + assertTrue(sk2.isMemoryUpdatableFormat()); + assertTrue(sk2.isReadOnly()); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 21L); + + WritableMemory srcWmem = WritableMemory.writableWrap(byteArr); + KllLongsSketch sk3 = KllLongsSketch.writableWrap(srcWmem, memReqSvr); + assertTrue(sk3 instanceof KllDirectLongsSketch); + println(sk3.toString(true, false)); + assertFalse(sk3.isReadOnly()); + sk3.update(22); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 22L); + } + + @Test + public void checkRODirectCompact() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + Memory srcMem = Memory.wrap(sk.toByteArray()); //compact RO fmt + KllLongsSketch sk2 = KllLongsSketch.wrap(srcMem); + assertTrue(sk2 instanceof KllDirectCompactLongsSketch); + //println(sk2.toString(true, false)); + assertFalse(sk2.isMemoryUpdatableFormat()); + assertTrue(sk2.isReadOnly()); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 21L); + Memory srcMem2 = Memory.wrap(sk2.toByteArray()); + KllLongsSketch sk3 = KllLongsSketch.writableWrap((WritableMemory)srcMem2, memReqSvr); + assertTrue(sk3 instanceof KllDirectCompactLongsSketch); + assertFalse(sk2.isMemoryUpdatableFormat()); + //println(sk3.toString(true, false)); + assertTrue(sk3.isReadOnly()); + assertEquals(sk3.getMinItem(), 1L); + assertEquals(sk3.getMaxItem(), 21L); + } + + @Test + public void checkDirectCompactSingleItem() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + + sk.update(1); + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertTrue(sk2 instanceof KllDirectCompactLongsSketch); + //println(sk2.toString(true, false)); + assertTrue(sk2.isReadOnly()); + assertEquals(sk2.getLongSingleItem(), 1L); + + sk.update(2); + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertEquals(sk2.getN(), 2); + try { + sk2.getLongSingleItem(); + fail(); + } catch (SketchesArgumentException e) { } + } + + @Test + public void checkDirectCompactGetLongItemsArray() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + long[] itemsArr = sk2.getLongItemsArray(); + for (int i = 0; i < 20; i++) { assertEquals(itemsArr[i], 0); } + + sk.update(1); + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + itemsArr = sk2.getLongItemsArray(); + for (int i = 0; i < 19; i++) { assertEquals(itemsArr[i], 0); } + assertEquals(itemsArr[19], 1L); + + for (int i = 2; i <= 21; i++) { sk.update(i); } + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + itemsArr = sk2.getLongItemsArray(); + assertEquals(itemsArr.length, 33); + assertEquals(itemsArr[22], 21); + } + + @Test + public void checkHeapAndDirectCompactGetRetainedItemsArray() { + int k = 20; + + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + long[] retArr = sk.getLongRetainedItemsArray(); + assertEquals(retArr.length, 0); + + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + retArr = sk2.getLongRetainedItemsArray(); + assertEquals(retArr.length, sk.getNumRetained()); + assertEquals(retArr.length, 0); + + sk.update(1); + retArr = sk.getLongRetainedItemsArray(); + assertEquals(retArr.length, sk.getNumRetained()); + assertEquals(retArr.length, 1); + assertEquals(retArr[0], 1L); + + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + retArr = sk2.getLongRetainedItemsArray(); + assertEquals(retArr.length, sk.getNumRetained()); + assertEquals(retArr.length, 1); + assertEquals(retArr[0], 1L); + + for (int i = 2; i <= 21; i++) { sk.update(i); } + retArr = sk.getLongRetainedItemsArray(); + assertEquals(retArr.length, sk.getNumRetained()); + assertEquals(retArr.length, 11); + + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertEquals(retArr.length, sk2.getNumRetained()); + assertEquals(retArr.length, 11); + } + + @Test + public void checkMinAndMax() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + try { sk2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + sk.update(1); + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertEquals(sk2.getMaxItem(),1L); + assertEquals(sk2.getMinItem(),1L); + for (int i = 2; i <= 21; i++) { sk.update(i); } + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertEquals(sk2.getMaxItem(),21L); + assertEquals(sk2.getMinItem(),1L); + } + + @Test + public void checkQuantile() { + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(); + for (int i = 1; i <= 1000; i++) { sk1.update(i); } + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk1.toByteArray())); + long med2 = sk2.getQuantile(0.5); + long med1 = sk1.getQuantile(0.5); + assertEquals(med1, med2); + println("Med1: " + med1); + println("Med2: " + med2); + } + + @Test + public void checkCompactSingleItemMerge() { + int k = 20; + KllLongsSketch skH1 = KllLongsSketch.newHeapInstance(k); //Heap with 1 (single) + skH1.update(21); + KllLongsSketch skDC1 = KllLongsSketch.wrap(Memory.wrap(skH1.toByteArray())); //Direct Compact with 1 (single) + KllLongsSketch skH20 = KllLongsSketch.newHeapInstance(k); //Heap with 20 + for (int i = 1; i <= 20; i++) { skH20.update(i); } + skH20.merge(skDC1); + assertEquals(skH20.getN(), 21); + + WritableMemory wmem = WritableMemory.allocate(1000); + KllLongsSketch skDU20 = KllLongsSketch.newDirectInstance(k, wmem, memReqSvr);//Direct Updatable with 21 + for (int i = 1; i <= 20; i++) { skDU20.update(i); } + skDU20.merge(skDC1); + assertEquals(skDU20.getN(), 21); + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + private final static boolean enablePrinting = false; + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchIteratorTest.java new file mode 100644 index 000000000..8be509f10 --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchIteratorTest.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.QuantilesLongsSketchIterator; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class KllDirectLongsSketchIteratorTest { + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void emptySketch() { + final KllLongsSketch sketch = getDLSketch(200, 0); + QuantilesLongsSketchIterator it = sketch.iterator(); + Assert.assertFalse(it.next()); + } + + @Test + public void oneItemSketch() { + final KllLongsSketch sketch = getDLSketch(200, 0); + sketch.update(0); + QuantilesLongsSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getQuantile(), 0); + Assert.assertEquals(it.getWeight(), 1); + Assert.assertFalse(it.next()); + } + + @Test + public void bigSketches() { + for (int n = 1000; n < 100000; n += 2000) { + final KllLongsSketch sketch = getDLSketch(200, 0); + for (int i = 0; i < n; i++) { + sketch.update(i); + } + QuantilesLongsSketchIterator it = sketch.iterator(); + int count = 0; + int weight = 0; + while (it.next()) { + count++; + weight += (int)it.getWeight(); + } + Assert.assertEquals(count, sketch.getNumRetained()); + Assert.assertEquals(weight, n); + } + } + + private static KllLongsSketch getDLSketch(final int k, final int n) { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = KllHelper.toByteArray(sk, true); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + + KllLongsSketch dlsk = KllLongsSketch.writableWrap(wmem, memReqSvr); + return dlsk; + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchTest.java new file mode 100644 index 000000000..f1784b7ce --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchTest.java @@ -0,0 +1,686 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.kll.KllSketch.SketchStructure; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +public class KllDirectLongsSketchTest { + + private static final double PMF_EPS_FOR_K_8 = 0.35; // PMF rank error (epsilon) for k=8 + private static final double PMF_EPS_FOR_K_128 = 0.025; // PMF rank error (epsilon) for k=128 + private static final double PMF_EPS_FOR_K_256 = 0.013; // PMF rank error (epsilon) for k=256 + private static final double NUMERIC_NOISE_TOLERANCE = 1E-6; + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void empty() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + assertTrue(sketch.isEmpty()); + assertEquals(sketch.getN(), 0); + assertEquals(sketch.getNumRetained(), 0); + try { sketch.getRank(0); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getQuantile(0.5); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getQuantiles(new double[] {0.0, 1.0}); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getPMF(new long[] {0}); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getCDF(new long[0]); fail(); } catch (SketchesArgumentException e) {} + assertNotNull(sketch.toString(true, true)); + assertNotNull(sketch.toString()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantileInvalidArg() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + sketch.update(1); + sketch.getQuantile(-1.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantilesInvalidArg() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + sketch.update(1); + sketch.getQuantiles(new double[] {2.0}); + } + + @Test + public void oneValue() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + sketch.update(1); + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 1); + assertEquals(sketch.getNumRetained(), 1); + assertEquals(sketch.getRank(1, EXCLUSIVE), 0.0); + assertEquals(sketch.getRank(2, EXCLUSIVE), 1.0); + assertEquals(sketch.getMinItem(), 1L); + assertEquals(sketch.getMaxItem(), 1L); + assertEquals(sketch.getQuantile(0.5, EXCLUSIVE), 1L); + } + + @Test + public void manyValuesEstimationMode() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + final int n = 1_000_000; + + for (int i = 0; i < n; i++) { + sketch.update(i); + } + assertEquals(sketch.getN(), n); + + // test getRank + for (int i = 0; i < n; i++) { + final double trueRank = (double) i / n; + assertEquals(sketch.getRank(i), trueRank, PMF_EPS_FOR_K_256, "for value " + i); + } + + // test getPMF + final double[] pmf = sketch.getPMF(new long[] {n / 2}); // split at median + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); + assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); + assertEquals(sketch.getMinItem(), 0); // min value is exact + assertEquals(sketch.getMaxItem(), n - 1L); // max value is exact + + // check at every 0.1 percentage point + final double[] ranks = new double[1001]; + final double[] reverseRanks = new double[1001]; // check that ordering doesn't matter + for (int i = 0; i <= 1000; i++) { + ranks[i] = (double) i / 1000; + reverseRanks[1000 - i] = ranks[i]; + } + final long[] quantiles = sketch.getQuantiles(ranks); + final long[] reverseQuantiles = sketch.getQuantiles(reverseRanks); + long previousQuantile = 0; + for (int i = 0; i <= 1000; i++) { + final long quantile = sketch.getQuantile(ranks[i]); + assertEquals(quantile, quantiles[i]); + assertEquals(quantile, reverseQuantiles[1000 - i]); + assertTrue(previousQuantile <= quantile); + previousQuantile = quantile; + } + } + + @Test + public void getRankGetCdfGetPmfConsistency() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + final int n = 1000; + final long[] values = new long[n]; + for (int i = 0; i < n; i++) { + sketch.update(i); + values[i] = i; + } + final double[] ranks = sketch.getCDF(values); + final double[] pmf = sketch.getPMF(values); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i]), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + + @Test + public void merge() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(200, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + + assertEquals(sketch2.getMinItem(), n); + assertEquals(sketch2.getMaxItem(), 2 * n - 1); + + sketch1.merge(sketch2); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2L * n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), 2 * n - 1L); + assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeLowerK() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(256, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(128, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + + assertEquals(sketch2.getMinItem(), n); + assertEquals(sketch2.getMaxItem(), 2 * n - 1); + + assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); + assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); + sketch1.merge(sketch2); + + // sketch1 must get "contaminated" by the lower K in sketch2 + assertEquals(sketch1.getNormalizedRankError(false), sketch2.getNormalizedRankError(false)); + assertEquals(sketch1.getNormalizedRankError(true), sketch2.getNormalizedRankError(true)); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2 * n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), 2 * n - 1); + assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_128); + } + + @Test + public void mergeEmptyLowerK() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(256, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(128, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + + // rank error should not be affected by a merge with an empty sketch with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + assertEquals(sketch1.getQuantile(0.5), n / 2, n / 2 * PMF_EPS_FOR_K_256); + + //merge the other way + sketch2.merge(sketch1); + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + assertEquals(sketch1.getQuantile(0.5), n / 2, n / 2 * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeExactModeLowerK() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(256, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(128, 0); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + sketch2.update(1); + + // rank error should not be affected by a merge with a sketch in exact mode with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + } + + @Test + public void mergeMinMinValueFromOther() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(200, 0); + sketch1.update(1); + sketch2.update(2); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinItem(), 1L); + } + + @Test + public void mergeMinAndMaxFromOther() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(200, 0); + int n = 1_000_000; + for (int i = 1; i <= n; i++) { + sketch1.update(i); + } + sketch2.merge(sketch1); + assertEquals(sketch2.getMinItem(), 1L); + assertEquals(sketch2.getMaxItem(), 1_000_000L); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooSmall() { + getUpdatableDirectLongSketch(KllSketch.DEFAULT_M - 1, 0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooLarge() { + getUpdatableDirectLongSketch(KllSketch.MAX_K + 1, 0); + } + + @Test + public void minK() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(KllSketch.DEFAULT_M, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); + } + + @Test + public void maxK() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(KllSketch.MAX_K, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.MAX_K); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); + } + + @Test + public void serializeDeserializeEmptyViaCompactHeapify() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final byte[] bytes = sketch1.toByteArray(); //compact + final KllLongsSketch sketch2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(false)); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + try { sketch2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sketch2.currentSerializedSizeBytes(false), + sketch1.currentSerializedSizeBytes(false)); + } + + @Test + public void serializeDeserializeEmptyViaUpdatableWritableWrap() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final byte[] bytes = KllHelper.toByteArray(sketch1, true); + final KllLongsSketch sketch2 = + KllLongsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true)); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + try { sketch2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sketch2.currentSerializedSizeBytes(true), + sketch1.currentSerializedSizeBytes(true)); + } + + @Test + public void serializeDeserializeOneValueViaCompactHeapify() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + sketch1.update(1); + final byte[] bytes = sketch1.toByteArray(); + final KllLongsSketch sketch2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(false)); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertTrue(sketch2.getMinItem() < Long.MAX_VALUE); + assertTrue(sketch2.getMaxItem() > Long.MIN_VALUE); + assertEquals(sketch2.currentSerializedSizeBytes(false), 8 + Long.BYTES); + } + + @Test + public void serializeDeserializeOneValueViaUpdatableWritableWrap() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + sketch1.update(1); + final byte[] bytes = KllHelper.toByteArray(sketch1, true); + final KllLongsSketch sketch2 = + KllLongsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true)); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinItem(), 1L); + assertEquals(sketch2.getMaxItem(), 1L); + assertEquals(sketch2.currentSerializedSizeBytes(false), 8 + Long.BYTES); + assertEquals(sketch2.currentSerializedSizeBytes(true), bytes.length); + } + + @Test + public void serializeDeserializeFullViaCompactHeapify() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 1000); + final byte[] byteArr1 = sketch1.toByteArray(); //compact + final KllLongsSketch sketch2 = KllLongsSketch.heapify(Memory.wrap(byteArr1)); + assertEquals(byteArr1.length, sketch1.currentSerializedSizeBytes(false)); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinItem(), sketch1.getMinItem()); + assertEquals(sketch2.getMaxItem(), sketch1.getMaxItem()); + assertEquals(sketch2.currentSerializedSizeBytes(false), sketch1.currentSerializedSizeBytes(false)); + } + + @Test + public void serializeDeserializeFullViaUpdatableWritableWrap() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final int n = 1000; + for (int i = 1; i <= n; i++) { + sketch1.update(i); + } + final byte[] bytes = KllHelper.toByteArray(sketch1, true); //updatable + final KllLongsSketch sketch2 = + KllLongsSketch.writableWrap(WritableMemory.writableWrap(bytes), memReqSvr); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true)); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinItem(), sketch1.getMinItem()); + assertEquals(sketch2.getMaxItem(), sketch1.getMaxItem()); + assertEquals(sketch2.currentSerializedSizeBytes(true), sketch1.currentSerializedSizeBytes(true)); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void outOfOrderSplitPoints() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + sketch.update(0); + sketch.getCDF(new long[] {1, 0}); + } + + @Test + public void checkSimpleMergeDirect() { //used for troubleshooting + int k = 20; + int n1 = 21; + int n2 = 43; + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(k); + KllLongsSketch sk2 = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println("SK1:"); + println(sk1.toString(true, true)); + println("SK2:"); + println(sk2.toString(true, true)); + WritableMemory wmem1 = WritableMemory.writableWrap(KllHelper.toByteArray(sk1, true)); + WritableMemory wmem2 = WritableMemory.writableWrap(KllHelper.toByteArray(sk2, true)); + KllLongsSketch dsk1 = KllLongsSketch.writableWrap(wmem1, memReqSvr); + KllLongsSketch dsk2 = KllLongsSketch.writableWrap(wmem2, memReqSvr); + println("BEFORE MERGE"); + println(dsk1.toString(true, true)); + dsk1.merge(dsk2); + println("AFTER MERGE"); + println(dsk1.toString(true, true)); + } + + @Test + public void checkSketchInitializeDirectLongUpdatableMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: LONG FULL DIRECT FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(compBytes, LONGS_SKETCH, true)); + sk = KllLongsSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21); + assertEquals(sk.getMinItem(), 1); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(compBytes, LONGS_SKETCH, true)); + sk = KllLongsSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(compBytes, LONGS_SKETCH, true)); + sk = KllLongsSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkGetWritableMemory() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 200); + assertEquals(sketch.getK(), 200); + assertEquals(sketch.getN(), 200); + assertFalse(sketch.isEmpty()); + assertTrue(sketch.isMemoryUpdatableFormat()); + assertFalse(sketch.isEstimationMode()); + assertTrue(sketch.isLongsSketch()); + assertFalse(sketch.isLevelZeroSorted()); + assertFalse(sketch.isDoublesSketch()); + + final WritableMemory wmem = sketch.getWritableMemory(); + final KllLongsSketch sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), 200); + assertEquals(sk.getN(), 200); + assertFalse(sk.isEmpty()); + assertFalse(sk.isMemoryUpdatableFormat()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isLongsSketch()); + assertFalse(sk.isLevelZeroSorted()); + assertFalse(sk.isDoublesSketch()); + } + + @Test + public void checkReset() { + WritableMemory dstMem = WritableMemory.allocate(3000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + long min1 = sk.getMinItem(); + long max1 = sk.getMaxItem(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + long min2 = sk.getMinItem(); + long max2 = sk.getMaxItem(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } + + @Test + public void checkHeapify() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + KllLongsSketch sk2 = KllHeapLongsSketch.heapifyImpl(dstMem); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 100L); + } + + @Test + public void checkMergeKllLongsSketch() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 21; i++) { sk.update(i); } + KllLongsSketch sk2 = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++ ) { sk2.update(i + 100); } + sk.merge(sk2); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getMaxItem(), 121L); + } + + @Test + public void checkReverseMergeKllLongsSketch() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 21; i++) { sk.update(i); } + KllLongsSketch sk2 = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++ ) { sk2.update(i + 100); } + sk2.merge(sk); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 121L); + } + + @Test + public void checkWritableWrapOfCompactForm() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++ ) { sk.update(i); } + WritableMemory srcMem = WritableMemory.writableWrap(sk.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.writableWrap(srcMem, memReqSvr); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 21L); + } + + @Test + public void checkReadOnlyExceptions() { + int k = 20; + long[] fltArr = new long[0]; + long fltV = 1; + int idx = 1; + boolean bool = true; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + try { sk2.incN(1); fail(); } catch (SketchesArgumentException e) { } + try { sk2.incNumLevels(); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setLongItemsArray(fltArr); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setLongItemsArrayAt(idx, fltV); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setLevelZeroSorted(bool); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setMaxItem(fltV); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setMinItem(fltV); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setMinK(idx); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setN(idx); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setNumLevels(idx); fail(); } catch (SketchesArgumentException e) { } + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkMergeExceptions() { + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(20); + WritableMemory srcMem1 = WritableMemory.writableWrap(sk1.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.writableWrap(srcMem1, memReqSvr); + sk2.merge(sk1); + } + + @Test + public void checkVectorUpdate() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + long[] v = new long[21]; + for (int i = 0; i < 21; i++) { v[i] = i + 1; } + sk.update(v, 0, 21); + println(sk.toString(true, true)); + int[] levelsArr = sk.getLevelsArray(SketchStructure.UPDATABLE); + assertEquals(levelsArr[0], 22); + long[] longsArr = sk.getLongItemsArray(); + assertEquals(longsArr[22], 21); + } + + @Test + public void checkWeightedUpdate() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(8, dstMem, memReqSvr); + for (int i = 0; i < 16; i++) { + sk.update(i + 1, 16); + } + println(sk.toString(true, true)); + assertEquals(sk.getN(), 256); + assertEquals(sk.getMaxItem(), 16L); + assertEquals(sk.getMinItem(), 1L); + } + + private static KllLongsSketch getUpdatableDirectLongSketch(int k, int n) { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = KllHelper.toByteArray(sk, true); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + KllLongsSketch dfsk = KllLongsSketch.writableWrap(wmem, memReqSvr); + return dfsk; + } + + @Test + public void checkMergeExceptionsWrongType() { + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(20); + KllDoublesSketch sk2 = KllDoublesSketch.newHeapInstance(20); + try { sk1.merge(sk2); fail(); } catch (ClassCastException e) { } + try { sk2.merge(sk1); fail(); } catch (ClassCastException e) { } + } + + private final static boolean enablePrinting = false; + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllLongsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllLongsSketchIteratorTest.java new file mode 100644 index 000000000..a98c32c9e --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllLongsSketchIteratorTest.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.quantilescommon.LongsSortedViewIterator; +import org.apache.datasketches.quantilescommon.QuantilesLongsSketchIterator; +import org.testng.Assert; +import org.testng.annotations.Test; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +public class KllLongsSketchIteratorTest { + + @Test + public void emptySketch() { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + QuantilesLongsSketchIterator it = sketch.iterator(); + Assert.assertFalse(it.next()); + } + + @Test + public void oneItemSketch() { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + QuantilesLongsSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getQuantile(), 1L); + Assert.assertEquals(it.getWeight(), 1); + Assert.assertFalse(it.next()); + } + + @Test + public void twoItemSketchForIterator() { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + sketch.update(2); + QuantilesLongsSketchIterator itr = sketch.iterator(); + assertTrue(itr.next()); + + assertEquals(itr.getQuantile(), 2L); + assertEquals(itr.getWeight(), 1); + + assertTrue(itr.next()); + + assertEquals(itr.getQuantile(), 1L); + assertEquals(itr.getWeight(), 1); + } + + @Test + public void twoItemSketchForSortedViewIterator() { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + sketch.update(2); + LongsSortedViewIterator itr = sketch.getSortedView().iterator(); + + assertTrue(itr.next()); + + assertEquals(itr.getQuantile(), 1L); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); + assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0); + assertEquals(itr.getNormalizedRank(INCLUSIVE), 0.5); + + assertTrue(itr.next()); + + assertEquals(itr.getQuantile(), 2L); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); + assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0.5); + assertEquals(itr.getNormalizedRank(INCLUSIVE), 1.0); + } + + @Test + public void bigSketches() { + for (int n = 1000; n < 100000; n += 2000) { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + for (int i = 0; i < n; i++) { + sketch.update(i); + } + QuantilesLongsSketchIterator it = sketch.iterator(); + int count = 0; + int weight = 0; + while (it.next()) { + count++; + weight += (int)it.getWeight(); + } + Assert.assertEquals(count, sketch.getNumRetained()); + Assert.assertEquals(weight, n); + } + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllLongsSketchSerDeTest.java b/src/test/java/org/apache/datasketches/kll/KllLongsSketchSerDeTest.java new file mode 100644 index 000000000..b9b0f800d --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllLongsSketchSerDeTest.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.Memory; +import org.testng.annotations.Test; + +public class KllLongsSketchSerDeTest { + + @Test + public void serializeDeserializeEmpty() { + final int N = 20; + + final KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(N); + //Empty: from heap -> byte[] -> heap + final byte[] bytes = sk1.toByteArray(); + final KllLongsSketch sk2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sk1.getSerializedSizeBytes()); + assertTrue(sk2.isEmpty()); + assertEquals(sk2.getNumRetained(), sk1.getNumRetained()); + assertEquals(sk2.getN(), sk1.getN()); + assertEquals(sk2.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + try { sk2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sk2.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + + //Empty: from heap -> byte[] -> off heap + final KllLongsSketch sk3 = KllLongsSketch.wrap(Memory.wrap(bytes)); + assertTrue(sk3.isEmpty()); + assertEquals(sk3.getNumRetained(), sk1.getNumRetained()); + assertEquals(sk3.getN(), sk1.getN()); + assertEquals(sk3.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + try { sk3.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sk3.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sk3.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + //from heap -> byte[] -> off heap -> byte[] -> compare byte[] + final byte[] bytes2 = sk3.toByteArray(); + assertEquals(bytes, bytes2); + } + + @Test + public void serializeDeserializeOneValue() { + final KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(); + sk1.update(1); + + //from heap -> byte[] -> heap + final byte[] bytes = sk1.toByteArray(); + final KllLongsSketch sk2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sk1.getSerializedSizeBytes()); + assertFalse(sk2.isEmpty()); + assertEquals(sk2.getNumRetained(), 1); + assertEquals(sk2.getN(), 1); + assertEquals(sk2.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 1L); + assertEquals(sk2.getSerializedSizeBytes(), Long.BYTES + Long.BYTES); + + //from heap -> byte[] -> off heap + final KllLongsSketch sk3 = KllLongsSketch.wrap(Memory.wrap(bytes)); + assertFalse(sk3.isEmpty()); + assertEquals(sk3.getNumRetained(), 1); + assertEquals(sk3.getN(), 1); + assertEquals(sk3.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + assertEquals(sk3.getMinItem(), 1L); + assertEquals(sk3.getMaxItem(), 1L); + assertEquals(sk3.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + //from heap -> byte[] -> off heap -> byte[] -> compare byte[] + final byte[] bytes2 = sk3.toByteArray(); + assertEquals(bytes, bytes2); + } + + @Test + public void serializeDeserializeMultipleValues() { + final KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(); + final int n = 1000; + for (int i = 0; i < n; i++) { + sk1.update(i); + } + assertEquals(sk1.getMinItem(), 0); + assertEquals(sk1.getMaxItem(), 999L); + + //from heap -> byte[] -> heap + final byte[] bytes = sk1.toByteArray(); + final KllLongsSketch sk2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sk1.getSerializedSizeBytes()); + assertFalse(sk2.isEmpty()); + assertEquals(sk2.getNumRetained(), sk1.getNumRetained()); + assertEquals(sk2.getN(), sk1.getN()); + assertEquals(sk2.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + assertEquals(sk2.getMinItem(), sk1.getMinItem()); + assertEquals(sk2.getMaxItem(), sk1.getMaxItem()); + assertEquals(sk2.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + + //from heap -> byte[] -> off heap + final KllLongsSketch sk3 = KllLongsSketch.wrap(Memory.wrap(bytes)); + assertFalse(sk3.isEmpty()); + assertEquals(sk3.getNumRetained(), sk1.getNumRetained()); + assertEquals(sk3.getN(), sk1.getN()); + assertEquals(sk3.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + assertEquals(sk3.getMinItem(), sk1.getMinItem()); + assertEquals(sk3.getMaxItem(), sk1.getMaxItem()); + assertEquals(sk3.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + //from heap -> byte[] -> off heap -> byte[] -> compare byte[] + final byte[] bytes2 = sk3.toByteArray(); + assertEquals(bytes, bytes2); + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllLongsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllLongsSketchTest.java new file mode 100644 index 000000000..1e4c1004a --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllLongsSketchTest.java @@ -0,0 +1,719 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static java.lang.Math.min; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.LongsSortedView; +import org.apache.datasketches.quantilescommon.LongsSortedViewIterator; +import org.testng.annotations.Test; + +public class KllLongsSketchTest { + private static final String LS = System.getProperty("line.separator"); + private static final double PMF_EPS_FOR_K_8 = KllSketch.getNormalizedRankError(8, true); + private static final double PMF_EPS_FOR_K_128 = KllSketch.getNormalizedRankError(128, true); + private static final double PMF_EPS_FOR_K_256 = KllSketch.getNormalizedRankError(256, true); + private static final double NUMERIC_NOISE_TOLERANCE = 1E-6; + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void empty() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + assertTrue(sketch.isEmpty()); + assertEquals(sketch.getN(), 0); + assertEquals(sketch.getNumRetained(), 0); + try { sketch.getRank(0); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getQuantile(0.5); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getQuantiles(new double[] {0}); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getPMF(new long[] {0}); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getCDF(new long[] {0}); fail(); } catch (SketchesArgumentException e) {} + assertNotNull(sketch.toString(true, true)); + assertNotNull(sketch.toString()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantileInvalidArg() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + sketch.getQuantile(-1.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantilesInvalidArg() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + sketch.getQuantiles(new double[] {2.0}); + } + + @Test + public void oneValue() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 1); + assertEquals(sketch.getNumRetained(), 1); + assertEquals(sketch.getRank(0L, EXCLUSIVE), 0.0); + assertEquals(sketch.getRank(1L, EXCLUSIVE), 0.0); + assertEquals(sketch.getRank(2L, EXCLUSIVE), 1.0); + assertEquals(sketch.getRank(0L, INCLUSIVE), 0.0); + assertEquals(sketch.getRank(1L, INCLUSIVE), 1.0); + assertEquals(sketch.getRank(2L, INCLUSIVE), 1.0); + assertEquals(sketch.getMinItem(), 1L); + assertEquals(sketch.getMaxItem(), 1L); + assertEquals(sketch.getQuantile(0.5, EXCLUSIVE), 1L); + assertEquals(sketch.getQuantile(0.5, INCLUSIVE), 1L); + } + + @Test + public void tenValues() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 10; i++) { sketch.update(i); } + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 10); + assertEquals(sketch.getNumRetained(), 10); + for (int i = 1; i <= 10; i++) { + assertEquals(sketch.getRank(i, EXCLUSIVE), (i - 1) / 10.0); + assertEquals(sketch.getRank(i, INCLUSIVE), i / 10.0); + } + final long[] qArr = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + double[] rOut = sketch.getRanks(qArr); //inclusive + for (int i = 0; i < qArr.length; i++) { + assertEquals(rOut[i], (i + 1) / 10.0); + } + rOut = sketch.getRanks(qArr, EXCLUSIVE); //exclusive + for (int i = 0; i < qArr.length; i++) { + assertEquals(rOut[i], i / 10.0); + } + + for (int i = 0; i >= 10; i++) { + double rank = i/10.0; + double q = rank == 1.0 ? i : i + 1; + assertEquals(sketch.getQuantile(rank, EXCLUSIVE), q); + q = rank == 0 ? i + 1 : i; + assertEquals(sketch.getQuantile(rank, INCLUSIVE), q); + } + + { + // getQuantile() and getQuantiles() equivalence EXCLUSIVE + final long[] quantiles = + sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, EXCLUSIVE); + for (int i = 0; i <= 10; i++) { + assertEquals(sketch.getQuantile(i / 10.0, EXCLUSIVE), quantiles[i]); + } + } + { + // getQuantile() and getQuantiles() equivalence INCLUSIVE + final long[] quantiles = + sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, INCLUSIVE); + for (int i = 0; i <= 10; i++) { + assertEquals(sketch.getQuantile(i / 10.0, INCLUSIVE), quantiles[i]); + } + } + } + + @Test + public void manyValuesEstimationMode() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + final int n = 1_000_000; + + for (int i = 0; i < n; i++) { + sketch.update(i); + } + assertEquals(sketch.getN(), n); + + // test getRank + for (int i = 0; i < n; i++) { + final double trueRank = (double) i / n; + assertEquals(sketch.getRank(i), trueRank, PMF_EPS_FOR_K_256, "for value " + i); + } + + // test getPMF + final double[] pmf = sketch.getPMF(new long[] {n / 2}); // split at median + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); + assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); + + assertEquals(sketch.getMinItem(), 0); // min value is exact + assertEquals(sketch.getMaxItem(), n - 1); // max value is exact + + // check at every 0.1 percentage point + final double[] fractions = new double[1001]; + final double[] reverseFractions = new double[1001]; // check that ordering doesn't matter + for (int i = 0; i <= 1000; i++) { + fractions[i] = (double) i / 1000; + reverseFractions[1000 - i] = fractions[i]; + } + final long[] quantiles = sketch.getQuantiles(fractions); + final long[] reverseQuantiles = sketch.getQuantiles(reverseFractions); + double previousQuantile = 0; + for (int i = 0; i <= 1000; i++) { + final double quantile = sketch.getQuantile(fractions[i]); + assertEquals(quantile, quantiles[i]); + assertEquals(quantile, reverseQuantiles[1000 - i]); + assertTrue(previousQuantile <= quantile); + previousQuantile = quantile; + } + } + + @Test + public void getRankGetCdfGetPmfConsistency() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + final int n = 1000; + final long[] values = new long[n]; + for (int i = 0; i < n; i++) { + sketch.update(i); + values[i] = i; + } + { // inclusive = false (default) + final double[] ranks = sketch.getCDF(values); + final double[] pmf = sketch.getPMF(values); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i]), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + { // inclusive = true + final double[] ranks = sketch.getCDF(values, INCLUSIVE); + final double[] pmf = sketch.getPMF(values, INCLUSIVE); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i], INCLUSIVE), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + } + + @Test + public void merge() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), (n - 1)); + + assertEquals(sketch2.getMinItem(), n); + assertEquals(sketch2.getMaxItem(), (2 * n - 1)); + + sketch1.merge(sketch2); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2L * n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), (2 * n - 1)); + assertEquals(sketch1.getQuantile(0.5), n, 2 * n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeLowerK() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(256); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(128); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1L); + + assertEquals(sketch2.getMinItem(), n); + assertEquals(sketch2.getMaxItem(), 2L * n - 1L); + + assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); + assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); + sketch1.merge(sketch2); + + // sketch1 must get "contaminated" by the lower K in sketch2 + assertEquals(sketch1.getNormalizedRankError(false), sketch2.getNormalizedRankError(false)); + assertEquals(sketch1.getNormalizedRankError(true), sketch2.getNormalizedRankError(true)); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2 * n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), 2L * n - 1L); + assertEquals(sketch1.getQuantile(0.5), n, 2L * n * PMF_EPS_FOR_K_128); + } + + @Test + public void mergeEmptyLowerK() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(256); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(128); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + + // rank error should not be affected by a merge with an empty sketch with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + assertEquals(sketch1.getQuantile(0.5), n / 2, n * PMF_EPS_FOR_K_256); + + //merge the other way + sketch2.merge(sketch1); + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + assertEquals(sketch1.getQuantile(0.5), n / 2, n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeExactModeLowerK() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(256); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(128); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + sketch2.update(1); + + // rank error should not be affected by a merge with a sketch in exact mode with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + } + + @Test + public void mergeMinMinValueFromOther() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(); + sketch1.update(1); + sketch2.update(2); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinItem(), 1); + } + + @Test + public void mergeMinAndMaxFromOther() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(); + for (int i = 1; i <= 1_000_000; i++) { + sketch1.update(i); + } + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(10); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinItem(), 1); + assertEquals(sketch2.getMaxItem(), 1_000_000); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooSmall() { + KllLongsSketch.newHeapInstance(KllSketch.DEFAULT_M - 1); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooLarge() { + KllLongsSketch.newHeapInstance(KllSketch.MAX_K + 1); + } + + @Test + public void minK() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(KllSketch.DEFAULT_M); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); + assertEquals(sketch.getQuantile(0.5), 500, 1000 * PMF_EPS_FOR_K_8); + } + + @Test + public void maxK() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(KllSketch.MAX_K); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.MAX_K); + assertEquals(sketch.getQuantile(0.5), 500, 1000 * PMF_EPS_FOR_K_256); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void outOfOrderSplitPoints() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(0); + sketch.getCDF(new long[] {1L, 0L}); + } + + @Test + public void checkReset() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + double min1 = sk.getMinItem(); + double max1 = sk.getMaxItem(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + double min2 = sk.getMinItem(); + double max2 = sk.getMaxItem(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } + + @Test + public void checkReadOnlyUpdate() { + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(20); + Memory mem = Memory.wrap(sk1.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.wrap(mem); + try { sk2.update(1); fail(); } catch (SketchesArgumentException e) { } + } + + @Test + public void checkNewDirectInstanceAndSize() { + WritableMemory wmem = WritableMemory.allocate(3000); + KllLongsSketch.newDirectInstance(wmem, memReqSvr); + try { KllLongsSketch.newDirectInstance(null, memReqSvr); fail(); } + catch (NullPointerException e) { } + try { KllLongsSketch.newDirectInstance(wmem, null); fail(); } + catch (NullPointerException e) { } + int updateSize = KllSketch.getMaxSerializedSizeBytes(200, 0, LONGS_SKETCH, true); + int compactSize = KllSketch.getMaxSerializedSizeBytes(200, 0, LONGS_SKETCH, false); + assertTrue(compactSize < updateSize); + } + + @Test + public void sortedView() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + sk.update(3); + sk.update(1); + sk.update(2); + + LongsSortedView view = sk.getSortedView(); + LongsSortedViewIterator itr = view.iterator(); + assertEquals(itr.next(), true); + assertEquals(itr.getQuantile(), 1); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); + assertEquals(itr.next(), true); + assertEquals(itr.getQuantile(), 2); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); + assertEquals(itr.next(), true); + assertEquals(itr.getQuantile(), 3); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 2); + assertEquals(itr.getNaturalRank(INCLUSIVE), 3); + assertEquals(itr.next(), false); + } + + @Test //also visual + public void checkCDF_PDF() { + final double[] cdfI = {.25, .50, .75, 1.0, 1.0 }; + final double[] cdfE = {0.0, .25, .50, .75, 1.0 }; + final double[] pmfI = {.25, .25, .25, .25, 0.0 }; + final double[] pmfE = {0.0, .25, .25, .25, .25 }; + final double toll = 1E-10; + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + final long[] doublesIn = {10, 20, 30, 40}; + for (int i = 0; i < doublesIn.length; i++) { sketch.update(doublesIn[i]); } + long[] sp = new long[] { 10, 20, 30, 40 }; + println("SplitPoints:"); + for (int i = 0; i < sp.length; i++) { + printf("%10d", sp[i]); + } + println(""); + println("INCLUSIVE:"); + double[] cdf = sketch.getCDF(sp, INCLUSIVE); + double[] pmf = sketch.getPMF(sp, INCLUSIVE); + printf("%10s%10s" + LS, "CDF", "PMF"); + for (int i = 0; i < cdf.length; i++) { + printf("%10.2f%10.2f" + LS, cdf[i], pmf[i]); + assertEquals(cdf[i], cdfI[i], toll); + assertEquals(pmf[i], pmfI[i], toll); + } + println("EXCLUSIVE"); + cdf = sketch.getCDF(sp, EXCLUSIVE); + pmf = sketch.getPMF(sp, EXCLUSIVE); + printf("%10s%10s" + LS, "CDF", "PMF"); + for (int i = 0; i < cdf.length; i++) { + printf("%10.2f%10.2f" + LS, cdf[i], pmf[i]); + assertEquals(cdf[i], cdfE[i], toll); + assertEquals(pmf[i], pmfE[i], toll); + } + } + + @Test + public void checkWrapCase1Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + Memory mem = Memory.wrap(sk.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.wrap(mem); + + assertTrue(mem.isReadOnly()); + assertTrue(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkWritableWrapCase6And2Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + WritableMemory wmem = WritableMemory.writableWrap(KllHelper.toByteArray(sk, true)); + KllLongsSketch sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + + assertFalse(wmem.isReadOnly()); + assertFalse(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkKllSketchCase5Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + + assertFalse(wmem.isReadOnly()); + assertTrue(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkKllSketchCase3Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true)); + WritableMemory wmem = (WritableMemory) mem; + KllLongsSketch sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + + assertTrue(wmem.isReadOnly()); + assertTrue(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkKllSketchCase7Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true)); + WritableMemory wmem = (WritableMemory) mem; + KllLongsSketch sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + + assertTrue(wmem.isReadOnly()); + assertTrue(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkReadOnlyExceptions() { + int[] intArr = new int[0]; + int intV = 2; + int idx = 1; + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(20); + Memory mem = Memory.wrap(sk1.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.wrap(mem); + try { sk2.setLevelsArray(intArr); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setLevelsArrayAt(idx,intV); fail(); } catch (SketchesArgumentException e) { } + } + + @Test + public void checkIsSameResource() { + int cap = 128; + WritableMemory wmem = WritableMemory.allocate(cap); + WritableMemory reg1 = wmem.writableRegion(0, 64); + WritableMemory reg2 = wmem.writableRegion(64, 64); + assertFalse(reg1 == reg2); + assertFalse(reg1.isSameResource(reg2)); + + WritableMemory reg3 = wmem.writableRegion(0, 64); + assertFalse(reg1 == reg3); + assertTrue(reg1.isSameResource(reg3)); + + byte[] byteArr1 = KllLongsSketch.newHeapInstance(20).toByteArray(); + reg1.putByteArray(0, byteArr1, 0, byteArr1.length); + KllLongsSketch sk1 = KllLongsSketch.wrap(reg1); + + byte[] byteArr2 = KllLongsSketch.newHeapInstance(20).toByteArray(); + reg2.putByteArray(0, byteArr2, 0, byteArr2.length); + assertFalse(sk1.isSameResource(reg2)); + + byte[] byteArr3 = KllLongsSketch.newHeapInstance(20).toByteArray(); + reg3.putByteArray(0, byteArr3, 0, byteArr3.length); + assertTrue(sk1.isSameResource(reg3)); + } + + @Test + public void checkSortedViewAfterReset() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + sk.update(1L); + LongsSortedView sv = sk.getSortedView(); + long dsv = sv.getQuantile(1.0, INCLUSIVE); + assertEquals(dsv, 1L); + sk.reset(); + try { sk.getSortedView(); fail(); } catch (SketchesArgumentException e) { } + } + + @Test + public void checkVectorUpdate() { + boolean withLevels = false; + boolean withLevelsAndItems = true; + int k = 20; + int n = 108; + int maxVsz = 40; //max vector size + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + int j = 1; + int rem; + while ((rem = n - j + 1) > 0) { + int vecSz = min(rem, maxVsz); + long[] v = new long[vecSz]; + for (int i = 0; i < vecSz; i++) { v[i] = j++; } + sk.update(v, 0, vecSz); + } + println(LS + "#<<< END STATE # >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + println(""); + assertEquals(sk.getN(), 108); + assertEquals(sk.getMaxItem(), 108L); + assertEquals(sk.getMinItem(), 1L); + } + + @Test + public void vectorizedUpdates() { + final int trials = 1; + final int M = 1; //number of vectors + final int N = 1000; //vector size + final int K = 256; + final long[] values = new long[N]; + long vIn = 1L; + long totN = 0; + final long startTime = System.nanoTime(); + for (int t = 0; t < trials; t++) { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(K); + for (int m = 0; m < M; m++) { + for (int n = 0; n < N; n++) { + values[n] = vIn++; //fill vector + } + sketch.update(values, 0, N); //vector input + } + totN = sketch.getN(); + assertEquals(totN, M * N); + assertEquals(sketch.getMinItem(), 1L); + assertEquals(sketch.getMaxItem(), totN); + assertEquals(sketch.getQuantile(0.5), totN / 2, totN * PMF_EPS_FOR_K_256 * 2.0); //wider tolerance + } + final long runTime = System.nanoTime() - startTime; + println("Vectorized Updates"); + printf(" Vector size : %,12d" + LS, N); + printf(" Num Vectors : %,12d" + LS, M); + printf(" Total Input : %,12d" + LS, totN); + printf(" Run Time mS : %,12.3f" + LS, runTime / 1e6); + final double trialTime = runTime / (1e6 * trials); + printf(" mS / Trial : %,12.3f" + LS, trialTime); + final double updateTime = runTime / (1.0 * totN * trials); + printf(" nS / Update : %,12.3f" + LS, updateTime); + } + + @Test + public void nonVectorizedUpdates() { + final int trials = 1; + final int M = 1; //number of vectors + final int N = 1000; //vector size + final int K = 256; + final long[] values = new long[N]; + long vIn = 1L; + long totN = 0; + final long startTime = System.nanoTime(); + for (int t = 0; t < trials; t++) { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(K); + for (int m = 0; m < M; m++) { + for (int n = 0; n < N; n++) { + values[n] = vIn++; //fill vector + } + for (int i = 0; i < N; i++) { + sketch.update(values[i]); //single item input + } + } + totN = sketch.getN(); + assertEquals(totN, M * N); + assertEquals(sketch.getMinItem(), 1L); + assertEquals(sketch.getMaxItem(), totN); + assertEquals(sketch.getQuantile(0.5), totN / 2, totN * PMF_EPS_FOR_K_256 * 2.0); //wider tolerance + } + final long runTime = System.nanoTime() - startTime; + println("Vectorized Updates"); + printf(" Vector size : %,12d" + LS, N); + printf(" Num Vectors : %,12d" + LS, M); + printf(" Total Input : %,12d" + LS, totN); + printf(" Run Time mS : %,12.3f" + LS, runTime / 1e6); + final double trialTime = runTime / (1e6 * trials); + printf(" mS / Trial : %,12.3f" + LS, trialTime); + final double updateTime = runTime / (1.0 * totN * trials); + printf(" nS / Update : %,12.3f" + LS, updateTime); + } + + private final static boolean enablePrinting = false; + + /** + * @param format the format + * @param args the args + */ + private static final void printf(final String format, final Object ...args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } +} diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDirectLongsTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDirectLongsTest.java new file mode 100644 index 000000000..366f93f7d --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDirectLongsTest.java @@ -0,0 +1,459 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.LongsSortedView; +import org.apache.datasketches.quantilescommon.LongsSortedViewIterator; +import org.testng.annotations.Test; + +public class KllMiscDirectLongsTest { + static final String LS = System.getProperty("line.separator"); + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void checkBounds() { + final KllLongsSketch kll = getDirectLongsSketch(200, 0); + for (int i = 0; i < 1000; i++) { + kll.update(i); + } + final double eps = kll.getNormalizedRankError(false); + final long est = kll.getQuantile(0.5); + final long ub = kll.getQuantileUpperBound(0.5); + final long lb = kll.getQuantileLowerBound(0.5); + assertEquals(ub, kll.getQuantile(.5 + eps)); + assertEquals(lb, kll.getQuantile(0.5 - eps)); + println("Ext : " + est); + println("UB : " + ub); + println("LB : " + lb); + final double rest = kll.getRank(est); + final double restUB = kll.getRankUpperBound(rest); + final double restLB = kll.getRankLowerBound(rest); + assertTrue(restUB - rest < (2 * eps)); + assertTrue(rest - restLB < (2 * eps)); + } + + //@Test //enable static println(..) for visual checking + public void visualCheckToString() { + final int k = 20; + final KllLongsSketch sk = getDirectLongsSketch(k, 0); + for (int i = 0; i < 10; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + + final KllLongsSketch sk2 = getDirectLongsSketch(k, 0); + for (int i = 0; i < 400; i++) { sk2.update(i + 1); } + println("\n" + sk2.toString(true, true)); + + sk2.merge(sk); + final String s2 = sk2.toString(true, true); + println(LS + s2); + } + + @Test + public void viewDirectCompactions() { + int k = 20; + int u = 108; + KllLongsSketch sk = getDirectLongsSketch(k, 0); + for (int i = 1; i <= u; i++) { + sk.update(i); + if (sk.levelsArr[0] == 0) { + println(sk.toString(true, true)); + sk.update(++i); + println(sk.toString(true, true)); + assertEquals(sk.getLongItemsArray()[sk.levelsArr[0]], i); + } + } + } + + @Test + public void viewCompactionAndSortedView() { + int k = 20; + KllLongsSketch sk = getDirectLongsSketch(k, 0); + show(sk, 20); + LongsSortedView sv = sk.getSortedView(); + LongsSortedViewIterator itr = sv.iterator(); + printf("%12s%12s\n", "Value", "CumWeight"); + while (itr.next()) { + long v = itr.getQuantile(); + long wt = itr.getWeight(); + printf("%12d%12d\n", v, wt); + } + } + + private static void show(final KllLongsSketch sk, int limit) { + int i = (int) sk.getN(); + for ( ; i < limit; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + } + + @Test + public void checkSketchInitializeLongHeap() { + int k = 20; //don't change this + KllLongsSketch sk; + + //println("#### CASE: LONG FULL HEAP"); + sk = getDirectLongsSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG HEAP EMPTY"); + sk = getDirectLongsSketch(k, 0); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG HEAP SINGLE"); + sk = getDirectLongsSketch(k, 0); + sk.update(1); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeLongHeapifyCompactMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: LONG FULL HEAPIFIED FROM COMPACT"); + sk2 = getDirectLongsSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG EMPTY HEAPIFIED FROM COMPACT"); + sk2 = getDirectLongsSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG SINGLE HEAPIFIED FROM COMPACT"); + sk2 = getDirectLongsSketch(k, 0); + sk2.update(1); + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeLongHeapifyUpdatableMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: LONG FULL HEAPIFIED FROM UPDATABLE"); + sk2 = getDirectLongsSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2,true); + wmem = WritableMemory.writableWrap(compBytes); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + // println("#### CASE: LONG EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = getDirectLongsSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = getDirectLongsSketch(k, 0); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2,true); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkMemoryToStringLongUpdatable() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: LONG FULL UPDATABLE"); + sk = getDirectLongsSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: LONG EMPTY UPDATABLE"); + sk = getDirectLongsSketch(k, 0); + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: LONG SINGLE UPDATABL"); + sk = getDirectLongsSketch(k, 0); + sk.update(1); + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + } + + @Test + public void checkSimpleMerge() { + int k = 20; + int n1 = 21; + int n2 = 21; + KllLongsSketch sk1 = getDirectLongsSketch(k, 0); + KllLongsSketch sk2 = getDirectLongsSketch(k, 0); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println(sk1.toString(true, true)); + println(sk2.toString(true, true)); + sk1.merge(sk2); + println(sk1.toString(true, true)); + assertEquals(sk1.getMaxItem(), 121L); + assertEquals(sk1.getMinItem(), 1L); + } + + @Test + public void checkSizes() { + KllLongsSketch sk = getDirectLongsSketch(20, 0); + for (int i = 1; i <= 21; i++) { sk.update(i); } + //println(sk.toString(true, true)); + byte[] byteArr1 = KllHelper.toByteArray(sk, true); + int size1 = sk.currentSerializedSizeBytes(true); + assertEquals(size1, byteArr1.length); + byte[] byteArr2 = sk.toByteArray(); + int size2 = sk.currentSerializedSizeBytes(false); + assertEquals(size2, byteArr2.length); + } + + @Test + public void checkNewInstance() { + int k = 200; + WritableMemory dstMem = WritableMemory.allocate(3000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(k, dstMem, memReqSvr); + for (int i = 1; i <= 10_000; i++) {sk.update(i); } + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getMaxItem(), 10000L); + //println(sk.toString(true, true)); + } + + @Test + public void checkDifferentM() { + int k = 20; + int m = 4; + WritableMemory dstMem = WritableMemory.allocate(1000); + KllLongsSketch sk = KllDirectLongsSketch.newDirectUpdatableInstance(k, m, dstMem, memReqSvr); + for (int i = 1; i <= 200; i++) {sk.update(i); } + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getMaxItem(), 200L); + } + + private static KllLongsSketch getDirectLongsSketch(final int k, final int n) { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = KllHelper.toByteArray(sk, true); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + KllLongsSketch dfsk = KllLongsSketch.writableWrap(wmem, memReqSvr); + return dfsk; + } + + @Test + public void printlnTest() { + String s = "PRINTING: printf in " + this.getClass().getName(); + println(s); + printf("%s\n", s); + } + + private final static boolean enablePrinting = false; + + /** + * @param format the format + * @param args the args + */ + private static final void printf(final String format, final Object ...args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscLongsTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscLongsTest.java new file mode 100644 index 000000000..018ad91db --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllMiscLongsTest.java @@ -0,0 +1,790 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.kll.KllDirectLongsSketch.KllDirectCompactLongsSketch; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.LongsSortedView; +import org.apache.datasketches.quantilescommon.LongsSortedViewIterator; +import org.testng.annotations.Test; + +import static org.apache.datasketches.common.Util.LS; +import static org.apache.datasketches.common.Util.bitAt; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +/** + * @author Lee Rhodes + */ +public class KllMiscLongsTest { + private final MemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void checkSortedViewConstruction() { + final KllLongsSketch kll = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 20; i++) { kll.update(i); } + LongsSortedView fsv = kll.getSortedView(); + long[] cumWeights = fsv.getCumulativeWeights(); + long[] values = fsv.getQuantiles(); + assertEquals(cumWeights.length, 20); + assertEquals(values.length, 20); + for (int i = 0; i < 20; i++) { + assertEquals(cumWeights[i], i + 1); + assertEquals(values[i], i + 1); + } + } + + @Test //set static enablePrinting = true for visual checking + public void checkBounds() { + final KllLongsSketch kll = KllLongsSketch.newHeapInstance(); //default k = 200 + for (int i = 0; i < 1000; i++) { + kll.update(i); + } + final double eps = kll.getNormalizedRankError(false); + final long est = kll.getQuantile(0.5); + final long ub = kll.getQuantileUpperBound(0.5); + final long lb = kll.getQuantileLowerBound(0.5); + assertEquals(ub, kll.getQuantile(.5 + eps)); + assertEquals(lb, kll.getQuantile(0.5 - eps)); + println("Ext : " + est); + println("UB : " + ub); + println("LB : " + lb); + final double rest = kll.getRank(est); + final double restUB = kll.getRankUpperBound(rest); + final double restLB = kll.getRankLowerBound(rest); + assertTrue(restUB - rest < (2 * eps)); + assertTrue(rest - restLB < (2 * eps)); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions1() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(6, (byte) 3); //corrupt with odd M + KllLongsSketch.heapify(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions2() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(0, (byte) 1); //corrupt preamble ints, should be 2 + KllLongsSketch.heapify(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions3() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + sk.update(1); + sk.update(2); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(0, (byte) 1); //corrupt preamble ints, should be 5 + KllLongsSketch.heapify(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions4() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(1, (byte) 0); //corrupt SerVer, should be 1 or 2 + KllLongsSketch.heapify(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions5() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(2, (byte) 0); //corrupt FamilyID, should be 15 + KllLongsSketch.heapify(wmem); + } + + @Test //set static enablePrinting = true for visual checking + public void checkMisc() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(8); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} //empty + println(sk.toString(true, true)); + for (int i = 0; i < 20; i++) { sk.update(i); } + println(sk.toString(true, true)); + sk.toByteArray(); + final long[] items = sk.getLongItemsArray(); + assertEquals(items.length, 16); + final int[] levels = sk.getLevelsArray(sk.sketchStructure); + assertEquals(levels.length, 3); + assertEquals(sk.getNumLevels(), 2); + } + + @Test //set static enablePrinting = true for visual checking + public void visualCheckToString() { + final KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + int n = 21; + for (int i = 1; i <= n; i++) { sk.update(i); } + println(sk.toString(true, true)); + assertEquals(sk.getNumLevels(), 2); + assertEquals(sk.getMinItem(), 1); + assertEquals(sk.getMaxItem(), 21); + assertEquals(sk.getNumRetained(), 11); + + final KllLongsSketch sk2 = KllLongsSketch.newHeapInstance(20); + n = 400; + for (int i = 101; i <= n + 100; i++) { sk2.update(i); } + println(LS + sk2.toString(true, true)); + assertEquals(sk2.getNumLevels(), 5); + assertEquals(sk2.getMinItem(), 101); + assertEquals(sk2.getMaxItem(), 500); + assertEquals(sk2.getNumRetained(), 52); + + sk2.merge(sk); + println(LS + sk2.toString(true, true)); + assertEquals(sk2.getNumLevels(), 5); + assertEquals(sk2.getMinItem(), 1); + assertEquals(sk2.getMaxItem(), 500); + assertEquals(sk2.getNumRetained(), 56); + } + + @Test //set static enablePrinting = true for visual checking + public void viewHeapCompactions() { + int k = 20; + int n = 108; + boolean withLevels = false; + boolean withLevelsAndItems = true; + int compaction = 0; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { + sk.update(i); + if (sk.levelsArr[0] == 0) { + println(LS + "#<<< BEFORE COMPACTION # " + (++compaction) + " >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + sk.update(++i); + println(LS + "#<<< AFTER COMPACTION # " + (compaction) + " >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + assertEquals(sk.getLongItemsArray()[sk.levelsArr[0]], i); + } + } + println(LS + "#<<< END STATE # >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + println(""); + } + + @Test //set static enablePrinting = true for visual checking + public void viewDirectCompactions() { + int k = 20; + int n = 108; + boolean withLevels = false; + boolean withLevelsAndItems = true; + int compaction = 0; + int sizeBytes = KllSketch.getMaxSerializedSizeBytes(k, n, LONGS_SKETCH, true); + WritableMemory wmem = WritableMemory.allocate(sizeBytes); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(k, wmem, memReqSvr); + for (int i = 1; i <= n; i++) { + sk.update(i); + if (sk.levelsArr[0] == 0) { + println(LS + "#<<< BEFORE COMPACTION # " + (++compaction) + " >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + sk.update(++i); + println(LS + "#<<< AFTER COMPACTION # " + (compaction) + " >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + assertEquals(sk.getLongItemsArray()[sk.levelsArr[0]], i); + } + } + println(LS + "#<<< END STATE # >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + println(""); + } + + @Test //set static enablePrinting = true for visual checking + public void viewCompactionAndSortedView() { + int n = 43; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= n; i++) { sk.update(i); } + println(sk.toString(true, true)); + LongsSortedView sv = sk.getSortedView(); + LongsSortedViewIterator itr = sv.iterator(); + println("### SORTED VIEW"); + printf("%6s %12s %12s" + LS, "Idx", "Value", "Weight"); + int i = 0; + while (itr.next()) { + long v = itr.getQuantile(); + long wt = itr.getWeight(); + printf("%6d %12d %12d" + LS, i, v, wt); + i++; + } + assertEquals(sv.getMinItem(), 1L); + assertEquals(sv.getMaxItem(), n); + } + + @Test //set static enablePrinting = true for visual checking + public void checkWeightedUpdates1() { + int k = 20; + int weight = 127; + long item = 10; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + println(sk.toString(true, true)); + sk.update(item, weight); + println(sk.toString(true, true)); + assertEquals(sk.getNumRetained(), 7); + assertEquals(sk.getN(), weight); + sk.update(item, weight); + println(sk.toString(true, true)); + assertEquals(sk.getNumRetained(), 14); + assertEquals(sk.getN(), 254); + } + + @Test //set static enablePrinting = true for visual checking + public void checkWeightedUpdates2() { + int k = 20; + int initial = 1000; + int weight = 127; + long item = 10; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= initial; i++) { sk.update(i + 1000); } + println(sk.toString(true, true)); + sk.update(item, weight); + println(sk.toString(true, true)); + assertEquals(sk.getNumRetained(), 65); + assertEquals(sk.getN(), 1127); + + LongsSortedViewIterator itr = sk.getSortedView().iterator(); + println("### SORTED VIEW"); + printf("%12s %12s %12s" + LS, "Value", "Weight", "NaturalRank"); + long cumWt = 0; + while (itr.next()) { + long v = itr.getQuantile(); + long wt = itr.getWeight(); + long natRank = itr.getNaturalRank(INCLUSIVE); + cumWt += wt; + assertEquals(cumWt, natRank); + printf("%12d %12d %12d" + LS, v, wt, natRank); + } + assertEquals(cumWt, sk.getN()); + } + + @Test //set static enablePrinting = true for visual checking + public void checkCreateItemsArray() { //used with weighted updates + long item = 10; + int weight = 108; + long[] itemsArr = KllLongsHelper.createItemsArray(item, weight); + assertEquals(itemsArr.length, 4); + for (int i = 0; i < itemsArr.length; i++) { itemsArr[i] = item; } + outputItems(itemsArr); + } + + private static void outputItems(long[] itemsArr) { + String[] hdr2 = {"Index", "Value"}; + String hdr2fmt = "%6s %15s" + LS; + String d2fmt = "%6d %15d" + LS; + println("ItemsArr"); + printf(hdr2fmt, (Object[]) hdr2); + for (int i = 0; i < itemsArr.length; i++) { + printf(d2fmt, i, itemsArr[i]); + } + println(""); + } + + @Test //set static enablePrinting = true for visual checking + public void checkCreateLevelsArray() { //used with weighted updates + int weight = 108; + int[] levelsArr = KllHelper.createLevelsArray(weight); + assertEquals(levelsArr.length, 8); + int[] correct = {0,0,0,1,2,2,3,4}; + for (int i = 0; i < levelsArr.length; i++) { + assertEquals(levelsArr[i], correct[i]); + } + outputLevels(weight, levelsArr); + } + + private static void outputLevels(int weight, int[] levelsArr) { + String[] hdr = {"Lvl", "StartAdr", "BitPattern", "Weight"}; + String hdrfmt = "%3s %9s %10s %s" + LS; + String dfmt = "%3d %9d %10d %d" + LS; + String dfmt_2 = "%3d %9d %s" + LS; + println("Count = " + weight + " => " + (Integer.toBinaryString(weight))); + println("LevelsArr"); + printf(hdrfmt, (Object[]) hdr); + for (int i = 0; i < levelsArr.length; i++) { + if (i == levelsArr.length - 1) { printf(dfmt_2, i, levelsArr[i], "ItemsArr.length"); } + else { + int j = bitAt(weight, i); + printf(dfmt, i, levelsArr[i], j, 1 << (i)); + } + } + println(""); + } + + @Test + public void viewMemorySketchData() { + int k = 20; + int n = 109; + boolean withLevels = true; + boolean withLevelsAndItems = true; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toByteArray(); + Memory mem = Memory.wrap(byteArr); + KllLongsSketch fltSk = KllLongsSketch.wrap(mem); + println(fltSk.toString(withLevels, withLevelsAndItems)); + assertEquals(fltSk.getN(), n); + } + + @Test //set static enablePrinting = true for visual checking + public void checkIntCapAux() { + String[] hdr = {"level", "depth", "wt", "cap", "(end)", "MaxN"}; + String hdrFmt = "%6s %6s %28s %10s %10s %34s" + LS; + String dataFmt = "%6d %6d %,28d %,10d %,10d %,34d" + LS; + int k = 1000; + int m = 8; + int numLevels = 20; + println("k=" + k + ", m=" + m + ", numLevels=" + numLevels); + printf(hdrFmt, (Object[]) hdr); + long maxN = 0; + long[] correct = {0,1,1,2,2,3,5,8,12,17,26,39,59,88,132,198,296,444,667,1000}; + for (int i = 0; i < numLevels; i++) { + int depth = numLevels - i - 1; + long cap = KllHelper.intCapAux(k, depth); + long end = Math.max(m, cap); + long wt = 1L << i; + maxN += wt * end; + printf(dataFmt, i, depth, wt, cap, end, maxN); + assertEquals(cap, correct[i]); + } + } + + @Test //set static enablePrinting = true for visual checking + public void checkIntCapAuxAux() { + String[] hdr = {"d","twoK","2k*2^d","3^d","tmp=2k*2^d/3^d","(tmp + 1)/2", "(end)"}; + String hdrFmt = "%6s %10s %20s %20s %15s %12s %10s" + LS; + String dataFmt = "%6d %10d %,20d %,20d %15d %12d %10d" + LS; + long k = (1L << 16) - 1L; + long m = 8; + println("k = " + k + ", m = " + m); + printf(hdrFmt, (Object[]) hdr); + long[] correct = + {65535,43690,29127,19418,12945,8630,5753,3836,2557,1705,1136,758,505,337,224,150,100,67,44,30,20,13,9,6,4,3,2,1,1,1,0}; + for (int i = 0; i < 31; i++) { + long twoK = k << 1; + long twoKxtwoD = twoK << i; + long threeToD = KllHelper.powersOfThree[i]; + long tmp = twoKxtwoD / threeToD; + long result = (tmp + 1L) >>> 1; + long end = Math.max(m, result); //performed later + printf(dataFmt, i, twoK, twoKxtwoD, threeToD, tmp, result, end); + assertEquals(result,correct[i]); + assertEquals(result, KllHelper.intCapAuxAux(k, i)); + } + } + + @Test + public void checkGrowLevels() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + assertEquals(sk.getNumLevels(), 2); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure)[2], 33); + } + + @Test //set static enablePrinting = true for visual checking + public void checkSketchInitializeLongHeap() { + int k = 20; //don't change this + KllLongsSketch sk; + + println("#### CASE: LONG FULL HEAP"); + sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG HEAP EMPTY"); + sk = KllLongsSketch.newHeapInstance(k); + println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG HEAP SINGLE"); + sk = KllLongsSketch.newHeapInstance(k); + sk.update(1); + println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test //set static enablePrinting = true for visual checking + public void checkSketchInitializeLongHeapifyCompactMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: LONG FULL HEAPIFIED FROM COMPACT"); + sk2 = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21); + assertEquals(sk.getMinItem(), 1); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG EMPTY HEAPIFIED FROM COMPACT"); + sk2 = KllLongsSketch.newHeapInstance(k); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG SINGLE HEAPIFIED FROM COMPACT"); + sk2 = KllLongsSketch.newHeapInstance(k); + sk2.update(1); + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test //set static enablePrinting = true for visual checking + public void checkSketchInitializeLongHeapifyUpdatableMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: LONG FULL HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21); + assertEquals(sk.getMinItem(), 1); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test //set static enablePrinting = true for visual checking + public void checkMemoryToStringLongCompact() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + byte[] compBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: LONG FULL COMPACT"); + sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + + println("#### CASE: LONG EMPTY COMPACT"); + sk = KllLongsSketch.newHeapInstance(k); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + + println("#### CASE: LONG SINGLE COMPACT"); + sk = KllLongsSketch.newHeapInstance(k); + sk.update(1); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + } + + @Test //set static enablePrinting = true for visual checking + public void checkMemoryToStringLongUpdatable() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: LONG FULL UPDATABLE"); + sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllHeapLongsSketch.heapifyImpl(wmem); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); //note: heapify does not copy free space, while toUpdatableByteArray does + assertEquals(sk.getN(), sk2.getN()); + assertEquals(sk.getMinItem(), sk2.getMinItem()); + assertEquals(sk.getMaxItem(), sk2.getMaxItem()); + assertEquals(sk.getNumRetained(), sk2.getNumRetained()); + + println("#### CASE: LONG EMPTY UPDATABLE"); + sk = KllLongsSketch.newHeapInstance(k); + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllHeapLongsSketch.heapifyImpl(wmem); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: LONG SINGLE UPDATABLE"); + sk = KllLongsSketch.newHeapInstance(k); + sk.update(1); + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllHeapLongsSketch.heapifyImpl(wmem); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + } + + @Test + public void checkSimpleMerge() { + int k = 20; + int m = 8; + int n1 = 21; + int n2 = 43; + WritableMemory wmem = WritableMemory.allocate(3000); + WritableMemory wmem2 = WritableMemory.allocate(3000); + + KllLongsSketch sk1 = KllDirectLongsSketch.newDirectUpdatableInstance(k, m, wmem, memReqSvr); + KllLongsSketch sk2 = KllDirectLongsSketch.newDirectUpdatableInstance(k, m, wmem2, memReqSvr); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + sk1.merge(sk2); + assertEquals(sk1.getMinItem(), 1L); + assertEquals(sk1.getMaxItem(), 143L); + } + + @Test + public void checkGetSingleItem() { + int k = 20; + KllLongsSketch skHeap = KllLongsSketch.newHeapInstance(k); + skHeap.update(1); + assertTrue(skHeap instanceof KllHeapLongsSketch); + assertEquals(skHeap.getLongSingleItem(), 1L); + + WritableMemory srcMem = WritableMemory.writableWrap(KllHelper.toByteArray(skHeap, true)); + KllLongsSketch skDirect = KllLongsSketch.writableWrap(srcMem, memReqSvr); + assertTrue(skDirect instanceof KllDirectLongsSketch); + assertEquals(skDirect.getLongSingleItem(), 1L); + + Memory srcMem2 = Memory.wrap(skHeap.toByteArray()); + KllLongsSketch skCompact = KllLongsSketch.wrap(srcMem2); + assertTrue(skCompact instanceof KllDirectCompactLongsSketch); + assertEquals(skCompact.getLongSingleItem(), 1L); + } + + @Test + public void printlnTest() { + String s = "PRINTING: printf in " + this.getClass().getName(); + println(s); + printf("%s" + LS, s); + } + + private final static boolean enablePrinting = false; + + /** + * @param format the format + * @param args the args + */ + private static final void printf(final String format, final Object ... args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} From 9c54687ca1ed79a847f3514de3b7529cbc3fd102 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 9 Oct 2024 14:44:47 -0700 Subject: [PATCH 05/22] Add all new kll longs files --- .../kll/KllDirectLongsSketch.java | 391 ++++++++++ .../datasketches/kll/KllHeapLongsSketch.java | 315 +++++++++ .../datasketches/kll/KllLongsHelper.java | 483 +++++++++++++ .../datasketches/kll/KllLongsSketch.java | 669 ++++++++++++++++++ .../kll/KllLongsSketchIterator.java | 40 ++ .../LongsSketchSortedView.java | 137 ++++ .../quantilescommon/LongsSortedView.java | 175 +++++ .../LongsSortedViewIterator.java | 51 ++ .../quantilescommon/QuantilesLongsAPI.java | 300 ++++++++ .../QuantilesLongsSketchIterator.java | 40 ++ 10 files changed, 2601 insertions(+) create mode 100644 src/main/java/org/apache/datasketches/kll/KllDirectLongsSketch.java create mode 100644 src/main/java/org/apache/datasketches/kll/KllHeapLongsSketch.java create mode 100644 src/main/java/org/apache/datasketches/kll/KllLongsHelper.java create mode 100644 src/main/java/org/apache/datasketches/kll/KllLongsSketch.java create mode 100644 src/main/java/org/apache/datasketches/kll/KllLongsSketchIterator.java create mode 100644 src/main/java/org/apache/datasketches/quantilescommon/LongsSketchSortedView.java create mode 100644 src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java create mode 100644 src/main/java/org/apache/datasketches/quantilescommon/LongsSortedViewIterator.java create mode 100644 src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java create mode 100644 src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsSketchIterator.java diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectLongsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectLongsSketch.java new file mode 100644 index 000000000..bf91baabd --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllDirectLongsSketch.java @@ -0,0 +1,391 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.common.ByteArrayUtil.copyBytes; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_EMPTY; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_FULL; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_SINGLE; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; + +import org.apache.datasketches.common.ByteArrayUtil; +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +/** + * This class implements an off-heap, updatable KllLongsSketch using WritableMemory. + * + *

Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}

+ * + * @author Lee Rhodes, Kevin Lang + */ +class KllDirectLongsSketch extends KllLongsSketch { + private WritableMemory wmem; + private MemoryRequestServer memReqSvr; + + /** + * Constructs from Memory or WritableMemory already initialized with a sketch image and validated. + * @param wmem the current WritableMemory + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @param memVal the MemoryValadate object + */ + KllDirectLongsSketch( + final SketchStructure sketchStructure, + final WritableMemory wmem, + final MemoryRequestServer memReqSvr, + final KllMemoryValidate memVal) { + super(sketchStructure); + this.wmem = wmem; + this.memReqSvr = memReqSvr; + readOnly = (wmem != null && wmem.isReadOnly()) || sketchStructure != UPDATABLE; + levelsArr = memVal.levelsArr; //always converted to writable form. + } + + /** + * Create a new updatable, direct instance of this sketch. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param m parameter that controls the minimum level width in items. + * @param dstMem the given destination WritableMemory object for use by the sketch + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return a new instance of this sketch + */ + static KllDirectLongsSketch newDirectUpdatableInstance( + final int k, + final int m, + final WritableMemory dstMem, + final MemoryRequestServer memReqSvr) { + setMemoryPreInts(dstMem, UPDATABLE.getPreInts()); + setMemorySerVer(dstMem, UPDATABLE.getSerVer()); + setMemoryFamilyID(dstMem, Family.KLL.getID()); + setMemoryK(dstMem, k); + setMemoryM(dstMem, m); + setMemoryN(dstMem, 0); + setMemoryMinK(dstMem, k); + setMemoryNumLevels(dstMem, 1); + int offset = DATA_START_ADR; + //new Levels array + dstMem.putIntArray(offset, new int[] {k, k}, 0, 2); + offset += 2 * Integer.BYTES; + //new min/max array + dstMem.putLongArray(offset, new long[] {Long.MAX_VALUE, Long.MIN_VALUE}, 0, 2); + offset += 2 * ITEM_BYTES; + //new empty items array + dstMem.putLongArray(offset, new long[k], 0, k); + + final KllMemoryValidate memVal = new KllMemoryValidate(dstMem, LONGS_SKETCH, null); + final WritableMemory wMem = dstMem; + return new KllDirectLongsSketch(UPDATABLE, wMem, memReqSvr, memVal); + } + + //End of Constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "Null"; } + return Long.toString(getLongItemsArray()[index]); + } + + @Override + public int getK() { + return getMemoryK(wmem); + } + + //MinMax Methods + + @Override + public long getMaxItem() { + if (sketchStructure == COMPACT_EMPTY || isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + if (sketchStructure == COMPACT_SINGLE) { return getLongSingleItem(); } + //either compact-full or updatable + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES; + return wmem.getLong(offset); + } + + @Override + long getMaxItemInternal() { + if (sketchStructure == COMPACT_EMPTY || isEmpty()) { return Long.MAX_VALUE; } + if (sketchStructure == COMPACT_SINGLE) { return getLongSingleItem(); } + //either compact-full or updatable + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES; + return wmem.getLong(offset); + } + + @Override + String getMaxItemAsString() { + final long maxItem = getMaxItemInternal(); + return Long.toString(maxItem); + } + + @Override + public long getMinItem() { + if (sketchStructure == COMPACT_EMPTY || isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + if (sketchStructure == COMPACT_SINGLE) { return getLongSingleItem(); } + //either compact-full or updatable + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure); + return wmem.getLong(offset); + } + + @Override + long getMinItemInternal() { + if (sketchStructure == COMPACT_EMPTY || isEmpty()) { return Long.MAX_VALUE; } + if (sketchStructure == COMPACT_SINGLE) { return getLongSingleItem(); } + //either compact-full or updatable + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure); + return wmem.getLong(offset); + } + + @Override + String getMinItemAsString() { + final long minItem = getMinItemInternal(); + return Long.toString(minItem); + } + + @Override + void setMaxItem(final long item) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES; + wmem.putLong(offset, item); + } + + @Override + void setMinItem(final long item) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure); + wmem.putLong(offset, item); + } + + //END MinMax Methods + + @Override + public long getN() { + if (sketchStructure == COMPACT_EMPTY) { return 0; } + else if (sketchStructure == COMPACT_SINGLE) { return 1; } + else { return getMemoryN(wmem); } + } + + //other restricted + + @Override //returns updatable, expanded array including free space at bottom + long[] getLongItemsArray() { + final int k = getK(); + if (sketchStructure == COMPACT_EMPTY) { return new long[k]; } + if (sketchStructure == COMPACT_SINGLE) { + final long[] itemsArr = new long[k]; + itemsArr[k - 1] = getLongSingleItem(); + return itemsArr; + } + final int capacityItems = KllHelper.computeTotalItemCapacity(k, getM(), getNumLevels()); + final long[] longItemsArr = new long[capacityItems]; + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + 2 * ITEM_BYTES; + final int shift = (sketchStructure == COMPACT_FULL) ? levelsArr[0] : 0; + final int numItems = (sketchStructure == COMPACT_FULL) ? getNumRetained() : capacityItems; + wmem.getLongArray(offset, longItemsArr, shift, numItems); + return longItemsArr; + } + + @Override //returns compact items array of retained items, no free space. + long[] getLongRetainedItemsArray() { + if (sketchStructure == COMPACT_EMPTY) { return new long[0]; } + if (sketchStructure == COMPACT_SINGLE) { return new long[] { getLongSingleItem() }; } + final int numRetained = getNumRetained(); + final long[] longItemsArr = new long[numRetained]; + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + 2 * ITEM_BYTES + + (sketchStructure == COMPACT_FULL ? 0 : levelsArr[0] * ITEM_BYTES); + wmem.getLongArray(offset, longItemsArr, 0, numRetained); + return longItemsArr; + } + + @Override + long getLongSingleItem() { + if (!isSingleItem()) { throw new SketchesArgumentException(NOT_SINGLE_ITEM_MSG); } + if (sketchStructure == COMPACT_SINGLE) { + return wmem.getLong(DATA_START_ADR_SINGLE_ITEM); + } + final int offset; + if (sketchStructure == COMPACT_FULL) { + offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + 2 * ITEM_BYTES; + } else { //sketchStructure == UPDATABLE + offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + (2 + getK() - 1) * ITEM_BYTES; + } + return wmem.getLong(offset); + } + + @Override + int getM() { + return getMemoryM(wmem); + } + + @Override + MemoryRequestServer getMemoryRequestServer() { return memReqSvr; } + + @Override + int getMinK() { + if (sketchStructure == COMPACT_FULL || sketchStructure == UPDATABLE) { return getMemoryMinK(wmem); } + return getK(); + } + + @Override + byte[] getMinMaxByteArr() { + final byte[] bytesOut = new byte[2 * ITEM_BYTES]; + if (sketchStructure == COMPACT_EMPTY) { + ByteArrayUtil.putLongLE(bytesOut, 0, Long.MAX_VALUE); + ByteArrayUtil.putLongLE(bytesOut, ITEM_BYTES, Long.MIN_VALUE); + return bytesOut; + } + final int offset; + if (sketchStructure == COMPACT_SINGLE) { + offset = DATA_START_ADR_SINGLE_ITEM; + wmem.getByteArray(offset, bytesOut, 0, ITEM_BYTES); + copyBytes(bytesOut, 0, bytesOut, ITEM_BYTES, ITEM_BYTES); + return bytesOut; + } + //sketchStructure == UPDATABLE OR COMPACT_FULL + offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure); + wmem.getByteArray(offset, bytesOut, 0, ITEM_BYTES); + wmem.getByteArray(offset + ITEM_BYTES, bytesOut, ITEM_BYTES, ITEM_BYTES); + return bytesOut; + } + + @Override + byte[] getRetainedItemsByteArr() { + if (sketchStructure == COMPACT_EMPTY) { return new byte[0]; } + final long[] lngArr = getLongRetainedItemsArray(); + final byte[] lngByteArr = new byte[lngArr.length * ITEM_BYTES]; + final WritableMemory wmem2 = WritableMemory.writableWrap(lngByteArr); + wmem2.putLongArray(0, lngArr, 0, lngArr.length); + return lngByteArr; + } + + @Override + byte[] getTotalItemsByteArr() { + final long[] lngArr = getLongItemsArray(); + final byte[] lngByteArr = new byte[lngArr.length * ITEM_BYTES]; + final WritableMemory wmem2 = WritableMemory.writableWrap(lngByteArr); + wmem2.putLongArray(0, lngArr, 0, lngArr.length); + return lngByteArr; + } + + @Override + WritableMemory getWritableMemory() { + return wmem; + } + + @Override + void incN(final int increment) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + setMemoryN(wmem, getMemoryN(wmem) + increment); + } + + @Override + void incNumLevels() { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + int numLevels = getMemoryNumLevels(wmem); + setMemoryNumLevels(wmem, ++numLevels); + } + + @Override + boolean isLevelZeroSorted() { + return getMemoryLevelZeroSortedFlag(wmem); + } + + @Override + void setLongItemsArray(final long[] longItems) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + 2 * ITEM_BYTES; + wmem.putLongArray(offset, longItems, 0, longItems.length); + } + + @Override + void setLongItemsArrayAt(final int index, final long item) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int offset = + DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + (index + 2) * ITEM_BYTES; + wmem.putLong(offset, item); + } + + @Override + void setLongItemsArrayAt(final int index, final long[] items, final int srcOffset, final int length) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + (index + 2) * ITEM_BYTES; + wmem.putLongArray(offset, items, srcOffset, length); + } + + @Override + void setLevelZeroSorted(final boolean sorted) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + setMemoryLevelZeroSortedFlag(wmem, sorted); + } + + @Override + void setMinK(final int minK) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + setMemoryMinK(wmem, minK); + } + + @Override + void setN(final long n) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + setMemoryN(wmem, n); + } + + @Override + void setNumLevels(final int numLevels) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + setMemoryNumLevels(wmem, numLevels); + } + + @Override + void setWritableMemory(final WritableMemory wmem) { + this.wmem = wmem; + } + + final static class KllDirectCompactLongsSketch extends KllDirectLongsSketch { + + KllDirectCompactLongsSketch( + final SketchStructure sketchStructure, + final Memory srcMem, + final KllMemoryValidate memVal) { + super(sketchStructure, (WritableMemory) srcMem, null, memVal); + } + } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapLongsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapLongsSketch.java new file mode 100644 index 000000000..c4e091342 --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllHeapLongsSketch.java @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.common.ByteArrayUtil.putLongLE; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_EMPTY; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_FULL; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_SINGLE; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; + +import java.util.Arrays; +import java.util.Objects; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +/** + * This class implements an on-heap longs KllSketch. + * + *

Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}

+ * + * @author Lee Rhodes, Kevin Lang + */ +final class KllHeapLongsSketch extends KllLongsSketch { + private final int k; // configured size of K. + private final int m; // configured size of M. + private long n; // number of items input into this sketch. + private int minK; // dynamic minK for error estimation after merging with different k. + private boolean isLevelZeroSorted; + private long minLongItem; + private long maxLongItem; + private long[] longItems; + + /** + * New instance heap constructor with a given parameters k and m. + * + * @param k parameter that controls size of the sketch and accuracy of estimates. + * k can be between m and 65535, inclusive. + * @param m parameter controls the minimum level width in items. It can be 2, 4, 6 or 8. + * The DEFAULT_M, which is 8 is recommended. Other sizes of m should be considered + * experimental as they have not been as well characterized. + */ + KllHeapLongsSketch(final int k, final int m) { + super(UPDATABLE); + KllHelper.checkM(m); + KllHelper.checkK(k, m); + this.levelsArr = new int[] {k, k}; + this.readOnly = false; + this.k = k; + this.m = m; + this.n = 0; + this.minK = k; + this.isLevelZeroSorted = false; + this.minLongItem = Long.MAX_VALUE; + this.maxLongItem = Long.MIN_VALUE; + this.longItems = new long[k]; + } + + /** + * Used for creating a temporary sketch for use with weighted updates. + */ + KllHeapLongsSketch(final int k, final int m, final long item, final long weight) { + super(UPDATABLE); + KllHelper.checkM(m); + KllHelper.checkK(k, m); + this.levelsArr = KllHelper.createLevelsArray(weight); + this.readOnly = false; + this.k = k; + this.m = m; + this.n = weight; + this.minK = k; + this.isLevelZeroSorted = false; + this.minLongItem = item; + this.maxLongItem = item; + this.longItems = KllLongsHelper.createItemsArray(item, weight); + } + + /** + * Heapify constructor. + * @param srcMem Memory object that contains data serialized by this sketch. + * @param memValidate the MemoryValidate object + */ + private KllHeapLongsSketch( + final Memory srcMem, + final KllMemoryValidate memValidate) { + super(UPDATABLE); + final SketchStructure memStructure = memValidate.sketchStructure; + this.k = memValidate.k; + this.m = memValidate.m; + this.n = memValidate.n; + this.minK = memValidate.minK; + this.levelsArr = memValidate.levelsArr; //normalized to full + this.isLevelZeroSorted = memValidate.level0SortedFlag; + + if (memStructure == COMPACT_EMPTY) { + minLongItem = Long.MAX_VALUE; + maxLongItem = Long.MIN_VALUE; + longItems = new long[k]; + } + else if (memStructure == COMPACT_SINGLE) { + final long item = srcMem.getLong(DATA_START_ADR_SINGLE_ITEM); + minLongItem = maxLongItem = item; + longItems = new long[k]; + longItems[k - 1] = item; + } + else if (memStructure == COMPACT_FULL) { + int offsetBytes = DATA_START_ADR; + offsetBytes += (levelsArr.length - 1) * Integer.BYTES; //shortened levelsArr + minLongItem = srcMem.getLong(offsetBytes); + offsetBytes += Long.BYTES; + maxLongItem = srcMem.getLong(offsetBytes); + offsetBytes += Long.BYTES; + final int capacityItems = levelsArr[getNumLevels()]; + final int freeSpace = levelsArr[0]; + final int retainedItems = capacityItems - freeSpace; + longItems = new long[capacityItems]; + srcMem.getLongArray(offsetBytes, longItems, freeSpace, retainedItems); + } + else { //(memStructure == UPDATABLE) + int offsetBytes = DATA_START_ADR; + offsetBytes += levelsArr.length * Integer.BYTES; //full levelsArr + minLongItem = srcMem.getLong(offsetBytes); + offsetBytes += Long.BYTES; + maxLongItem = srcMem.getLong(offsetBytes); + offsetBytes += Long.BYTES; + final int capacityItems = levelsArr[getNumLevels()]; + longItems = new long[capacityItems]; + srcMem.getLongArray(offsetBytes, longItems, 0, capacityItems); + } + } + + static KllHeapLongsSketch heapifyImpl(final Memory srcMem) { + Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); + final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, LONGS_SKETCH); + return new KllHeapLongsSketch(srcMem, memVal); + } + + //End of constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "Null"; } + return Long.toString(longItems[index]); + } + + @Override + public int getK() { return k; } + + //MinMax Methods + + @Override + long getMaxItemInternal() { return maxLongItem; } + + @Override + public long getMaxItem() { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + return maxLongItem; + } + + @Override + String getMaxItemAsString() { + return Long.toString(maxLongItem); + } + + @Override + long getMinItemInternal() { return minLongItem; } + + @Override + public long getMinItem() { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + return minLongItem; + } + + @Override + String getMinItemAsString() { + return Long.toString(minLongItem); + } + + @Override + byte[] getMinMaxByteArr() { + final byte[] bytesOut = new byte[2 * Long.BYTES]; + putLongLE(bytesOut, 0, minLongItem); + putLongLE(bytesOut, Long.BYTES, maxLongItem); + return bytesOut; + } + + @Override + void setMaxItem(final long item) { this.maxLongItem = item; } + + @Override + void setMinItem(final long item) { this.minLongItem = item; } + + //END MinMax Methods + + @Override + public long getN() { return n; } + + //other restricted + + @Override + long[] getLongItemsArray() { return longItems; } + + @Override + long getLongSingleItem() { + if (n != 1L) { throw new SketchesArgumentException(NOT_SINGLE_ITEM_MSG); } + return longItems[k - 1]; + } + + @Override + int getM() { return m; } + + @Override + MemoryRequestServer getMemoryRequestServer() { return null; } + + @Override + int getMinK() { return minK; } + + @Override + byte[] getRetainedItemsByteArr() { + if (isEmpty()) { return new byte[0]; } + final byte[] bytesOut; + if (isSingleItem()) { + bytesOut = new byte[Long.BYTES]; + putLongLE(bytesOut, 0, getLongSingleItem()); + return bytesOut; + } + final int retained = getNumRetained(); + final int bytes = retained * Long.BYTES; + bytesOut = new byte[bytes]; + final WritableMemory wmem = WritableMemory.writableWrap(bytesOut); + wmem.putLongArray(0, longItems, levelsArr[0], retained); + return bytesOut; + } + + @Override + byte[] getTotalItemsByteArr() { + final byte[] byteArr = new byte[longItems.length * Long.BYTES]; + final WritableMemory wmem = WritableMemory.writableWrap(byteArr); + wmem.putLongArray(0, longItems, 0, longItems.length); + return byteArr; + } + + @Override + WritableMemory getWritableMemory() { + return null; + } + + @Override + void incN(final int increment) { n += increment; } + + @Override + void incNumLevels() { + //the heap sketch computes num levels from the array itself, so this is not used on-heap + } + + @Override + boolean isLevelZeroSorted() { return this.isLevelZeroSorted; } + + @Override + void setLongItemsArray(final long[] longItems) { this.longItems = longItems; } + + @Override + void setLongItemsArrayAt(final int index, final long item) { this.longItems[index] = item; } + + @Override + void setLongItemsArrayAt(final int dstIndex, final long[] srcItems, final int srcOffset, final int length) { + System.arraycopy(srcItems, srcOffset, longItems, dstIndex, length); + } + + @Override + void setLevelZeroSorted(final boolean sorted) { this.isLevelZeroSorted = sorted; } + + @Override + void setMinK(final int minK) { this.minK = minK; } + + @Override + void setN(final long n) { this.n = n; } + + @Override + void setNumLevels(final int numLevels) { + //the heap sketch computes num levels from the array itself, so this is not used on-heap + } + + @Override + long[] getLongRetainedItemsArray() { + return Arrays.copyOfRange(longItems, levelsArr[0], levelsArr[getNumLevels()]); + } + + @Override + void setWritableMemory(final WritableMemory wmem) { } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java b/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java new file mode 100644 index 000000000..ec67b55d4 --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java @@ -0,0 +1,483 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static java.lang.Math.max; +import static java.lang.Math.min; +import static org.apache.datasketches.common.Util.isEven; +import static org.apache.datasketches.common.Util.isOdd; +import static org.apache.datasketches.kll.KllHelper.findLevelToCompact; + +import java.util.Arrays; +import java.util.Random; + +import org.apache.datasketches.memory.WritableMemory; + +/** + * Static methods to support KllLongsSketch + * @author Lee Rhodes + * @author Zac Blanco + */ +final class KllLongsHelper { + + /** + * Create Items Array from given item and weight. + * Used with weighted update only. + * @param item the given item + * @param weight the given weight + * @return the Items Array. + */ + static long[] createItemsArray(final long item, final long weight) { + final int itemsArrLen = Long.bitCount(weight); + final long[] itemsArr = new long[itemsArrLen]; + Arrays.fill(itemsArr, item); + return itemsArr; + } + + /** + * The following code is only valid in the special case of exactly reaching capacity while updating. + * It cannot be used while merging, while reducing k, or anything else. + * @param lngSk the current KllLongsSketch + */ + static void compressWhileUpdatingSketch(final KllLongsSketch lngSk) { + final int level = + findLevelToCompact(lngSk.getK(), lngSk.getM(), lngSk.getNumLevels(), lngSk.levelsArr); + if (level == lngSk.getNumLevels() - 1) { + //The level to compact is the top level, thus we need to add a level. + //Be aware that this operation grows the items array, + //shifts the items data and the level boundaries of the data, + //and grows the levels array and increments numLevels_. + KllHelper.addEmptyTopLevelToCompletelyFullSketch(lngSk); + } + //after this point, the levelsArray will not be expanded, only modified. + final int[] myLevelsArr = lngSk.levelsArr; + final int rawBeg = myLevelsArr[level]; + final int rawEnd = myLevelsArr[level + 1]; + // +2 is OK because we already added a new top level if necessary + final int popAbove = myLevelsArr[level + 2] - rawEnd; + final int rawPop = rawEnd - rawBeg; + final boolean oddPop = isOdd(rawPop); + final int adjBeg = oddPop ? rawBeg + 1 : rawBeg; + final int adjPop = oddPop ? rawPop - 1 : rawPop; + final int halfAdjPop = adjPop / 2; + + //the following is specific to longs + final long[] myLongItemsArray = lngSk.getLongItemsArray(); + if (level == 0) { // level zero might not be sorted, so we must sort it if we wish to compact it + Arrays.sort(myLongItemsArray, adjBeg, adjBeg + adjPop); + } + if (popAbove == 0) { + KllLongsHelper.randomlyHalveUpLongs(myLongItemsArray, adjBeg, adjPop, KllSketch.random); + } else { + KllLongsHelper.randomlyHalveDownLongs(myLongItemsArray, adjBeg, adjPop, KllSketch.random); + KllLongsHelper.mergeSortedLongArrays( + myLongItemsArray, adjBeg, halfAdjPop, + myLongItemsArray, rawEnd, popAbove, + myLongItemsArray, adjBeg + halfAdjPop); + } + + int newIndex = myLevelsArr[level + 1] - halfAdjPop; // adjust boundaries of the level above + lngSk.setLevelsArrayAt(level + 1, newIndex); + + if (oddPop) { + lngSk.setLevelsArrayAt(level, myLevelsArr[level + 1] - 1); // the current level now contains one item + myLongItemsArray[myLevelsArr[level]] = myLongItemsArray[rawBeg]; // namely this leftover guy + } else { + lngSk.setLevelsArrayAt(level, myLevelsArr[level + 1]); // the current level is now empty + } + + // verify that we freed up halfAdjPop array slots just below the current level + assert myLevelsArr[level] == rawBeg + halfAdjPop; + + // finally, we need to shift up the data in the levels below + // so that the freed-up space can be used by level zero + if (level > 0) { + final int amount = rawBeg - myLevelsArr[0]; + System.arraycopy(myLongItemsArray, myLevelsArr[0], myLongItemsArray, myLevelsArr[0] + halfAdjPop, amount); + } + for (int lvl = 0; lvl < level; lvl++) { + newIndex = myLevelsArr[lvl] + halfAdjPop; //adjust boundary + lngSk.setLevelsArrayAt(lvl, newIndex); + } + lngSk.setLongItemsArray(myLongItemsArray); + } + + //assumes readOnly = false and UPDATABLE, called from KllLongsSketch::merge + static void mergeLongsImpl(final KllLongsSketch mySketch, final KllLongsSketch otherLngSk) { + if (otherLngSk.isEmpty()) { return; } + + //capture my key mutable fields before doing any merging + final boolean myEmpty = mySketch.isEmpty(); + final long myMin = mySketch.getMinItemInternal(); + final long myMax = mySketch.getMaxItemInternal(); + final int myMinK = mySketch.getMinK(); + final long finalN = Math.addExact(mySketch.getN(), otherLngSk.getN()); + + //buffers that are referenced multiple times + final int otherNumLevels = otherLngSk.getNumLevels(); + final int[] otherLevelsArr = otherLngSk.levelsArr; + final long[] otherLongItemsArray; + + //MERGE: update this sketch with level0 items from the other sketch + if (otherLngSk.isCompactSingleItem()) { + KllLongsSketch.updateLong(mySketch, otherLngSk.getLongSingleItem()); + otherLongItemsArray = new long[0]; + } else { + otherLongItemsArray = otherLngSk.getLongItemsArray(); + for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) { + KllLongsSketch.updateLong(mySketch, otherLongItemsArray[i]); + } + } + + //After the level 0 update, we capture the intermediate state of my levels and items arrays... + final int myCurNumLevels = mySketch.getNumLevels(); + final int[] myCurLevelsArr = mySketch.levelsArr; + final long[] myCurLongItemsArray = mySketch.getLongItemsArray(); + + // create aliases in case there are no higher levels + int myNewNumLevels = myCurNumLevels; + int[] myNewLevelsArr = myCurLevelsArr; + long[] myNewLongItemsArray = myCurLongItemsArray; + + //merge higher levels if they exist + if (otherNumLevels > 1 && !otherLngSk.isCompactSingleItem()) { + final int tmpSpaceNeeded = mySketch.getNumRetained() + + KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr); + final long[] workbuf = new long[tmpSpaceNeeded]; + + final int provisionalNumLevels = max(myCurNumLevels, otherNumLevels); + + final int ub = max(KllHelper.ubOnNumLevels(finalN), provisionalNumLevels); + final int[] worklevels = new int[ub + 2]; // ub+1 does not work + final int[] outlevels = new int[ub + 2]; + + populateLongWorkArrays(workbuf, worklevels, provisionalNumLevels, + myCurNumLevels, myCurLevelsArr, myCurLongItemsArray, + otherNumLevels, otherLevelsArr, otherLongItemsArray); + + // notice that workbuf is being used as both the input and output + final int[] result = generalLongsCompress(mySketch.getK(), mySketch.getM(), provisionalNumLevels, + workbuf, worklevels, workbuf, outlevels, mySketch.isLevelZeroSorted(), KllSketch.random); + final int targetItemCount = result[1]; //was finalCapacity. Max size given k, m, numLevels + final int curItemCount = result[2]; //was finalPop + + // now we need to finalize the results for mySketch + + //THE NEW NUM LEVELS + myNewNumLevels = result[0]; + assert myNewNumLevels <= ub; // ub may be much bigger + + // THE NEW ITEMS ARRAY + myNewLongItemsArray = (targetItemCount == myCurLongItemsArray.length) + ? myCurLongItemsArray + : new long[targetItemCount]; + final int freeSpaceAtBottom = targetItemCount - curItemCount; + + //shift the new items array create space at bottom + System.arraycopy(workbuf, outlevels[0], myNewLongItemsArray, freeSpaceAtBottom, curItemCount); + final int theShift = freeSpaceAtBottom - outlevels[0]; + + //calculate the new levels array length + final int finalLevelsArrLen; + if (myCurLevelsArr.length < myNewNumLevels + 1) { finalLevelsArrLen = myNewNumLevels + 1; } + else { finalLevelsArrLen = myCurLevelsArr.length; } + + //THE NEW LEVELS ARRAY + myNewLevelsArr = new int[finalLevelsArrLen]; + for (int lvl = 0; lvl < myNewNumLevels + 1; lvl++) { // includes the "extra" index + myNewLevelsArr[lvl] = outlevels[lvl] + theShift; + } + + //MEMORY SPACE MANAGEMENT + if (mySketch.getWritableMemory() != null) { + final WritableMemory wmem = + KllHelper.memorySpaceMgmt(mySketch, myNewLevelsArr.length, myNewLongItemsArray.length); + mySketch.setWritableMemory(wmem); + } + } //end of updating levels above level 0 + + //Update Preamble: + mySketch.setN(finalN); + if (otherLngSk.isEstimationMode()) { //otherwise the merge brings over exact items. + mySketch.setMinK(min(myMinK, otherLngSk.getMinK())); + } + + //Update numLevels, levelsArray, items + mySketch.setNumLevels(myNewNumLevels); + mySketch.setLevelsArray(myNewLevelsArr); + mySketch.setLongItemsArray(myNewLongItemsArray); + + //Update min, max items + final long otherMin = otherLngSk.getMinItemInternal(); + final long otherMax = otherLngSk.getMaxItemInternal(); + if (myEmpty) { + mySketch.setMinItem(otherMin); + mySketch.setMaxItem(otherMax); + } else { + mySketch.setMinItem(min(myMin, otherMin)); + mySketch.setMaxItem(max(myMax, otherMax)); + } + assert KllHelper.sumTheSampleWeights(mySketch.getNumLevels(), mySketch.levelsArr) == mySketch.getN(); + } + + private static void mergeSortedLongArrays( //only bufC is modified + final long[] bufA, final int startA, final int lenA, + final long[] bufB, final int startB, final int lenB, + final long[] bufC, final int startC) { + final int lenC = lenA + lenB; + final int limA = startA + lenA; + final int limB = startB + lenB; + final int limC = startC + lenC; + + int a = startA; + int b = startB; + + for (int c = startC; c < limC; c++) { + if (a == limA) { + bufC[c] = bufB[b]; + b++; + } else if (b == limB) { + bufC[c] = bufA[a]; + a++; + } else if (bufA[a] < bufB[b]) { + bufC[c] = bufA[a]; + a++; + } else { + bufC[c] = bufB[b]; + b++; + } + } + assert a == limA; + assert b == limB; + } + + /** + * Validation Method. This must be modified to use the validation test + * @param buf the items array + * @param start data start + * @param length items array length + * @param random instance of Random + */ + //NOTE For validation Method: Need to modify to run. + private static void randomlyHalveDownLongs(final long[] buf, final int start, final int length, + final Random random) { + assert isEven(length); + final int half_length = length / 2; + final int offset = random.nextInt(2); // disable for validation + //final int offset = deterministicOffset(); // enable for validation + int j = start + offset; + for (int i = start; i < (start + half_length); i++) { + buf[i] = buf[j]; + j += 2; + } + } + + /** + * Validation Method. This must be modified to use the validation test + * @param buf the items array + * @param start data start + * @param length items array length + * @param random instance of Random + */ + //NOTE For validation Method: Need to modify to run. + private static void randomlyHalveUpLongs(final long[] buf, final int start, final int length, + final Random random) { + assert isEven(length); + final int half_length = length / 2; + final int offset = random.nextInt(2); // disable for validation + //final int offset = deterministicOffset(); // enable for validation + int j = (start + length) - 1 - offset; + for (int i = (start + length) - 1; i >= (start + half_length); i--) { + buf[i] = buf[j]; + j -= 2; + } + } + + /** + * Compression algorithm used to merge higher levels. + *

Here is what we do for each level:

+ *
  • If it does not need to be compacted, then simply copy it over.
  • + *
  • Otherwise, it does need to be compacted, so... + *
    • Copy zero or one guy over.
    • + *
    • If the level above is empty, halve up.
    • + *
    • Else the level above is nonempty, so halve down, then merge up.
    • + *
  • + *
  • Adjust the boundaries of the level above.
  • + *
+ * + *

It can be proved that generalCompress returns a sketch that satisfies the space constraints + * no matter how much data is passed in. + * We are pretty sure that it works correctly when inBuf and outBuf are the same. + * All levels except for level zero must be sorted before calling this, and will still be + * sorted afterwards. + * Level zero is not required to be sorted before, and may not be sorted afterwards.

+ * + *

This trashes inBuf and inLevels and modifies outBuf and outLevels.

+ * + * @param k The sketch parameter k + * @param m The minimum level size + * @param numLevelsIn provisional number of number of levels = max(this.numLevels, other.numLevels) + * @param inBuf work buffer of size = this.getNumRetained() + other.getNumRetainedAboveLevelZero(). + * This contains the long[] of the other sketch + * @param inLevels work levels array size = ubOnNumLevels(this.n + other.n) + 2 + * @param outBuf the same array as inBuf + * @param outLevels the same size as inLevels + * @param isLevelZeroSorted true if this.level 0 is sorted + * @param random instance of java.util.Random + * @return int array of: {numLevels, targetItemCount, currentItemCount) + */ + // + private static int[] generalLongsCompress( + final int k, + final int m, + final int numLevelsIn, + final long[] inBuf, + final int[] inLevels, + final long[] outBuf, + final int[] outLevels, + final boolean isLevelZeroSorted, + final Random random) { + assert numLevelsIn > 0; // things are too weird if zero levels are allowed + int numLevels = numLevelsIn; + int currentItemCount = inLevels[numLevels] - inLevels[0]; // decreases with each compaction + int targetItemCount = KllHelper.computeTotalItemCapacity(k, m, numLevels); // increases if we add levels + boolean doneYet = false; + outLevels[0] = 0; + int curLevel = -1; + while (!doneYet) { + curLevel++; // start out at level 0 + + // If we are at the current top level, add an empty level above it for convenience, + // but do not actually increment numLevels until later + if (curLevel == (numLevels - 1)) { + inLevels[curLevel + 2] = inLevels[curLevel + 1]; + } + + final int rawBeg = inLevels[curLevel]; + final int rawLim = inLevels[curLevel + 1]; + final int rawPop = rawLim - rawBeg; + + if ((currentItemCount < targetItemCount) || (rawPop < KllHelper.levelCapacity(k, numLevels, curLevel, m))) { + // copy level over as is + // because inBuf and outBuf could be the same, make sure we are not moving data upwards! + assert (rawBeg >= outLevels[curLevel]); + System.arraycopy(inBuf, rawBeg, outBuf, outLevels[curLevel], rawPop); + outLevels[curLevel + 1] = outLevels[curLevel] + rawPop; + } + else { + // The sketch is too full AND this level is too full, so we compact it + // Note: this can add a level and thus change the sketch's capacity + + final int popAbove = inLevels[curLevel + 2] - rawLim; + final boolean oddPop = isOdd(rawPop); + final int adjBeg = oddPop ? 1 + rawBeg : rawBeg; + final int adjPop = oddPop ? rawPop - 1 : rawPop; + final int halfAdjPop = adjPop / 2; + + if (oddPop) { // copy one guy over + outBuf[outLevels[curLevel]] = inBuf[rawBeg]; + outLevels[curLevel + 1] = outLevels[curLevel] + 1; + } else { // copy zero guys over + outLevels[curLevel + 1] = outLevels[curLevel]; + } + + // level zero might not be sorted, so we must sort it if we wish to compact it + if ((curLevel == 0) && !isLevelZeroSorted) { + Arrays.sort(inBuf, adjBeg, adjBeg + adjPop); + } + + if (popAbove == 0) { // Level above is empty, so halve up + randomlyHalveUpLongs(inBuf, adjBeg, adjPop, random); + } else { // Level above is nonempty, so halve down, then merge up + randomlyHalveDownLongs(inBuf, adjBeg, adjPop, random); + mergeSortedLongArrays(inBuf, adjBeg, halfAdjPop, inBuf, rawLim, popAbove, inBuf, adjBeg + halfAdjPop); + } + + // track the fact that we just eliminated some data + currentItemCount -= halfAdjPop; + + // Adjust the boundaries of the level above + inLevels[curLevel + 1] = inLevels[curLevel + 1] - halfAdjPop; + + // Increment numLevels if we just compacted the old top level + // This creates some more capacity (the size of the new bottom level) + if (curLevel == (numLevels - 1)) { + numLevels++; + targetItemCount += KllHelper.levelCapacity(k, numLevels, 0, m); + } + } // end of code for compacting a level + + // determine whether we have processed all levels yet (including any new levels that we created) + if (curLevel == (numLevels - 1)) { doneYet = true; } + } // end of loop over levels + + assert (outLevels[numLevels] - outLevels[0]) == currentItemCount; + return new int[] {numLevels, targetItemCount, currentItemCount}; + } + + private static void populateLongWorkArrays( //workBuf and workLevels are modified + final long[] workBuf, final int[] workLevels, final int provisionalNumLevels, + final int myCurNumLevels, final int[] myCurLevelsArr, final long[] myCurLongItemsArr, + final int otherNumLevels, final int[] otherLevelsArr, final long[] otherLongItemsArr) { + + workLevels[0] = 0; + + // Note: the level zero data from "other" was already inserted into "self". + // This copies into workbuf. + final int selfPopZero = KllHelper.currentLevelSizeItems(0, myCurNumLevels, myCurLevelsArr); + System.arraycopy(myCurLongItemsArr, myCurLevelsArr[0], workBuf, workLevels[0], selfPopZero); + workLevels[1] = workLevels[0] + selfPopZero; + + for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { + final int selfPop = KllHelper.currentLevelSizeItems(lvl, myCurNumLevels, myCurLevelsArr); + final int otherPop = KllHelper.currentLevelSizeItems(lvl, otherNumLevels, otherLevelsArr); + workLevels[lvl + 1] = workLevels[lvl] + selfPop + otherPop; + assert selfPop >= 0 && otherPop >= 0; + if (selfPop == 0 && otherPop == 0) { continue; } + if (selfPop > 0 && otherPop == 0) { + System.arraycopy(myCurLongItemsArr, myCurLevelsArr[lvl], workBuf, workLevels[lvl], selfPop); + } + else if (selfPop == 0 && otherPop > 0) { + System.arraycopy(otherLongItemsArr, otherLevelsArr[lvl], workBuf, workLevels[lvl], otherPop); + } + else if (selfPop > 0 && otherPop > 0) { + mergeSortedLongArrays( //only workBuf is modified + myCurLongItemsArr, myCurLevelsArr[lvl], selfPop, + otherLongItemsArr, otherLevelsArr[lvl], otherPop, + workBuf, workLevels[lvl]); + } + } + } + + /* + * Validation Method. + * The following must be enabled for use with the KllDoublesValidationTest, + * which is only enabled for manual testing. In addition, two Validation Methods + * above need to be modified. + */ //NOTE Validation Method: Need to uncomment to use + // static int nextOffset = 0; + // + // private static int deterministicOffset() { + // final int result = nextOffset; + // nextOffset = 1 - nextOffset; + // return result; + // } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java b/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java new file mode 100644 index 000000000..0f6fa7d89 --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java @@ -0,0 +1,669 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static java.lang.Math.max; +import static java.lang.Math.min; +import static org.apache.datasketches.common.ByteArrayUtil.putLongLE; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; + +import java.util.Arrays; +import java.util.Objects; + +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SuppressFBWarnings; +import org.apache.datasketches.kll.KllDirectLongsSketch.KllDirectCompactLongsSketch; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.LongsSketchSortedView; +import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; +import org.apache.datasketches.quantilescommon.QuantilesLongsAPI; +import org.apache.datasketches.quantilescommon.QuantilesLongsSketchIterator; + +/** + * This variation of the KllSketch implements primitive longs. + * + * @see org.apache.datasketches.kll.KllSketch + */ +public abstract class KllLongsSketch extends KllSketch implements QuantilesLongsAPI { + private LongsSketchSortedView longsSV = null; + final static int ITEM_BYTES = Long.BYTES; + + KllLongsSketch( + final SketchStructure sketchStructure) { + super(SketchType.LONGS_SKETCH, sketchStructure); + } + + //Factories for new heap instances. + + /** + * Create a new heap instance of this sketch with the default k = 200. + * The default k = 200 results in a normalized rank error of about + * 1.65%. Larger K will have smaller error but the sketch will be larger (and slower). + * @return new KllLongsSketch on the Java heap. + */ + public static KllLongsSketch newHeapInstance() { + return newHeapInstance(DEFAULT_K); + } + + /** + * Create a new heap instance of this sketch with a given parameter k. + * k can be between 8, inclusive, and 65535, inclusive. + * The default k = 200 results in a normalized rank error of about + * 1.65%. Larger K will have smaller error but the sketch will be larger (and slower). + * @param k parameter that controls size of the sketch and accuracy of estimates. + * @return new KllLongsSketch on the Java heap. + */ + public static KllLongsSketch newHeapInstance(final int k) { + return new KllHeapLongsSketch(k, DEFAULT_M); + } + + //Factories for new direct instances. + + /** + * Create a new direct updatable instance of this sketch with the default k. + * The default k = 200 results in a normalized rank error of about + * 1.65%. Larger k will have smaller error but the sketch will be larger (and slower). + * @param dstMem the given destination WritableMemory object for use by the sketch + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return a new direct instance of this sketch + */ + public static KllLongsSketch newDirectInstance( + final WritableMemory dstMem, + final MemoryRequestServer memReqSvr) { + return newDirectInstance(DEFAULT_K, dstMem, memReqSvr); + } + + /** + * Create a new direct updatable instance of this sketch with a given k. + * @param k parameter that controls size of the sketch and accuracy of estimates. + * @param dstMem the given destination WritableMemory object for use by the sketch + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return a new direct instance of this sketch + */ + public static KllLongsSketch newDirectInstance( + final int k, + final WritableMemory dstMem, + final MemoryRequestServer memReqSvr) { + Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null"); + Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null"); + return KllDirectLongsSketch.newDirectUpdatableInstance(k, DEFAULT_M, dstMem, memReqSvr); + } + + //Factory to create an heap instance from a Memory image + + /** + * Factory heapify takes a compact sketch image in Memory and instantiates an on-heap sketch. + * The resulting sketch will not retain any link to the source Memory. + * @param srcMem a compact Memory image of a sketch serialized by this sketch. + * See Memory + * @return a heap-based sketch based on the given Memory. + */ + public static KllLongsSketch heapify(final Memory srcMem) { + Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); + return KllHeapLongsSketch.heapifyImpl(srcMem); + } + + //Factory to wrap a Read-Only Memory + + /** + * Wrap a sketch around the given read only compact source Memory containing sketch data + * that originated from this sketch. + * @param srcMem the read only source Memory + * @return instance of this sketch + */ + public static KllLongsSketch wrap(final Memory srcMem) { + Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); + final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, LONGS_SKETCH, null); + if (memVal.sketchStructure == UPDATABLE) { + final MemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); //dummy + return new KllDirectLongsSketch(memVal.sketchStructure, (WritableMemory)srcMem, memReqSvr, memVal); + } else { + return new KllDirectCompactLongsSketch(memVal.sketchStructure, srcMem, memVal); + } + } + + //Factory to wrap a WritableMemory image + + /** + * Wrap a sketch around the given source Writable Memory containing sketch data + * that originated from this sketch. + * @param srcMem a WritableMemory that contains data. + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return instance of this sketch + */ + public static KllLongsSketch writableWrap( + final WritableMemory srcMem, + final MemoryRequestServer memReqSvr) { + Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); + Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null"); + final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, LONGS_SKETCH); + if (memVal.sketchStructure == UPDATABLE) { + return new KllDirectLongsSketch(UPDATABLE, srcMem, memReqSvr, memVal); + } else { + return new KllDirectCompactLongsSketch(memVal.sketchStructure, srcMem, memVal); + } + } + + //END of Constructors + + @Override + public double[] getCDF(final long[] splitPoints, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + refreshSortedView(); + return longsSV.getCDF(splitPoints, searchCrit); + } + + @Override + public double[] getPMF(final long[] splitPoints, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + refreshSortedView(); + return longsSV.getPMF(splitPoints, searchCrit); + } + + @Override + public long getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + refreshSortedView(); + return longsSV.getQuantile(rank, searchCrit); + } + + @Override + public long[] getQuantiles(final double[] ranks, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + refreshSortedView(); + final int len = ranks.length; + final long[] quantiles = new long[len]; + for (int i = 0; i < len; i++) { + quantiles[i] = longsSV.getQuantile(ranks[i], searchCrit); + } + return quantiles; + } + + /** + * {@inheritDoc} + * The approximate probability that the true quantile is within the confidence interval + * specified by the upper and lower quantile bounds for this sketch is 0.99. + */ + @Override + public long getQuantileLowerBound(final double rank) { + return getQuantile(max(0, rank - KllHelper.getNormalizedRankError(getMinK(), false))); + } + + /** + * {@inheritDoc} + * The approximate probability that the true quantile is within the confidence interval + * specified by the upper and lower quantile bounds for this sketch is 0.99. + */ + @Override + public long getQuantileUpperBound(final double rank) { + return getQuantile(min(1.0, rank + KllHelper.getNormalizedRankError(getMinK(), false))); + } + + @Override + public double getRank(final long quantile, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + refreshSortedView(); + return longsSV.getRank(quantile, searchCrit); + } + + /** + * {@inheritDoc} + * The approximate probability that the true rank is within the confidence interval + * specified by the upper and lower rank bounds for this sketch is 0.99. + */ + @Override + public double getRankLowerBound(final double rank) { + return max(0.0, rank - KllHelper.getNormalizedRankError(getMinK(), false)); + } + + /** + * {@inheritDoc} + * The approximate probability that the true rank is within the confidence interval + * specified by the upper and lower rank bounds for this sketch is 0.99. + */ + @Override + public double getRankUpperBound(final double rank) { + return min(1.0, rank + KllHelper.getNormalizedRankError(getMinK(), false)); + } + + @Override + public double[] getRanks(final long[] quantiles, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + refreshSortedView(); + final int len = quantiles.length; + final double[] ranks = new double[len]; + for (int i = 0; i < len; i++) { + ranks[i] = longsSV.getRank(quantiles[i], searchCrit); + } + return ranks; + } + + @Override + public QuantilesLongsSketchIterator iterator() { + return new KllLongsSketchIterator( + getLongItemsArray(), getLevelsArray(SketchStructure.UPDATABLE), getNumLevels()); + } + + @Override + public final void merge(final KllSketch other) { + if (readOnly || sketchStructure != UPDATABLE) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + if (this == other) { throw new SketchesArgumentException(SELF_MERGE_MSG); } + final KllLongsSketch otherLngSk = (KllLongsSketch)other; + if (otherLngSk.isEmpty()) { return; } + KllLongsHelper.mergeLongsImpl(this, otherLngSk); + longsSV = null; + } + + /** + * {@inheritDoc} + *

The parameter k will not change.

+ */ + @Override + public final void reset() { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int k = getK(); + setN(0); + setMinK(k); + setNumLevels(1); + setLevelZeroSorted(false); + setLevelsArray(new int[] {k, k}); + setMinItem(Long.MAX_VALUE); + setMaxItem(Long.MIN_VALUE); + setLongItemsArray(new long[k]); + longsSV = null; + } + + @Override + public byte[] toByteArray() { + return KllHelper.toByteArray(this, false); + } + + @Override + public String toString(final boolean withLevels, final boolean withLevelsAndItems) { + KllSketch sketch = this; + if (withLevelsAndItems && sketchStructure != UPDATABLE) { + final Memory mem = getWritableMemory(); + assert mem != null; + sketch = KllLongsSketch.heapify(getWritableMemory()); + } + return KllHelper.toStringImpl(sketch, withLevels, withLevelsAndItems, getSerDe()); + } + + //SINGLE UPDATE + + @Override + public void update(final long item) { + // Align with KllDoublesSketch + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + updateLong(this, item); + longsSV = null; + } + + //Also Called from KllLongsHelper::merge + static void updateLong(final KllLongsSketch lngSk, final long item) { + lngSk.updateMinMax(item); + int freeSpace = lngSk.levelsArr[0]; + assert (freeSpace >= 0); + if (freeSpace == 0) { + KllLongsHelper.compressWhileUpdatingSketch(lngSk); + freeSpace = lngSk.levelsArr[0]; + assert (freeSpace > 0); + } + lngSk.incN(1); + lngSk.setLevelZeroSorted(false); + final int nextPos = freeSpace - 1; + lngSk.setLevelsArrayAt(0, nextPos); + lngSk.setLongItemsArrayAt(nextPos, item); + } + + /** + * Single update of min and max + * @param item the source item, it must not be a NaN. + */ + final void updateMinMax(final long item) { + if (isEmpty()) { + setMinItem(item); + setMaxItem(item); + } else { + setMinItem(min(getMinItemInternal(), item)); + setMaxItem(max(getMaxItemInternal(), item)); + } + } + + //WEIGHTED UPDATE + + /** + * Weighted update. Updates this sketch with the given item the number of times specified by the given integer weight. + * @param item the item to be repeated. NaNs are ignored. + * @param weight the number of times the update of item is to be repeated. It must be ≥ one. + */ + public void update(final long item, final long weight) { + // + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + if (weight < 1L) { throw new SketchesArgumentException("Weight is less than one."); } + if (weight == 1L) { updateLong(this, item); } + else { + if (weight < levelsArr[0]) { + for (int i = 0; i < (int)weight; i++) { updateLong(this, item); } + } else { + final KllHeapLongsSketch tmpSk = new KllHeapLongsSketch(getK(), DEFAULT_M, item, weight); + merge(tmpSk); + } + } + longsSV = null; + } + + // VECTOR UPDATE + + /** + * Vector update. Updates this sketch with the given array (vector) of items, starting at the items + * offset for a length number of items. This is not supported for direct sketches. + * @param items the vector of items + * @param offset the starting index of the items[] array + * @param length the number of items + */ + public void update(final long[] items, final int offset, final int length) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + if (length == 0) { return; } + updateLong(items, offset, length); + longsSV = null; + } + /* Align with KllDoublesSketch + + + + + + + + + + + + + */ + private void updateLong(final long[] srcItems, final int srcOffset, final int length) { + if (isEmpty()) { + setMinItem(srcItems[srcOffset]); //initialize with a real value + setMaxItem(srcItems[srcOffset]); + } + + int count = 0; + while (count < length) { + if (levelsArr[0] == 0) { + KllLongsHelper.compressWhileUpdatingSketch(this); + } + final int spaceNeeded = length - count; + final int freeSpace = levelsArr[0]; + assert (freeSpace > 0); + final int numItemsToCopy = min(spaceNeeded, freeSpace); + final int dstOffset = freeSpace - numItemsToCopy; + final int localSrcOffset = srcOffset + count; + setLongItemsArrayAt(dstOffset, srcItems, localSrcOffset, numItemsToCopy); + updateMinMax(srcItems, localSrcOffset, numItemsToCopy); + count += numItemsToCopy; + incN(numItemsToCopy); + setLevelsArrayAt(0, dstOffset); + } + setLevelZeroSorted(false); + } + + /** + * Vector update of min and max. + * @param srcItems the input source array of values, no NaNs allowed. + * @param srcOffset the starting offset in srcItems + * @param length the number of items to update min and max + */ + private void updateMinMax(final long[] srcItems, final int srcOffset, final int length) { + final int end = srcOffset + length; + for (int i = srcOffset; i < end; i++) { + setMinItem(min(getMinItemInternal(), srcItems[i])); + setMaxItem(max(getMaxItemInternal(), srcItems[i])); + } + } + /* Align with KllDoublesSketch + + + + + + + + + */ + // END ALL UPDATE METHODS + + /** + * @return full size of internal items array including empty space at bottom. + */ + abstract long[] getLongItemsArray(); + + /** + * @return items array of retained items. + */ + abstract long[] getLongRetainedItemsArray(); + + abstract long getLongSingleItem(); + + // Min & Max Methods + + abstract long getMaxItemInternal(); + + abstract void setMaxItem(long item); + + abstract long getMinItemInternal(); + + abstract void setMinItem(long item); + + @Override + abstract byte[] getMinMaxByteArr(); + + @Override + int getMinMaxSizeBytes() { + return Long.BYTES * 2; + } + + //END Min & Max Methods + + @Override + abstract byte[] getRetainedItemsByteArr(); + + @Override + int getRetainedItemsSizeBytes() { + return getNumRetained() * Long.BYTES; + } + + @Override + ArrayOfItemsSerDe getSerDe() { return null; } + + @Override + final byte[] getSingleItemByteArr() { + final byte[] bytes = new byte[ITEM_BYTES]; + putLongLE(bytes, 0, getLongSingleItem()); + return bytes; + } + + @Override + int getSingleItemSizeBytes() { + return Long.BYTES; + } + + @Override + abstract byte[] getTotalItemsByteArr(); + + @Override + int getTotalItemsNumBytes() { + return levelsArr[getNumLevels()] * Long.BYTES; + } + + abstract void setLongItemsArray(long[] longItems); + + abstract void setLongItemsArrayAt(int index, long item); + + abstract void setLongItemsArrayAt(int dstIndex, long[] srcItems, int srcOffset, int length); + + // SORTED VIEW + + @Override + @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "OK in this case.") + public LongsSketchSortedView getSortedView() { + refreshSortedView(); + return longsSV; + } + + private final LongsSketchSortedView refreshSortedView() { + if (longsSV == null) { + final CreateSortedView csv = new CreateSortedView(); + longsSV = csv.getSV(); + } + return longsSV; + } + + private final class CreateSortedView { + long[] quantiles; + long[] cumWeights; + + LongsSketchSortedView getSV() { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + final long[] srcQuantiles = getLongItemsArray(); + final int[] srcLevels = levelsArr; + final int srcNumLevels = getNumLevels(); + + if (!isLevelZeroSorted()) { + Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]); + if (!hasMemory()) { setLevelZeroSorted(true); } + //we don't sort level0 in Memory, only our copy. + } + final int numQuantiles = getNumRetained(); + quantiles = new long[numQuantiles]; + cumWeights = new long[numQuantiles]; + populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles); + return new LongsSketchSortedView( + quantiles, cumWeights, KllLongsSketch.this); + } + + private void populateFromSketch(final long[] srcQuantiles, final int[] srcLevels, + final int srcNumLevels, final int numItems) { + final int[] myLevels = new int[srcNumLevels + 1]; + final int offset = srcLevels[0]; + System.arraycopy(srcQuantiles, offset, quantiles, 0, numItems); + int srcLevel = 0; + int dstLevel = 0; + long weight = 1; + while (srcLevel < srcNumLevels) { + final int fromIndex = srcLevels[srcLevel] - offset; + final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive + if (fromIndex < toIndex) { // if equal, skip empty level + Arrays.fill(cumWeights, fromIndex, toIndex, weight); + myLevels[dstLevel] = fromIndex; + myLevels[dstLevel + 1] = toIndex; + dstLevel++; + } + srcLevel++; + weight *= 2; + } + final int numLevels = dstLevel; + blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels); //create unit weights + KllHelper.convertToCumulative(cumWeights); + } + } //End of class CreateSortedView + + private static void blockyTandemMergeSort(final long[] quantiles, final long[] weights, + final int[] levels, final int numLevels) { + if (numLevels == 1) { return; } + + // duplicate the input in preparation for the "ping-pong" copy reduction strategy. + final long[] quantilesTmp = Arrays.copyOf(quantiles, quantiles.length); + final long[] weightsTmp = Arrays.copyOf(weights, quantiles.length); // don't need the extra one + + blockyTandemMergeSortRecursion(quantilesTmp, weightsTmp, quantiles, weights, levels, 0, numLevels); + } + + private static void blockyTandemMergeSortRecursion( + final long[] quantilesSrc, final long[] weightsSrc, + final long[] quantilesDst, final long[] weightsDst, + final int[] levels, final int startingLevel, final int numLevels) { + if (numLevels == 1) { return; } + final int numLevels1 = numLevels / 2; + final int numLevels2 = numLevels - numLevels1; + assert numLevels1 >= 1; + assert numLevels2 >= numLevels1; + final int startingLevel1 = startingLevel; + final int startingLevel2 = startingLevel + numLevels1; + // swap roles of src and dst + blockyTandemMergeSortRecursion( + quantilesDst, weightsDst, + quantilesSrc, weightsSrc, + levels, startingLevel1, numLevels1); + blockyTandemMergeSortRecursion( + quantilesDst, weightsDst, + quantilesSrc, weightsSrc, + levels, startingLevel2, numLevels2); + tandemMerge( + quantilesSrc, weightsSrc, + quantilesDst, weightsDst, + levels, + startingLevel1, numLevels1, + startingLevel2, numLevels2); + } + + private static void tandemMerge( + final long[] quantilesSrc, final long[] weightsSrc, + final long[] quantilesDst, final long[] weightsDst, + final int[] levelStarts, + final int startingLevel1, final int numLevels1, + final int startingLevel2, final int numLevels2) { + final int fromIndex1 = levelStarts[startingLevel1]; + final int toIndex1 = levelStarts[startingLevel1 + numLevels1]; // exclusive + final int fromIndex2 = levelStarts[startingLevel2]; + final int toIndex2 = levelStarts[startingLevel2 + numLevels2]; // exclusive + int iSrc1 = fromIndex1; + int iSrc2 = fromIndex2; + int iDst = fromIndex1; + + while (iSrc1 < toIndex1 && iSrc2 < toIndex2) { + if (quantilesSrc[iSrc1] < quantilesSrc[iSrc2]) { + quantilesDst[iDst] = quantilesSrc[iSrc1]; + weightsDst[iDst] = weightsSrc[iSrc1]; + iSrc1++; + } else { + quantilesDst[iDst] = quantilesSrc[iSrc2]; + weightsDst[iDst] = weightsSrc[iSrc2]; + iSrc2++; + } + iDst++; + } + if (iSrc1 < toIndex1) { + System.arraycopy(quantilesSrc, iSrc1, quantilesDst, iDst, toIndex1 - iSrc1); + System.arraycopy(weightsSrc, iSrc1, weightsDst, iDst, toIndex1 - iSrc1); + } else if (iSrc2 < toIndex2) { + System.arraycopy(quantilesSrc, iSrc2, quantilesDst, iDst, toIndex2 - iSrc2); + System.arraycopy(weightsSrc, iSrc2, weightsDst, iDst, toIndex2 - iSrc2); + } + } + + // END SORTED VIEW + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllLongsSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllLongsSketchIterator.java new file mode 100644 index 000000000..8922ec70e --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllLongsSketchIterator.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.quantilescommon.QuantilesLongsSketchIterator; + +/** + * Iterator over KllLongsSketch. The order is not defined. + */ +public final class KllLongsSketchIterator extends KllSketchIterator implements QuantilesLongsSketchIterator { + private final long[] quantiles; + + KllLongsSketchIterator(final long[] quantiles, final int[] levelsArr, final int numLevels) { + super(levelsArr, numLevels); + this.quantiles = quantiles; + } + + @Override + public long getQuantile() { + return quantiles[index]; + } + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSketchSortedView.java new file mode 100644 index 000000000..efb4006f6 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSketchSortedView.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; +import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.quantilescommon.IncludeMinMax.LongsPair; + +/** + * The SortedView of the KllLongsSketch. + * @author Lee Rhodes + * @author Zac Blanco + */ +public final class LongsSketchSortedView implements LongsSortedView { + private final long[] quantiles; + private final long[] cumWeights; //cumulative natural weights + private final long totalN; + + /** + * Construct from elements, also used in testing. + * @param quantiles sorted array of quantiles + * @param cumWeights sorted, monotonically increasing cumulative weights. + * @param sk the underlying quantile sketch. + */ + public LongsSketchSortedView( + final long[] quantiles, + final long[] cumWeights, + final QuantilesLongsAPI sk) { + final LongsPair dPair = + IncludeMinMax.includeLongsMinMax(quantiles, cumWeights, sk.getMaxItem(), sk.getMinItem()); + this.quantiles = dPair.quantiles; + this.cumWeights = dPair.cumWeights; + this.totalN = sk.getN(); + } + + //Used for testing + LongsSketchSortedView( + final long[] quantiles, + final long[] cumWeights, + final long totalN, + final long maxItem, + final long minItem) { + final LongsPair dPair = + IncludeMinMax.includeLongsMinMax(quantiles, cumWeights, maxItem, minItem); + this.quantiles = dPair.quantiles; + this.cumWeights = dPair.cumWeights; + this.totalN = totalN; + } + + @Override + public long[] getCumulativeWeights() { + return cumWeights.clone(); + } + + @Override + public long getMaxItem() { + final int top = quantiles.length - 1; + return quantiles[top]; + } + + @Override + public long getMinItem() { + return quantiles[0]; + } + + @Override + public long getN() { + return totalN; + } + + @Override + public int getNumRetained() { + return quantiles.length; + } + + @Override + public long getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + QuantilesUtil.checkNormalizedRankBounds(rank); + final int len = cumWeights.length; + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); + final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; + final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); + if (index == -1) { + return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; + } + return quantiles[index]; + } + + @Override + public long[] getQuantiles() { + return quantiles.clone(); + } + + @Override + public double getRank(final long quantile, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + final int len = quantiles.length; + final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.LE : InequalitySearch.LT; + final int index = InequalitySearch.find(quantiles, 0, len - 1, quantile, crit); + if (index == -1) { + return 0; //EXCLUSIVE (LT) case: quantile <= minQuantile; INCLUSIVE (LE) case: quantile < minQuantile + } + return (double)cumWeights[index] / totalN; + } + + @Override + public boolean isEmpty() { + return totalN == 0; + } + + @Override + public LongsSortedViewIterator iterator() { + return new LongsSortedViewIterator(quantiles, cumWeights); + } + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java new file mode 100644 index 000000000..4823edd1d --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +/** + * The Sorted View for quantile sketches of primitive type long. + * @see SortedView + * @author Lee Rhodes + * @author Zac Blanco + */ +public interface LongsSortedView extends SortedView { + + /** + * Returns an approximation to the Cumulative Distribution Function (CDF) of the input stream + * as a monotonically increasing array of double ranks (or cumulative probabilities) on the interval [0.0, 1.0], + * given a set of splitPoints. + * + *

The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function.

+ * + * @param splitPoints an array of m unique, monotonically increasing items + * (of the same type as the input items) + * that divide the item input domain into m+1 overlapping intervals. + * + *

The start of each interval is below the lowest item retained by the sketch + * corresponding to a zero rank or zero probability, and the end of the interval + * is the rank or cumulative probability corresponding to the split point.

+ * + *

The (m+1)th interval represents 100% of the distribution represented by the sketch + * and consistent with the definition of a cumulative probability distribution, thus the (m+1)th + * rank or probability in the returned array is always 1.0.

+ * + *

If a split point exactly equals a retained item of the sketch and the search criterion is:

+ * + *
    + *
  • INCLUSIVE, the resulting cumulative probability will include that item.
  • + *
  • EXCLUSIVE, the resulting cumulative probability will not include the weight of that split point.
  • + *
+ * + *

It is not recommended to include either the minimum or maximum items of the input stream.

+ * + * @param searchCrit the desired search criteria. + * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) { + QuantilesUtil.checkLongsSplitPointsOrder(splitPoints); + final int len = splitPoints.length + 1; + final double[] buckets = new double[len]; + for (int i = 0; i < len - 1; i++) { + buckets[i] = getRank(splitPoints[i], searchCrit); + } + buckets[len - 1] = 1; + return buckets; + } + + /** + * Returns the maximum item of the stream. This may be distinct from the largest item retained by the + * sketch algorithm. + * + * @return the maximum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + long getMaxItem(); + + /** + * Returns the minimum item of the stream. This may be distinct from the smallest item retained by the + * sketch algorithm. + * + * @return the minimum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + long getMinItem(); + + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * as an array of probability masses as doubles on the interval [0.0, 1.0], + * given a set of splitPoints. + * + *

The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(true) function.

+ * + * @param splitPoints an array of m unique, monotonically increasing items + * (of the same type as the input items) + * that divide the item input domain into m+1 consecutive, non-overlapping intervals. + * + *

Each interval except for the end intervals starts with a split point and ends with the next split + * point in sequence.

+ * + *

The first interval starts below the lowest item retained by the sketch + * corresponding to a zero rank or zero probability, and ends with the first split point

+ * + *

The last (m+1)th interval starts with the last split point and ends after the last + * item retained by the sketch corresponding to a rank or probability of 1.0.

+ * + *

The sum of the probability masses of all (m+1) intervals is 1.0.

+ * + *

If the search criterion is:

+ * + *
    + *
  • INCLUSIVE, and the upper split point of an interval equals an item retained by the sketch, the interval + * will include that item. If the lower split point equals an item retained by the sketch, the interval will exclude + * that item.
  • + *
  • EXCLUSIVE, and the upper split point of an interval equals an item retained by the sketch, the interval + * will exclude that item. If the lower split point equals an item retained by the sketch, the interval will include + * that item.
  • + *
+ * + *

It is not recommended to include either the minimum or maximum items of the input stream.

+ * + * @param searchCrit the desired search criteria. + * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getPMF(long[] splitPoints, QuantileSearchCriteria searchCrit) { + final double[] buckets = getCDF(splitPoints, searchCrit); + final int len = buckets.length; + for (int i = len; i-- > 1; ) { + buckets[i] -= buckets[i - 1]; + } + return buckets; + } + + /** + * Gets the approximate quantile of the given normalized rank and the given search criterion. + * + * @param rank the given normalized rank, a double in the range [0.0, 1.0]. + * @param searchCrit If INCLUSIVE, the given rank includes all quantiles ≤ + * the quantile directly corresponding to the given rank. + * If EXCLUSIVE, he given rank includes all quantiles < + * the quantile directly corresponding to the given rank. + * @return the approximate quantile given the normalized rank. + * @throws IllegalArgumentException if sketch is empty. + * @see QuantileSearchCriteria + */ + long getQuantile(double rank, QuantileSearchCriteria searchCrit); + + /** + * Returns an array of all retained quantiles by the sketch. + * @return an array of all retained quantiles by the sketch. + */ + long[] getQuantiles(); + + /** + * Gets the normalized rank corresponding to the given a quantile. + * + * @param quantile the given quantile + * @param searchCrit if INCLUSIVE the given quantile is included into the rank. + * @return the normalized rank corresponding to the given quantile. + * @throws IllegalArgumentException if sketch is empty. + * @see QuantileSearchCriteria + */ + double getRank(long quantile, QuantileSearchCriteria searchCrit); + + @Override + LongsSortedViewIterator iterator(); + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedViewIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedViewIterator.java new file mode 100644 index 000000000..77510cd5a --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedViewIterator.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +/** + * Iterator over quantile sketches of primitive type long. + */ +public final class LongsSortedViewIterator extends SortedViewIterator { + private final long[] quantiles; + + /** + * Constructor. + * @param quantiles the given array of quantiles, which must be ordered. + * @param cumWeights the given array of cumulative weights, which must be ordered, start with the value one, and + * the last value must be equal to N, the total number of items updated to the sketch. + */ + public LongsSortedViewIterator(final long[] quantiles, final long[] cumWeights) { + super(cumWeights); + this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter + } + + /** + * Gets the quantile at the current index. + * + *

Don't call this before calling next() for the first time + * or after getting false from next().

+ * + * @return the quantile at the current index. + */ + public long getQuantile() { + return quantiles[index]; + } + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java new file mode 100644 index 000000000..2b542a68f --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + +/** + * The Quantiles API for item type long. + * @see QuantilesAPI + * @author Lee Rhodes + * @author Zac Blanco + */ +public interface QuantilesLongsAPI extends QuantilesAPI { + + /** + * This is equivalent to {@link #getCDF(long[], QuantileSearchCriteria) getCDF(splitPoints, INCLUSIVE)} + * @param splitPoints an array of m unique, monotonically increasing items. + * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getCDF(long[] splitPoints) { + return getCDF(splitPoints, INCLUSIVE); + } + + /** + * Returns an approximation to the Cumulative Distribution Function (CDF) of the input stream + * as a monotonically increasing array of double ranks (or cumulative probabilities) on the interval [0.0, 1.0], + * given a set of splitPoints. + * + *

The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function.

+ * + * @param splitPoints an array of m unique, monotonically increasing items + * (of the same type as the input items) + * that divide the item input domain into m+1 overlapping intervals. + * + *

The start of each interval is below the lowest item retained by the sketch + * corresponding to a zero rank or zero probability, and the end of the interval + * is the rank or cumulative probability corresponding to the split point.

+ * + *

The (m+1)th interval represents 100% of the distribution represented by the sketch + * and consistent with the definition of a cumulative probability distribution, thus the (m+1)th + * rank or probability in the returned array is always 1.0.

+ * + *

If a split point exactly equals a retained item of the sketch and the search criterion is:

+ * + *
    + *
  • INCLUSIVE, the resulting cumulative probability will include that item.
  • + *
  • EXCLUSIVE, the resulting cumulative probability will not include the weight of that split point.
  • + *
+ * + *

It is not recommended to include either the minimum or maximum items of the input stream.

+ * + * @param searchCrit the desired search criteria. + * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit); + + /** + * Returns the maximum item of the stream. This is provided for convenience and may be different from the + * item returned by getQuantile(1.0). + * + * @return the maximum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + long getMaxItem(); + + /** + * Returns the minimum item of the stream. This is provided for convenience and may be different from the + * item returned by getQuantile(0.0). + * + * @return the minimum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + long getMinItem(); + + /** + * This is equivalent to {@link #getPMF(long[], QuantileSearchCriteria) getPMF(splitPoints, INCLUSIVE)} + * @param splitPoints an array of m unique, monotonically increasing items. + * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getPMF(long[] splitPoints) { + return getPMF(splitPoints, INCLUSIVE); + } + + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * as an array of probability masses as doubles on the interval [0.0, 1.0], + * given a set of splitPoints. + * + *

The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(true) function.

+ * + * @param splitPoints an array of m unique, monotonically increasing items + * (of the same type as the input items) + * that divide the item input domain into m+1 consecutive, non-overlapping intervals. + * + *

Each interval except for the end intervals starts with a split point and ends with the next split + * point in sequence.

+ * + *

The first interval starts below the lowest item retained by the sketch + * corresponding to a zero rank or zero probability, and ends with the first split point

+ * + *

The last (m+1)th interval starts with the last split point and ends after the last + * item retained by the sketch corresponding to a rank or probability of 1.0.

+ * + *

The sum of the probability masses of all (m+1) intervals is 1.0.

+ * + *

If the search criterion is:

+ * + *
    + *
  • INCLUSIVE, and the upper split point of an interval equals an item retained by the sketch, the interval + * will include that item. If the lower split point equals an item retained by the sketch, the interval will exclude + * that item.
  • + *
  • EXCLUSIVE, and the upper split point of an interval equals an item retained by the sketch, the interval + * will exclude that item. If the lower split point equals an item retained by the sketch, the interval will include + * that item.
  • + *
+ * + *

It is not recommended to include either the minimum or maximum items of the input stream.

+ * + * @param searchCrit the desired search criteria. + * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + double[] getPMF(long[] splitPoints, QuantileSearchCriteria searchCrit); + + /** + * This is equivalent to {@link #getQuantile(double, QuantileSearchCriteria) getQuantile(rank, INCLUSIVE)} + * @param rank the given normalized rank, a double in the range [0.0, 1.0]. + * @return the approximate quantile given the normalized rank. + * @throws IllegalArgumentException if sketch is empty. + */ + default long getQuantile(double rank) { + return getQuantile(rank, INCLUSIVE); + } + + /** + * Gets the approximate quantile of the given normalized rank and the given search criterion. + * + * @param rank the given normalized rank, a double in the range [0.0, 1.0]. + * @param searchCrit If INCLUSIVE, the given rank includes all quantiles ≤ + * the quantile directly corresponding to the given rank. + * If EXCLUSIVE, he given rank includes all quantiles < + * the quantile directly corresponding to the given rank. + * @return the approximate quantile given the normalized rank. + * @throws IllegalArgumentException if sketch is empty. + * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + */ + long getQuantile(double rank, QuantileSearchCriteria searchCrit); + + /** + * Gets the lower bound of the quantile confidence interval in which the quantile of the + * given rank exists. + * + *

Although it is possible to estimate the probability that the true quantile + * exists within the quantile confidence interval specified by the upper and lower quantile bounds, + * it is not possible to guarantee the width of the quantile confidence interval + * as an additive or multiplicative percent of the true quantile.

+ * + * @param rank the given normalized rank + * @return the lower bound of the quantile confidence interval in which the quantile of the + * given rank exists. + * @throws IllegalArgumentException if sketch is empty. + */ + long getQuantileLowerBound(double rank); + + /** + * Gets the upper bound of the quantile confidence interval in which the true quantile of the + * given rank exists. + * + *

Although it is possible to estimate the probability that the true quantile + * exists within the quantile confidence interval specified by the upper and lower quantile bounds, + * it is not possible to guarantee the width of the quantile interval + * as an additive or multiplicative percent of the true quantile.

+ * + * @param rank the given normalized rank + * @return the upper bound of the quantile confidence interval in which the true quantile of the + * given rank exists. + * @throws IllegalArgumentException if sketch is empty. + */ + long getQuantileUpperBound(double rank); + + /** + * This is equivalent to {@link #getQuantiles(double[], QuantileSearchCriteria) getQuantiles(ranks, INCLUSIVE)} + * @param ranks the given array of normalized ranks, each of which must be + * in the interval [0.0,1.0]. + * @return an array of quantiles corresponding to the given array of normalized ranks. + * @throws IllegalArgumentException if sketch is empty. + */ + default long[] getQuantiles(double[] ranks) { + return getQuantiles(ranks, INCLUSIVE); + } + + /** + * Gets an array of quantiles from the given array of normalized ranks. + * + * @param ranks the given array of normalized ranks, each of which must be + * in the interval [0.0,1.0]. + * @param searchCrit if INCLUSIVE, the given ranks include all quantiles ≤ + * the quantile directly corresponding to each rank. + * @return an array of quantiles corresponding to the given array of normalized ranks. + * @throws IllegalArgumentException if sketch is empty. + * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + */ + long[] getQuantiles(double[] ranks, QuantileSearchCriteria searchCrit); + + /** + * This is equivalent to {@link #getRank(long, QuantileSearchCriteria) getRank(quantile, INCLUSIVE)} + * @param quantile the given quantile + * @return the normalized rank corresponding to the given quantile + * @throws IllegalArgumentException if sketch is empty. + */ + default double getRank(long quantile) { + return getRank(quantile, INCLUSIVE); + } + + /** + * Gets the normalized rank corresponding to the given a quantile. + * + * @param quantile the given quantile + * @param searchCrit if INCLUSIVE the given quantile is included into the rank. + * @return the normalized rank corresponding to the given quantile + * @throws IllegalArgumentException if sketch is empty. + * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + */ + double getRank(long quantile, QuantileSearchCriteria searchCrit); + + /** + * This is equivalent to {@link #getRanks(long[], QuantileSearchCriteria) getRanks(quantiles, INCLUSIVE)} + * @param quantiles the given array of quantiles + * @return an array of normalized ranks corresponding to the given array of quantiles. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getRanks(long[] quantiles) { + return getRanks(quantiles, INCLUSIVE); + } + + /** + * Gets an array of normalized ranks corresponding to the given array of quantiles and the given + * search criterion. + * + * @param quantiles the given array of quantiles + * @param searchCrit if INCLUSIVE, the given quantiles include the rank directly corresponding to each quantile. + * @return an array of normalized ranks corresponding to the given array of quantiles. + * @throws IllegalArgumentException if sketch is empty. + * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + */ + double[] getRanks(long[] quantiles, QuantileSearchCriteria searchCrit); + + /** + * Returns the current number of bytes this Sketch would require if serialized. + * @return the number of bytes this sketch would require if serialized. + */ + int getSerializedSizeBytes(); + + /** + * Gets the sorted view of this sketch + * @return the sorted view of this sketch + */ + LongsSortedView getSortedView(); + + /** + * Gets the iterator for this sketch, which is not sorted. + * @return the iterator for this sketch + */ + QuantilesLongsSketchIterator iterator(); + + /** + * Returns a byte array representation of this sketch. + * @return a byte array representation of this sketch. + */ + byte[] toByteArray(); + + /** + * Updates this sketch with the given item. + * @param item from a stream of items. NaNs are ignored. + */ + void update(long item); + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsSketchIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsSketchIterator.java new file mode 100644 index 000000000..7ed0d9805 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsSketchIterator.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +/** + * The quantiles sketch iterator for primitive type long. + * @see QuantilesSketchIterator + * @author Zac Blanco + */ +public interface QuantilesLongsSketchIterator extends QuantilesSketchIterator { + + /** + * Gets the long quantile at the current index. + * + *

Don't call this before calling next() for the first time + * or after getting false from next().

+ * + * @return the long quantile at the current index. + */ + long getQuantile(); + +} + From c0479b299594834325731c94bc27cdc6ad0bd12f Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 9 Oct 2024 15:01:00 -0700 Subject: [PATCH 06/22] fix findbugs, includeMinMax, QuantilesUtil. --- .../quantilescommon/IncludeMinMax.java | 65 +++++++++++++++++++ .../quantilescommon/QuantilesUtil.java | 15 +++++ tools/FindBugsExcludeFilter.xml | 1 + 3 files changed, 81 insertions(+) diff --git a/src/main/java/org/apache/datasketches/quantilescommon/IncludeMinMax.java b/src/main/java/org/apache/datasketches/quantilescommon/IncludeMinMax.java index 203e338d2..1edfc2054 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/IncludeMinMax.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/IncludeMinMax.java @@ -63,6 +63,24 @@ public FloatsPair(final float[] quantiles, final long[] cumWeights) { } } + /** A simple structure to hold a pair of arrays */ + public static class LongsPair { + /** the array of quantiles */ + public long[] quantiles; + /** the array of associated cumulative weights */ + public long[] cumWeights; + + /** + * Constructor. + * @param quantiles the array of quantiles + * @param cumWeights the array of associated cumulative weights + */ + public LongsPair(final long[] quantiles, final long[] cumWeights) { + this.quantiles = quantiles; + this.cumWeights = cumWeights; + } + } + /** * A simple structure to hold a pair of arrays * @param the item class type @@ -131,6 +149,53 @@ public static DoublesPair includeDoublesMinMax( return new DoublesPair(adjQuantiles, adjCumWeights); } + /** + * The logic to include the min and max of type double. + * @param quantilesIn The array of quantiles + * @param cumWeightsIn The array of associated cumulative weights + * @param maxItem the maximum item of the stream + * @param minItem the minimum item of the stream + * @return a DoublesPair + */ + public static LongsPair includeLongsMinMax( + final long[] quantilesIn, + final long[] cumWeightsIn, + final long maxItem, + final long minItem) { + final int lenIn = cumWeightsIn.length; + final boolean adjLow = quantilesIn[0] != minItem; //if true, adjust the low end + final boolean adjHigh = quantilesIn[lenIn - 1] != maxItem; //if true, adjust the high end + int adjLen = lenIn; //this will be the length of the local copies of quantiles and cumWeights + adjLen += adjLow ? 1 : 0; + adjLen += adjHigh ? 1 : 0; + final long[] adjQuantiles; + final long[] adjCumWeights; + if (adjLen > lenIn) { //is any adjustment required at all? + adjQuantiles = new long[adjLen]; + adjCumWeights = new long[adjLen]; + final int offset = adjLow ? 1 : 0; + System.arraycopy(quantilesIn, 0, adjQuantiles, offset, lenIn); + System.arraycopy(cumWeightsIn,0, adjCumWeights, offset, lenIn); + + //Adjust the low end if required. Don't need to adjust weight of next one because it is cumulative. + if (adjLow) { + adjQuantiles[0] = minItem; + adjCumWeights[0] = 1; + } + + if (adjHigh) { + adjQuantiles[adjLen - 1] = maxItem; + adjCumWeights[adjLen - 1] = cumWeightsIn[lenIn - 1]; + adjCumWeights[adjLen - 2] = cumWeightsIn[lenIn - 1] - 1; + } + } else { //both min and max are already in place, no adjustments are required. + adjQuantiles = quantilesIn; + adjCumWeights = cumWeightsIn; + + } //END of Adjust End Points + return new LongsPair(adjQuantiles, adjCumWeights); + } + /** * The logic to include the min and max of type float. * @param quantilesIn The array of quantiles diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java index 75798c20f..529fd386d 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java @@ -67,6 +67,21 @@ public static final void checkDoublesSplitPointsOrder(final double[] values) { } } + /** + * Checks the sequential validity of the given array of double values. + * They must be unique and monotonically increasing. + * @param values the given array of double values + */ + public static void checkLongsSplitPointsOrder(final long[] values) { + Objects.requireNonNull(values); + final int len = values.length; + for (int j = 0; j < len - 1; j++) { + if (values[j] < values[j + 1]) { continue; } + throw new SketchesArgumentException( + "Values must be unique and monotonically increasing."); + } + } + /** * Checks the sequential validity of the given array of float values. * They must be unique, monotonically increasing and not NaN. diff --git a/tools/FindBugsExcludeFilter.xml b/tools/FindBugsExcludeFilter.xml index 4f62a5cd6..d47e78f09 100644 --- a/tools/FindBugsExcludeFilter.xml +++ b/tools/FindBugsExcludeFilter.xml @@ -41,6 +41,7 @@ under the License. + From 7ee5cf3b0d4a74d8fe13b60f23f3797d82f0df78 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 9 Oct 2024 16:43:31 -0700 Subject: [PATCH 07/22] More changes for 17 --- pom.xml | 74 ++++++++++--------- .../apache/datasketches/kll/KllHelper.java | 25 +++++++ .../datasketches/kll/KllPreambleUtil.java | 29 +++++++- 3 files changed, 92 insertions(+), 36 deletions(-) diff --git a/pom.xml b/pom.xml index 949152680..d6635e6e4 100644 --- a/pom.xml +++ b/pom.xml @@ -18,14 +18,22 @@ specific language governing permissions and limitations under the License. --> - + 4.0.0 + + org.apache + apache + 32 + + org.apache.datasketches datasketches-java - 6.1.0-SNAPSHOT + 7.0.0 jar ${project.artifactId} @@ -58,8 +66,8 @@ under the License. - - + GitHub https://github.com/apache/${project.artifactId}/issues @@ -75,10 +83,10 @@ under the License. - 2.2.0 + 4.0.0 - 7.10.0 + 7.10.2 generate_java_files check_cpp_files @@ -96,32 +104,31 @@ under the License. ${charset.encoding} yyyy-MM-dd'T'HH-mm-ss'Z' - - 3.7.1 - 3.13.0 - 3.1.1 - 3.4.1 - 3.2.3 - 3.4.0 - 3.7.0 - 3.0.1 - 3.2.0 - 3.3.1 - 3.2.5 - 3.1.2 - - 0.16.1 - - 0.8.12 - - - 4.3.0 - - 2.16.2 - - - 1.0.0 - 4.9.10 + + 3.7.1 + 3.13.0 + 3.1.3 + 3.5.0 + 3.2.5 + 3.4.2 + 3.8.0 + 3.1.1 + 3.2.0 + 3.3.1 + 3.4.0 + + 4.9.10 + + 0.16.1 + + + 4.3.0 + + 0.8.12 + + 2.17.1 + + 1.0.0 @@ -130,6 +137,7 @@ under the License. datasketches-memory ${datasketches-memory.version}
+ org.testng diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index d60220948..21188255c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -42,6 +42,7 @@ import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.ITEMS_SKETCH; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; import static org.apache.datasketches.quantilescommon.QuantilesAPI.UNSUPPORTED_MSG; import java.nio.ByteOrder; @@ -591,6 +592,11 @@ static void addEmptyTopLevelToCompletelyFullSketch(final KllSketch sketch) { float minFloat = Float.NaN; float maxFloat = Float.NaN; + long[] myCurLongItemsArr = null; + long[] myNewLongItemsArr = null; + long minLong = Long.MAX_VALUE; + long maxLong = Long.MIN_VALUE; + Object[] myCurItemsArr = null; Object[] myNewItemsArr = null; Object minItem = null; @@ -611,6 +617,14 @@ else if (sketchType == FLOATS_SKETCH) { maxFloat = fltSk.getMaxItem(); //assert we are following a certain growth scheme assert myCurFloatItemsArr.length == myCurTotalItemsCapacity; + } + else if (sketchType == LONGS_SKETCH) { + final KllLongsSketch lngSk = (KllLongsSketch) sketch; + myCurLongItemsArr = lngSk.getLongItemsArray(); + minLong = lngSk.getMinItem(); + maxLong = lngSk.getMaxItem(); + //assert we are following a certain growth scheme + assert myCurLongItemsArr.length == myCurTotalItemsCapacity; } else { //sketchType == ITEMS_SKETCH final KllItemsSketch itmSk = (KllItemsSketch) sketch; @@ -654,6 +668,11 @@ else if (sketchType == FLOATS_SKETCH) { myNewFloatItemsArr = new float[myNewTotalItemsCapacity]; // copy and shift the current items data into the new array System.arraycopy(myCurFloatItemsArr, 0, myNewFloatItemsArr, deltaItemsCap, myCurTotalItemsCapacity); + } + else if (sketchType == LONGS_SKETCH) { + myNewLongItemsArr = new long[myNewTotalItemsCapacity]; + // copy and shift the current items data into the new array + System.arraycopy(myCurLongItemsArr, 0, myNewLongItemsArr, deltaItemsCap, myCurTotalItemsCapacity); } else { //sketchType == ITEMS_SKETCH myNewItemsArr = new Object[myNewTotalItemsCapacity]; @@ -681,6 +700,12 @@ else if (sketchType == FLOATS_SKETCH) { fltSk.setMinItem(minFloat); fltSk.setMaxItem(maxFloat); fltSk.setFloatItemsArray(myNewFloatItemsArr); + } + else if (sketchType == LONGS_SKETCH) { + final KllLongsSketch lngSk = (KllLongsSketch) sketch; + lngSk.setMinItem(minLong); + lngSk.setMaxItem(maxLong); + lngSk.setLongItemsArray(myNewLongItemsArr); } else { //sketchType == ITEMS_SKETCH final KllItemsSketch itmSk = (KllItemsSketch) sketch; diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index 18e4de3f0..74a8009ea 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -28,6 +28,7 @@ import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.ITEMS_SKETCH; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; import java.util.Objects; @@ -210,7 +211,7 @@ static String toString(final Memory mem, final SketchType sketchType, final bool * Used primarily in testing. * * @param mem the given Memory - * @param sketchType the sketch type: FLOATS_SKETCH, DOUBLES_SKETCH, or ITEMS_SKETCH. + * @param sketchType the sketch type: FLOATS_SKETCH, DOUBLES_SKETCH, LONGS_SKETCH, or ITEMS_SKETCH. * @param includeData if true, includes detail of retained data. * @param serDe must be supplied for KllItemsSketch, otherwise can be null. * @return the summary string. @@ -229,7 +230,7 @@ static String toString(final Memory mem, final SketchType sketchType, final //final boolean updatable = mySketchStructure == UPDATABLE; final boolean emptyFlag = memVal.emptyFlag; final int sketchBytes = memVal.sketchBytes; - final int typeBytes = sketchType == DOUBLES_SKETCH ? Double.BYTES : Float.BYTES; + final int typeBytes = sketchType.getBytes(); final int familyID = getMemoryFamilyID(mem); final String famName = idToFamily(familyID).toString(); @@ -295,6 +296,11 @@ static String toString(final Memory mem, final SketchType sketchType, final offsetBytes += typeBytes; sb.append(mem.getFloat(offsetBytes)).append(LS); offsetBytes += typeBytes; + } else if (sketchType == LONGS_SKETCH) { + sb.append(mem.getLong(offsetBytes)).append(LS); + offsetBytes += typeBytes; + sb.append(mem.getLong(offsetBytes)).append(LS); + offsetBytes += typeBytes; } else { //ITEMS_SKETCH sb.append("<<>>").append(LS); } @@ -311,6 +317,11 @@ static String toString(final Memory mem, final SketchType sketchType, final sb.append(mem.getFloat(offsetBytes)).append(LS); offsetBytes += typeBytes; } + } else if (sketchType == LONGS_SKETCH) { + for (int i = 0; i < itemsSpace; i++) { + sb.append(mem.getLong(offsetBytes)).append(LS); + offsetBytes += typeBytes; + } } else { //ITEMS_SKETCH sb.append("<<>>").append(LS); } @@ -338,6 +349,11 @@ static String toString(final Memory mem, final SketchType sketchType, final offsetBytes += typeBytes; sb.append(mem.getFloat(offsetBytes)).append(LS); offsetBytes += typeBytes; + } else if (sketchType == LONGS_SKETCH) { + sb.append(mem.getLong(offsetBytes)).append(LS); + offsetBytes += typeBytes; + sb.append(mem.getLong(offsetBytes)).append(LS); + offsetBytes += typeBytes; } else { //ITEMS_SKETCH sb.append(serDe.deserializeFromMemory(mem, offsetBytes, 1)[0]).append(LS); offsetBytes += serDe.sizeOf(mem, offsetBytes, 1); @@ -346,7 +362,7 @@ static String toString(final Memory mem, final SketchType sketchType, final } sb.append("RETAINED DATA").append(LS); - final int itemSpace = (sketchBytes - offsetBytes) / typeBytes; + final int itemSpace = (sketchBytes - offsetBytes) / (typeBytes == 0 ? 1 : typeBytes); if (sketchType == DOUBLES_SKETCH) { for (int i = 0; i < itemSpace; i++) { sb.append(i + ", " + mem.getDouble(offsetBytes)).append(LS); @@ -357,6 +373,11 @@ static String toString(final Memory mem, final SketchType sketchType, final sb.append(i + ", " + mem.getFloat(offsetBytes)).append(LS); offsetBytes += typeBytes; } + } else if (sketchType == LONGS_SKETCH) { + for (int i = 0; i < itemSpace; i++) { + sb.append(i + ", " + mem.getLong(offsetBytes)).append(LS); + offsetBytes += typeBytes; + } } else { //ITEMS_SKETCH final T[] itemsArr = serDe.deserializeFromMemory(mem, offsetBytes, retainedItems); for (int i = 0; i < itemsArr.length; i++) { @@ -372,6 +393,8 @@ static String toString(final Memory mem, final SketchType sketchType, final sb.append(mem.getDouble(DATA_START_ADR_SINGLE_ITEM)).append(LS); } else if (sketchType == FLOATS_SKETCH) { sb.append(mem.getFloat(DATA_START_ADR_SINGLE_ITEM)).append(LS); + } else if (sketchType == LONGS_SKETCH) { + sb.append(mem.getLong(DATA_START_ADR_SINGLE_ITEM)).append(LS); } else { //ITEMS_SKETCH sb.append(serDe.deserializeFromMemory(mem, DATA_START_ADR_SINGLE_ITEM, 1)[0]).append(LS); } From 2b44c6d963bb7733c784cf443d046cef1cb81cbe Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 9 Oct 2024 18:20:02 -0700 Subject: [PATCH 08/22] Added more robust checking of closing resources. --- .../hll/DirectAuxHashMapTest.java | 1 + .../DirectQuantilesMemoryRequestTest.java | 12 +++-- .../quantiles/DoublesSketchTest.java | 6 ++- .../theta/DirectQuickSelectSketchTest.java | 6 ++- .../theta/HeapifyWrapSerVer1and2Test.java | 48 +++++++++---------- .../datasketches/theta/UnionImplTest.java | 4 ++ 6 files changed, 45 insertions(+), 32 deletions(-) diff --git a/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java b/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java index 98039616f..c913af378 100644 --- a/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java +++ b/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java @@ -90,6 +90,7 @@ public void checkGrow() { assertTrue(hllSketch.isMemory()); assertFalse(hllSketch.isOffHeap()); assertFalse(hllSketch.isSameResource(wmem)); + assertFalse(wmem.isAlive()); } @Test diff --git a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java index 6552d0ae3..c252eef3c 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java @@ -51,7 +51,7 @@ public void checkLimitedMemoryScenarios() { //Requesting application //########## Owning Implementation // This part would actually be part of the Memory owning implementation so it is faked here WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); - + WritableMemory wmem2 = wmem; println("Initial mem size: " + wmem.getCapacity()); //########## Receiving Application @@ -74,6 +74,7 @@ public void checkLimitedMemoryScenarios() { //Requesting application //The actual Memory has been re-allocated several times, // so the above wmem reference is invalid. println("\nFinal mem size: " + wmem.getCapacity()); + assertFalse(wmem2.isAlive()); } @Test @@ -83,7 +84,7 @@ public void checkGrowBaseBuf() { final int initBytes = (4 + (u / 2)) << 3; // not enough to hold everything WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); - + WritableMemory wmem2 = wmem; println("Initial mem size: " + wmem.getCapacity()); final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); for (int i = 1; i <= u; i++) { @@ -92,6 +93,7 @@ public void checkGrowBaseBuf() { final int currentSpace = usk1.getCombinedBufferItemCapacity(); println("curCombBufItemCap: " + currentSpace); assertEquals(currentSpace, 2 * k); + assertFalse(wmem2.isAlive()); } @Test @@ -101,7 +103,7 @@ public void checkGrowCombBuf() { final int initBytes = ((2 * k) + 4) << 3; //just room for BB WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); - + WritableMemory wmem2 = wmem; println("Initial mem size: " + wmem.getCapacity()); final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); for (int i = 1; i <= u; i++) { @@ -113,6 +115,7 @@ public void checkGrowCombBuf() { final int newSpace = usk1.getCombinedBufferItemCapacity(); println("newCombBurItemCap: " + newSpace); assertEquals(newCB.length, 3 * k); + assertFalse(wmem2.isAlive()); } @Test @@ -124,7 +127,7 @@ public void checkGrowFromWrappedEmptySketch() { final Memory origSketchMem = Memory.wrap(usk1.toByteArray()); WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); - + WritableMemory wmem2 = wmem; origSketchMem.copyTo(0, wmem, 0, initBytes); UpdateDoublesSketch usk2 = DirectUpdateDoublesSketch.wrapInstance(wmem); assertTrue(wmem.isSameResource(usk2.getMemory())); @@ -141,6 +144,7 @@ public void checkGrowFromWrappedEmptySketch() { final int expectedSize = COMBINED_BUFFER + ((2 * k) << 3); assertEquals(mem2.getCapacity(), expectedSize); + assertFalse(wmem2.isAlive()); } @Test diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java index 8f4243bfe..ea928ba02 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java @@ -28,7 +28,6 @@ import java.nio.ByteOrder; import org.apache.datasketches.memory.DefaultMemoryRequestServer; -//import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.DoublesSortedView; import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator; @@ -144,19 +143,21 @@ public void checkEmptyExceptions() { public void directSketchShouldMoveOntoHeapEventually() { WritableMemory wmem = WritableMemory.allocateDirect(1000, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); Assert.assertTrue(sketch.isSameResource(wmem)); for (int i = 0; i < 1000; i++) { sketch.update(i); } Assert.assertFalse(sketch.isSameResource(wmem)); + Assert.assertFalse(wmem2.isAlive()); } @Test public void directSketchShouldMoveOntoHeapEventually2() { int i = 0; WritableMemory wmem = WritableMemory.allocateDirect(50, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); - + WritableMemory wmem2 = wmem; UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); Assert.assertTrue(sketch.isSameResource(wmem)); for (; i < 1000; i++) { @@ -167,6 +168,7 @@ public void directSketchShouldMoveOntoHeapEventually2() { break; } } + Assert.assertFalse(wmem2.isAlive()); } @Test diff --git a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java index 63957c3cd..ef6004a1b 100644 --- a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java @@ -876,11 +876,12 @@ public void checkMoveAndResize() { int bytes = Sketches.getMaxUpdateSketchBytes(k); WritableMemory wmem = WritableMemory.allocateDirect(bytes / 2, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); - + WritableMemory wmem2 = wmem; UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem); assertTrue(sketch.isSameResource(wmem)); for (int i = 0; i < u; i++) { sketch.update(i); } assertFalse(sketch.isSameResource(wmem)); + assertFalse(wmem2.isAlive()); } @Test @@ -889,7 +890,7 @@ public void checkReadOnlyRebuildResize() { int u = 2 * k; int bytes = Sketches.getMaxUpdateSketchBytes(k); WritableMemory wmem = WritableMemory.allocateDirect(bytes / 2, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); - + WritableMemory wmem2 = wmem; UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem); for (int i = 0; i < u; i++) { sketch.update(i); } double est1 = sketch.getEstimate(); @@ -910,6 +911,7 @@ public void checkReadOnlyRebuildResize() { } catch (SketchesReadOnlyException e) { //expected } + assertFalse(wmem2.isAlive()); } @Test diff --git a/src/test/java/org/apache/datasketches/theta/HeapifyWrapSerVer1and2Test.java b/src/test/java/org/apache/datasketches/theta/HeapifyWrapSerVer1and2Test.java index 6c90eafe4..5e81b1808 100644 --- a/src/test/java/org/apache/datasketches/theta/HeapifyWrapSerVer1and2Test.java +++ b/src/test/java/org/apache/datasketches/theta/HeapifyWrapSerVer1and2Test.java @@ -293,7 +293,7 @@ public void checkWrapCompactSketchAssumedDefaultSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertTrue(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV2 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer2(sv3csk, seed)); @@ -301,7 +301,7 @@ public void checkWrapCompactSketchAssumedDefaultSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV1 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer1(sv3csk)); @@ -309,7 +309,7 @@ public void checkWrapCompactSketchAssumedDefaultSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } } @Test @@ -329,7 +329,7 @@ public void checkWrapCompactSketchAssumedDifferentSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertTrue(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV2 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer2(sv3csk, seed)); @@ -337,7 +337,7 @@ public void checkWrapCompactSketchAssumedDifferentSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV1 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer1(sv3csk)); @@ -345,7 +345,7 @@ public void checkWrapCompactSketchAssumedDifferentSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), defaultSeedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } } @Test @@ -365,7 +365,7 @@ public void checkWrapCompactSketchGivenDefaultSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertTrue(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV2 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer2(sv3csk, seed)); @@ -373,7 +373,7 @@ public void checkWrapCompactSketchGivenDefaultSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV1 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer1(sv3csk)); @@ -381,7 +381,7 @@ public void checkWrapCompactSketchGivenDefaultSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {/* ignore */} + if (wmem.isAlive()) { wmem.close(); } } @Test @@ -401,7 +401,7 @@ public void checkWrapCompactSketchGivenDifferentSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertTrue(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV2 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer2(sv3csk, seed)); @@ -409,7 +409,7 @@ public void checkWrapCompactSketchGivenDifferentSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV1 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer1(sv3csk)); @@ -417,7 +417,7 @@ public void checkWrapCompactSketchGivenDifferentSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } } @Test @@ -437,7 +437,7 @@ public void checkWrapSketchAssumedDefaultSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertTrue(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV2 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer2(sv3csk, seed)); @@ -445,7 +445,7 @@ public void checkWrapSketchAssumedDefaultSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV1 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer1(sv3csk)); @@ -453,7 +453,7 @@ public void checkWrapSketchAssumedDefaultSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } } @Test @@ -473,7 +473,7 @@ public void checkWrapSketchAssumedDifferentSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertTrue(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV2 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer2(sv3csk, seed)); @@ -481,7 +481,7 @@ public void checkWrapSketchAssumedDifferentSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV1 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer1(sv3csk)); @@ -489,7 +489,7 @@ public void checkWrapSketchAssumedDifferentSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), defaultSeedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } } @Test @@ -509,7 +509,7 @@ public void checkWrapSketchGivenDefaultSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertTrue(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV2 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer2(sv3csk, seed)); @@ -517,7 +517,7 @@ public void checkWrapSketchGivenDefaultSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV1 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer1(sv3csk)); @@ -525,7 +525,7 @@ public void checkWrapSketchGivenDefaultSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } } @Test @@ -545,7 +545,7 @@ public void checkWrapSketchGivenDifferentSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertTrue(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV2 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer2(sv3csk, seed)); @@ -553,7 +553,7 @@ public void checkWrapSketchGivenDifferentSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } //SV1 test wmem = putOffHeap(BackwardConversions.convertSerVer3toSerVer1(sv3csk)); @@ -561,7 +561,7 @@ public void checkWrapSketchGivenDifferentSeed() { assertEquals(sv3cskResult.getEstimate(), sv3usk.getEstimate()); assertEquals(sv3cskResult.getSeedHash(), seedHash); assertFalse(sv3cskResult.isDirect()); - try { wmem.close(); } catch (Exception e) {} + if (wmem.isAlive()) { wmem.close(); } } private static WritableMemory putOffHeap(Memory heapMem) { diff --git a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java index f28212061..e569175e7 100644 --- a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java +++ b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java @@ -200,10 +200,12 @@ public void checkMoveAndResize() { ResourceScope scope = ResourceScope.newConfinedScope(); final WritableMemory wmem = WritableMemory.allocateDirect(bytes / 2, 1, scope, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + final WritableMemory wmemA = wmem; final UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem); assertTrue(sketch.isSameResource(wmem)); final WritableMemory wmem2 = WritableMemory.allocateDirect(bytes / 2, 1, scope, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + final WritableMemory wmemB = wmem2; final Union union = SetOperation.builder().buildUnion(wmem2); assertTrue(union.isSameResource(wmem2)); @@ -212,6 +214,8 @@ public void checkMoveAndResize() { final Union union2 = SetOperation.builder().buildUnion(); //on-heap union assertFalse(union2.isSameResource(wmem2)); //obviously not + assertFalse(wmemA.isAlive()); + assertFalse(wmemB.isAlive()); } @Test From 935099185b1aaeed35bc8da4f55e366abdd2e153 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Thu, 10 Oct 2024 11:10:57 -0700 Subject: [PATCH 09/22] remove extraneous offset increment --- src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index 74a8009ea..e11228669 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -383,7 +383,6 @@ static String toString(final Memory mem, final SketchType sketchType, final for (int i = 0; i < itemsArr.length; i++) { sb.append(i + ", " + serDe.toString(itemsArr[i])).append(LS); } - offsetBytes += serDe.sizeOf(mem, offsetBytes, retainedItems); } } else if (myStructure == COMPACT_SINGLE) { From e2f6b855c1ade13980a5ce42f3beb0db22f9372e Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sun, 13 Oct 2024 10:51:42 -0700 Subject: [PATCH 10/22] Updates to ds-java17 --- README.md | 2 - .../datasketches/common/MemoryStatus.java | 54 +++++++++++++ .../org/apache/datasketches/common/Util.java | 3 +- .../datasketches/hll/DirectHllArray.java | 4 - .../datasketches/kll/KllHeapItemsSketch.java | 2 +- .../datasketches/kll/KllPreambleUtil.java | 1 - .../quantiles/DirectUpdateDoublesSketch.java | 2 +- .../quantilescommon/DoublesSortedView.java | 5 +- .../GenericInequalitySearch.java | 2 +- .../quantilescommon/InequalitySearch.java | 2 +- .../QuantileSearchCriteria.java | 2 +- .../quantilescommon/QuantilesAPI.java | 2 +- .../datasketches/req/BaseReqSketch.java | 2 +- .../apache/datasketches/theta/AnotBimpl.java | 6 -- .../theta/ConcurrentHeapThetaBuffer.java | 4 +- .../theta/ConcurrentSharedThetaSketch.java | 15 ++-- .../theta/DirectCompactSketch.java | 6 +- .../theta/DirectQuickSelectSketch.java | 4 + .../theta/DirectQuickSelectSketchR.java | 6 +- .../theta/EmptyCompactSketch.java | 10 --- .../datasketches/theta/HeapCompactSketch.java | 10 --- .../datasketches/theta/HeapUpdateSketch.java | 10 --- .../datasketches/theta/IntersectionImpl.java | 14 +++- .../apache/datasketches/theta/Rebuilder.java | 2 +- .../datasketches/theta/SetOperation.java | 17 +--- .../theta/SetOperationBuilder.java | 2 +- .../datasketches/theta/SingleItemSketch.java | 10 --- .../org/apache/datasketches/theta/Sketch.java | 42 ++-------- .../apache/datasketches/theta/Sketches.java | 10 +-- .../apache/datasketches/theta/UnionImpl.java | 14 +++- .../BloomFilterCrossLanguageTest.java | 80 +++++++++++++++++++ .../filters/bloomfilter/BloomFilterTest.java | 2 - 32 files changed, 202 insertions(+), 145 deletions(-) create mode 100644 src/main/java/org/apache/datasketches/common/MemoryStatus.java create mode 100644 src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java diff --git a/README.md b/README.md index 3190036d1..80517d304 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,6 @@ --> [![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.datasketches/datasketches-java/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.apache.datasketches/datasketches-java) -[![Language grade: Java](https://img.shields.io/lgtm/grade/java/g/apache/datasketches-java.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/apache/datasketches-java/context:java) -[![Total alerts](https://img.shields.io/lgtm/alerts/g/apache/datasketches-java.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/apache/datasketches-java/alerts/) [![Coverage Status](https://coveralls.io/repos/github/apache/datasketches-java/badge.svg)](https://coveralls.io/github/apache/datasketches-java) ================= diff --git a/src/main/java/org/apache/datasketches/common/MemoryStatus.java b/src/main/java/org/apache/datasketches/common/MemoryStatus.java new file mode 100644 index 000000000..564797e32 --- /dev/null +++ b/src/main/java/org/apache/datasketches/common/MemoryStatus.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.common; + +import org.apache.datasketches.memory.Memory; + +/** + * Methods for inquiring the status of a backing Memory object. + */ +public interface MemoryStatus { + + /** + * Returns true if this object's internal data is backed by a Memory object, + * which may be on-heap or off-heap. + * @return true if this object's internal data is backed by a Memory object. + */ + default boolean hasMemory() { return false; } + + /** + * Returns true if this object's internal data is backed by direct (off-heap) Memory. + * @return true if this object's internal data is backed by direct (off-heap) Memory. + */ + default boolean isDirect() { return false; } + + /** + * Returns true if the backing resource of this is identical with the backing resource + * of that. The capacities must be the same. If this is a region, + * the region offset must also be the same. + * + * @param that A different non-null and alive Memory object. + * @return true if the backing resource of this is identical with the backing resource + * of that. + * @throws SketchesArgumentException if that is not alive (already closed). + */ + default boolean isSameResource(final Memory that) { return false; } + +} diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java index 729b92f3f..19a8ee614 100644 --- a/src/main/java/org/apache/datasketches/common/Util.java +++ b/src/main/java/org/apache/datasketches/common/Util.java @@ -245,7 +245,8 @@ public static String zeroPad(final String s, final int fieldLength) { /** * Prepend or postpend the given string with the given character to fill the given field length. - * If the given string is equal to or greater than the given field length, it will be returned without modification. + * If the given string is equal to or greater than the given field length, it will be returned + * without modification. * @param s the given string * @param fieldLength the desired field length * @param padChar the desired pad character diff --git a/src/main/java/org/apache/datasketches/hll/DirectHllArray.java b/src/main/java/org/apache/datasketches/hll/DirectHllArray.java index 07835da69..e0c66c0aa 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectHllArray.java +++ b/src/main/java/org/apache/datasketches/hll/DirectHllArray.java @@ -56,7 +56,6 @@ abstract class DirectHllArray extends AbstractHllArray { WritableMemory wmem; Memory mem; Object memObj; - //long memAdd; final boolean compact; private static int checkMemCompactFlag(final WritableMemory wmem, final int lgConfigK) { @@ -70,7 +69,6 @@ private static int checkMemCompactFlag(final WritableMemory wmem, final int lgCo this.wmem = wmem; mem = wmem; memObj = wmem.getArray(); - //memAdd = wmem.getCumulativeOffset(0L); compact = extractCompactFlag(mem); insertEmptyFlag(wmem, false); } @@ -81,7 +79,6 @@ private static int checkMemCompactFlag(final WritableMemory wmem, final int lgCo wmem = null; this.mem = mem; memObj = ((WritableMemory) mem).getArray(); - //memAdd = mem.getCumulativeOffset(0L); compact = extractCompactFlag(mem); } @@ -90,7 +87,6 @@ final void updateMemory(final WritableMemory newWmem) { wmem = newWmem; mem = newWmem; memObj = wmem.getArray(); - //memAdd = wmem.getCumulativeOffset(0L); } @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java index e58516fd3..87e3a542a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java @@ -36,7 +36,7 @@ import org.apache.datasketches.memory.WritableMemory; /** - * This class implements an on-heap doubles KllSketch. + * This class implements an on-heap items KllSketch. * *

Please refer to the documentation in the package-info:
* {@link org.apache.datasketches.kll}

diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index e11228669..f2c3847ae 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -227,7 +227,6 @@ static String toString(final Memory mem, final SketchType sketchType, final final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", " + zeroPad(Integer.toBinaryString(flags), 8); final int preInts = memVal.preInts; //?? - //final boolean updatable = mySketchStructure == UPDATABLE; final boolean emptyFlag = memVal.emptyFlag; final int sketchBytes = memVal.sketchBytes; final int typeBytes = sketchType.getBytes(); diff --git a/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java index 0998223e8..7a7a7a850 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java @@ -268,7 +268,7 @@ private WritableMemory growCombinedMemBuffer(final int itemSpaceNeeded) { final WritableMemory newMem = memReqSvr.request(mem_, needBytes); mem_.copyTo(0, newMem, 0, memBytes); - if (mem_.isCloseable()) { memReqSvr.requestClose(mem_, newMem); } + memReqSvr.requestClose(mem_, newMem); return newMem; } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java index bdc3cc75c..98616661f 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java @@ -148,7 +148,7 @@ default double[] getPMF(double[] splitPoints, QuantileSearchCriteria searchCrit * the quantile directly corresponding to the given rank. * @return the approximate quantile given the normalized rank. * @throws IllegalArgumentException if sketch is empty. - * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + * @see QuantileSearchCriteria */ double getQuantile(double rank, QuantileSearchCriteria searchCrit); @@ -165,7 +165,7 @@ default double[] getPMF(double[] splitPoints, QuantileSearchCriteria searchCrit * @param searchCrit if INCLUSIVE the given quantile is included into the rank. * @return the normalized rank corresponding to the given quantile. * @throws IllegalArgumentException if sketch is empty. - * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + * @see QuantileSearchCriteria */ double getRank(double quantile, QuantileSearchCriteria searchCrit); @@ -173,4 +173,3 @@ default double[] getPMF(double[] splitPoints, QuantileSearchCriteria searchCrit DoublesSortedViewIterator iterator(); } - diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericInequalitySearch.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericInequalitySearch.java index e650fef9b..7b9d6d665 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericInequalitySearch.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericInequalitySearch.java @@ -41,7 +41,7 @@ *

Given a sorted array of values arr[] and a search key value v, the algorithms for * the searching criteria are given with each enum criterion.

* - * @see + * @see * Sketching Quantiles and Ranks Tutorial * @author Lee Rhodes */ diff --git a/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java b/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java index 51b013573..21af2b531 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java @@ -40,7 +40,7 @@ *

Given a sorted array of values arr[] and a search key value v, the algorithms for * the searching criteria are given with each enum criterion.

* - * @see + * @see * Sketching Quantiles and Ranks Tutorial * @author Lee Rhodes */ diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java index 8ea3c3415..e7b9e6ef6 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java @@ -22,7 +22,7 @@ /** * These search criteria are used by the KLL, REQ and Classic Quantiles sketches in the DataSketches library. * - * @see + * @see * Sketching Quantiles and Ranks Tutorial * * @author Lee Rhodes diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java index b70843bb4..9943fefee 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java @@ -194,7 +194,7 @@ *

[*] Note that obtaining epsilon may require using a similar function but with more parameters * based on the specific sketch implementation.

* - * @see + * @see * Sketching Quantiles and Ranks, Tutorial * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria * diff --git a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java index e587cd633..2460308fc 100644 --- a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java +++ b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java @@ -28,7 +28,7 @@ * This abstract class provides a single place to define and document the public API * for the Relative Error Quantiles Sketch. * - * @see + * @see * Sketching Quantiles and Ranks Tutorial * * @author Lee Rhodes diff --git a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java index d2161c995..e7b2c99eb 100644 --- a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java +++ b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java @@ -26,7 +26,6 @@ import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -148,11 +147,6 @@ int getRetainedEntries() { return curCount_; } - @Override - public boolean isSameResource(final Memory that) { - return false; - } - //restricted private static long[] getHashArrA(final Sketch skA) { //returns a new array diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java index 741db2f72..e7b3ddaac 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java @@ -94,7 +94,7 @@ private static int computeLogBufferSize(final int lgNomLongs, final long exactSi * @param hash to be propagated */ private boolean propagateToSharedSketch(final long hash) { - //noinspection StatementWithEmptyBody + //no inspection StatementWithEmptyBody while (localPropagationInProgress.get()) { } //busy wait until previous propagation completed localPropagationInProgress.set(true); @@ -108,7 +108,7 @@ private boolean propagateToSharedSketch(final long hash) { * Propagates the content of the buffer as a sketch to the shared sketch */ private void propagateToSharedSketch() { - //noinspection StatementWithEmptyBody + //no inspection StatementWithEmptyBody while (localPropagationInProgress.get()) { } //busy wait until previous propagation completed diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java index 49a8140c3..cdc843f8b 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java @@ -21,6 +21,7 @@ import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.datasketches.common.MemoryStatus; import org.apache.datasketches.memory.WritableMemory; /** @@ -30,7 +31,7 @@ * * @author eshcar */ -interface ConcurrentSharedThetaSketch { +interface ConcurrentSharedThetaSketch extends MemoryStatus { long NOT_SINGLE_HASH = -1L; double MIN_ERROR = 0.0000001; @@ -63,8 +64,8 @@ static long computeExactLimit(long k, double error) { void endPropagation(AtomicBoolean localPropagationInProgress, boolean isEager); /** - * Returns the value of the volatile theta manged by the shared sketch - * @return the value of the volatile theta manged by the shared sketch + * Returns the value of the volatile theta managed by the shared sketch + * @return the value of the volatile theta managed by the shared sketch */ long getVolatileTheta(); @@ -124,10 +125,10 @@ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch s // //For the external user all of the below methods can be obtained by casting the shared //sketch to UpdateSketch. However, these methods here also act as an alias so that an - //attempt to access these methods from the local buffer will be divered to the shared + //attempt to access these methods from the local buffer will be diverted to the shared //sketch. - //From Sketch + //From Sketch and MemoryStatus int getCompactBytes(); @@ -139,10 +140,6 @@ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch s double getUpperBound(int numStdDev); - boolean hasMemory(); - - boolean isDirect(); - boolean isEmpty(); boolean isEstimationMode(); diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java index de0e6e43b..0f69ec3c2 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java @@ -109,12 +109,12 @@ public long getThetaLong() { @Override public boolean hasMemory() { - return true; + return mem_ != null; } @Override public boolean isDirect() { - return mem_.isDirect(); + return hasMemory() ? mem_.isDirect() : false; } @Override @@ -132,7 +132,7 @@ public boolean isOrdered() { @Override public boolean isSameResource(final Memory that) { - return mem_.isSameResource(that); + return hasMemory() ? mem_.isSameResource(that) : false; } @Override diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java index a1ac53c6d..ad9051a08 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java @@ -323,6 +323,10 @@ UpdateReturnState hashUpdate(final long hash) { memReqSvr_ = (memReqSvr_ == null) ? wmem_.getMemoryRequestServer() : memReqSvr_; + if (memReqSvr_ == null) { //in case the MRS is not enabled or null. + throw new SketchesArgumentException("Out of Memory, MemoryRequestServer is null, cannot expand."); + } + final WritableMemory newDstMem = memReqSvr_.request(wmem_,reqBytes); moveAndResize(wmem_, preambleLongs, lgArrLongs, newDstMem, tgtLgArrLongs, thetaLong); diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java index a3ffebc14..fb2aed2a5 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java @@ -144,12 +144,12 @@ public long getThetaLong() { @Override public boolean hasMemory() { - return true; + return wmem_ != null; } @Override public boolean isDirect() { - return wmem_.isDirect(); + return hasMemory() ? wmem_.isDirect() : false; } @Override @@ -159,7 +159,7 @@ public boolean isEmpty() { @Override public boolean isSameResource(final Memory that) { - return wmem_.isSameResource(that); + return hasMemory() ? wmem_.isSameResource(that) : false; } @Override diff --git a/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java b/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java index ae481a425..8f6e4972a 100644 --- a/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java @@ -91,16 +91,6 @@ public long getThetaLong() { return Long.MAX_VALUE; } - @Override - public boolean hasMemory() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return true; diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java index 479aa3eeb..f394e9303 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java @@ -102,16 +102,6 @@ public long getThetaLong() { return thetaLong_; } - @Override - public boolean hasMemory() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return empty_; diff --git a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java index 1cc6d75cd..49734a9e8 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java @@ -66,16 +66,6 @@ public int getCurrentBytes() { return (preLongs + dataLongs) << 3; } - @Override - public boolean isDirect() { - return false; - } - - @Override - public boolean hasMemory() { - return false; - } - //UpdateSketch @Override diff --git a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java index 509ec2f93..fc81d1124 100644 --- a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java @@ -336,14 +336,24 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds dstMem, compactCache); } + @Override + public boolean hasMemory() { + return wmem_ != null; + } + @Override public boolean hasResult() { - return wmem_ != null ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; + return hasMemory() ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; + } + + @Override + public boolean isDirect() { + return hasMemory() ? wmem_.isDirect() : false; } @Override public boolean isSameResource(final Memory that) { - return wmem_ != null ? wmem_.isSameResource(that) : false; + return hasMemory() ? wmem_.isSameResource(that) : false; } @Override diff --git a/src/main/java/org/apache/datasketches/theta/Rebuilder.java b/src/main/java/org/apache/datasketches/theta/Rebuilder.java index 07093f652..b6e3de342 100644 --- a/src/main/java/org/apache/datasketches/theta/Rebuilder.java +++ b/src/main/java/org/apache/datasketches/theta/Rebuilder.java @@ -84,7 +84,7 @@ static final void quickSelectAndRebuild(final WritableMemory mem, final int prea } /** - * Moves me (the entire sketch) to a new larger Memory location and rebuilds the hash table. + * Moves me (the entire updatable sketch) to a new larger Memory location and rebuilds the hash table. * This assumes a Memory preamble of standard form with the correct value of thetaLong. * Afterwards, the caller must update the local Memory reference, lgArrLongs * and hashTableThreshold from the dstMemory and free the source Memory. diff --git a/src/main/java/org/apache/datasketches/theta/SetOperation.java b/src/main/java/org/apache/datasketches/theta/SetOperation.java index 4d8ebf0c4..b89ca9703 100644 --- a/src/main/java/org/apache/datasketches/theta/SetOperation.java +++ b/src/main/java/org/apache/datasketches/theta/SetOperation.java @@ -25,6 +25,7 @@ import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.MemoryStatus; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -35,7 +36,7 @@ * * @author Lee Rhodes */ -public abstract class SetOperation { +public abstract class SetOperation implements MemoryStatus { static final int CONST_PREAMBLE_LONGS = 3; SetOperation() {} @@ -237,20 +238,6 @@ public static int getMaxAnotBResultBytes(final int nomEntries) { */ public abstract Family getFamily(); - /** - * Returns true if the backing resource of this is identical with the backing resource - * of that. The capacities must be the same. If this is a region, - * the region offset must also be the same. - * - *

Note: Only certain set operators during stateful operations can be serialized. - * Only when they are stored into Memory will this be relevant.

- * - * @param that A different non-null object - * @return true if the backing resource of this is the same as the backing resource - * of that. - */ - public abstract boolean isSameResource(Memory that); - //restricted /** diff --git a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java index d788418dc..4a35cf67d 100644 --- a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java @@ -66,7 +66,7 @@ public SetOperationBuilder() { * Sets the Maximum Nominal Entries (max K) for this set operation. The effective value of K of the result of a * Set Operation can be less than max K, but never greater. * The minimum value is 16 and the maximum value is 67,108,864, which is 2^26. - * @param nomEntries Nominal Entres + * @param nomEntries Nominal Entries * This will become the ceiling power of 2 if it is not a power of 2. * @return this SetOperationBuilder */ diff --git a/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java index a4bac21c9..3cfc13b1e 100644 --- a/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java +++ b/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java @@ -343,16 +343,6 @@ public double getUpperBound(final int numStdDev) { return 1.0; } - @Override - public boolean hasMemory() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return false; diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java index d13fe9b80..89618bc23 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta/Sketch.java @@ -32,6 +32,7 @@ import static org.apache.datasketches.thetacommon.HashOperations.count; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.MemoryStatus; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -44,7 +45,7 @@ * * @author Lee Rhodes */ -public abstract class Sketch { +public abstract class Sketch implements MemoryStatus { static final int DEFAULT_LG_RESIZE_FACTOR = 3; //Unique to Heap Sketch() {} @@ -292,14 +293,11 @@ public double getLowerBound(final int numStdDev) { /** * Returns the maximum number of storage bytes required for a CompactSketch with the given - * number of actual entries. Note that this assumes the worse case of the sketch in - * estimation mode, which requires storing theta and count. - * @param numberOfEntries the actual number of entries stored with the CompactSketch. + * number of actual entries. + * @param numberOfEntries the actual number of retained entries stored in the sketch. * @return the maximum number of storage bytes required for a CompactSketch with the given number - * of entries. - * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead} + * of retained entries. */ - @Deprecated public static int getMaxCompactSketchBytes(final int numberOfEntries) { if (numberOfEntries == 0) { return 8; } if (numberOfEntries == 1) { return 16; } @@ -386,26 +384,12 @@ public double getUpperBound(final int numStdDev) { : getRetainedEntries(true); } - /** - * Returns true if this sketch's data structure is backed by Memory or WritableMemory. - * @return true if this sketch's data structure is backed by Memory or WritableMemory. - */ - public abstract boolean hasMemory(); - /** * Returns true if this sketch is in compact form. * @return true if this sketch is in compact form. */ public abstract boolean isCompact(); - /** - * Returns true if the this sketch's internal data structure is backed by direct (off-heap) - * Memory. - * @return true if the this sketch's internal data structure is backed by direct (off-heap) - * Memory. - */ - public abstract boolean isDirect(); - /** * See Empty * @return true if empty. @@ -427,18 +411,6 @@ public boolean isEstimationMode() { */ public abstract boolean isOrdered(); - /** - * Returns true if the backing resource of this is identical with the backing resource - * of that. The capacities must be the same. If this is a region, - * the region offset must also be the same. - * @param that A different non-null object - * @return true if the backing resource of this is the same as the backing resource - * of that. - */ - public boolean isSameResource(final Memory that) { - return false; - } - /** * Returns a HashIterator that can be used to iterate over the retained hash values of the * Theta sketch. @@ -605,8 +577,8 @@ public static String toString(final Memory mem) { abstract int getCurrentPreambleLongs(); /** - * Returns the Memory object if it exists, otherwise null. - * @return the Memory object if it exists, otherwise null. + * Returns the backing Memory object if it exists, otherwise null. + * @return the backing Memory object if it exists, otherwise null. */ abstract Memory getMemory(); diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java b/src/main/java/org/apache/datasketches/theta/Sketches.java index 077d9d5ae..2e7fa0915 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketches.java +++ b/src/main/java/org/apache/datasketches/theta/Sketches.java @@ -80,15 +80,11 @@ public static int getMaxAnotBResultBytes(final int maxNomEntries) { /** * Returns the maximum number of storage bytes required for a CompactSketch with the given - * number of actual entries. Note that this assumes the worse case of the sketch in - * estimation mode, which requires storing theta and count. - * @param numberOfEntries the actual number of entries stored with the CompactSketch. + * number of actual entries. + * @param numberOfEntries the actual number of retained entries stored in the sketch. * @return the maximum number of storage bytes required for a CompactSketch with the given number - * of entries. - * @see Sketch#getMaxCompactSketchBytes(int) - * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) getCompactSketchMaxBytes(int)} instead. + * of retained entries. */ - @Deprecated public static int getMaxCompactSketchBytes(final int numberOfEntries) { return Sketch.getMaxCompactSketchBytes(numberOfEntries); } diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java index 3ea5ca557..be2980801 100644 --- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java @@ -262,10 +262,22 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds minThetaLong, curCountOut, seedHash, empty, true, dstOrdered, dstOrdered, dstMem, compactCacheOut); } + @Override + public boolean hasMemory() { + return gadget_ instanceof DirectQuickSelectSketchR + ? gadget_.hasMemory() : false; + } + + @Override + public boolean isDirect() { + return gadget_ instanceof DirectQuickSelectSketchR + ? gadget_.isDirect() : false; + } + @Override public boolean isSameResource(final Memory that) { return gadget_ instanceof DirectQuickSelectSketchR - ? gadget_.getMemory().isSameResource(that) : false; + ? gadget_.isSameResource(that) : false; } @Override diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java new file mode 100644 index 000000000..12c6f20df --- /dev/null +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.filters.bloomfilter; + +import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES; +import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; +import static org.apache.datasketches.common.TestUtil.cppPath; +import static org.apache.datasketches.common.TestUtil.javaPath; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import java.io.IOException; +import java.nio.file.Files; + +import org.apache.datasketches.memory.Memory; +import org.testng.annotations.Test; + +/** + * Serialize binary sketches to be tested by C++ code. + * Test deserialization of binary sketches serialized by C++ code. + */ +public class BloomFilterCrossLanguageTest { + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generatBloomFilterBinariesForCompatibilityTesting() throws IOException { + final int[] nArr = {0, 10_000, 2_000_000, 300_000_00}; + final short[] hArr = {3, 5}; + for (int n : nArr) { + for (short numHashes : hArr) { + final long configBits = Math.max(n, 1000L); // so empty still has valid bit size + BloomFilter bf = BloomFilterBuilder.createBySize(configBits, numHashes); + for (int i = 0; i < n / 10; ++i) { + bf.update(i); + } + if (n > 0) { bf.update(Float.NaN); } + assertEquals(bf.isEmpty(), n == 0); + assertTrue(bf.isEmpty() || (bf.getBitsUsed() > (n / 10))); + Files.newOutputStream(javaPath.resolve("bf_n" + n + "_h" + numHashes + "_java.sk")).write(bf.toByteArray()); + } + } + } + + @Test(groups = {CHECK_CPP_FILES}) + public void readBloomFilterBinariesForCompatibilityTesting() throws IOException { + final int[] nArr = {0, 10_000, 2_000_000, 300_000_00}; + final short[] hArr = {3, 5}; + for (int n : nArr) { + for (short numHashes : hArr) { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("bf_n" + n + "_h" + numHashes + "_cpp.sk")); + final BloomFilter bf = BloomFilter.heapify(Memory.wrap(bytes)); + assertEquals(bf.isEmpty(), n == 0); + assertTrue(bf.isEmpty() || (bf.getBitsUsed() > (n / 10))); + + for (int i = 0; i < n / 10; ++i) { + assertTrue(bf.query(i)); + } + if (n > 0) { + assert(bf.query(Double.NaN)); + } + } + } + } +} diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java index 81f9e0d2e..25bef3643 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java @@ -31,7 +31,6 @@ import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -//import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; @@ -58,7 +57,6 @@ public void createNewFilterTest() throws Exception { WritableMemory wmem; try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(sizeBytes)).scope()) { - final BloomFilter bf2 = new BloomFilter(numBits, numHashes, seed, wmem); assertTrue(bf2.isEmpty()); assertTrue(bf2.hasMemory()); From 9f4813885806521a997ef7464b60f8ed14a7b7f5 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sun, 13 Oct 2024 10:53:31 -0700 Subject: [PATCH 11/22] remove unnecessary cast --- src/main/java/org/apache/datasketches/theta/UnionImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java index be2980801..bac05de74 100644 --- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java @@ -333,7 +333,7 @@ public void union(final Sketch sketchIn) { if (sketchIn.isOrdered() && (sketchIn instanceof CompactSketch)) { //Use early stop //Ordered, thus compact if (sketchIn.hasMemory()) { - final Memory skMem = ((CompactSketch) sketchIn).getMemory(); + final Memory skMem = sketchIn.getMemory(); final int preambleLongs = skMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; for (int i = 0; i < curCountIn; i++ ) { final int offsetBytes = preambleLongs + i << 3; From 3a5e67ce66bbb85424363bb73db9a7c57b71f74f Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 4 Nov 2024 19:01:04 -0500 Subject: [PATCH 12/22] Update gitignore, pom, and fix one warning --- .gitignore | 3 +++ pom.xml | 2 +- src/main/java/org/apache/datasketches/theta/UnionImpl.java | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 96b25576b..f03c5078f 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,9 @@ *.ipr *.iws +# VSCode project files +**/.vscode/ + # Additional tools .clover/ diff --git a/pom.xml b/pom.xml index 949152680..abcbd0c54 100644 --- a/pom.xml +++ b/pom.xml @@ -75,7 +75,7 @@ under the License. - 2.2.0 + 4.0.0 7.10.0 diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java index 3ea5ca557..7288365b9 100644 --- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java @@ -321,7 +321,7 @@ public void union(final Sketch sketchIn) { if (sketchIn.isOrdered() && (sketchIn instanceof CompactSketch)) { //Use early stop //Ordered, thus compact if (sketchIn.hasMemory()) { - final Memory skMem = ((CompactSketch) sketchIn).getMemory(); + final Memory skMem = sketchIn.getMemory(); final int preambleLongs = skMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; for (int i = 0; i < curCountIn; i++ ) { final int offsetBytes = preambleLongs + i << 3; From 4a061fe70a9d94aa296dc8c6b8e9b36f25bef466 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sun, 17 Nov 2024 12:49:44 -0800 Subject: [PATCH 13/22] update GHA, pom --- ...anual-os-matrix.yml => auto-os-matrix.yml} | 22 ++++++++++++------- .github/workflows/codeql-analysis.yml | 8 +++---- .github/workflows/javadoc.yml | 22 ++++++++++++++----- .github/workflows/manual-coverage.yml | 12 +++++----- pom.xml | 2 +- 5 files changed, 42 insertions(+), 24 deletions(-) rename .github/workflows/{manual-os-matrix.yml => auto-os-matrix.yml} (85%) diff --git a/.github/workflows/manual-os-matrix.yml b/.github/workflows/auto-os-matrix.yml similarity index 85% rename from .github/workflows/manual-os-matrix.yml rename to .github/workflows/auto-os-matrix.yml index 2d5537841..a8afc3b6b 100644 --- a/.github/workflows/manual-os-matrix.yml +++ b/.github/workflows/auto-os-matrix.yml @@ -1,19 +1,23 @@ -name: DataSketches-Java Manual OS Matrix Test & Install +name: DataSketches-Java Auto OS Matrix Test & Install on: + pull_request: + push: + branches: [ master ] workflow_dispatch: env: - MAVEN_OPTS: -Xmx4g -Xms1g + MAVEN_OPTS: -Xmx1g -Xms1g jobs: build: name: Build, Test, Install - runs-on: ${{matrix.os}} + strategy: fail-fast: false + matrix: - jdk: [ 8, 11 ] + jdk: [ 17 ] os: [ windows-latest, ubuntu-latest, macos-latest ] include: - os: windows-latest @@ -26,31 +30,33 @@ jobs: skip_javadoc: -Dmaven.javadoc.skip=true skip_gpg: -Dgpg.skip=true + runs-on: ${{matrix.os}} + env: JDK_VERSION: ${{ matrix.jdk }} steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false - name: Cache local Maven repository - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.m2/repository key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: build-${{ runner.os }}-maven- - name: Install Matrix JDK - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: java-version: ${{ matrix.jdk }} distribution: 'temurin' java-package: jdk architecture: x64 # Architecture options: x86, x64, armv7, aarch64, ppc64le -# setup-java@v3 has a "with cache" option +# setup-java@v4 has a "with cache" option - name: Echo Java Version run: > diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 7cb4c8976..51726d6d7 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -28,11 +28,11 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v3 + uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -46,7 +46,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v3 + uses: github/codeql-action/autobuild@v4 # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -59,6 +59,6 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 + uses: github/codeql-action/analyze@v4 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml index 6033d3273..d1c554291 100644 --- a/.github/workflows/javadoc.yml +++ b/.github/workflows/javadoc.yml @@ -2,22 +2,34 @@ name: JavaDoc on: push: - branches: - - master + branches: [7.0.X, master] workflow_dispatch: jobs: javadoc: runs-on: ubuntu-latest + steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 + + - name: Setup Java + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + + - name: Echo Java Version + run: java -version + - name: Generate JavaDoc run: mvn javadoc:javadoc + - name: Deploy JavaDoc - uses: JamesIves/github-pages-deploy-action@5dc1d5a192aeb5ab5b7d5a77b7d36aea4a7f5c92 + uses: JamesIves/github-pages-deploy-action@4.6.8 + #uses: JamesIves/github-pages-deploy-action@5dc1d5a192aeb5ab5b7d5a77b7d36aea4a7f5c92 with: token: ${{ secrets.GITHUB_TOKEN }} - folder: target/site/apidocs + folder: target/reports/apidocs target-folder: docs/${{ github.ref_name }} branch: gh-pages diff --git a/.github/workflows/manual-coverage.yml b/.github/workflows/manual-coverage.yml index e7b6ebc81..79b6006b3 100644 --- a/.github/workflows/manual-coverage.yml +++ b/.github/workflows/manual-coverage.yml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - jdk: [ 8 ] + jdk: [ 17 ] os: [ ubuntu-latest ] include: # - os: windows-latest @@ -31,33 +31,33 @@ jobs: steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false - name: Cache local Maven repository - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.m2/repository key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: build-${{ runner.os }}-maven- - name: Install Matrix JDK - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: java-version: ${{ matrix.jdk }} distribution: 'temurin' java-package: jdk architecture: x64 # Architecture options: x86, x64, armv7, aarch64, ppc64le -# setup-java@v3 has a "with cache" option +# setup-java@v4 has a "with cache" option - name: Echo Java Version run: > java -version - name: Test, Package, Verify, Coverage Report - if: ${{ matrix.jdk == 8 && success() }} + if: ${{ matrix.jdk == 17 && success() }} run: mvn verify coveralls:report -B -DrepoToken=${{secrets.coveralls_token}} diff --git a/pom.xml b/pom.xml index d6635e6e4..c843c9835 100644 --- a/pom.xml +++ b/pom.xml @@ -83,7 +83,7 @@ under the License. - 4.0.0 + 4.1.0 7.10.2 From d1853a9774e3a2dcb32c088fda5acf655a96a92a Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 18 Nov 2024 14:40:37 -0800 Subject: [PATCH 14/22] Upload fixes as a result of CodeQL run. --- .github/workflows/codeql-analysis.yml | 12 +-- .github/workflows/javadoc.yml | 4 +- .../filters/bloomfilter/package-info.java | 4 +- .../apache/datasketches/hll/package-info.java | 2 +- .../quantiles/CompactDoublesSketch.java | 5 - .../datasketches/quantiles/DoublesSketch.java | 2 +- .../quantiles/UpdateDoublesSketch.java | 7 ++ .../quantilescommon/QuantilesAPI.java | 11 +++ .../datasketches/req/BaseReqSketch.java | 14 ++- .../thetacommon/SetOperationCornerCases.java | 93 +++++++++++++++++++ .../tuple/SerializerDeserializer.java | 13 ++- .../QuantilesSketchCrossLanguageTest.java | 2 +- 12 files changed, 149 insertions(+), 20 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 51726d6d7..376d69404 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -1,13 +1,13 @@ name: "CodeQL" on: - push: - branches: [ 'master' ] - pull_request: +# push: +# branches: [ 'master' ] +# pull_request: # The branches below must be a subset of the branches above - branches: [ 'master' ] - schedule: - - cron: '10 17 * * 4' +# branches: [ 'master' ] +# schedule: +# - cron: '10 17 * * 4' workflow_dispatch: jobs: diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml index d1c554291..5487b9e7b 100644 --- a/.github/workflows/javadoc.yml +++ b/.github/workflows/javadoc.yml @@ -26,8 +26,8 @@ jobs: run: mvn javadoc:javadoc - name: Deploy JavaDoc - uses: JamesIves/github-pages-deploy-action@4.6.8 - #uses: JamesIves/github-pages-deploy-action@5dc1d5a192aeb5ab5b7d5a77b7d36aea4a7f5c92 + uses: JamesIves/github-pages-deploy-action@v4.6.8 + #uses: JamesIves/github-pages-deploy-action@881db5376404c5c8d621010bcbec0310b58d5e29 #alternate with: token: ${{ secrets.GITHUB_TOKEN }} folder: target/reports/apidocs diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/package-info.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/package-info.java index 8a752e168..5cc6f28fe 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/package-info.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/package-info.java @@ -16,5 +16,7 @@ * specific language governing permissions and limitations * under the License. */ - +/** + * BloomFilter package + */ package org.apache.datasketches.filters.bloomfilter; diff --git a/src/main/java/org/apache/datasketches/hll/package-info.java b/src/main/java/org/apache/datasketches/hll/package-info.java index 9ffafa7bd..625cb011c 100644 --- a/src/main/java/org/apache/datasketches/hll/package-info.java +++ b/src/main/java/org/apache/datasketches/hll/package-info.java @@ -18,7 +18,7 @@ */ /** - *

The DataSketches™ HLL sketch family package

+ *

The DataSketches™ HLL sketch family package

* {@link org.apache.datasketches.hll.HllSketch HllSketch} and {@link org.apache.datasketches.hll.Union Union} * are the public facing classes of this high performance implementation of Phillipe Flajolet's * HyperLogLog algorithm[1] but with significantly improved error behavior and important features that can be diff --git a/src/main/java/org/apache/datasketches/quantiles/CompactDoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/CompactDoublesSketch.java index f6df9e87e..18e05d315 100644 --- a/src/main/java/org/apache/datasketches/quantiles/CompactDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/CompactDoublesSketch.java @@ -20,7 +20,6 @@ package org.apache.datasketches.quantiles; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.memory.Memory; /** * Compact sketches are inherently read only. @@ -31,10 +30,6 @@ public abstract class CompactDoublesSketch extends DoublesSketch { super(k); } - public static CompactDoublesSketch heapify(final Memory srcMem) { - return HeapCompactDoublesSketch.heapifyInstance(srcMem); - } - @Override boolean isCompact() { return true; diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java index 03f84116a..49e7c5fbb 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java @@ -140,7 +140,7 @@ public static final DoublesSketchBuilder builder() { */ public static DoublesSketch heapify(final Memory srcMem) { if (checkIsCompactMemory(srcMem)) { - return CompactDoublesSketch.heapify(srcMem); + return HeapCompactDoublesSketch.heapifyInstance(srcMem); } return UpdateDoublesSketch.heapify(srcMem); } diff --git a/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java index 155001c87..f56c3b352 100644 --- a/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java @@ -49,6 +49,13 @@ public static UpdateDoublesSketch wrap(final WritableMemory srcMem) { @Override public abstract void update(double item); + /** + * Factory heapify takes a compact sketch image in Memory and instantiates an on-heap sketch. + * The resulting sketch will not retain any link to the source Memory. + * @param srcMem a compact Memory image of a sketch serialized by this sketch. + * See Memory + * @return a heap-based sketch based on the given Memory. + */ public static UpdateDoublesSketch heapify(final Memory srcMem) { return HeapUpdateDoublesSketch.heapifyInstance(srcMem); } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java index 9943fefee..3dc0651a7 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java @@ -205,11 +205,22 @@ @SuppressWarnings("javadoc") public interface QuantilesAPI { + /** The sketch must not be empty for this operation. */ static String EMPTY_MSG = "The sketch must not be empty for this operation. "; + + /** Unsupported operation for this Sketch Type. */ static String UNSUPPORTED_MSG = "Unsupported operation for this Sketch Type. "; + + /** Sketch does not have just one item. */ static String NOT_SINGLE_ITEM_MSG = "Sketch does not have just one item. "; + + /** MemoryRequestServer must not be null. */ static String MEM_REQ_SVR_NULL_MSG = "MemoryRequestServer must not be null. "; + + /** Target sketch is Read Only, cannot write. */ static String TGT_IS_READ_ONLY_MSG = "Target sketch is Read Only, cannot write. "; + + /** A sketch cannot merge with itself. */ static String SELF_MERGE_MSG = "A sketch cannot merge with itself. "; /** diff --git a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java index 2460308fc..05d4d3ce2 100644 --- a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java +++ b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java @@ -89,11 +89,23 @@ public static double getRSE(final int k, final double rank, final boolean hra, f @Override public abstract float getQuantileLowerBound(double rank); + /** + * Gets an approximate lower bound of the quantile associated with the given rank. + * @param rank the given normalized rank, a number between 0 and 1.0. + * @param numStdDev the number of standard deviations. Must be 1, 2, or 3. + * @return an approximate lower bound quantile, if it exists. + */ public abstract float getQuantileLowerBound(double rank, int numStdDev); @Override public abstract float getQuantileUpperBound(double rank); + /** + * Gets an approximate upper bound of the quantile associated with the given rank. + * @param rank the given normalized rank, a number between 0 and 1.0. + * @param numStdDev the number of standard deviations. Must be 1, 2, or 3. + * @return an approximate upper bound quantile, if it exists. + */ public abstract float getQuantileUpperBound(double rank, int numStdDev); @Override @@ -101,7 +113,7 @@ public static double getRSE(final int k, final double rank, final boolean hra, f /** * Gets an approximate lower bound rank of the given normalized rank. - * @param rank the given rank, a number between 0 and 1.0. + * @param rank the given normalized rank, a number between 0 and 1.0. * @param numStdDev the number of standard deviations. Must be 1, 2, or 3. * @return an approximate lower bound rank. */ diff --git a/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java b/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java index 20dd6ee7d..72a98565d 100644 --- a/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java +++ b/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java @@ -46,10 +46,18 @@ private IntersectAction(final String actionId, final String actionDescription) { this.actionDescription = actionDescription; } + /** + * Gets the Action ID + * @return the actionId + */ public String getActionId() { return actionId; } + /** + * Gets the Action Description + * @return the actionDescription + */ public String getActionDescription() { return actionDescription; } @@ -72,24 +80,42 @@ private AnotbAction(final String actionId, final String actionDescription) { this.actionDescription = actionDescription; } + /** + * Gets the Action ID + * @return the actionId + */ public String getActionId() { return actionId; } + /** + * Gets the action description + * @return the action description + */ public String getActionDescription() { return actionDescription; } } + /** List of union actions */ public enum UnionAction { + /** Sketch A Exactly */ SKETCH_A("A", "Sketch A Exactly"), + /** Trim Sketch A by MinTheta */ TRIM_A("TA", "Trim Sketch A by MinTheta"), + /** Sketch B Exactly */ SKETCH_B("B", "Sketch B Exactly"), + /** Trim Sketch B by MinTheta */ TRIM_B("TB", "Trim Sketch B by MinTheta"), + /** Degenerate{MinTheta, 0, F} */ DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), + /** Degenerate{ThetaA, 0, F} */ DEGEN_THA_0_F("DA", "Degenerate{ThetaA, 0, F}"), + /** Degenerate{ThetaB, 0, F} */ DEGEN_THB_0_F("DB", "Degenerate{ThetaB, 0, F}"), + /** Empty{1.0, 0, T} */ EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), + /** Full Union */ FULL_UNION("N", "Full Union"); private String actionId; @@ -100,49 +126,74 @@ private UnionAction(final String actionId, final String actionDescription) { this.actionDescription = actionDescription; } + /** + * Gets the action ID + * @return the actionId + */ public String getActionId() { return actionId; } + /** + * Gets the action description + * @return the actionDescription + */ public String getActionDescription() { return actionDescription; } } + /** List of corner cases */ public enum CornerCase { + /** Empty Empty */ Empty_Empty(055, "A{ 1.0, 0, T} ; B{ 1.0, 0, T}", IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.EMPTY_1_0_T), + /** Empty Exact */ Empty_Exact(056, "A{ 1.0, 0, T} ; B{ 1.0,>0, F}", IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.SKETCH_B), + /** Empty Estimation */ Empty_Estimation(052, "A{ 1.0, 0, T} ; B{<1.0,>0, F", IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.SKETCH_B), + /** Empty Degen */ Empty_Degen(050, "A{ 1.0, 0, T} ; B{<1.0, 0, F}", IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.DEGEN_THB_0_F), + /** Exact Empty */ Exact_Empty(065, "A{ 1.0,>0, F} ; B{ 1.0, 0, T}", IntersectAction.EMPTY_1_0_T, AnotbAction.SKETCH_A, UnionAction.SKETCH_A), + /** Exact Exact */ Exact_Exact(066, "A{ 1.0,>0, F} ; B{ 1.0,>0, F}", IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Exact Estimation */ Exact_Estimation(062, "A{ 1.0,>0, F} ; B{<1.0,>0, F}", IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Exact Degen */ Exact_Degen(060, "A{ 1.0,>0, F} ; B{<1.0, 0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.TRIM_A, UnionAction.TRIM_A), + /** Estimation_Empty */ Estimation_Empty(025, "A{<1.0,>0, F} ; B{ 1.0, 0, T}", IntersectAction.EMPTY_1_0_T, AnotbAction.SKETCH_A, UnionAction.SKETCH_A), + /** Estimation_Exact */ Estimation_Exact(026, "A{<1.0,>0, F} ; B{ 1.0,>0, F}", IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Estimation_Estimation */ Estimation_Estimation(022, "A{<1.0,>0, F} ; B{<1.0,>0, F}", IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Estimation_Degen */ Estimation_Degen(020, "A{<1.0,>0, F} ; B{<1.0, 0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.TRIM_A, UnionAction.TRIM_A), + /** Degen_Empty */ Degen_Empty(005, "A{<1.0, 0, F} ; B{ 1.0, 0, T}", IntersectAction.EMPTY_1_0_T, AnotbAction.DEGEN_THA_0_F, UnionAction.DEGEN_THA_0_F), + /** Degen_Exact */ Degen_Exact(006, "A{<1.0, 0, F} ; B{ 1.0,>0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_THA_0_F, UnionAction.TRIM_B), + /** Degen_Estimation */ Degen_Estimation(002, "A{<1.0, 0, F} ; B{<1.0,>0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_MIN_0_F, UnionAction.TRIM_B), + /** Degen_Degen */ Degen_Degen(000, "A{<1.0, 0, F} ; B{<1.0, 0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_MIN_0_F, UnionAction.DEGEN_MIN_0_F); @@ -168,27 +219,52 @@ private CornerCase(final int caseId, final String caseDescription, this.unionAction = unionAction; } + /** + * Gets the case ID + * @return the caseId + */ public int getId() { return caseId; } + /** + * Gets the case description + * @return the caseDescription + */ public String getCaseDescription() { return caseDescription; } + /** + * Gets the intersect action + * @return the intersectAction + */ public IntersectAction getIntersectAction() { return intersectAction; } + /** + * Gets the AnotB action + * @return the anotbAction + */ public AnotbAction getAnotbAction() { return anotbAction; } + /** + * Gets the union action + * @return the unionAction + */ public UnionAction getUnionAction() { return unionAction; } //See checkById test in /tuple/MiscTest. + /** + * Converts caseId to CornerCaseId + * @param id the case ID + * @return the Corner Case ID + */ public static CornerCase caseIdToCornerCase(final int id) { final CornerCase cc = caseIdToCornerCaseMap.get(id); if (cc == null) { @@ -198,12 +274,29 @@ public static CornerCase caseIdToCornerCase(final int id) { } } //end of enum CornerCase + /** + * Creates the CornerCase ID + * @param thetaLongA the theta of A as a long + * @param countA the count of A + * @param emptyA true if A is empty + * @param thetaLongB the theta of B as a long + * @param countB the count of B + * @param emptyB true if B is empty + * @return the Corner Case ID + */ public static int createCornerCaseId( final long thetaLongA, final int countA, final boolean emptyA, final long thetaLongB, final int countB, final boolean emptyB) { return (sketchStateId(emptyA, countA, thetaLongA) << 3) | sketchStateId(emptyB, countB, thetaLongB); } + /** + * Returns the sketch state ID + * @param isEmpty true if empty + * @param numRetained the number of items retained + * @param thetaLong the value of theta as a long + * @return the sketch state ID + */ public static int sketchStateId(final boolean isEmpty, final int numRetained, final long thetaLong) { // assume thetaLong = MAX if empty return (((thetaLong == MAX) || isEmpty) ? 4 : 0) | ((numRetained > 0) ? 2 : 0) | (isEmpty ? 1 : 0); diff --git a/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java b/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java index 44d1d9cc0..9b0ca33cb 100644 --- a/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java +++ b/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java @@ -32,8 +32,17 @@ public final class SerializerDeserializer { * Defines the sketch classes that this SerializerDeserializer can handle. */ @SuppressWarnings("javadoc") - public static enum SketchType { QuickSelectSketch, CompactSketch, ArrayOfDoublesQuickSelectSketch, - ArrayOfDoublesCompactSketch, ArrayOfDoublesUnion } + public static enum SketchType { + /** QuickSelectSketch */ + QuickSelectSketch, + /** CompactSketch */ + CompactSketch, + /** ArrayOfDoublesQuickSelectSketch */ + ArrayOfDoublesQuickSelectSketch, + /** ArrayOfDoublesCompactSketch */ + ArrayOfDoublesCompactSketch, + /** ArrayOfDoublesUnion */ + ArrayOfDoublesUnion } static final int TYPE_BYTE_OFFSET = 3; diff --git a/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java b/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java index 8c80f4399..68347ffb8 100644 --- a/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java @@ -249,7 +249,7 @@ private static void getAndCheck(String ver, int n, double quantile) { Assert.assertEquals(q2, quantile, 0.0); // same thing with compact sketch - qs2 = CompactDoublesSketch.heapify(srcMem); + qs2 = HeapCompactDoublesSketch.heapifyInstance(srcMem); //Test the quantile q2 = qs2.getQuantile(nf, EXCLUSIVE); println("New Median: " + q2); From f42af451b38c096d0d4af5f45814c86ef252e78e Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 18 Nov 2024 15:36:57 -0800 Subject: [PATCH 15/22] Update again for coverage reported errors. --- .../java/org/apache/datasketches/hll/package-info.java | 2 +- .../thetacommon/SetOperationCornerCases.java | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/apache/datasketches/hll/package-info.java b/src/main/java/org/apache/datasketches/hll/package-info.java index 625cb011c..a17a9f646 100644 --- a/src/main/java/org/apache/datasketches/hll/package-info.java +++ b/src/main/java/org/apache/datasketches/hll/package-info.java @@ -18,7 +18,7 @@ */ /** - *

The DataSketches™ HLL sketch family package

+ *

The DataSketches™ HLL sketch family package

* {@link org.apache.datasketches.hll.HllSketch HllSketch} and {@link org.apache.datasketches.hll.Union Union} * are the public facing classes of this high performance implementation of Phillipe Flajolet's * HyperLogLog algorithm[1] but with significantly improved error behavior and important features that can be diff --git a/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java b/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java index 72a98565d..d9fda48bb 100644 --- a/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java +++ b/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java @@ -34,8 +34,11 @@ public class SetOperationCornerCases { /** Intersection actions */ public enum IntersectAction { + /** Degenerate{MinTheta, 0, F} */ DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), + /** Empty{1.0, 0, T */ EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), + /** Full Intersect */ FULL_INTERSECT("I", "Full Intersect"); private String actionId; @@ -65,11 +68,17 @@ public String getActionDescription() { /** A not B actions */ public enum AnotbAction { + /** Sketch A Exact */ SKETCH_A("A", "Sketch A Exactly"), + /** Trim Sketch A by MinTheta */ TRIM_A("TA", "Trim Sketch A by MinTheta"), + /** Degenerate{MinTheta, 0, F} */ DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), + /** Degenerate{ThetaA, 0, F} */ DEGEN_THA_0_F("DA", "Degenerate{ThetaA, 0, F}"), + /** Empty{1.0, 0, T} */ EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), + /** Full AnotB */ FULL_ANOTB("N", "Full AnotB"); private String actionId; From 34aaa8baf679cccdac2ea6bdefef46870e5bc004 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Tue, 10 Dec 2024 15:35:28 -0800 Subject: [PATCH 16/22] This huge update does not impact any code. It is mostly Javadoc updates. There are also a few pom updates and some updates to the GH Action workflows. --- .github/workflows/auto-jdk-matrix.yml | 2 +- .github/workflows/auto-os-matrix.yml | 6 +- .github/workflows/javadoc.yml | 7 +- .github/workflows/manual-coverage.yml | 71 -------------- pom.xml | 93 +++++++++++-------- .../filters/bloomfilter/BloomFilter.java | 4 +- .../bloomfilter/BloomFilterBuilder.java | 4 +- .../datasketches/frequencies/ItemsSketch.java | 4 +- .../datasketches/frequencies/LongsSketch.java | 4 +- .../frequencies/PreambleUtil.java | 1 + .../apache/datasketches/hash/MurmurHash3.java | 2 - .../datasketches/hash/package-info.java | 3 +- .../datasketches/kll/KllDoublesHelper.java | 1 + .../datasketches/kll/KllDoublesSketch.java | 1 + .../datasketches/kll/KllFloatsHelper.java | 1 + .../datasketches/kll/KllFloatsSketch.java | 1 + .../datasketches/kll/KllItemsHelper.java | 1 + .../datasketches/kll/KllLongsHelper.java | 1 + .../datasketches/kll/KllLongsSketch.java | 1 + .../datasketches/quantiles/DoublesSketch.java | 1 + .../datasketches/quantiles/package-info.java | 3 +- .../quantilescommon/DoublesSortedView.java | 8 +- .../quantilescommon/FloatsSortedView.java | 8 +- .../quantilescommon/GenericSortedView.java | 8 +- .../quantilescommon/LongsSortedView.java | 8 +- .../quantilescommon/QuantilesAPI.java | 4 +- .../quantilescommon/QuantilesDoublesAPI.java | 8 +- .../quantilescommon/QuantilesFloatsAPI.java | 8 +- .../quantilescommon/QuantilesGenericAPI.java | 8 +- .../quantilescommon/QuantilesLongsAPI.java | 8 +- .../datasketches/req/BaseReqSketch.java | 1 + .../org/apache/datasketches/req/ReqSerDe.java | 3 +- .../sampling/ReservoirLongsUnion.java | 7 +- .../datasketches/sampling/ReservoirSize.java | 1 + .../datasketches/sampling/package-info.java | 4 +- .../datasketches/theta/PreambleUtil.java | 3 +- .../datasketches/theta/UpdateSketch.java | 1 + .../org/apache/datasketches/tuple/Union.java | 3 +- .../tuple/adouble/DoubleSummary.java | 4 + .../tuple/aninteger/IntegerSummary.java | 4 + .../DirectArrayOfDoublesCompactSketch.java | 1 + .../DirectArrayOfDoublesIntersection.java | 3 +- ...DirectArrayOfDoublesQuickSelectSketch.java | 1 + .../DirectArrayOfDoublesSketchIterator.java | 1 + .../DirectArrayOfDoublesUnion.java | 1 + tools/SketchesCheckstyle.xml | 2 +- 46 files changed, 146 insertions(+), 174 deletions(-) delete mode 100644 .github/workflows/manual-coverage.yml diff --git a/.github/workflows/auto-jdk-matrix.yml b/.github/workflows/auto-jdk-matrix.yml index ce15bb57e..6e280f123 100644 --- a/.github/workflows/auto-jdk-matrix.yml +++ b/.github/workflows/auto-jdk-matrix.yml @@ -3,7 +3,7 @@ name: DataSketches-Java Auto JDK Matrix Test & Install on: pull_request: push: - branches: [ master ] + branches: [ master, main ] workflow_dispatch: env: diff --git a/.github/workflows/auto-os-matrix.yml b/.github/workflows/auto-os-matrix.yml index a8afc3b6b..d9f5bc4c5 100644 --- a/.github/workflows/auto-os-matrix.yml +++ b/.github/workflows/auto-os-matrix.yml @@ -3,7 +3,7 @@ name: DataSketches-Java Auto OS Matrix Test & Install on: pull_request: push: - branches: [ master ] + branches: [ master, main ] workflow_dispatch: env: @@ -55,8 +55,6 @@ jobs: distribution: 'temurin' java-package: jdk architecture: x64 -# Architecture options: x86, x64, armv7, aarch64, ppc64le -# setup-java@v4 has a "with cache" option - name: Echo Java Version run: > @@ -75,6 +73,8 @@ jobs: -D skipTests=true ${{matrix.os.skip_gpg}} +# Architecture options: x86, x64, armv7, aarch64, ppc64le +# setup-java@v4 has a "with cache" option # Lifecycle: validate, compile, test, package, verify, install, deploy # -B batch mode # -V show Version without stopping diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml index 5487b9e7b..04d70e25a 100644 --- a/.github/workflows/javadoc.yml +++ b/.github/workflows/javadoc.yml @@ -2,7 +2,7 @@ name: JavaDoc on: push: - branches: [7.0.X, master] + branches: [ master, main ] workflow_dispatch: jobs: @@ -22,12 +22,15 @@ jobs: - name: Echo Java Version run: java -version + - name: Print Current workflow + run: > + cat .github/workflows/javadoc.yml + - name: Generate JavaDoc run: mvn javadoc:javadoc - name: Deploy JavaDoc uses: JamesIves/github-pages-deploy-action@v4.6.8 - #uses: JamesIves/github-pages-deploy-action@881db5376404c5c8d621010bcbec0310b58d5e29 #alternate with: token: ${{ secrets.GITHUB_TOKEN }} folder: target/reports/apidocs diff --git a/.github/workflows/manual-coverage.yml b/.github/workflows/manual-coverage.yml deleted file mode 100644 index 79b6006b3..000000000 --- a/.github/workflows/manual-coverage.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: Datasketches-Java Manual Coverage Report - -on: - workflow_dispatch: - -env: - MAVEN_OPTS: -Xmx4g -Xms1g - -jobs: - build: - name: Build, Test, Coverage - runs-on: ${{matrix.os}} - strategy: - fail-fast: false - matrix: - jdk: [ 17 ] - os: [ ubuntu-latest ] - include: -# - os: windows-latest -# skip_javadoc: "`-Dmaven`.javadoc`.skip=true" -# skip_gpg: "`-Dgpg`.skip=true" - - os: ubuntu-latest - skip_javadoc: -Dmaven.javadoc.skip=true - skip_gpg: -Dgpg.skip=true -# - os: macos-latest -# skip_javadoc: -Dmaven.javadoc.skip=true -# skip_gpg: -Dgpg.skip=true - - env: - JDK_VERSION: ${{ matrix.jdk }} - - steps: - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v4 - with: - persist-credentials: false - - - name: Cache local Maven repository - uses: actions/cache@v4 - with: - path: ~/.m2/repository - key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: build-${{ runner.os }}-maven- - - - name: Install Matrix JDK - uses: actions/setup-java@v4 - with: - java-version: ${{ matrix.jdk }} - distribution: 'temurin' - java-package: jdk - architecture: x64 -# Architecture options: x86, x64, armv7, aarch64, ppc64le -# setup-java@v4 has a "with cache" option - - - name: Echo Java Version - run: > - java -version - - - name: Test, Package, Verify, Coverage Report - if: ${{ matrix.jdk == 17 && success() }} - run: - mvn verify coveralls:report -B - -DrepoToken=${{secrets.coveralls_token}} - ${{matrix.os.skip_javadoc}} - ${{matrix.os.skip_gpg}} - -# Lifecycle: validate, compile, test, package, verify, install, deploy -# Coverage reports are available after the verify phase -# -B batch mode -# -V show Version without stopping -# -q quiet, only show errors diff --git a/pom.xml b/pom.xml index c843c9835..ccff0122d 100644 --- a/pom.xml +++ b/pom.xml @@ -95,9 +95,10 @@ under the License. 3.6.3 17 + --add-modules=jdk.incubator.foreign ${java.version} ${java.version} - -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 --add-modules=jdk.incubator.foreign + -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 ${add-modules} UTF-8 ${charset.encoding} ${charset.encoding} @@ -105,30 +106,31 @@ under the License. yyyy-MM-dd'T'HH-mm-ss'Z' - 3.7.1 - 3.13.0 - 3.1.3 - 3.5.0 - 3.2.5 - 3.4.2 - 3.8.0 - 3.1.1 - 3.2.0 - 3.3.1 - 3.4.0 - - 4.9.10 - - 0.16.1 - - - 4.3.0 - - 0.8.12 - - 2.17.1 - - 1.0.0 + 3.7.1 + 3.13.0 + 3.1.3 + 3.5.0 + 3.2.7 + 3.4.2 + 3.11.1 + 3.1.1 + 3.2.0 + 3.3.1 + 3.5.2 + 3.2.0 + + 4.9.10 + + 0.16.1 + + + 4.3.0 + + 0.8.12 + + 2.17.1 + + 1.0.0
@@ -145,13 +147,6 @@ under the License. ${testng.version} test
-
@@ -169,7 +164,7 @@ under the License. ${maven-compiler-plugin.version} - --add-modules=jdk.incubator.foreign + ${add-modules} @@ -195,10 +190,10 @@ under the License. - [17,18) + ${java.version} - [${maven.version},) + [${maven.version},4.0.0) @@ -243,7 +238,7 @@ under the License. true public - --add-modules=jdk.incubator.foreign + ${add-modules} @@ -289,7 +284,7 @@ under the License. maven-surefire-plugin ${maven-surefire-failsafe-plugins.version} - --add-modules=jdk.incubator.foreign + ${add-modules} false false true @@ -298,6 +293,26 @@ under the License. + + org.apache.maven.plugins + maven-toolchains-plugin + ${maven-toolchains-plugin.version} + + + + toolchain + + + + + + + ${java.version} + + + + + org.apache.rat apache-rat-plugin @@ -402,6 +417,10 @@ under the License. org.apache.maven.plugins maven-surefire-plugin + + org.apache.maven.plugins + maven-toolchains-plugin + org.apache.rat apache-rat-plugin diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java index eafe88340..171fc2cfb 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java @@ -33,8 +33,8 @@ import org.apache.datasketches.memory.XxHash; /** - *

A Bloom filter is a data structure that can be used for probabilistic - * set membership.

+ * A Bloom filter is a data structure that can be used for probabilistic + * set membership. * *

When querying a Bloom filter, there are no false positives. Specifically: * When querying an item that has already been inserted to the filter, the filter will diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java index f865a3350..ee17a9918 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java @@ -25,8 +25,8 @@ import org.apache.datasketches.memory.WritableMemory; /** - *

This class provides methods to help estimate the correct parameters when - * creating a Bloom filter, and methods to create the filter using those values.

+ * This class provides methods to help estimate the correct parameters when + * creating a Bloom filter, and methods to create the filter using those values. * *

The underlying math is described in the * diff --git a/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java b/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java index 7c5bc770d..b2648b943 100644 --- a/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java +++ b/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java @@ -55,10 +55,10 @@ import org.apache.datasketches.memory.WritableMemory; /** - *

This sketch is useful for tracking approximate frequencies of items of type <T> + * This sketch is useful for tracking approximate frequencies of items of type <T> * with optional associated counts (<T> item, long count) that are members of a * multiset of such items. The true frequency of an item is defined to be the sum of associated - * counts.

+ * counts. * *

This implementation provides the following capabilities:

*
    diff --git a/src/main/java/org/apache/datasketches/frequencies/LongsSketch.java b/src/main/java/org/apache/datasketches/frequencies/LongsSketch.java index 30452a1b5..7d1990b62 100644 --- a/src/main/java/org/apache/datasketches/frequencies/LongsSketch.java +++ b/src/main/java/org/apache/datasketches/frequencies/LongsSketch.java @@ -54,9 +54,9 @@ import org.apache.datasketches.memory.WritableMemory; /** - *

    This sketch is useful for tracking approximate frequencies of long items with optional + * This sketch is useful for tracking approximate frequencies of long items with optional * associated counts (long item, long count) that are members of a multiset of - * such items. The true frequency of an item is defined to be the sum of associated counts.

    + * such items. The true frequency of an item is defined to be the sum of associated counts. * *

    This implementation provides the following capabilities:

    *
      diff --git a/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java b/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java index 785c211f2..b5db6025c 100644 --- a/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java @@ -31,6 +31,7 @@ /** * This class defines the preamble data structure and provides basic utilities for some of the key * fields. + * *

      * The intent of the design of this class was to isolate the detailed knowledge of the bit and byte * layout of the serialized form of the sketches derived from the Sketch class into one place. This diff --git a/src/main/java/org/apache/datasketches/hash/MurmurHash3.java b/src/main/java/org/apache/datasketches/hash/MurmurHash3.java index 9fb2ab948..a708e0077 100644 --- a/src/main/java/org/apache/datasketches/hash/MurmurHash3.java +++ b/src/main/java/org/apache/datasketches/hash/MurmurHash3.java @@ -29,10 +29,8 @@ import org.apache.datasketches.memory.Memory; /** - *

      * The MurmurHash3 is a fast, non-cryptographic, 128-bit hash function that has * excellent avalanche and 2-way bit independence properties. - *

      * *

      * Austin Appleby's C++ diff --git a/src/main/java/org/apache/datasketches/hash/package-info.java b/src/main/java/org/apache/datasketches/hash/package-info.java index 2d97afeb6..5744b2776 100644 --- a/src/main/java/org/apache/datasketches/hash/package-info.java +++ b/src/main/java/org/apache/datasketches/hash/package-info.java @@ -18,12 +18,11 @@ */ /** - *

      The hash package contains a high-performing and extended Java implementations + * The hash package contains a high-performing and extended Java implementations * of Austin Appleby's 128-bit MurmurHash3 hash function originally coded in C. * This core MurmurHash3.java class is used throughout many of the sketch classes for consistency * and as long as the user specifies the same seed will result in coordinated hash operations. * This package also contains an adaptor class that extends the basic class with more functions * commonly associated with hashing. - *

      */ package org.apache.datasketches.hash; diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 67035b45f..acbecdf07 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -312,6 +312,7 @@ private static void randomlyHalveUpDoubles(final double[] buf, final int start, /** * Compression algorithm used to merge higher levels. + * *

      Here is what we do for each level:

      *
      • If it does not need to be compacted, then simply copy it over.
      • *
      • Otherwise, it does need to be compacted, so... diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 59cda435f..fbe9dbb36 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -278,6 +278,7 @@ public final void merge(final KllSketch other) { /** * {@inheritDoc} + * *

        The parameter k will not change.

        */ @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index 50cadeb3e..69045f78c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -312,6 +312,7 @@ private static void randomlyHalveUpFloats(final float[] buf, final int start, fi /** * Compression algorithm used to merge higher levels. + * *

        Here is what we do for each level:

        *
        • If it does not need to be compacted, then simply copy it over.
        • *
        • Otherwise, it does need to be compacted, so... diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 48781631d..b993f9998 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -278,6 +278,7 @@ public final void merge(final KllSketch other) { /** * {@inheritDoc} + * *

          The parameter k will not change.

          */ @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java index 00b8c7d35..dd309ba3a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java @@ -346,6 +346,7 @@ static void updateItem(final KllItemsSketch itmSk, final T item, final lo /** * Compression algorithm used to merge higher levels. + * *

          Here is what we do for each level:

          *
          • If it does not need to be compacted, then simply copy it over.
          • *
          • Otherwise, it does need to be compacted, so... diff --git a/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java b/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java index ec67b55d4..04fe2cc08 100644 --- a/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java @@ -312,6 +312,7 @@ private static void randomlyHalveUpLongs(final long[] buf, final int start, fina /** * Compression algorithm used to merge higher levels. + * *

            Here is what we do for each level:

            *
            • If it does not need to be compacted, then simply copy it over.
            • *
            • Otherwise, it does need to be compacted, so... diff --git a/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java b/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java index 0f6fa7d89..f5688ad70 100644 --- a/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java @@ -278,6 +278,7 @@ public final void merge(final KllSketch other) { /** * {@inheritDoc} + * *

              The parameter k will not change.

              */ @Override diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java index 49e7c5fbb..2fc399f9b 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java @@ -506,6 +506,7 @@ public QuantilesDoublesSketchIterator iterator() { /** * {@inheritDoc} + * *

              The parameter k will not change.

              */ @Override diff --git a/src/main/java/org/apache/datasketches/quantiles/package-info.java b/src/main/java/org/apache/datasketches/quantiles/package-info.java index c44e2e316..8767757b7 100644 --- a/src/main/java/org/apache/datasketches/quantiles/package-info.java +++ b/src/main/java/org/apache/datasketches/quantiles/package-info.java @@ -18,9 +18,8 @@ */ /** - *

              The quantiles package contains stochastic streaming algorithms that enable single-pass + * The quantiles package contains stochastic streaming algorithms that enable single-pass * analysis of the distribution of a stream of quantiles. - *

              * * @see org.apache.datasketches.quantiles.DoublesSketch * @see org.apache.datasketches.quantiles.ItemsSketch diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java index 98616661f..1427f6279 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java @@ -38,7 +38,7 @@ public interface DoublesSortedView extends SortedView { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *
              *

              The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.

              @@ -55,7 +55,7 @@ public interface DoublesSortedView extends SortedView { *
            * *

            It is not recommended to include either the minimum or maximum items of the input stream.

            - * + * * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -100,7 +100,7 @@ default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit) * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *
            *

            Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.

            * @@ -124,7 +124,7 @@ default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit) *
          * *

          It is not recommended to include either the minimum or maximum items of the input stream.

          - * + * * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java index 0a0c54b5a..eec699d94 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java @@ -38,7 +38,7 @@ public interface FloatsSortedView extends SortedView { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *
          *

          The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.

          @@ -55,7 +55,7 @@ public interface FloatsSortedView extends SortedView { *
        * *

        It is not recommended to include either the minimum or maximum items of the input stream.

        - * + * * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -100,7 +100,7 @@ default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit) * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *
        *

        Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.

        * @@ -124,7 +124,7 @@ default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit) *
      * *

      It is not recommended to include either the minimum or maximum items of the input stream.

      - * + * * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java index 0d27ce78c..f0dc81151 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java @@ -47,7 +47,7 @@ public interface GenericSortedView extends PartitioningFeature, SketchPar * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *
      *

      The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.

      @@ -64,7 +64,7 @@ public interface GenericSortedView extends PartitioningFeature, SketchPar *
    * *

    It is not recommended to include either the minimum or maximum items of the input stream.

    - * + * * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -116,7 +116,7 @@ default double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria sear * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *
    *

    Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.

    * @@ -140,7 +140,7 @@ default double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria sear *
* *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + * * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java index 4823edd1d..e7e3521c7 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java @@ -38,7 +38,7 @@ public interface LongsSortedView extends SortedView { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *
*

The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.

@@ -55,7 +55,7 @@ public interface LongsSortedView extends SortedView { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -100,7 +100,7 @@ default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *
*

Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.

* @@ -124,7 +124,7 @@ default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java index 3dc0651a7..a082fc27a 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java @@ -20,12 +20,12 @@ package org.apache.datasketches.quantilescommon; /** - *

This is a stochastic streaming sketch that enables near-real time analysis of the + * This is a stochastic streaming sketch that enables near-real time analysis of the * approximate distribution of items from a very large stream in a single pass, requiring only * that the items are comparable. * The analysis is obtained using the getQuantile() function or the * inverse functions getRank(), getPMF() (the Probability Mass Function), and getCDF() - * (the Cumulative Distribution Function).

+ * (the Cumulative Distribution Function). * *

Given an input stream of N items, the natural rank of any specific * item is defined as its index (1 to N) in the hypothetical sorted stream of all diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java index e8e5310f5..8c4f6620f 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java @@ -50,7 +50,7 @@ default double[] getCDF(double[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *

*

The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.

@@ -67,7 +67,7 @@ default double[] getCDF(double[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -113,7 +113,7 @@ default double[] getPMF(double[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *
*

Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.

* @@ -137,7 +137,7 @@ default double[] getPMF(double[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java index 986780444..8b8a91bdd 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java @@ -49,7 +49,7 @@ default double[] getCDF(float[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *
*

The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.

@@ -66,7 +66,7 @@ default double[] getCDF(float[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -112,7 +112,7 @@ default double[] getPMF(float[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *
*

Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.

* @@ -136,7 +136,7 @@ default double[] getPMF(float[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java index 459e58cdd..bc0881282 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java @@ -53,7 +53,7 @@ default double[] getCDF(T[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *
*

The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.

@@ -70,7 +70,7 @@ default double[] getCDF(T[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -132,7 +132,7 @@ default double[] getPMF(T[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *
*

Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.

* @@ -156,7 +156,7 @@ default double[] getPMF(T[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java index 2b542a68f..fb1ca5817 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java @@ -50,7 +50,7 @@ default double[] getCDF(long[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *
*

The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.

@@ -67,7 +67,7 @@ default double[] getCDF(long[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -113,7 +113,7 @@ default double[] getPMF(long[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *
*

Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.

* @@ -137,7 +137,7 @@ default double[] getPMF(long[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java index 05d4d3ce2..feeba7739 100644 --- a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java +++ b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java @@ -172,6 +172,7 @@ public boolean isReadOnly() { /** * {@inheritDoc} + * *

The parameters k, highRankAccuracy, and reqDebug will not change.

*/ @Override diff --git a/src/main/java/org/apache/datasketches/req/ReqSerDe.java b/src/main/java/org/apache/datasketches/req/ReqSerDe.java index 52b1371a9..952749deb 100644 --- a/src/main/java/org/apache/datasketches/req/ReqSerDe.java +++ b/src/main/java/org/apache/datasketches/req/ReqSerDe.java @@ -110,7 +110,8 @@ * 0 || (empty)| 0 | K | Flags |FamID=17| SerVer | PreInts = 2 | * *
-   * 

Flags:

+ * + * Flags: * Bit 0 : Endianness, reserved * Bit 1 : ReadOnly, reserved * Bit 2 : Empty diff --git a/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java b/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java index c3ef33957..b58317a9a 100644 --- a/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java +++ b/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java @@ -132,10 +132,9 @@ public static ReservoirLongsUnion heapify(final Memory srcMem) { /** * Union the given sketch. - *

- * This method can be repeatedly called. If the given sketch is null it is interpreted as an empty - * sketch. - *

+ * + *

This method can be repeatedly called. If the given sketch is null it is interpreted as an empty + * sketch.

* * @param sketchIn The incoming sketch. */ diff --git a/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java b/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java index e6f361955..e12d31aa9 100644 --- a/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java +++ b/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java @@ -25,6 +25,7 @@ /** * This class provides a compact representation of reservoir size by encoding it into a * fixed-point 16-bit value. + * *

The value itself is a fractional power of 2, with 5 bits of exponent and 11 bits of * mantissa. The exponent allows a choice of anywhere from 0-30, and there are 2048 possible * reservoir size values within each octave. Because reservoir size must be an integer, this diff --git a/src/main/java/org/apache/datasketches/sampling/package-info.java b/src/main/java/org/apache/datasketches/sampling/package-info.java index edfaa20a8..bbe446914 100644 --- a/src/main/java/org/apache/datasketches/sampling/package-info.java +++ b/src/main/java/org/apache/datasketches/sampling/package-info.java @@ -18,8 +18,8 @@ */ /** - *

This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of - * weighted and unweighted items from a stream.

+ * This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of + * weighted and unweighted items from a stream. * *

These sketches are mergeable and can be serialized and deserialized to/from a compact * form.

diff --git a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java index 562be982c..e1d9262e6 100644 --- a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java @@ -37,6 +37,7 @@ /** * This class defines the preamble data structure and provides basic utilities for some of the key * fields. + * *

The intent of the design of this class was to isolate the detailed knowledge of the bit and * byte layout of the serialized form of the sketches derived from the Sketch class into one place. * This allows the possibility of the introduction of different serialization @@ -126,7 +127,7 @@ * 3 ||----------------------Start of Hash Table of longs---------------------------------| *

* - *

Union objects require 32 bytes of preamble plus a non-compact array of longs representing a + *

Union objects require 32 bytes of preamble plus a non-compact array of longs representing a * hash table.

* *
diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
index 882c5e2e9..cb6854b02 100644
--- a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
@@ -343,6 +343,7 @@ public UpdateReturnState update(final long[] data) {
 
   /**
    * All potential updates converge here.
+   *
    * 

Don't ever call this unless you really know what you are doing!

* * @param hash the given input hash value. A hash of zero or Long.MAX_VALUE is ignored. diff --git a/src/main/java/org/apache/datasketches/tuple/Union.java b/src/main/java/org/apache/datasketches/tuple/Union.java index 653312fa0..acefa2ab5 100644 --- a/src/main/java/org/apache/datasketches/tuple/Union.java +++ b/src/main/java/org/apache/datasketches/tuple/Union.java @@ -100,8 +100,7 @@ public CompactSketch union(final Sketch tupleSketch, /** * Performs a stateful union of the internal set with the given tupleSketch. * @param tupleSketch input tuple sketch to merge with the internal set. - * - *

Nulls and empty sketches are ignored.

+ * Nulls and empty sketches are ignored. */ public void union(final Sketch tupleSketch) { if (tupleSketch == null || tupleSketch.isEmpty()) { return; } diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java index e7abae0d4..a54c11afc 100644 --- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java @@ -41,24 +41,28 @@ public enum Mode { /** * The aggregation mode is the summation function. + * *

New retained value = previous retained value + incoming value

*/ Sum, /** * The aggregation mode is the minimum function. + * *

New retained value = min(previous retained value, incoming value)

*/ Min, /** * The aggregation mode is the maximum function. + * *

New retained value = max(previous retained value, incoming value)

*/ Max, /** * The aggregation mode is always one. + * *

New retained value = 1.0

*/ AlwaysOne diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java index 4c04fa2c2..72695355e 100644 --- a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java @@ -41,24 +41,28 @@ public enum Mode { /** * The aggregation mode is the summation function. + * *

New retained value = previous retained value + incoming value

*/ Sum, /** * The aggregation mode is the minimum function. + * *

New retained value = min(previous retained value, incoming value)

*/ Min, /** * The aggregation mode is the maximum function. + * *

New retained value = max(previous retained value, incoming value)

*/ Max, /** * The aggregation mode is always one. + * *

New retained value = 1

*/ AlwaysOne diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java index 91d4eade4..52f827149 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java @@ -30,6 +30,7 @@ /** * Direct Compact Sketch of type ArrayOfDoubles. + * *

This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java index 3dd019d74..7c1b1bf07 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java @@ -23,6 +23,7 @@ /** * Direct Intersection operation for tuple sketches of type ArrayOfDoubles. + * *

This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

@@ -43,7 +44,7 @@ final class DirectArrayOfDoublesIntersection extends ArrayOfDoublesIntersection } @Override - protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, + protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, final long seed) { return new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed, mem_); } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index 1b4e86904..ae1aa3dc0 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -33,6 +33,7 @@ /** * Direct QuickSelect tuple sketch of type ArrayOfDoubles. + * *

This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java index 15503fc04..dcdab1313 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java @@ -23,6 +23,7 @@ /** * Iterator over the off-heap, Direct tuple sketch of type ArrayOfDoubles (compact or hash table). + * *

This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java index 00310f534..734019632 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java @@ -27,6 +27,7 @@ /** * Direct Union operation for tuple sketches of type ArrayOfDoubles. + * *

This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml index 873a878a0..8d4b40a16 100644 --- a/tools/SketchesCheckstyle.xml +++ b/tools/SketchesCheckstyle.xml @@ -230,7 +230,7 @@ under the License. - + From c57687bfe52bf4196516698e01564d7e36ffe6b3 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 11 Dec 2024 13:58:06 -0800 Subject: [PATCH 17/22] Update README.md --- README.md | 98 ++++++++++--------------------------------------------- 1 file changed, 17 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index 80517d304..547df3e9d 100644 --- a/README.md +++ b/README.md @@ -25,10 +25,10 @@ # Apache® DataSketches™ Core Java Library Component This is the core Java component of the DataSketches library. It contains all of the sketching algorithms and can be accessed directly from user applications. -This component is also a dependency of other components of the library that create adaptors for target systems, such as the [Apache Pig adaptor](https://github.com/apache/datasketches-pig) and the [Apache Hive adaptor](https://github.com/apache/datasketches-hive). +This component is also a dependency of other components of the library that create adaptors for target systems, such as the [Apache Pig adaptor](https://github.com/apache/datasketches-pig), the [Apache Hive adaptor](https://github.com/apache/datasketches-hive), and others. Note that we have a parallel core component for C++ and Python implementations of the same sketch algorithms, -[datasketches-cpp](https://github.com/apache/datasketches-cpp). +[datasketches-cpp](https://github.com/apache/datasketches-cpp) and [datasketches-python](https://github.com/apache/datasketches-python) Please visit the main [DataSketches website](https://datasketches.apache.org) for more information. @@ -39,30 +39,29 @@ If you are interested in making contributions to this site please see our [Commu ## Maven Build Instructions __NOTE:__ This component accesses resource files for testing. As a result, the directory elements of the full absolute path of the target installation directory must qualify as Java identifiers. In other words, the directory elements must not have any space characters (or non-Java identifier characters) in any of the path elements. This is required by the Oracle Java Specification in order to ensure location-independent access to resources: [See Oracle Location-Independent Access to Resources](https://docs.oracle.com/javase/8/docs/technotes/guides/lang/resources.html) -### A JDK8 with Hotspot or JDK11 with Hotspot is required to compile -This component depends on the [datasketches-memory](https://github.com/apache/datasketches-memory) component, -and, as a result, must be compiled with one of the above JDKs. -If your application only relies on the APIs of this component no special JVM arguments are required. -However, if your application also directly relies on the APIs of the *datasketches-memory* component, -you may need additional JVM arguments. -Please refer to the [datasketches-memory README](https://github.com/apache/datasketches-memory/blob/master/README.md) for details. +### JDK17 is required to compile +This component depends on the [datasketches-memory-4.1.X](https://github.com/apache/datasketches-memory/tree/4.1.X) component, +and, as a result, must be compiled with JDK17 and this dependency: -If your application uses Maven, you can also use the *pom.xml* of this component as an example of how to automatically -configure the JVM arguments for compilation and testing based on the version of the JDK. +``` + + org.apache.datasketches + datasketches-memory + 4.1.0 + +``` + +If your application only relies on the APIs of datasketches-java no special JVM arguments are required. +However, if your application also directly relies on the APIs of the *datasketches-memory* component, +you may need the additional JVM argument **--enable-preview**. ### Recommended Build Tool This DataSketches component is structured as a Maven project and Maven is the recommended Build Tool. -There are two types of tests: normal unit tests and tests run by the strict profile. - To run normal unit tests: $ mvn clean test -To run the strict profile tests (only supported in Java 8): - - $ mvn clean test -P strict - To install jars built from the downloaded source: $ mvn clean install -DskipTests=true @@ -80,74 +79,11 @@ This will create the following jars: #### Run-time There is one run-time dependency: -* org.apache.datasketches : datasketches-memory +* [datasketches-memory-4.1.X](https://github.com/apache/datasketches-memory/tree/4.1.X) #### Testing See the pom.xml file for test dependencies. -## Special Build / Test Instructions for Eclipse - -Building and running tests using JDK 8 should not be a problem. - -However, with JDK 9+, and Eclipse versions up to and including 4.22.0 (2021-12), Eclipse fails to translate the required JPMS JVM arguments specified in the POM compiler or surefire plugins into the *.classpath* file, causing illegal reflection access errors -[eclipse-m2e/m2e-core Bug 543631](https://github.com/eclipse-m2e/m2e-core/issues/129). - -There are two ways to fix this: - -#### Method 1: Manually update *.classpath* file: -Open the *.classpath* file in a text editor and find the following *classpathentry* element (this assumes JDK11, change to suit): - -``` - - - - - - -``` -Then edit it as follows: - -``` - - - - - - - - -``` -Finally, *refresh*. - -#### Method 2: Manually update *Module Dependencies* - -In Eclipse, open the project *Properties / Java Build Path / Module Dependencies ...* - -* Select *java.base* -* Select *Configured details* -* Select *Expose Package...* - * Enter *Package* = java.nio - * Enter *Target module* = ALL-UNNAMED - * Select button: *opens* - * Hit *OK* -* Select *Expose Package...* - * Enter *Package* = jdk.internal.misc - * Enter *Target module* = ALL-UNNAMED - * Select button: *exports* - * Hit *OK* -* Select *Expose Package...* - * Enter *Package* = jdk.internal.ref - * Enter *Target module* = ALL-UNNAMED - * Select button: *exports* - * Hit *OK* -* Select *Expose Package...* - * Enter *Package* = sun.nio.ch - * Enter *Target module* = ALL-UNNAMED - * Select button: *opens* - * Hit *OK* - -**NOTE:** If you execute *Maven/Update Project...* from Eclipse with the option *Update project configuration from pom.xml* checked, all of the above will be erased, and you will have to redo it. - ## Known Issues #### SpotBugs From 5fc852f5149402ee098e857bfdb9ad39ada53e85 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 11 Dec 2024 14:02:13 -0800 Subject: [PATCH 18/22] Update pom in preparation to create main based on 17. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index ccff0122d..4c5532e54 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ under the License. org.apache.datasketches datasketches-java - 7.0.0 + 7.0.0-SNAPSHOT jar ${project.artifactId} From d0575f7176e0d6c7c016caec01d0aeeb6488aaa2 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Tue, 17 Dec 2024 12:18:05 -0800 Subject: [PATCH 19/22] Update move_to_17_using_FFM with GHA workflow changes. --- ...cpp_files.yml => auto-check_cpp_files.yml} | 32 +++++++++++++------ .github/workflows/auto-jdk-matrix.yml | 22 +++++++------ ...nalysis.yml => manual-codeql-analysis.yml} | 11 ++----- .../{javadoc.yml => manual-javadoc.yml} | 6 ++-- 4 files changed, 38 insertions(+), 33 deletions(-) rename .github/workflows/{check_cpp_files.yml => auto-check_cpp_files.yml} (59%) rename .github/workflows/{codeql-analysis.yml => manual-codeql-analysis.yml} (84%) rename .github/workflows/{javadoc.yml => manual-javadoc.yml} (85%) diff --git a/.github/workflows/check_cpp_files.yml b/.github/workflows/auto-check_cpp_files.yml similarity index 59% rename from .github/workflows/check_cpp_files.yml rename to .github/workflows/auto-check_cpp_files.yml index 59ae5824a..b9e05ae97 100644 --- a/.github/workflows/check_cpp_files.yml +++ b/.github/workflows/auto-check_cpp_files.yml @@ -2,8 +2,7 @@ name: Serialization Compatibility Test on: push: - branches: - - master + branches: [ master, main ] workflow_dispatch: jobs: @@ -12,21 +11,34 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 - - name: Checkout C++ - uses: actions/checkout@v3 + uses: actions/checkout@v4 + + - name: Checkout C++ + uses: actions/checkout@v4 with: repository: apache/datasketches-cpp path: cpp - - name: Configure C++ build + + - name: Setup Java + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + + - name: Configure C++ build run: cd cpp/build && cmake .. -DGENERATE=true - - name: Build C++ unit tests + + - name: Build C++ unit tests run: cd cpp && cmake --build build --config Release - - name: Run C++ tests + + - name: Run C++ tests run: cd cpp && cmake --build build --config Release --target test - - name: Make dir + + - name: Make dir run: mkdir -p serialization_test_data/cpp_generated_files + - name: Copy files run: cp cpp/build/*/test/*_cpp.sk serialization_test_data/cpp_generated_files - - name: Run Java tests + + - name: Run Java tests run: mvn test -P check-cpp-files diff --git a/.github/workflows/auto-jdk-matrix.yml b/.github/workflows/auto-jdk-matrix.yml index 6e280f123..f8220bbe2 100644 --- a/.github/workflows/auto-jdk-matrix.yml +++ b/.github/workflows/auto-jdk-matrix.yml @@ -7,7 +7,7 @@ on: workflow_dispatch: env: - MAVEN_OPTS: -Xmx1g -Xms1g + MAVEN_OPTS: -Xmx4g -Xms1g jobs: build: @@ -16,9 +16,11 @@ jobs: strategy: fail-fast: false + matrix: + jdk: [ 17 ] env: - JDK_VERSION: 17 + JDK_VERSION: ${{ matrix.jdk }} steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" @@ -30,14 +32,6 @@ jobs: run: > cat .github/workflows/auto-jdk-matrix.yml - - name: Setup Java - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-package: jdk - architecture: x64 - java-version: 17 - - name: Cache local Maven repository uses: actions/cache@v4 with: @@ -45,6 +39,14 @@ jobs: key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: build-${{ runner.os }}-maven- + - name: Install Matrix JDK + uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.jdk }} + distribution: 'temurin' + java-package: jdk + architecture: x64 + - name: Echo Java Version run: > java -version diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/manual-codeql-analysis.yml similarity index 84% rename from .github/workflows/codeql-analysis.yml rename to .github/workflows/manual-codeql-analysis.yml index 376d69404..1b94d13a3 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/manual-codeql-analysis.yml @@ -1,13 +1,6 @@ name: "CodeQL" on: -# push: -# branches: [ 'master' ] -# pull_request: - # The branches below must be a subset of the branches above -# branches: [ 'master' ] -# schedule: -# - cron: '10 17 * * 4' workflow_dispatch: jobs: @@ -48,8 +41,8 @@ jobs: - name: Autobuild uses: github/codeql-action/autobuild@v4 - # ℹ️ Command-line programs to run using the OS shell. - # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + # Command-line programs to run using the OS shell. + # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun # If the Autobuild fails above, remove it and uncomment the following three lines. # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. diff --git a/.github/workflows/javadoc.yml b/.github/workflows/manual-javadoc.yml similarity index 85% rename from .github/workflows/javadoc.yml rename to .github/workflows/manual-javadoc.yml index 04d70e25a..5f6ac6aec 100644 --- a/.github/workflows/javadoc.yml +++ b/.github/workflows/manual-javadoc.yml @@ -1,8 +1,6 @@ name: JavaDoc on: - push: - branches: [ master, main ] workflow_dispatch: jobs: @@ -24,7 +22,7 @@ jobs: - name: Print Current workflow run: > - cat .github/workflows/javadoc.yml + cat .github/workflows/manual-javadoc.yml - name: Generate JavaDoc run: mvn javadoc:javadoc @@ -33,6 +31,6 @@ jobs: uses: JamesIves/github-pages-deploy-action@v4.6.8 with: token: ${{ secrets.GITHUB_TOKEN }} - folder: target/reports/apidocs + folder: target/site/apidocs target-folder: docs/${{ github.ref_name }} branch: gh-pages From 8752c6aaa349e1afa817abd1ef9cd922062b06c3 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Tue, 17 Dec 2024 12:19:38 -0800 Subject: [PATCH 20/22] update move_to_17_using_FFM with 6.1.X --- .../frequencies/PreambleUtil.java | 16 +- .../quantiles/DoublesByteArrayImpl.java | 24 +- .../datasketches/tdigest/TDigestDouble.java | 48 +- .../bloomfilter/DirectBitArrayTest.java | 2 +- .../hll/DirectCouponListTest.java | 3 - .../datasketches/hll/PreambleUtilTest.java | 4 - .../kll/KllCrossLanguageTest.java | 10 + .../KllDirectCompactDoublesSketchTest.java | 14 +- .../KllDirectDoublesSketchIteratorTest.java | 2 +- .../kll/KllDirectDoublesSketchTest.java | 8 +- .../kll/KllDoublesSketchSerDeTest.java | 2 +- .../kll/KllDoublesSketchTest.java | 14 +- .../datasketches/kll/KllItemsSketchTest.java | 2 +- .../datasketches/kll/KllMiscDoublesTest.java | 4 +- .../datasketches/kll/KllMiscItemsTest.java | 20 +- .../quantiles/DebugUnionTest.java | 1 - .../DirectQuantilesMemoryRequestTest.java | 1 - .../quantiles/DoublesSketchTest.java | 1 - .../tdigest/TDigestDoubleTest.java | 10 + .../datasketches/theta/CompactSketchTest.java | 1 - ...ConcurrentDirectQuickSelectSketchTest.java | 1 - .../ConcurrentHeapQuickSelectSketchTest.java | 2 +- .../theta/DirectQuickSelectSketchTest.java | 9 +- .../theta/HeapifyWrapSerVer1and2Test.java | 833 +++++++++--------- .../datasketches/theta/SketchesTest.java | 3 +- .../theta/ThetaSketchCrossLanguageTest.java | 10 + .../datasketches/theta/UnionImplTest.java | 3 +- tools/SketchesCheckstyle.xml | 2 +- 28 files changed, 554 insertions(+), 496 deletions(-) diff --git a/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java b/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java index b5db6025c..1021a89ce 100644 --- a/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java @@ -32,22 +32,16 @@ * This class defines the preamble data structure and provides basic utilities for some of the key * fields. * - *

- * The intent of the design of this class was to isolate the detailed knowledge of the bit and byte + *

The intent of the design of this class was to isolate the detailed knowledge of the bit and byte * layout of the serialized form of the sketches derived from the Sketch class into one place. This * allows the possibility of the introduction of different serialization schemes with minimal impact - * on the rest of the library. - *

+ * on the rest of the library.

* - *

- * MAP: Low significance bytes of this long data structure are on the right. However, the + *

MAP: Low significance bytes of this long data structure are on the right. However, the * multi-byte integers (int and long) are stored in native byte order. The byte - * values are treated as unsigned. - *

+ * values are treated as unsigned.

* - *

- * An empty FrequentItems only requires 8 bytes. All others require 32 bytes of preamble. - *

+ *

An empty FrequentItems only requires 8 bytes. All others require 32 bytes of preamble.

* *
  *  * Long || Start Byte Adr:
diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesByteArrayImpl.java b/src/main/java/org/apache/datasketches/quantiles/DoublesByteArrayImpl.java
index f4df5aa8b..8451bad33 100644
--- a/src/main/java/org/apache/datasketches/quantiles/DoublesByteArrayImpl.java
+++ b/src/main/java/org/apache/datasketches/quantiles/DoublesByteArrayImpl.java
@@ -58,7 +58,7 @@ static byte[] toByteArray(final DoublesSketch sketch, final boolean ordered, fin
         | (ordered ? ORDERED_FLAG_MASK : 0)
         | (compact ? (COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK) : 0);
 
-    if (empty && !sketch.hasMemory()) { //empty & has Memory
+    if (empty && !sketch.hasMemory()) { //empty & !has Memory
       final byte[] outByteArr = new byte[Long.BYTES];
       final WritableMemory memOut = WritableMemory.writableWrap(outByteArr);
       final int preLongs = 1;
@@ -79,15 +79,7 @@ static byte[] toByteArray(final DoublesSketch sketch, final boolean ordered, fin
    */
   private static byte[] convertToByteArray(final DoublesSketch sketch, final int flags,
                                            final boolean ordered, final boolean compact) {
-    final int preLongs = 2;
-    final int extra = 2; // extra space for min and max quantiles
-    final int prePlusExtraBytes = (preLongs + extra) << 3;
-    final int k = sketch.getK();
-    final long n = sketch.getN();
-
-    // If not-compact, have accessor always report full levels. Then use level size to determine
-    // whether to copy data out.
-    final DoublesSketchAccessor dsa = DoublesSketchAccessor.wrap(sketch, !compact);
+    final int preLongs = sketch.isEmpty() ? 1 : 2;
 
     final int outBytes = (compact ? sketch.getCurrentCompactSerializedSizeBytes()
         : sketch.getCurrentUpdatableSerializedSizeBytes());
@@ -95,15 +87,23 @@ private static byte[] convertToByteArray(final DoublesSketch sketch, final int f
     final byte[] outByteArr = new byte[outBytes];
     final WritableMemory memOut = WritableMemory.writableWrap(outByteArr);
 
-    //insert preamble-0, N, min, max
+    //insert pre0
+    final int k = sketch.getK();
     insertPre0(memOut, preLongs, flags, k);
     if (sketch.isEmpty()) { return outByteArr; }
 
+    //insert N, min, max
+    final long n = sketch.getN();
     insertN(memOut, n);
     insertMinDouble(memOut, sketch.isEmpty() ? Double.NaN : sketch.getMinItem());
     insertMaxDouble(memOut, sketch.isEmpty() ? Double.NaN : sketch.getMaxItem());
 
-    long memOffsetBytes = prePlusExtraBytes;
+    // If not-compact, have accessor always report full levels. Then use level size to determine
+    // whether to copy data out.
+    final DoublesSketchAccessor dsa = DoublesSketchAccessor.wrap(sketch, !compact);
+
+    final int minAndMax = 2; // extra space for min and max quantiles
+    long memOffsetBytes = (preLongs + minAndMax) << 3;
 
     // might need to sort base buffer but don't want to change input sketch
     final int bbCnt = computeBaseBufferItems(k, n);
diff --git a/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java b/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java
index 1e3408511..951bd7244 100644
--- a/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java
+++ b/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java
@@ -32,6 +32,7 @@
 import org.apache.datasketches.memory.WritableBuffer;
 import org.apache.datasketches.memory.WritableMemory;
 import org.apache.datasketches.quantilescommon.QuantilesAPI;
+import org.apache.datasketches.quantilescommon.QuantilesUtil;
 
 /**
  * t-Digest for estimating quantiles and ranks.
@@ -125,7 +126,7 @@ public void merge(final TDigestDouble other) {
   /**
    * Process buffered values and merge centroids if needed
    */
-  public void compress() {
+  private void compress() {
     if (numBuffered_ == 0) { return; }
     final int num = numBuffered_ + numCentroids_;
     final double[] values =  new double[num];
@@ -277,6 +278,51 @@ public double getQuantile(final double rank) {
     return weightedAverage(centroidWeights_[numCentroids_ - 1], w1, maxValue_, w2);
   }
 
+  /**
+   * Returns an approximation to the Probability Mass Function (PMF) of the input stream
+   * given a set of split points.
+   *
+   * @param splitPoints an array of m unique, monotonically increasing values
+   * that divide the input domain into m+1 consecutive disjoint intervals (bins).
+   *
+   * @return an array of m+1 doubles each of which is an approximation
+   * to the fraction of the input stream values (the mass) that fall into one of those intervals.
+   * @throws SketchesStateException if sketch is empty.
+   */
+  public double[] getPMF(final double[] splitPoints) {
+    final double[] buckets = getCDF(splitPoints);
+    for (int i = buckets.length; i-- > 1; ) {
+      buckets[i] -= buckets[i - 1];
+    }
+    return buckets;
+  }
+
+  /**
+   * Returns an approximation to the Cumulative Distribution Function (CDF), which is the
+   * cumulative analog of the PMF, of the input stream given a set of split points.
+   *
+   * @param splitPoints an array of m unique, monotonically increasing values
+   * that divide the input domain into m+1 consecutive disjoint intervals.
+   *
+   * @return an array of m+1 doubles, which are a consecutive approximation to the CDF
+   * of the input stream given the splitPoints. The value at array position j of the returned
+   * CDF array is the sum of the returned values in positions 0 through j of the returned PMF
+   * array. This can be viewed as array of ranks of the given split points plus one more value
+   * that is always 1.
+   * @throws SketchesStateException if sketch is empty.
+   */
+  public double[] getCDF(final double[] splitPoints) {
+    if (isEmpty()) { throw new SketchesStateException(QuantilesAPI.EMPTY_MSG); }
+    QuantilesUtil.checkDoublesSplitPointsOrder(splitPoints);
+    final int len = splitPoints.length + 1;
+    final double[] ranks = new double[len];
+    for (int i = 0; i < len - 1; i++) {
+      ranks[i] = getRank(splitPoints[i]);
+    }
+    ranks[len - 1] = 1.0;
+    return ranks;
+  }
+
   /**
    * Computes size needed to serialize the current state.
    * @return size in bytes needed to serialize this tdigest
diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java
index 1df6cc9d9..8327a0d5e 100644
--- a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java
+++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java
@@ -139,7 +139,7 @@ public void basicWritableWrapTest() {
   @Test
   public void countWritableWrappedBitsWhenDirty() {
     // like basicOperationTest but with setBit which does
-    // not neecssarily track numBitsSet_
+    // not necessarily track numBitsSet_
     final HeapBitArray hba = new HeapBitArray(128);
     assertFalse(hba.getAndSetBit(1));
     assertFalse(hba.getAndSetBit(2));
diff --git a/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java b/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java
index 985cdf798..09eebabf7 100644
--- a/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java
+++ b/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java
@@ -31,7 +31,6 @@
 
 import org.apache.datasketches.memory.DefaultMemoryRequestServer;
 import org.apache.datasketches.memory.Memory;
-//import org.apache.datasketches.memory.WritableHandle;
 import org.apache.datasketches.memory.WritableMemory;
 
 /**
@@ -74,8 +73,6 @@ private static void promotions(int lgConfigK, int n, TgtHllType tgtHllType, bool
     byte[] barr1;
     WritableMemory wmem;
     try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes)).scope()) {
-      //byte[] byteArr = new byte[bytes];
-      //WritableMemory wmem = WritableMemory.wrap(byteArr);
       hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem);
       assertTrue(hllSketch.isEmpty());
 
diff --git a/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java b/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java
index deb8c5be5..17f3d0d0f 100644
--- a/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java
+++ b/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java
@@ -109,7 +109,6 @@ public void checkCorruptMemoryInput() {
     HllSketch sk = new HllSketch(12);
     byte[] memObj = sk.toCompactByteArray();
     WritableMemory wmem = WritableMemory.writableWrap(memObj);
-    //long memAdd = wmem.getCumulativeOffset(0);
     HllSketch bad;
 
     //checkFamily
@@ -148,7 +147,6 @@ public void checkCorruptMemoryInput() {
     for (int i = 1; i <= 15; i++) { sk.update(i); }
     memObj = sk.toCompactByteArray();
     wmem = WritableMemory.writableWrap(memObj);
-    //memAdd = wmem.getCumulativeOffset(0);
 
     //check wrong PreInts and SET
     try {
@@ -162,7 +160,6 @@ public void checkCorruptMemoryInput() {
     for (int i = 15; i <= 1000; i++) { sk.update(i); }
     memObj = sk.toCompactByteArray();
     wmem = WritableMemory.writableWrap(memObj);
-    //memAdd = wmem.getCumulativeOffset(0);
 
     //check wrong PreInts and HLL
     try {
@@ -179,7 +176,6 @@ public void checkExtractFlags() {
     int bytes = HllSketch.getMaxUpdatableSerializationBytes(4, TgtHllType.HLL_4);
     WritableMemory wmem = WritableMemory.allocate(bytes);
     Object memObj = wmem.getArray();
-    //long memAdd = wmem.getCumulativeOffset(0L);
     HllSketch sk = new HllSketch(4, TgtHllType.HLL_4, wmem);
     int flags = extractFlags(wmem);
     assertEquals(flags, EMPTY_FLAG_MASK);
diff --git a/src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java b/src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java
index 078f3503b..53b422b7c 100644
--- a/src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java
@@ -67,6 +67,16 @@ public void generateKllFloatsSketchBinaries() throws IOException {
     }
   }
 
+  @Test(groups = {GENERATE_JAVA_FILES})
+  public void generateKllLongsSketchBinaries() throws IOException {
+    final int[] nArr = {0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000};
+    for (int n: nArr) {
+      final KllLongsSketch sk = KllLongsSketch.newHeapInstance();
+      for (int i = 1; i <= n; i++) { sk.update(i); }
+      Files.newOutputStream(javaPath.resolve("kll_long_n" + n + "_java.sk")).write(sk.toByteArray());
+    }
+  }
+
   @Test(groups = {GENERATE_JAVA_FILES})
   public void generateKllItemsSketchBinaries() throws IOException {
     final int[] nArr = {0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000};
diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java
index 9831c2f57..7a4d061ad 100644
--- a/src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java
@@ -110,13 +110,13 @@ public void checkDirectCompactGetDoubleItemsArray() {
 
     KllDoublesSketch sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray()));
     double[] itemsArr = sk2.getDoubleItemsArray();
-    for (int i = 0; i < 20; i++) { assertEquals(itemsArr[i], 0F); }
+    for (int i = 0; i < 20; i++) { assertEquals(itemsArr[i], 0.0); }
 
     sk.update(1);
     sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray()));
     itemsArr = sk2.getDoubleItemsArray();
-    for (int i = 0; i < 19; i++) { assertEquals(itemsArr[i], 0F); }
-    assertEquals(itemsArr[19], 1F);
+    for (int i = 0; i < 19; i++) { assertEquals(itemsArr[i], 0.0); }
+    assertEquals(itemsArr[19], 1.0);
 
     for (int i = 2; i <= 21; i++) { sk.update(i); }
     sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray()));
@@ -169,12 +169,12 @@ public void checkMinAndMax() {
     try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {}
     sk.update(1);
     sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray()));
-    assertEquals(sk2.getMaxItem(),1.0F);
-    assertEquals(sk2.getMinItem(),1.0F);
+    assertEquals(sk2.getMaxItem(),1.0);
+    assertEquals(sk2.getMinItem(),1.0);
     for (int i = 2; i <= 21; i++) { sk.update(i); }
     sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray()));
-    assertEquals(sk2.getMaxItem(),21.0F);
-    assertEquals(sk2.getMinItem(),1.0F);
+    assertEquals(sk2.getMaxItem(),21.0);
+    assertEquals(sk2.getMinItem(),1.0);
   }
 
   @Test
diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java
index 78a3b9cd5..4bfdfa4fc 100644
--- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java
@@ -41,7 +41,7 @@ public void oneItemSketch() {
     sketch.update(0);
     QuantilesDoublesSketchIterator it = sketch.iterator();
     Assert.assertTrue(it.next());
-    Assert.assertEquals(it.getQuantile(), 0f);
+    Assert.assertEquals(it.getQuantile(), 0);
     Assert.assertEquals(it.getWeight(), 1);
     Assert.assertFalse(it.next());
   }
diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java
index 33219a806..6342ac33d 100644
--- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java
@@ -189,11 +189,11 @@ public void mergeLowerK() {
       sketch2.update(2 * n - i - 1);
     }
 
-    assertEquals(sketch1.getMinItem(), 0.0f);
-    assertEquals(sketch1.getMaxItem(), n - 1f);
+    assertEquals(sketch1.getMinItem(), 0.0);
+    assertEquals(sketch1.getMaxItem(), n - 1.0);
 
     assertEquals(sketch2.getMinItem(), n);
-    assertEquals(sketch2.getMaxItem(), 2f * n - 1f);
+    assertEquals(sketch2.getMaxItem(), 2.0 * n - 1.0);
 
     assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false));
     assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true));
@@ -613,7 +613,7 @@ public void checkWritableWrapOfCompactForm() {
   public void checkReadOnlyExceptions() {
     int k = 20;
     double[] dblArr = new double[0];
-    double dblV = 1.0f;
+    double dblV = 1.0;
     int idx = 1;
     boolean bool = true;
     KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k);
diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchSerDeTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchSerDeTest.java
index e07a395da..007cc8370 100644
--- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchSerDeTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchSerDeTest.java
@@ -64,7 +64,7 @@ public void serializeDeserializeEmpty() {
   @Test
   public void serializeDeserializeOneValue() {
     final KllDoublesSketch sk1 = KllDoublesSketch.newHeapInstance();
-    sk1.update(1);
+    sk1.update(1.0);
 
     //from heap -> byte[] -> heap
     final byte[] bytes = sk1.toByteArray();
diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java
index 0b3818f1f..e143577f4 100644
--- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java
@@ -165,8 +165,8 @@ public void manyValuesEstimationMode() {
     assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256);
     assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256);
 
-    assertEquals(sketch.getMinItem(), 0f); // min value is exact
-    assertEquals(sketch.getMaxItem(), n - 1f); // max value is exact
+    assertEquals(sketch.getMinItem(), 0.0); // min value is exact
+    assertEquals(sketch.getMaxItem(), n - 1.0); // max value is exact
 
     // check at every 0.1 percentage point
     final double[] fractions = new double[1001];
@@ -261,11 +261,11 @@ public void mergeLowerK() {
       sketch2.update(2 * n - i - 1);
     }
 
-    assertEquals(sketch1.getMinItem(), 0.0f);
-    assertEquals(sketch1.getMaxItem(), n - 1f);
+    assertEquals(sketch1.getMinItem(), 0.0);
+    assertEquals(sketch1.getMaxItem(), n - 1);
 
     assertEquals(sketch2.getMinItem(), n);
-    assertEquals(sketch2.getMaxItem(), 2f * n - 1.0);
+    assertEquals(sketch2.getMaxItem(), 2.0 * n - 1.0);
 
     assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false));
     assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true));
@@ -306,7 +306,7 @@ public void mergeEmptyLowerK() {
     sketch2.merge(sketch1);
     assertFalse(sketch1.isEmpty());
     assertEquals(sketch1.getN(), n);
-    assertEquals(sketch1.getMinItem(), 0f);
+    assertEquals(sketch1.getMinItem(), 0.0);
     assertEquals(sketch1.getMaxItem(), n - 1.0);
     assertEquals(sketch1.getQuantile(0.5), n / 2.0, n * PMF_EPS_FOR_K_256);
   }
@@ -424,7 +424,7 @@ public void checkNewDirectInstanceAndSize() {
     KllDoublesSketch.newDirectInstance(wmem, memReqSvr);
     try { KllDoublesSketch.newDirectInstance(null, memReqSvr); fail(); }
     catch (NullPointerException e) { }
-    try { KllFloatsSketch.newDirectInstance(wmem, null); fail(); }
+    try { KllDoublesSketch.newDirectInstance(wmem, null); fail(); }
     catch (NullPointerException e) { }
     int updateSize = KllSketch.getMaxSerializedSizeBytes(200, 0, DOUBLES_SKETCH, true);
     int compactSize = KllSketch.getMaxSerializedSizeBytes(200, 0, DOUBLES_SKETCH, false);
diff --git a/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java
index 00028e341..9fc74d97b 100644
--- a/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java
@@ -570,7 +570,7 @@ public void checkCDF_PDF() {
   }
 
   @Test
-  public void checkWrapCase1Floats() {
+  public void checkWrapCase1Items() {
     KllItemsSketch sk = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe);
     final int n = 21;
     final int digits = Util.numDigits(n);
diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java
index e58c27419..4ce988d22 100644
--- a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java
@@ -100,8 +100,8 @@ public void checkHeapifyExceptions2() {
   @Test(expectedExceptions = SketchesArgumentException.class)
   public void checkHeapifyExceptions3() {
     KllDoublesSketch sk = KllDoublesSketch.newHeapInstance();
-    sk.update(1.0f);
-    sk.update(2.0f);
+    sk.update(1.0);
+    sk.update(2.0);
     WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray());
     wmem.putByte(0, (byte) 1); //corrupt preamble ints, should be 5
     KllDoublesSketch.heapify(wmem);
diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java
index acf3343d9..5f51a7f1a 100644
--- a/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java
@@ -293,7 +293,7 @@ public void checkSketchInitializeItemsHeap() {
     final int digits = Util.numDigits(n);
     KllItemsSketch sk;
 
-    println("#### CASE: FLOAT FULL HEAP");
+    println("#### CASE: ITEM FULL HEAP");
     sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
     for (int i = 1; i <= n; i++) { sk.update(Util.longToFixedLengthString(i, digits)); }
     println(sk.toString(true, true));
@@ -310,7 +310,7 @@ public void checkSketchInitializeItemsHeap() {
     assertEquals(sk.getNumLevels(), 2);
     assertFalse(sk.isLevelZeroSorted());
 
-    println("#### CASE: FLOAT HEAP EMPTY");
+    println("#### CASE: ITEM HEAP EMPTY");
     sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
     println(sk.toString(true, true));
     assertEquals(sk.getK(), k);
@@ -326,7 +326,7 @@ public void checkSketchInitializeItemsHeap() {
     assertEquals(sk.getNumLevels(), 1);
     assertFalse(sk.isLevelZeroSorted());
 
-    println("#### CASE: FLOAT HEAP SINGLE");
+    println("#### CASE: ITEM HEAP SINGLE");
     sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
     sk.update("1");
     println(sk.toString(true, true));
@@ -354,7 +354,7 @@ public void checkSketchInitializeItemsHeapifyCompactMem() {
     byte[] compBytes;
     Memory mem;
 
-    println("#### CASE: FLOAT FULL HEAPIFIED FROM COMPACT");
+    println("#### CASE: ITEM FULL HEAPIFIED FROM COMPACT");
     sk2 = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
     for (int i = 1; i <= n; i++) { sk2.update(Util.longToFixedLengthString(i, digits)); }
     println(sk2.toString(true, true));
@@ -375,7 +375,7 @@ public void checkSketchInitializeItemsHeapifyCompactMem() {
     assertEquals(sk.getNumLevels(), 2);
     assertFalse(sk.isLevelZeroSorted());
 
-    println("#### CASE: FLOAT EMPTY HEAPIFIED FROM COMPACT");
+    println("#### CASE: ITEM EMPTY HEAPIFIED FROM COMPACT");
     sk2 =  KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
     //println(sk.toString(true, true));
     compBytes = sk2.toByteArray();
@@ -395,7 +395,7 @@ public void checkSketchInitializeItemsHeapifyCompactMem() {
     assertEquals(sk.getNumLevels(), 1);
     assertFalse(sk.isLevelZeroSorted());
 
-    println("#### CASE: FLOAT SINGLE HEAPIFIED FROM COMPACT");
+    println("#### CASE: ITEM SINGLE HEAPIFIED FROM COMPACT");
     sk2 = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
     sk2.update("1");
     //println(sk2.toString(true, true));
@@ -417,7 +417,7 @@ public void checkSketchInitializeItemsHeapifyCompactMem() {
     assertFalse(sk.isLevelZeroSorted());
   }
 
-  //public void checkSketchInitializeFloatHeapifyUpdatableMem() Not Supported
+  //public void checkSketchInitializeItemHeapifyUpdatableMem() Not Supported
 
   @Test //set static enablePrinting = true for visual checking
   public void checkMemoryToStringItemsCompact() {
@@ -431,7 +431,7 @@ public void checkMemoryToStringItemsCompact() {
     Memory mem;
     String s;
 
-    println("#### CASE: FLOAT FULL COMPACT");
+    println("#### CASE: ITEM FULL COMPACT");
     sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
     for (int i = 1; i <= n; i++) { sk.update(Util.longToFixedLengthString(i, digits)); }
     compBytes = sk.toByteArray();
@@ -447,7 +447,7 @@ public void checkMemoryToStringItemsCompact() {
     println(s);
     assertEquals(compBytes, compBytes2);
 
-    println("#### CASE: FLOAT EMPTY COMPACT");
+    println("#### CASE: ITEM EMPTY COMPACT");
     sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
     compBytes = sk.toByteArray();
     mem = Memory.wrap(compBytes);
@@ -462,7 +462,7 @@ public void checkMemoryToStringItemsCompact() {
     println(s);
     assertEquals(compBytes, compBytes2);
 
-    println("#### CASE: FLOAT SINGLE COMPACT");
+    println("#### CASE: ITEM SINGLE COMPACT");
     sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe);
     sk.update("1");
     compBytes = sk.toByteArray();
diff --git a/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java b/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java
index b1bd5818d..636105ef8 100644
--- a/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java
@@ -31,7 +31,6 @@
 import jdk.incubator.foreign.ResourceScope;
 
 import org.apache.datasketches.memory.DefaultMemoryRequestServer;
-//import org.apache.datasketches.memory.WritableHandle;
 import org.apache.datasketches.memory.WritableMemory;
 import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator;
 
diff --git a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java
index c252eef3c..d896bbefa 100644
--- a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java
@@ -30,7 +30,6 @@
 import org.testng.annotations.Test;
 
 import org.apache.datasketches.memory.Memory;
-//import org.apache.datasketches.memory.WritableHandle;
 import org.apache.datasketches.memory.WritableMemory;
 
 import jdk.incubator.foreign.ResourceScope;
diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
index ea928ba02..8cdc7bf71 100644
--- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
@@ -141,7 +141,6 @@ public void checkEmptyExceptions() {
 
   @Test
   public void directSketchShouldMoveOntoHeapEventually() {
-
     WritableMemory wmem = WritableMemory.allocateDirect(1000, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer());
     WritableMemory wmem2 = wmem;
     UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem);
diff --git a/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java b/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java
index db043cff6..e1c6914c7 100644
--- a/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java
+++ b/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java
@@ -41,6 +41,8 @@ public void empty() {
     assertThrows(SketchesStateException.class, () -> td.getMaxValue());
     assertThrows(SketchesStateException.class, () -> td.getRank(0));
     assertThrows(SketchesStateException.class, () -> td.getQuantile(0.5));
+    assertThrows(SketchesStateException.class, () -> td.getPMF(new double[]{0}));
+    assertThrows(SketchesStateException.class, () -> td.getCDF(new double[]{0}));
   }
 
   @Test
@@ -82,6 +84,14 @@ public void manyValues() {
     assertEquals(td.getQuantile(0.9), n * 0.9, n * 0.9 * 0.01);
     assertEquals(td.getQuantile(0.95), n * 0.95, n * 0.95 * 0.01);
     assertEquals(td.getQuantile(1), n - 1);
+    final double[] pmf = td.getPMF(new double[] {n / 2});
+    assertEquals(pmf.length, 2);
+    assertEquals(pmf[0], 0.5, 0.0001);
+    assertEquals(pmf[1], 0.5, 0.0001);
+    final double[] cdf = td.getCDF(new double[] {n / 2});
+    assertEquals(cdf.length, 2);
+    assertEquals(cdf[0], 0.5, 0.0001);
+    assertEquals(cdf[1], 1.0);
   }
 
   @Test
diff --git a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
index 1e2089b51..188dbf427 100644
--- a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
@@ -32,7 +32,6 @@
 import org.apache.datasketches.common.SketchesArgumentException;
 import org.apache.datasketches.memory.DefaultMemoryRequestServer;
 import org.apache.datasketches.memory.Memory;
-//import org.apache.datasketches.memory.WritableHandle;
 import org.apache.datasketches.memory.WritableMemory;
 import org.testng.annotations.Test;
 
diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
index e4c112281..6d6af7047 100644
--- a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
@@ -119,7 +119,6 @@ public void checkHeapifyByteArrayExact() {
     // That is, this is being run for its side-effect of accessing things.
     // If something is wonky, it will generate an exception and fail the test.
     local2.toString(true, true, 8, true);
-
   }
 
   @Test
diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java
index 2261edc3b..84ddcb80e 100644
--- a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java
@@ -114,7 +114,7 @@ public void checkIllegalSketchID_UpdateSketch() {
     WritableMemory mem = WritableMemory.writableWrap(byteArray);
     mem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
 
-    //try to heapify the corruped mem
+    //try to heapify the corrupted mem
     Sketch.heapify(mem, sl.seed);
   }
 
diff --git a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java
index ef6004a1b..f36597b7c 100644
--- a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java
@@ -47,7 +47,6 @@
 import org.apache.datasketches.common.SketchesReadOnlyException;
 import org.apache.datasketches.memory.DefaultMemoryRequestServer;
 import org.apache.datasketches.memory.Memory;
-//import org.apache.datasketches.memory.WritableHandle;
 import org.apache.datasketches.memory.WritableMemory;
 import org.apache.datasketches.thetacommon.HashOperations;
 import org.apache.datasketches.thetacommon.ThetaUtil;
@@ -86,7 +85,7 @@ public void checkBadSerVer() {
     }
   }
 
-  @Test//(expectedExceptions = SketchesArgumentException.class)
+  @Test
   public void checkConstructorKtooSmall() {
     int k = 8;
     WritableMemory wmem;
@@ -98,7 +97,7 @@ public void checkConstructorKtooSmall() {
     }
   }
 
-  @Test//(expectedExceptions = SketchesArgumentException.class)
+  @Test
   public void checkConstructorMemTooSmall() {
     int k = 16;
     WritableMemory wmem;
@@ -195,7 +194,7 @@ public void checkWrapIllegalFamilyID_direct() {
     DirectQuickSelectSketch.writableWrap(mem, ThetaUtil.DEFAULT_UPDATE_SEED);
   }
 
-  @Test //(expectedExceptions = SketchesArgumentException.class)
+  @Test
   public void checkHeapifySeedConflict() {
     int k = 512;
     long seed1 = 1021;
@@ -213,7 +212,7 @@ public void checkHeapifySeedConflict() {
     }
   }
 
-  @Test//(expectedExceptions = SketchesArgumentException.class)
+  @Test
   public void checkCorruptLgNomLongs() {
     int k = 16;
     WritableMemory wmem;
diff --git a/src/test/java/org/apache/datasketches/theta/HeapifyWrapSerVer1and2Test.java b/src/test/java/org/apache/datasketches/theta/HeapifyWrapSerVer1and2Test.java
index 5e81b1808..c0cff6eb2 100644
--- a/src/test/java/org/apache/datasketches/theta/HeapifyWrapSerVer1and2Test.java
+++ b/src/test/java/org/apache/datasketches/theta/HeapifyWrapSerVer1and2Test.java
@@ -41,29 +41,29 @@ public void checkHeapifyCompactSketchAssumedDefaultSeed() {
     final int k = 64;
     final long seed = ThetaUtil.DEFAULT_UPDATE_SEED;
     final short seedHash = Util.computeSeedHash(seed);
-    UpdateSketch sv3usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build();
-    for (int i=0; i
     
     
-    
+    
 
     
       

From 14174086dfdac3b8b586b9cf2d3aaaf7b7cabacb Mon Sep 17 00:00:00 2001
From: Lee Rhodes 
Date: Wed, 18 Dec 2024 14:51:14 -0800
Subject: [PATCH 21/22] update GHA workflows

---
 .github/workflows/auto-check_cpp_files.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/auto-check_cpp_files.yml b/.github/workflows/auto-check_cpp_files.yml
index b9e05ae97..4b06687e0 100644
--- a/.github/workflows/auto-check_cpp_files.yml
+++ b/.github/workflows/auto-check_cpp_files.yml
@@ -12,33 +12,33 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v4
-      
+
 	  - name: Checkout C++
         uses: actions/checkout@v4
         with:
           repository: apache/datasketches-cpp
           path: cpp
-      
+
 	  - name: Setup Java
         uses: actions/setup-java@v4
         with:
           java-version: '17'
           distribution: 'temurin'
-      
+
 	  - name: Configure C++ build
         run: cd cpp/build && cmake .. -DGENERATE=true
-      
+
 	  - name: Build C++ unit tests
         run: cd cpp && cmake --build build --config Release
-      
+
 	  - name: Run C++ tests
         run: cd cpp && cmake --build build --config Release --target test
-      
+
 	  - name: Make dir
         run: mkdir -p serialization_test_data/cpp_generated_files
-      
+
       - name: Copy files
         run: cp cpp/build/*/test/*_cpp.sk serialization_test_data/cpp_generated_files
-      
+
 	  - name: Run Java tests
         run: mvn test -P check-cpp-files

From 341de43f1ee347621020785091d8a7c81344d68f Mon Sep 17 00:00:00 2001
From: Lee Rhodes 
Date: Wed, 18 Dec 2024 15:26:32 -0800
Subject: [PATCH 22/22] update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 547df3e9d..b6db7f89e 100644
--- a/README.md
+++ b/README.md
@@ -53,7 +53,7 @@ and, as a result, must be compiled with JDK17 and this dependency:
 
 If your application only relies on the APIs of datasketches-java no special JVM arguments are required.
 However, if your application also directly relies on the APIs of the *datasketches-memory* component, 
-you may need the additional JVM argument **--enable-preview**.
+you may need the additional JVM argument **--add-modules=jdk.incubator.foreign**.
 
 ### Recommended Build Tool
 This DataSketches component is structured as a Maven project and Maven is the recommended Build Tool.