Skip to content

Commit a902c9e

Browse files
authored
Merge pull request #324 from dynatrace-oss/distinct-count-util
make utility function for the deduplication for hash tokens public
2 parents 13e58de + b809887 commit a902c9e

File tree

4 files changed

+17
-20
lines changed

4 files changed

+17
-20
lines changed

src/main/java/com/dynatrace/hash4j/distinctcount/DistinctCountUtil.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919

2020
import java.util.Arrays;
2121

22-
class DistinctCountUtil {
22+
/** A utility functions for distinct counting. */
23+
public final class DistinctCountUtil {
2324

2425
private DistinctCountUtil() {}
2526

@@ -191,13 +192,13 @@ static double solveMaximumLikelihoodEquation(
191192
return x;
192193
}
193194

194-
static int computeToken1(long hashValue) {
195+
static int computeToken(long hashValue) {
195196
int idx = (int) (hashValue >>> 38);
196197
int nlz = Long.numberOfLeadingZeros(~(~hashValue << 26));
197198
return (idx << 6) | nlz;
198199
}
199200

200-
static long reconstructHash1(int token) {
201+
static long reconstructHash(int token) {
201202
long idx = token & 0xFFFFFFC0L;
202203
return (0x3FFFFFFFFFL >>> token) | (idx << 32);
203204
}

src/main/java/com/dynatrace/hash4j/distinctcount/HyperLogLog.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ public HyperLogLog add(long hashValue) {
286286
*/
287287
@Override
288288
public HyperLogLog addToken(int token) {
289-
return add(DistinctCountUtil.reconstructHash1(token));
289+
return add(DistinctCountUtil.reconstructHash(token));
290290
}
291291

292292
/**
@@ -309,7 +309,7 @@ public HyperLogLog addToken(int token) {
309309
* @return the 32-bit token
310310
*/
311311
public static int computeToken(long hashValue) {
312-
return DistinctCountUtil.computeToken1(hashValue);
312+
return DistinctCountUtil.computeToken(hashValue);
313313
}
314314

315315
/**
@@ -353,7 +353,7 @@ public HyperLogLog add(long hashValue, StateChangeObserver stateChangeObserver)
353353
*/
354354
@Override
355355
public HyperLogLog addToken(int token, StateChangeObserver stateChangeObserver) {
356-
return add(DistinctCountUtil.reconstructHash1(token), stateChangeObserver);
356+
return add(DistinctCountUtil.reconstructHash(token), stateChangeObserver);
357357
}
358358

359359
// returns register change probability scaled by 2^64

src/main/java/com/dynatrace/hash4j/distinctcount/UltraLogLog.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ public UltraLogLog add(long hashValue) {
203203
*/
204204
@Override
205205
public UltraLogLog addToken(int token) {
206-
return add(DistinctCountUtil.reconstructHash1(token));
206+
return add(DistinctCountUtil.reconstructHash(token));
207207
}
208208

209209
/**
@@ -226,7 +226,7 @@ public UltraLogLog addToken(int token) {
226226
* @return the 32-bit token
227227
*/
228228
public static int computeToken(long hashValue) {
229-
return DistinctCountUtil.computeToken1(hashValue);
229+
return DistinctCountUtil.computeToken(hashValue);
230230
}
231231

232232
/**
@@ -275,7 +275,7 @@ public UltraLogLog add(long hashValue, StateChangeObserver stateChangeObserver)
275275
*/
276276
@Override
277277
public UltraLogLog addToken(int token, StateChangeObserver stateChangeObserver) {
278-
return add(DistinctCountUtil.reconstructHash1(token), stateChangeObserver);
278+
return add(DistinctCountUtil.reconstructHash(token), stateChangeObserver);
279279
}
280280

281281
/**

src/test/java/com/dynatrace/hash4j/distinctcount/DistinctCountUtilTest.java

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ void testSolveMaximumLikelihoodEquation() {
162162
}
163163

164164
@Test
165-
void testComputeToken1() {
165+
void testComputeToken() {
166166
SplittableRandom random = new SplittableRandom(0xbafc97ad730480acL);
167167

168168
int numCycles = 100;
@@ -175,20 +175,18 @@ void testComputeToken1() {
175175
long mask = 0xFFFFFFC000000000L | (0x0000003FFFFFFFFFL >>> nlz);
176176
long hash = (r | (0x0000002000000000L >>> nlz)) & mask;
177177

178-
int token = DistinctCountUtil.computeToken1(hash);
179-
long reconstructedHash = DistinctCountUtil.reconstructHash1(token);
180-
int tokenFromReconstructedHash = DistinctCountUtil.computeToken1(reconstructedHash);
178+
int token = DistinctCountUtil.computeToken(hash);
179+
long reconstructedHash = DistinctCountUtil.reconstructHash(token);
180+
int tokenFromReconstructedHash = DistinctCountUtil.computeToken(reconstructedHash);
181181
assertThat(reconstructedHash).isEqualTo(hash | (0x0000001FFFFFFFFFL >>> nlz));
182182
assertThat(tokenFromReconstructedHash).isEqualTo(token);
183183
}
184184
}
185185
}
186186

187187
private static TokenIterable fromSortedArray(int[] tokens) {
188-
return new TokenIterable() {
189-
@Override
190-
public TokenIterator iterator() {
191-
return new TokenIterator() {
188+
return () ->
189+
new TokenIterator() {
192190
private int idx = 0;
193191

194192
@Override
@@ -201,8 +199,6 @@ public int nextToken() {
201199
return tokens[idx++];
202200
}
203201
};
204-
}
205-
};
206202
}
207203

208204
private static void testEstimationFromTokens(int distinctCount) {
@@ -215,7 +211,7 @@ private static void testEstimationFromTokens(int distinctCount) {
215211

216212
for (int i = 0; i < numIterations; ++i) {
217213
for (int c = 0; c < distinctCount; ++c) {
218-
tokens[c] = DistinctCountUtil.computeToken1(prg.nextLong());
214+
tokens[c] = DistinctCountUtil.computeToken(prg.nextLong());
219215
}
220216
Arrays.sort(tokens);
221217

0 commit comments

Comments
 (0)