Skip to content

Commit 10229d9

Browse files
ulrikrasmussenUlrik Rasmussenaaronbembenek
authored
Optimization of concurrent fact indexer (#27)
* Add fact indexer tests * Possibly an optimization of the concurrent fact indexer The concurrent fact indexer maintains a coarse and fine index of facts, where the coarse index is on the predicate symbol and the fine index is on the predicate symbol, argument index and constant at that argument. The fact indexer returns an overapproximation of the set of facts that might match the input query but tries to minimize the returned fact set using a heuristic which picks from the fine index the fact set maintained at the argument position for which the most distinct constants have been seen so far. Under the assumption that the fact sets are roughly of equal size for each constant this is a sound strategy, but it is easy to imagine worst cases where fact sets that are much too large are returned. E.g. consider f(a, x, b) f(b, x, b) f(c, x1, b) f(c, x2, b) ... f(c, x10000, b) f(c, x, d) and the query f(c, _, d)? Two fine indexes will be considered as candidates; the first and the third. The first index has three distinct constants and the third has only two. The candidate fact set returned is thus the 10,001 element set { f(c, x1, b), ..., f(c, x10000, b), f(c, x, d) } rather than the singleton set { f(c, x, d) } The fact indexer cannot evaluate the size of the underlying container since it has to work for all Iterable containers, including ConcurrentLinkedBag which cannot implement the Container interface for efficiency reasons. It is however quite easy to add a size method to this collection type and pass it as a lambda to the fact indexer. The fact indexer can then just pick the smallest set from the fine index instead of relying on a heuristic. * Use :: syntax for methods. --------- Co-authored-by: Ulrik Rasmussen <ulrik.rasmussen@deondigital.com> Co-authored-by: Aaron Bembenek <aaron.bembenek@gmail.com>
1 parent 4e25a86 commit 10229d9

File tree

5 files changed

+80
-16
lines changed

5 files changed

+80
-16
lines changed

src/main/java/edu/harvard/seas/pl/abcdatalog/engine/bottomup/concurrent/StratifiedNegationEvalManager.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,10 @@ public class StratifiedNegationEvalManager implements EvalManager {
8383

8484
private final ConcurrentFactIndexer<ConcurrentLinkedBag<PositiveAtom>> facts =
8585
new ConcurrentFactIndexer<>(
86-
() -> new ConcurrentLinkedBag<>(),
87-
(bag, atom) -> bag.add(atom),
88-
() -> ConcurrentLinkedBag.emptyBag());
86+
ConcurrentLinkedBag::new,
87+
ConcurrentLinkedBag::add,
88+
ConcurrentLinkedBag::emptyBag,
89+
ConcurrentLinkedBag::size);
8990
private final ConcurrentFactTrie trie = new ConcurrentFactTrie();
9091

9192
private final Map<PredicateSym, Set<Integer>> relevantStrataByPred = new HashMap<>();

src/main/java/edu/harvard/seas/pl/abcdatalog/util/datastructures/ConcurrentFactIndexer.java

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import java.util.concurrent.ConcurrentMap;
4646
import java.util.concurrent.atomic.AtomicReferenceArray;
4747
import java.util.function.BiConsumer;
48+
import java.util.function.Function;
4849
import java.util.function.Supplier;
4950

5051
/**
@@ -63,6 +64,7 @@ public class ConcurrentFactIndexer<T extends Iterable<PositiveAtom>> implements
6364
private final Supplier<T> generator;
6465
private final BiConsumer<T, PositiveAtom> addFunc;
6566
private final Supplier<T> empty;
67+
private final Function<T, Integer> size;
6668

6769
private final ConcurrentMap<PredicateSym, AtomicReferenceArray<ConcurrentMap<Constant, T>>>
6870
fineIdx = Utilities.createConcurrentMap();
@@ -73,9 +75,10 @@ public class ConcurrentFactIndexer<T extends Iterable<PositiveAtom>> implements
7375
*
7476
* @param generator an anonymous function that returns a container
7577
* @param addFunc an anonymous function that adds a fact to a container
78+
* @param size an anonymous function that gets the number of items in the container
7679
*/
77-
public ConcurrentFactIndexer(Supplier<T> generator, BiConsumer<T, PositiveAtom> addFunc) {
78-
this(generator, addFunc, generator);
80+
public ConcurrentFactIndexer(Supplier<T> generator, BiConsumer<T, PositiveAtom> addFunc, Function<T, Integer> size) {
81+
this(generator, addFunc, generator, size);
7982
}
8083

8184
/**
@@ -84,12 +87,14 @@ public ConcurrentFactIndexer(Supplier<T> generator, BiConsumer<T, PositiveAtom>
8487
* @param generator an anonymous function that returns a container
8588
* @param addFunc an anonymous function that adds a fact to a container
8689
* @param empty an anonymous function that returns an empty container (such as a static instance)
90+
* @param size an anonymous function that gets the number of items in the container
8791
*/
8892
public ConcurrentFactIndexer(
89-
Supplier<T> generator, BiConsumer<T, PositiveAtom> addFunc, Supplier<T> empty) {
93+
Supplier<T> generator, BiConsumer<T, PositiveAtom> addFunc, Supplier<T> empty, Function<T, Integer> size) {
9094
this.generator = generator;
9195
this.addFunc = addFunc;
9296
this.empty = empty;
97+
this.size = size;
9398
}
9499

95100
/**
@@ -179,21 +184,20 @@ public T indexInto(PositiveAtom a, ConstOnlySubstitution s) {
179184

180185
int bestIdx = -1;
181186
Term bestConst = null;
182-
int maxKeySetSize = -1;
187+
int minFactSetSize = Integer.MAX_VALUE;
183188
Term[] args = a.getArgs();
184189
for (int i = 0; i < args.length; ++i) {
185190
Term t = args[i];
186-
// if (!(t instanceof DummyTerm) && (t instanceof Constant || (s != null && (t =
187-
// s.get((Variable) t)) != null))) {
188191
if ((t = t.accept(tv, s)) != null) {
189192
ConcurrentMap<Constant, T> byConstant = byPos.get(i);
190193
if (byConstant != null) {
191-
if (!byConstant.containsKey(t)) {
194+
T collection = byConstant.get(t);
195+
if (collection == null) {
192196
return this.empty.get();
193197
}
194-
int keySetSize = byConstant.size();
195-
if (keySetSize > maxKeySetSize) {
196-
maxKeySetSize = keySetSize;
198+
int factSetSize = size.apply(collection);
199+
if (factSetSize < minFactSetSize) {
200+
minFactSetSize = factSetSize;
197201
bestIdx = i;
198202
bestConst = t;
199203
}
@@ -234,7 +238,7 @@ public ConcurrentFactIndexer<T> getCopy() {
234238
// hand, that might end up creating a new fact indexer with an
235239
// inconsistent state.
236240
ConcurrentFactIndexer<T> r =
237-
new ConcurrentFactIndexer<>(this.generator, this.addFunc, this.empty);
241+
new ConcurrentFactIndexer<>(this.generator, this.addFunc, this.empty, this.size);
238242
for (PredicateSym pred : this.coarseIdx.keySet()) {
239243
r.addAll(this.indexInto(pred));
240244
}

src/main/java/edu/harvard/seas/pl/abcdatalog/util/datastructures/ConcurrentLinkedBag.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
*/
3535

3636
import java.util.Iterator;
37+
import java.util.concurrent.atomic.AtomicInteger;
3738
import java.util.concurrent.atomic.AtomicReference;
3839

3940
/**
@@ -44,6 +45,7 @@
4445
*/
4546
public class ConcurrentLinkedBag<T> implements Iterable<T> {
4647
private AtomicReference<Node> head = new AtomicReference<>();
48+
private AtomicInteger size = new AtomicInteger();
4749

4850
/** A node in the linked list. */
4951
public class Node {
@@ -85,8 +87,18 @@ public void add(T e) {
8587
h = this.head.get();
8688
n = new Node(e, h);
8789
} while (!this.head.compareAndSet(h, n));
90+
// This is not updated atomically with the addition of the element and can thus only be considered a lower bound
91+
// for the set size. However, consumers cannot tell the difference since there is no way to transactionally read
92+
// the set size and traverse the set anyway.
93+
size.incrementAndGet();
8894
}
8995

96+
/**
97+
* Returns the number of elements that have been added to the set.
98+
* @return the size of the set
99+
*/
100+
public Integer size() { return size.get(); }
101+
90102
/**
91103
* Returns the head of the linked-list that backs this bag, or null if there is no head.
92104
*

src/main/java/edu/harvard/seas/pl/abcdatalog/util/datastructures/FactIndexerFactory.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ private FactIndexerFactory() {}
5151
*/
5252
public static ConcurrentFactIndexer<Set<PositiveAtom>> createConcurrentSetFactIndexer() {
5353
return new ConcurrentFactIndexer<>(
54-
() -> Utilities.createConcurrentSet(), (set, fact) -> set.add(fact));
54+
Utilities::createConcurrentSet, Set::add, Set::size);
5555
}
5656

5757
/**
@@ -61,6 +61,6 @@ public static ConcurrentFactIndexer<Set<PositiveAtom>> createConcurrentSetFactIn
6161
*/
6262
public static ConcurrentFactIndexer<Queue<PositiveAtom>> createConcurrentQueueFactIndexer() {
6363
return new ConcurrentFactIndexer<>(
64-
() -> new ConcurrentLinkedQueue<>(), (queue, fact) -> queue.add(fact));
64+
ConcurrentLinkedQueue::new, Queue::add, Queue::size);
6565
}
6666
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package edu.harvard.seas.pl.abcdatalog.util.datastructures;
2+
3+
import edu.harvard.seas.pl.abcdatalog.ast.*;
4+
import edu.harvard.seas.pl.abcdatalog.engine.AbstractTests;
5+
import org.junit.Assert;
6+
import org.junit.Test;
7+
import org.junit.runner.RunWith;
8+
import org.junit.runners.Suite;
9+
10+
import java.util.function.Supplier;
11+
12+
@RunWith(Suite.class)
13+
@Suite.SuiteClasses({
14+
FactIndexerTest.SetTests.class,
15+
FactIndexerTest.ConcurrentLinkedBagTests.class
16+
})
17+
public class FactIndexerTest {
18+
public static class SetTests extends AbstractFactIndexerTests {
19+
public SetTests() { super(FactIndexerFactory::createConcurrentSetFactIndexer); }
20+
}
21+
22+
public static class ConcurrentLinkedBagTests extends AbstractFactIndexerTests {
23+
public ConcurrentLinkedBagTests() { super(FactIndexerFactory::createConcurrentQueueFactIndexer); }
24+
}
25+
26+
public static abstract class AbstractFactIndexerTests extends AbstractTests {
27+
private final Supplier<FactIndexer> factIndexerFactory;
28+
29+
public AbstractFactIndexerTests(Supplier<FactIndexer> factIndexerFactory) {
30+
super(() -> { throw new Error("Tests do not use engine="); });
31+
this.factIndexerFactory = factIndexerFactory;
32+
}
33+
34+
@Test
35+
public void testSmallestFactSetIsReturnedFromFineIndex() {
36+
FactIndexer indexer = factIndexerFactory.get();
37+
indexer.addAll(parseFacts("f(a,x,b). f(b,x,b). f(c,x1,b). f(c,x2,b). f(c,x,d)."));
38+
39+
Iterable<PositiveAtom> result = indexer.indexInto(parseQuery("f(c,_,d)?"));
40+
int size = 0;
41+
for (PositiveAtom ignored : result) {
42+
++size;
43+
}
44+
Assert.assertEquals(1, size);
45+
}
46+
}
47+
}

0 commit comments

Comments
 (0)