Skip to content

Commit dc3f60a

Browse files
committed
make from_join take a Relation, add Relation::from_join
Also `Relation::from_vec` public
1 parent 8a2b998 commit dc3f60a

File tree

3 files changed

+134
-13
lines changed

3 files changed

+134
-13
lines changed

src/join.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,20 @@ pub(crate) fn join_into<'me, Key: Ord, Val1: Ord, Val2: Ord, Result: Ord>(
3434
output.insert(Relation::from_vec(results));
3535
}
3636

37+
pub(crate) fn join_into_relation<'me, Key: Ord, Val1: Ord, Val2: Ord, Result: Ord>(
38+
input1: &Relation<(Key, Val1)>,
39+
input2: &Relation<(Key, Val2)>,
40+
mut logic: impl FnMut(&Key, &Val1, &Val2) -> Result,
41+
) -> Relation<Result> {
42+
let mut results = Vec::new();
43+
44+
join_helper(&input1.elements, &input2.elements, |k, v1, v2| {
45+
results.push(logic(k, v1, v2));
46+
});
47+
48+
Relation::from_vec(results)
49+
}
50+
3751
/// Moves all recent tuples from `input1` that are not present in `input2` into `output`.
3852
pub(crate) fn antijoin_into<Key: Ord, Val: Ord, Result: Ord>(
3953
input1: &Variable<(Key, Val)>,
@@ -116,11 +130,22 @@ pub(crate) fn gallop<T>(mut slice: &[T], mut cmp: impl FnMut(&T) -> bool) -> &[T
116130
slice
117131
}
118132

133+
/// An input that can be used with `from_join`; either a `Variable` or a `Relation`.
119134
pub trait JoinInput<'me, Tuple: Ord>: Copy {
135+
/// If we are on iteration N of the loop, these are the tuples
136+
/// added on iteration N-1. (For a `Relation`, this is always an
137+
/// empty slice.)
120138
type RecentTuples: Deref<Target = [Tuple]>;
139+
140+
/// If we are on iteration N of the loop, these are the tuples
141+
/// added on iteration N - 2 or before. (For a `Relation`, this is
142+
/// just `self`.)
121143
type StableTuples: Deref<Target = [Relation<Tuple>]>;
122144

145+
/// Get the set of recent tuples.
123146
fn recent(self) -> Self::RecentTuples;
147+
148+
/// Get the set of stable tuples.
124149
fn stable(self) -> Self::StableTuples;
125150
}
126151

src/lib.rs

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ mod join;
1919
mod map;
2020
mod test;
2121
mod treefrog;
22+
pub use crate::join::JoinInput;
2223
pub use crate::treefrog::{
2324
extend_anti::ExtendAnti,
2425
extend_with::ExtendWith,
@@ -98,7 +99,20 @@ impl<Tuple: Ord> Relation<Tuple> {
9899
Relation { elements }
99100
}
100101

101-
fn from_vec(mut elements: Vec<Tuple>) -> Self {
102+
/// Creates a `Relation` by joining the values from `input1` and
103+
/// `input2` and then applying `logic`. Like
104+
/// [`Variable::from_join`] except for use where the inputs are
105+
/// not varying across iterations.
106+
pub fn from_join<Key: Ord, Val1: Ord, Val2: Ord>(
107+
input1: &Relation<(Key, Val1)>,
108+
input2: &Relation<(Key, Val2)>,
109+
logic: impl FnMut(&Key, &Val1, &Val2) -> Tuple,
110+
) -> Self {
111+
join::join_into_relation(input1, input2, logic)
112+
}
113+
114+
/// Creates a `Relation` from a vector of tuples.
115+
pub fn from_vec(mut elements: Vec<Tuple>) -> Self {
102116
elements.sort();
103117
elements.dedup();
104118
Relation { elements }
@@ -201,7 +215,22 @@ pub struct Variable<Tuple: Ord> {
201215

202216
// Operator implementations.
203217
impl<Tuple: Ord> Variable<Tuple> {
204-
/// Adds tuples that result from joining `input1` and `input2`.
218+
/// Adds tuples that result from joining `input1` and `input2` --
219+
/// each of the inputs must be a set of (Key, Value) tuples. Both
220+
/// `input1` and `input2` must have the same type of key (`K`) but
221+
/// they can have distinct value types (`V1` and `V2`
222+
/// respectively). The `logic` closure will be invoked for each
223+
/// key that appears in both inputs; it is also given the two
224+
/// values, and from those it should construct the resulting
225+
/// value.
226+
///
227+
/// Note that `input1` must be a variable, but `input2` can be a
228+
/// relation or a variable. Therefore, you cannot join two
229+
/// relations with this method. This is not because the result
230+
/// would be wrong, but because it would be inefficient: the
231+
/// result from such a join cannot vary across iterations (as
232+
/// relations are fixed), so you should prefer to invoke `insert`
233+
/// on a relation created by `Relation::from_join` instead.
205234
///
206235
/// # Examples
207236
///
@@ -224,10 +253,10 @@ impl<Tuple: Ord> Variable<Tuple> {
224253
/// let result = variable.complete();
225254
/// assert_eq!(result.len(), 121);
226255
/// ```
227-
pub fn from_join<K: Ord, V1: Ord, V2: Ord>(
256+
pub fn from_join<'me, K: Ord, V1: Ord, V2: Ord>(
228257
&self,
229-
input1: &Variable<(K, V1)>,
230-
input2: &Variable<(K, V2)>,
258+
input1: &'me Variable<(K, V1)>,
259+
input2: impl JoinInput<'me, (K, V2)>,
231260
logic: impl FnMut(&K, &V1, &V2) -> Tuple,
232261
) {
233262
join::join_into(input1, input2, self, logic)

src/test.rs

Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
#![cfg(test)]
22

3-
use crate::Relation;
43
use crate::Iteration;
4+
use crate::Relation;
55
use crate::RelationLeaper;
66
use proptest::prelude::*;
77
use proptest::{proptest, proptest_helper};
88

99
fn inputs() -> impl Strategy<Value = Vec<(u32, u32)>> {
10-
prop::collection::vec((0_u32..100, 0_u32..100), 1..100)
10+
prop::collection::vec((0_u32..100, 0_u32..100), 1..500)
1111
}
1212

13-
fn reachable_with_join(edges: &[(u32, u32)]) -> Relation<(u32, u32)> {
13+
/// The original way to use datafrog -- computes reachable nodes from a set of edges
14+
fn reachable_with_var_join(edges: &[(u32, u32)]) -> Relation<(u32, u32)> {
1415
let edges = Relation::from(edges.iter().cloned());
1516
let mut iteration = Iteration::new();
1617

@@ -28,6 +29,25 @@ fn reachable_with_join(edges: &[(u32, u32)]) -> Relation<(u32, u32)> {
2829
reachable.complete()
2930
}
3031

32+
/// Like `reachable`, but using a relation as an input to `from_join`
33+
fn reachable_with_relation_join(edges: &[(u32, u32)]) -> Relation<(u32, u32)> {
34+
let edges = Relation::from(edges.iter().cloned());
35+
let mut iteration = Iteration::new();
36+
37+
// NB. Changed from `reachable_with_var_join`:
38+
let edges_by_successor = Relation::from(edges.iter().map(|&(n1, n2)| (n2, n1)));
39+
40+
let reachable = iteration.variable::<(u32, u32)>("reachable");
41+
reachable.insert(edges);
42+
43+
while iteration.changed() {
44+
// reachable(N1, N3) :- edges(N1, N2), reachable(N2, N3).
45+
reachable.from_join(&reachable, &edges_by_successor, |&_, &n3, &n1| (n1, n3));
46+
}
47+
48+
reachable.complete()
49+
}
50+
3151
fn reachable_with_leapfrog(edges: &[(u32, u32)]) -> Relation<(u32, u32)> {
3252
let edges = Relation::from(edges.iter().cloned());
3353
let mut iteration = Iteration::new();
@@ -41,21 +61,68 @@ fn reachable_with_leapfrog(edges: &[(u32, u32)]) -> Relation<(u32, u32)> {
4161
// reachable(N1, N3) :- edges(N1, N2), reachable(N2, N3).
4262
reachable.from_leapjoin(
4363
&reachable,
44-
&mut [
45-
&mut edges_by_successor.extend_with(|&(n2, _)| n2),
46-
],
64+
&mut [&mut edges_by_successor.extend_with(|&(n2, _)| n2)],
4765
|&(_, n3), &n1| (n1, n3),
4866
);
4967
}
5068

5169
reachable.complete()
5270
}
5371

72+
/// Computes a join where the values are summed -- uses iteration
73+
/// variables (the original datafrog technique).
74+
fn sum_join_via_var(
75+
input1_slice: &[(u32, u32)],
76+
input2_slice: &[(u32, u32)],
77+
) -> Relation<(u32, u32)> {
78+
let mut iteration = Iteration::new();
79+
80+
let input1 = iteration.variable::<(u32, u32)>("input1");
81+
input1.insert(Relation::from(input1_slice.iter().cloned()));
82+
83+
let input2 = iteration.variable::<(u32, u32)>("input1");
84+
input2.insert(Relation::from(input2_slice.iter().cloned()));
85+
86+
let output = iteration.variable::<(u32, u32)>("output");
87+
88+
while iteration.changed() {
89+
// output(K1, V1 * 100 + V2) :- input1(K1, V1), input2(K1, V2).
90+
output.from_join(&input1, &input2, |&k1, &v1, &v2| (k1, v1 * 100 + v2));
91+
}
92+
93+
output.complete()
94+
}
95+
96+
/// Computes a join where the values are summed -- uses iteration
97+
/// variables (the original datafrog technique).
98+
fn sum_join_via_relation(
99+
input1_slice: &[(u32, u32)],
100+
input2_slice: &[(u32, u32)],
101+
) -> Relation<(u32, u32)> {
102+
let input1 = Relation::from(input1_slice.iter().cloned());
103+
let input2 = Relation::from(input2_slice.iter().cloned());
104+
Relation::from_join(&input1, &input2, |&k1, &v1, &v2| (k1, v1 * 100 + v2))
105+
}
106+
54107
proptest! {
55108
#[test]
56-
fn reachable(edges in inputs()) {
57-
let reachable1 = reachable_with_join(&edges);
109+
fn reachable_leapfrog_vs_var_join(edges in inputs()) {
110+
let reachable1 = reachable_with_var_join(&edges);
58111
let reachable2 = reachable_with_leapfrog(&edges);
59112
assert_eq!(reachable1.elements, reachable2.elements);
60113
}
114+
115+
#[test]
116+
fn reachable_rel_join_vs_var_join(edges in inputs()) {
117+
let reachable1 = reachable_with_var_join(&edges);
118+
let reachable2 = reachable_with_relation_join(&edges);
119+
assert_eq!(reachable1.elements, reachable2.elements);
120+
}
121+
122+
#[test]
123+
fn sum_join_from_var_vs_rel((set1, set2) in (inputs(), inputs())) {
124+
let output1 = sum_join_via_var(&set1, &set2);
125+
let output2 = sum_join_via_relation(&set1, &set2);
126+
assert_eq!(output1.elements, output2.elements);
127+
}
61128
}

0 commit comments

Comments
 (0)