Skip to content

Commit 6d7c218

Browse files
committed
Relation::from_vec
1 parent c963cae commit 6d7c218

File tree

4 files changed

+96
-74
lines changed

4 files changed

+96
-74
lines changed

src/bin/graspan1.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ fn main() {
4949
variable1.from_join(&variable1, &variable2, |_b, &a, &c| (c,a));
5050
}
5151

52-
// let _reachable = variable1.complete();
52+
let reachable = variable1.complete();
5353

54-
println!("{:?}\tComputation complete", timer.elapsed());
54+
println!("{:?}\tComputation complete (nodes_final: {})", timer.elapsed(), reachable.len());
5555

5656
}

src/join.rs

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@ pub fn join_into<Key: Ord, Val1: Ord, Val2: Ord, Result: Ord, F: Fn(&Key, &Val1,
1313
let recent1 = input1.recent.borrow();
1414
let recent2 = input2.recent.borrow();
1515

16-
for batch2 in input2.tuples.borrow().iter() {
16+
for batch2 in input2.stable.borrow().iter() {
1717
join_helper(&recent1, &batch2, |k,v1,v2| results.push(logic(k,v1,v2)));
1818
}
1919

20-
for batch1 in input1.tuples.borrow().iter() {
20+
for batch1 in input1.stable.borrow().iter() {
2121
join_helper(&batch1, &recent2, |k,v1,v2| results.push(logic(k,v1,v2)));
2222
}
2323

2424
join_helper(&recent1, &recent2, |k,v1,v2| results.push(logic(k,v1,v2)));
2525

26-
output.insert(results.into());
26+
output.insert(Relation::from_vec(results));
2727
}
2828

2929
/// Moves all recent tuples from `input1` that are not present in `input2` into `output`.
@@ -43,50 +43,45 @@ pub fn antijoin_into<Key: Ord, Val: Ord, Result: Ord, F: Fn(&Key, &Val)->Result>
4343
}
4444
}
4545

46-
output.insert(results.into());
46+
output.insert(Relation::from_vec(results));
4747
}
4848

4949
fn join_helper<K: Ord, V1, V2, F: FnMut(&K, &V1, &V2)>(mut slice1: &[(K,V1)], mut slice2: &[(K,V2)], mut result: F) {
5050

5151
while !slice1.is_empty() && !slice2.is_empty() {
5252

53-
if slice1[0].0 == slice2[0].0 {
53+
use std::cmp::Ordering;
5454

55-
let mut key1_count = 1;
56-
while key1_count < slice1.len() && slice1[0].0 == slice1[key1_count].0 {
57-
key1_count += 1;
58-
}
55+
// If the keys match produce tuples, else advance the smaller key until they might.
56+
match slice1[0].0.cmp(&slice2[0].0) {
57+
Ordering::Less => {
58+
slice1 = gallop(slice1, |x| x.0 < slice2[0].0);
59+
},
60+
Ordering::Equal => {
5961

60-
let mut key2_count = 1;
61-
while key2_count < slice2.len() && slice2[0].0 == slice2[key2_count].0 {
62-
key2_count += 1;
63-
}
62+
// Determine the number of matching keys in each slice.
63+
let count1 = slice1.iter().take_while(|x| x.0 == slice1[0].0).count();
64+
let count2 = slice2.iter().take_while(|x| x.0 == slice2[0].0).count();
6465

65-
for index1 in 0 .. key1_count {
66-
for index2 in 0 .. key2_count {
67-
result(&slice1[0].0, &slice1[index1].1, &slice2[index2].1);
66+
// Produce results from the cross-product of matches.
67+
for index1 in 0 .. count1 {
68+
for index2 in 0 .. count2 {
69+
result(&slice1[0].0, &slice1[index1].1, &slice2[index2].1);
70+
}
6871
}
69-
}
70-
71-
slice1 = &slice1[key1_count..];
72-
slice2 = &slice2[key2_count..];
7372

74-
}
75-
else {
76-
77-
if slice1[0].0 < slice2[0].0 {
78-
slice1 = gallop(slice1, |x| &x.0 < &slice2[0].0);
73+
// Advance slices past this key.
74+
slice1 = &slice1[count1..];
75+
slice2 = &slice2[count2..];
7976
}
80-
else {
81-
slice2 = gallop(slice2, |x| &x.0 < &slice1[0].0);
77+
Ordering::Greater => {
78+
slice2 = gallop(slice2, |x| x.0 < slice1[0].0);
8279
}
83-
8480
}
8581
}
8682
}
8783

88-
#[inline(always)]
89-
pub fn gallop<'a, T, F: Fn(&T)->bool>(mut slice: &'a [T], cmp: F) -> &'a [T] {
84+
pub fn gallop<T, F: Fn(&T)->bool>(mut slice: &[T], cmp: F) -> &[T] {
9085
// if empty slice, or already >= element, return
9186
if slice.len() > 0 && cmp(&slice[0]) {
9287
let mut step = 1;

src/lib.rs

Lines changed: 67 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,8 @@ mod join;
2222
/// A relation represents a fixed set of key-value pairs. In many places in a
2323
/// Datalog computation we want to be sure that certain relations are not able
2424
/// to vary (for example, in antijoins).
25-
#[derive(Eq, PartialEq)]
2625
pub struct Relation<Tuple: Ord> {
27-
/// Wrapped elements in the relation.
28-
///
29-
/// It is crucial that if this type is constructed manually, this field be
30-
/// sorted, and it is probably important that all elements be distinct.
26+
/// Sorted list of distinct tuples.
3127
pub elements: Vec<Tuple>
3228
}
3329

@@ -61,13 +57,18 @@ impl<Tuple: Ord> Relation<Tuple> {
6157
elements.dedup();
6258
Relation { elements }
6359
}
60+
61+
fn from_vec(mut elements: Vec<Tuple>) -> Self {
62+
elements.sort_unstable();
63+
elements.dedup();
64+
Relation { elements }
65+
}
66+
6467
}
6568

6669
impl<Tuple: Ord, I: IntoIterator<Item=Tuple>> From<I> for Relation<Tuple> {
6770
fn from(iterator: I) -> Self {
68-
let mut elements: Vec<Tuple> = iterator.into_iter().collect();
69-
elements.sort_unstable();
70-
Relation { elements }
71+
Relation::from_vec(iterator.into_iter().collect())
7172
}
7273
}
7374

@@ -120,34 +121,39 @@ impl Iteration {
120121
}
121122

122123
/// A type that can report on whether it has changed.
123-
pub trait VariableTrait {
124+
trait VariableTrait {
124125
/// Reports whether the variable has changed since it was last asked.
125126
fn changed(&mut self) -> bool;
126127
}
127128

128129
/// An monotonically increasing set of `Tuple`s.
129130
///
130-
/// The design here is that there are three types of tuples: i. those that have been
131-
/// processed by all operators that can access the variable, ii. those that should now
132-
/// be processed by all operators that can access the variable, and iii. those that
133-
/// have only just been added and should eventually be promoted to type ii. (but which
134-
/// are currently hidden).
131+
/// There are three stages in the lifecycle of a tuple:
132+
///
133+
/// 1. A tuple is added to `self.to_add`, but is not yet visible externally.
134+
/// 2. Newly added tuples are then promoted to `self.recent` for one iteration.
135+
/// 3. After one iteration, recent tuples are moved to `self.tuples` for posterity.
135136
///
136137
/// Each time `self.changed()` is called, the `recent` relation is folded into `tuples`,
137-
/// and the `to_add` relations are merged, deduplicated against `tuples`, and then made
138-
/// `recent`. This way, across calls to `changed()` all added relations are at some point
139-
/// in `recent` once and eventually all are in `tuples`.
138+
/// and the `to_add` relations are merged, potentially deduplicated against `tuples`, and
139+
/// then made `recent`. This way, across calls to `changed()` all added tuples are in
140+
/// `recent` at least once and eventually all are in `tuples`.
141+
///
142+
/// A `Variable` may optionally be instructed not to de-duplicate its tuples, for reasons
143+
/// of performance. Such a variable cannot be relied on to terminate iterative computation,
144+
/// and it is important that any cycle of derivations have at least one de-duplicating
145+
/// variable on it.
140146
pub struct Variable<Tuple: Ord> {
141147
/// Should the variable be maintained distinctly.
142-
pub distinct: bool,
148+
distinct: bool,
143149
/// A useful name for the variable.
144-
pub name: String,
150+
name: String,
145151
/// A list of relations whose union are the accepted tuples.
146-
pub tuples: Rc<RefCell<Vec<Relation<Tuple>>>>,
152+
stable: Rc<RefCell<Vec<Relation<Tuple>>>>,
147153
/// A list of recent tuples, still to be processed.
148-
pub recent: Rc<RefCell<Relation<Tuple>>>,
154+
recent: Rc<RefCell<Relation<Tuple>>>,
149155
/// A list of future tuples, to be introduced.
150-
pub to_add: Rc<RefCell<Vec<Relation<Tuple>>>>,
156+
to_add: Rc<RefCell<Vec<Relation<Tuple>>>>,
151157
}
152158

153159
// Operator implementations.
@@ -256,7 +262,7 @@ impl<Tuple: Ord> Clone for Variable<Tuple> {
256262
Variable {
257263
distinct: self.distinct,
258264
name: self.name.clone(),
259-
tuples: self.tuples.clone(),
265+
stable: self.stable.clone(),
260266
recent: self.recent.clone(),
261267
to_add: self.to_add.clone(),
262268
}
@@ -268,7 +274,7 @@ impl<Tuple: Ord> Variable<Tuple> {
268274
Variable {
269275
distinct: true,
270276
name: name.to_string(),
271-
tuples: Rc::new(RefCell::new(Vec::new().into())),
277+
stable: Rc::new(RefCell::new(Vec::new().into())),
272278
recent: Rc::new(RefCell::new(Vec::new().into())),
273279
to_add: Rc::new(RefCell::new(Vec::new().into())),
274280
}
@@ -292,7 +298,7 @@ impl<Tuple: Ord> Variable<Tuple> {
292298
assert!(self.recent.borrow().is_empty());
293299
assert!(self.to_add.borrow().is_empty());
294300
let mut result: Relation<Tuple> = Vec::new().into();
295-
while let Some(batch) = self.tuples.borrow_mut().pop() {
301+
while let Some(batch) = self.stable.borrow_mut().pop() {
296302
result = result.merge(batch);
297303
}
298304
result
@@ -302,14 +308,14 @@ impl<Tuple: Ord> Variable<Tuple> {
302308
impl<Tuple: Ord> VariableTrait for Variable<Tuple> {
303309
fn changed(&mut self) -> bool {
304310

305-
// 1. Merge self.recent into self.tuples.
306-
let mut recent = ::std::mem::replace(&mut (*self.recent.borrow_mut()), Vec::new().into());
307-
while self.tuples.borrow().last().map(|x| x.len() <= 2 * recent.len()) == Some(true) {
308-
let last = self.tuples.borrow_mut().pop().unwrap();
309-
recent = recent.merge(last);
310-
}
311-
if !recent.is_empty() {
312-
self.tuples.borrow_mut().push(recent);
311+
// 1. Merge self.recent into self.stable.
312+
if !self.recent.borrow().is_empty() {
313+
let mut recent = ::std::mem::replace(&mut (*self.recent.borrow_mut()), Vec::new().into());
314+
while self.stable.borrow().last().map(|x| x.len() <= 2 * recent.len()) == Some(true) {
315+
let last = self.stable.borrow_mut().pop().unwrap();
316+
recent = recent.merge(last);
317+
}
318+
self.stable.borrow_mut().push(recent);
313319
}
314320

315321
// 2. Move self.to_add into self.recent.
@@ -318,14 +324,25 @@ impl<Tuple: Ord> VariableTrait for Variable<Tuple> {
318324
while let Some(to_add_more) = self.to_add.borrow_mut().pop() {
319325
to_add = to_add.merge(to_add_more);
320326
}
321-
// 2b. Restrict `to_add` to tuples not in `self.tuples`.
327+
// 2b. Restrict `to_add` to tuples not in `self.stable`.
322328
if self.distinct {
323-
for batch in self.tuples.borrow().iter() {
329+
for batch in self.stable.borrow().iter() {
324330
let mut slice = &batch[..];
325-
to_add.elements.retain(|x| {
326-
slice = join::gallop(slice, |y| y < x);
327-
slice.len() == 0 || &slice[0] != x
328-
})
331+
// Only gallop if the slice is relatively large.
332+
if slice.len() > 4 * to_add.elements.len() {
333+
to_add.elements.retain(|x| {
334+
slice = join::gallop(slice, |y| y < x);
335+
slice.len() == 0 || &slice[0] != x
336+
});
337+
}
338+
else {
339+
to_add.elements.retain(|x| {
340+
while slice.len() > 0 && &slice[0] < x {
341+
slice = &slice[1..];
342+
}
343+
slice.len() == 0 || &slice[0] != x
344+
});
345+
}
329346
}
330347
}
331348
*self.recent.borrow_mut() = to_add;
@@ -340,4 +357,14 @@ impl<Tuple: Ord> VariableTrait for Variable<Tuple> {
340357

341358
!self.recent.borrow().is_empty()
342359
}
343-
}
360+
}
361+
362+
// impl<Tuple: Ord> Drop for Variable<Tuple> {
363+
// fn drop(&mut self) {
364+
// let mut total = 0;
365+
// for batch in self.tuples.borrow().iter() {
366+
// total += batch.len();
367+
// }
368+
// println!("FINAL: {:?}\t{:?}", self.name, total);
369+
// }
370+
// }

src/map.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Map functionality.
22
3-
use super::Variable;
3+
use super::{Variable, Relation};
44

55
pub fn map_into<T1: Ord, T2: Ord, F: Fn(&T1)->T2>(
66
input: &Variable<T1>,
@@ -13,5 +13,5 @@ pub fn map_into<T1: Ord, T2: Ord, F: Fn(&T1)->T2>(
1313
results.push(logic(tuple));
1414
}
1515

16-
output.insert(results.into());
16+
output.insert(Relation::from_vec(results));
1717
}

0 commit comments

Comments
 (0)