Skip to content

Commit 26ee359

Browse files
committed
fix: a few diff-related bugs
- Fix: remove extraneous leading/trailing whitespace/leaf nodes when parsing pretty-formatted html - Fix: track additional positioning information when traversing a document for a diff so that we can determine when we transition across elements at the same document depth Added some simple tests for both
1 parent 3d38650 commit 26ee359

File tree

4 files changed

+304
-44
lines changed

4 files changed

+304
-44
lines changed

crates/core/src/diff/diff.rs

Lines changed: 174 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,29 @@ use super::traversal::MoveTo;
99

1010
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1111
struct Cursor {
12+
/// The depth within the overall document at which this cursor is located
1213
depth: u32,
14+
/// The index of the parent node, i.e. two nodes can be at the same depth in the document,
15+
/// but be pointing to different parents, e.g.:
16+
///
17+
/// ```no_rust,ignore
18+
/// <html> depth = 0, parent = 0, child = 0
19+
/// <head> depth = 1, parent = 0, child = 0
20+
/// <meta name="title" content="hi" /> depth = 2, parent = 0, child = 0
21+
/// </head>
22+
/// <body> depth = 1, parent = 0, child = 1
23+
/// <a href="about:blank">click</a> depth = 2, parent = 1, chid = 0
24+
/// </body>
25+
/// </html>
26+
/// ```
27+
parent: u32,
1328
child: u32,
1429
node: NodeRef,
1530
}
1631
impl Cursor {
1732
#[inline]
18-
fn pos(&self) -> (u32, u32) {
19-
(self.depth, self.child)
33+
fn pos(&self) -> (u32, u32, u32) {
34+
(self.depth, self.parent, self.child)
2035
}
2136

2237
#[inline(always)]
@@ -29,36 +44,41 @@ pub fn diff(old_document: &Document, new_document: &Document) -> VecDeque<Patch>
2944
let mut patches = VecDeque::new();
3045
let mut old_current = Cursor {
3146
depth: 0,
47+
parent: 0,
3248
child: 0,
3349
node: old_document.root(),
3450
};
3551
let mut old_next = VecDeque::from([old_current]);
3652
let mut new_next = VecDeque::from([Cursor {
3753
depth: 0,
54+
parent: 0,
3855
child: 0,
3956
node: new_document.root(),
4057
}]);
4158
loop {
42-
match dbg!((old_next.pop_front(), new_next.pop_front())) {
59+
match (old_next.pop_front(), new_next.pop_front()) {
4360
// We're at the same position in both trees, so examine the details of the node under the cursor
4461
(Some(old_cursor), Some(new_cursor)) if old_cursor.pos() == new_cursor.pos() => {
4562
old_current = old_cursor;
46-
match dbg!((old_cursor.node(old_document), new_cursor.node(new_document))) {
63+
match (old_cursor.node(old_document), new_cursor.node(new_document)) {
4764
// This was the root node, so move the cursor down a level and start walking the children
4865
(Node::Root, Node::Root) => {
4966
let depth = old_cursor.depth + 1;
67+
let parent = old_cursor.child;
5068
let old_children = old_document.children(old_cursor.node);
5169
let new_children = new_document.children(new_cursor.node);
5270
old_next.extend(old_children.iter().copied().enumerate().map(
5371
|(i, node)| Cursor {
5472
depth,
73+
parent,
5574
child: i as u32,
5675
node,
5776
},
5877
));
5978
new_next.extend(new_children.iter().copied().enumerate().map(
6079
|(i, node)| Cursor {
6180
depth,
81+
parent,
6282
child: i as u32,
6383
node,
6484
},
@@ -97,18 +117,21 @@ pub fn diff(old_document: &Document, new_document: &Document) -> VecDeque<Patch>
97117
patches.append(&mut attr_changes);
98118
// Add the children of both nodes to the worklist
99119
let depth = old_cursor.depth + 1;
120+
let parent = old_cursor.child;
100121
let old_children = old_document.children(old_cursor.node);
101122
let new_children = new_document.children(new_cursor.node);
102123
old_next.extend(old_children.iter().copied().enumerate().map(
103124
|(i, node)| Cursor {
104125
depth,
126+
parent,
105127
child: i as u32,
106128
node,
107129
},
108130
));
109131
new_next.extend(new_children.iter().copied().enumerate().map(
110132
|(i, node)| Cursor {
111133
depth,
134+
parent,
112135
child: i as u32,
113136
node,
114137
},
@@ -122,10 +145,12 @@ pub fn diff(old_document: &Document, new_document: &Document) -> VecDeque<Patch>
122145
replacement: Node::Leaf(new.clone()),
123146
});
124147
let depth = old_cursor.depth + 1;
148+
let parent = old_cursor.child;
125149
let old_children = old_document.children(old_cursor.node);
126150
old_next.extend(old_children.iter().copied().enumerate().map(
127151
|(i, node)| Cursor {
128152
depth,
153+
parent,
129154
child: i as u32,
130155
node,
131156
},
@@ -152,10 +177,12 @@ pub fn diff(old_document: &Document, new_document: &Document) -> VecDeque<Patch>
152177
});
153178
patches.append(&mut attr_changes);
154179
let depth = old_cursor.depth + 1;
180+
let parent = old_cursor.child;
155181
let new_children = new_document.children(new_cursor.node);
156182
new_next.extend(new_children.iter().copied().enumerate().map(
157183
|(i, node)| Cursor {
158184
depth,
185+
parent,
159186
child: i as u32,
160187
node,
161188
},
@@ -164,9 +191,112 @@ pub fn diff(old_document: &Document, new_document: &Document) -> VecDeque<Patch>
164191
_ => unreachable!(),
165192
}
166193
}
167-
// We descended a level in the new tree, but not the old; this means that there
168-
// were removals in the new tree at that depth. Issue removals until the old cursor
169-
// catches up with the new
194+
// This occurs in the following scenario (where `^` indicates the cursor location in each tree):
195+
//
196+
// old new old worklist new worklist
197+
// | | <- depth 0 a a
198+
// / \ / \ b b
199+
// a b a b <- depth 1 c c
200+
// / \ | | | d < e <
201+
// c d e c e <- depth 2 e
202+
// ^ ^
203+
//
204+
// In this scenario, the new tree has had `d` removed from it, and as nodes are added to the worklist
205+
// in breadth-first order, this means that when we move to `d` in the old tree, we'll move to `e` in the new.
206+
//
207+
// When this occurs, it means that the old parent node (e.g. `a` in the example above) has extra nodes which need
208+
// to be removed. To bring the old cursor up to par with the new cursor, we remove all nodes in the old tree for
209+
// which the cursor has the same `depth` but smaller `parent` than the new cursor.
210+
(Some(old_cursor), Some(new_cursor))
211+
if old_cursor.depth == new_cursor.depth
212+
&& old_cursor.parent < new_cursor.parent =>
213+
{
214+
old_current = old_cursor;
215+
patches.push_back(Patch::Remove {
216+
node: old_cursor.node,
217+
});
218+
while let Some(old_cursor) = old_next.front() {
219+
if old_cursor.pos() == new_cursor.pos() {
220+
// We've caught up to the new tree
221+
break;
222+
}
223+
if old_cursor.depth != new_cursor.depth || old_cursor.parent > new_cursor.parent
224+
{
225+
// If the old cursor has moved further in the tree than the new cursor, we cannot proceed here
226+
break;
227+
}
228+
let old_cursor = old_next.pop_front().unwrap();
229+
old_current = old_cursor;
230+
patches.push_back(Patch::Remove {
231+
node: old_cursor.node,
232+
});
233+
}
234+
// We need to revisit the new_cursor now that we've adjusted the old cursor
235+
new_next.push_front(new_cursor);
236+
}
237+
// This occurs in the following scenario (where `^` indicates the cursor location in each tree):
238+
//
239+
// old new old worklist new worklist
240+
// | | <- depth 0 a a
241+
// / \ / \ b b
242+
// a b a b <- depth 1 c c
243+
// | | / \ | d < e <
244+
// c d c e d <- depth 2 d
245+
// ^ ^
246+
//
247+
// Similar to the previous scenario, this one occurs when the new tree has had `e` added to `a` as a new
248+
// child element. This means that when we move to `d` in the old tree, we'll move to `e` in the new.
249+
//
250+
// As implied, this means that the old parent node (e.g. `a` in the example above) has nodes which need
251+
// to be added to it. To bring the new cursor up to par with the old cursor, we add every node in the new tree for
252+
// which the cursor has the same `depth` and `parent` as the initial new cursor.
253+
(Some(old_cursor), Some(new_cursor))
254+
if old_cursor.depth == new_cursor.depth
255+
&& old_cursor.parent > new_cursor.parent =>
256+
{
257+
// We need to grab the parent of the previous position of the old cursor so we can append more children
258+
let parent = old_document.parent(old_current.node).unwrap();
259+
// Handle the node currently under the cursor
260+
patches.push_back(Patch::Move(MoveTo::Node(parent)));
261+
patches.push_back(Patch::Push(parent));
262+
let mut subtree = recursively_append(new_cursor.node, new_document);
263+
patches.append(&mut subtree);
264+
// Handle the rest
265+
while let Some(new_cursor) = new_next.front() {
266+
if old_cursor.pos() == new_cursor.pos() {
267+
// We've caught up to the old tree
268+
break;
269+
}
270+
// It should not be possible to move past the old cursor
271+
debug_assert_eq!(new_cursor.depth, old_cursor.depth);
272+
debug_assert!(new_cursor.parent < old_cursor.parent);
273+
let new_cursor = new_next.pop_front().unwrap();
274+
patches.push_back(Patch::Move(MoveTo::Node(parent)));
275+
patches.push_back(Patch::Push(parent));
276+
let mut subtree = recursively_append(new_cursor.node, new_document);
277+
patches.append(&mut subtree);
278+
}
279+
// We either caught up to the old cursor, or ran out of new tree to visit, either way, we need to revisit the old cursor
280+
old_next.push_front(old_cursor);
281+
}
282+
// This occurs in the following scenario (where `^` indicates the cursor location in each tree):
283+
//
284+
// old new old worklist new worklist
285+
// | | <- depth 0 a a
286+
// / \ / \ b b
287+
// a b a b <- depth 1 c c
288+
// / \ / d < e <
289+
// c d c <- depth 2
290+
// | ^ |
291+
// e e
292+
// ^
293+
//
294+
// In this scenario, as we move from `c` to the next position in both trees, the old tree remains at the same depth
295+
// to visit `d`, while the new tree no longer has a `d` and thus moves down the tree to `e`.
296+
//
297+
// In order to proceed, we must remove all nodes in the old tree with the same depth and parent as the initial old cursor,
298+
// until we either reach the end of the old tree, implying that the new tree also has additional children, or we catch up
299+
// to the new cursor.
170300
(Some(old_cursor), Some(new_cursor)) if old_cursor.depth < new_cursor.depth => {
171301
old_current = old_cursor;
172302
patches.push_back(Patch::Remove {
@@ -177,6 +307,8 @@ pub fn diff(old_document: &Document, new_document: &Document) -> VecDeque<Patch>
177307
// We've caught up to the new tree
178308
break;
179309
}
310+
// It should always be the case that the removals occur at the same depth
311+
debug_assert_eq!(old_cursor.depth, old_current.depth);
180312
let old_cursor = old_next.pop_front().unwrap();
181313
old_current = old_cursor;
182314
patches.push_back(Patch::Remove {
@@ -186,16 +318,25 @@ pub fn diff(old_document: &Document, new_document: &Document) -> VecDeque<Patch>
186318
// We either caught up to the new cursor, or ran out of old tree to visit, either way, we need to revisit the new cursor
187319
new_next.push_front(new_cursor);
188320
}
189-
// We descended a level in the old tree, but not the new; this means that there
190-
// were additions in the new tree at that depth. Issue additions until the new cursor
191-
// catches up with the old
321+
// This occurs in the following scenario (where `^` indicates the cursor location in each tree):
322+
//
323+
// old new old worklist new worklist
324+
// | | <- depth 0 a a
325+
// / \ / \ b b
326+
// a b a b <- depth 1 c c
327+
// / / \ d < e <
328+
// c c e <- depth 2
329+
// | | ^
330+
// d d
331+
// ^
332+
//
333+
// This is like the previous scenario, but inverted. The new tree has been modified in such a way that when we move from
334+
// `c` in both trees, we'll end up at a greater depth in the old tree.
335+
//
336+
// In order to proceed, we must add all nodes from the new tree with the same depth and parent as the initial new cursor,
337+
// until we either reach the end of the new tree, or we catch up to the old cursor
192338
(Some(old_cursor), Some(new_cursor)) => {
193-
assert!(
194-
old_cursor.depth > new_cursor.depth,
195-
"expected {} > {}",
196-
old_cursor.depth,
197-
new_cursor.depth
198-
);
339+
assert!(old_cursor.depth > new_cursor.depth);
199340
// We need to go back up a level on the old tree, then grab the parent of _that_ node,
200341
// so that we can append more children. If this returns None, it means that we're inserting
201342
// multiple siblings at the root
@@ -208,11 +349,16 @@ pub fn diff(old_document: &Document, new_document: &Document) -> VecDeque<Patch>
208349
let mut subtree = recursively_append(new_cursor.node, new_document);
209350
patches.append(&mut subtree);
210351
// Handle the rest
352+
let expected_depth = new_cursor.depth;
353+
let expected_parent = new_cursor.parent;
211354
while let Some(new_cursor) = new_next.front() {
212355
if old_cursor.pos() == new_cursor.pos() {
213356
// We've caught up to the old tree
214357
break;
215358
}
359+
// It should not be possible to move past the old cursor
360+
debug_assert_eq!(new_cursor.depth, expected_depth);
361+
debug_assert_eq!(new_cursor.parent, expected_parent);
216362
let new_cursor = new_next.pop_front().unwrap();
217363
patches.push_back(Patch::Move(MoveTo::Node(parent)));
218364
patches.push_back(Patch::Push(parent));
@@ -269,7 +415,7 @@ fn diff_attributes(
269415
new_document: &Document,
270416
) -> VecDeque<Patch> {
271417
use std::collections::hash_map::Entry;
272-
418+
273419
let mut patches = VecDeque::new();
274420
let mut old_attribute_names = FxHashSet::default();
275421
let mut new_attribute_names = FxHashSet::default();
@@ -316,19 +462,16 @@ fn diff_attributes(
316462
// Additions (in new, not in old)
317463
for diff in new_attribute_names.difference(&old_attribute_names) {
318464
// Issue patch to add this attribute to the old
319-
patches.extend(
320-
new_attributes[&diff]
321-
.iter()
322-
.copied()
323-
.map(|(_, value)| Patch::AddAttributeTo {
324-
node,
325-
attr: Attribute {
326-
namespace: diff.0,
327-
name: diff.1,
328-
value: value.clone(),
329-
},
330-
}),
331-
);
465+
patches.extend(new_attributes[&diff].iter().copied().map(|(_, value)| {
466+
Patch::AddAttributeTo {
467+
node,
468+
attr: Attribute {
469+
namespace: diff.0,
470+
name: diff.1,
471+
value: value.clone(),
472+
},
473+
}
474+
}));
332475
}
333476

334477
// Removals (in old, not in new)
@@ -403,8 +546,7 @@ fn recursively_append(src: NodeRef, src_document: &Document) -> VecDeque<Patch>
403546
patches.push_back(Patch::Attach);
404547
}
405548
// This is a tree, these types of edges aren't allowed
406-
DfsEvent::BackEdge(_, _)
407-
| DfsEvent::CrossForwardEdge(_, _) => unreachable!(),
549+
DfsEvent::BackEdge(_, _) | DfsEvent::CrossForwardEdge(_, _) => unreachable!(),
408550
}
409551
});
410552

0 commit comments

Comments
 (0)