Skip to content

Commit ce5cf37

Browse files
bjacotgcopybara-github
authored andcommitted
Make eq() produce a debug message diffing the pretty-printing debug string.
PiperOrigin-RevId: 529691450
1 parent ce370aa commit ce5cf37

File tree

3 files changed

+432
-1
lines changed

3 files changed

+432
-1
lines changed
Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::fmt::Debug;
16+
use std::ops::Index;
17+
18+
/// Compute the edit list of `left` and `right`.
19+
///
20+
/// See <https://en.wikipedia.org/wiki/Edit_distance>
21+
pub(crate) fn edit_list<T: Distance + Copy>(
22+
left: impl IntoIterator<Item = T>,
23+
right: impl IntoIterator<Item = T>,
24+
) -> Vec<Edit<T>> {
25+
let left: Vec<_> = left.into_iter().collect();
26+
let right: Vec<_> = right.into_iter().collect();
27+
28+
struct TableElement<U> {
29+
cost: f64,
30+
last_edit: Edit<U>,
31+
}
32+
33+
let mut table: Table<TableElement<T>> = Table::new(left.len() + 1, right.len() + 1);
34+
table.push(TableElement {
35+
cost: 0.0,
36+
// This is a placeholder value and should never be read.
37+
last_edit: Edit::ExtraLeft { left: left[0] },
38+
});
39+
40+
for idx in 1..(left.len() + 1) {
41+
table.push(TableElement {
42+
cost: idx as _,
43+
last_edit: Edit::ExtraLeft { left: left[idx - 1] },
44+
});
45+
}
46+
for idy in 1..(right.len() + 1) {
47+
table.push(TableElement {
48+
cost: idy as _,
49+
last_edit: Edit::ExtraRight { right: right[idy - 1] },
50+
});
51+
for idx in 1..(left.len() + 1) {
52+
let left_element = left[idx - 1];
53+
let right_element = right[idy - 1];
54+
let extra_left = TableElement {
55+
cost: 1.0 + table[(idx - 1, idy)].cost,
56+
last_edit: Edit::ExtraLeft { left: left_element },
57+
};
58+
let extra_right = TableElement {
59+
cost: 1.0 + table[(idx, idy - 1)].cost,
60+
last_edit: Edit::ExtraRight { right: right_element },
61+
};
62+
let distance = T::distance(left_element, right_element);
63+
let both = TableElement {
64+
cost: distance + table[(idx - 1, idy - 1)].cost,
65+
last_edit: Edit::Both { left: left_element, right: right_element, distance },
66+
};
67+
table.push(
68+
[extra_left, extra_right, both]
69+
.into_iter()
70+
.min_by(|a, b| a.cost.partial_cmp(&b.cost).unwrap())
71+
.unwrap(),
72+
);
73+
}
74+
}
75+
let mut path = Vec::with_capacity(left.len() + right.len());
76+
let mut current = (left.len(), right.len());
77+
while current != (0, 0) {
78+
let edit = table[current].last_edit.clone();
79+
current = match edit {
80+
Edit::ExtraLeft { .. } => (current.0 - 1, current.1),
81+
Edit::ExtraRight { .. } => (current.0, current.1 - 1),
82+
Edit::Both { .. } => (current.0 - 1, current.1 - 1),
83+
};
84+
path.push(edit);
85+
}
86+
path.reverse();
87+
path
88+
}
89+
90+
/// An edit operation on two sequences of `T`.
91+
#[derive(Debug, Clone)]
92+
pub(crate) enum Edit<T> {
93+
/// An extra `T` was added to the left sequence.
94+
ExtraLeft { left: T },
95+
/// An extra `T` was added to the right sequence.
96+
ExtraRight { right: T },
97+
/// An element was added to each sequence.
98+
Both { left: T, right: T, distance: f64 },
99+
}
100+
101+
/// Trait to implement the distance between two objects.
102+
///
103+
/// This allows to control the behavior of [`edit_list`] notably when two prefer
104+
/// one [`Edit::Both`] or one [`Edit::ExtraRight`] and [`Edit::ExtraLeft`].
105+
pub(crate) trait Distance {
106+
fn distance(left: Self, right: Self) -> f64;
107+
}
108+
109+
impl Distance for char {
110+
fn distance(left: Self, right: Self) -> f64 {
111+
if left == right { 0.0 } else { 1.0 }
112+
}
113+
}
114+
115+
impl Distance for &str {
116+
/// &str::distance makes it slightly cheaper to consume both left and right
117+
/// at the same time than to consume left and then to consume right. The
118+
/// discount gets larger if the strings are very similar.
119+
fn distance(left: Self, right: Self) -> f64 {
120+
if left == right {
121+
return 0.0;
122+
}
123+
let edits: f64 = edit_list(left.chars(), right.chars())
124+
.into_iter()
125+
.map(|edit| match edit {
126+
Edit::Both { distance, .. } => distance,
127+
_ => 1.0,
128+
})
129+
.sum();
130+
1. + edits / (left.chars().count().max(right.chars().count()) as f64)
131+
}
132+
}
133+
134+
/// 2D Table implemented with a Vec<_>.
135+
struct Table<T> {
136+
size1: usize,
137+
table: Vec<T>,
138+
}
139+
140+
impl<T> Table<T> {
141+
/// Create a new [`Table<T>`].
142+
///
143+
/// The internal vector is allocated but not filled. Accessing a value
144+
/// before [`push`]ing it will result in a panic.
145+
fn new(size1: usize, size2: usize) -> Self {
146+
Self { size1, table: Vec::with_capacity(size1 * size2) }
147+
}
148+
149+
/// Add [`new_element`] to [`self`].
150+
///
151+
/// New values are added along the first dimension until it is filled. In
152+
/// other words, the first element is inserted at (0, 0), the second at
153+
/// (1, 0), and so on, until the ([`size1`] + 1)th is inserted at (0, 1).
154+
fn push(&mut self, new_element: T) {
155+
self.table.push(new_element);
156+
}
157+
}
158+
159+
impl<T> Index<(usize, usize)> for Table<T> {
160+
type Output = T;
161+
162+
fn index(&self, (idx1, idx2): (usize, usize)) -> &T {
163+
&self.table[idx1 + self.size1 * idx2]
164+
}
165+
}
166+
167+
#[cfg(test)]
168+
mod tests {
169+
use super::*;
170+
use crate::elements_are;
171+
use crate::{matcher::Matcher, matchers::predicate, verify_that, Result};
172+
use indoc::indoc;
173+
174+
fn is_both<E: PartialEq + Debug>(
175+
l_expected: E,
176+
r_expected: E,
177+
) -> impl Matcher<ActualT = Edit<E>> {
178+
predicate(move |edit: &Edit<E>| {
179+
matches!(edit,
180+
Edit::Both { left, right,.. } if left == &l_expected && right == &r_expected)
181+
})
182+
}
183+
184+
fn is_extra_left<E: PartialEq + Debug>(l_expected: E) -> impl Matcher<ActualT = Edit<E>> {
185+
predicate(move |edit: &Edit<E>| {
186+
matches!(edit,
187+
Edit::ExtraLeft { left } if left == &l_expected)
188+
})
189+
}
190+
191+
fn is_extra_right<E: PartialEq + Debug>(r_expected: E) -> impl Matcher<ActualT = Edit<E>> {
192+
predicate(move |edit: &Edit<E>| {
193+
matches!(edit,
194+
Edit::ExtraRight { right } if right == &r_expected)
195+
})
196+
}
197+
198+
#[test]
199+
fn exact_match() -> Result<()> {
200+
let edits = edit_list("hello".chars(), "hello".chars());
201+
verify_that!(
202+
edits,
203+
elements_are![
204+
is_both('h', 'h'),
205+
is_both('e', 'e'),
206+
is_both('l', 'l'),
207+
is_both('l', 'l'),
208+
is_both('o', 'o'),
209+
]
210+
)
211+
}
212+
213+
#[test]
214+
fn completely_different() -> Result<()> {
215+
let edits = edit_list("goodbye".chars(), "hello".chars());
216+
verify_that!(
217+
edits,
218+
elements_are![
219+
is_both('g', 'h'),
220+
is_both('o', 'e'),
221+
is_both('o', 'l'),
222+
is_both('d', 'l'),
223+
is_both('b', 'o'),
224+
is_extra_left('y'),
225+
is_extra_left('e'),
226+
]
227+
)
228+
}
229+
230+
#[test]
231+
fn slightly_different() -> Result<()> {
232+
let edits = edit_list("floor".chars(), "flower".chars());
233+
verify_that!(
234+
edits,
235+
elements_are![
236+
is_both('f', 'f'),
237+
is_both('l', 'l'),
238+
is_both('o', 'o'),
239+
is_both('o', 'w'),
240+
is_extra_right('e'),
241+
is_both('r', 'r'),
242+
]
243+
)
244+
}
245+
246+
#[test]
247+
fn lines_difference() -> Result<()> {
248+
let left = indoc!(
249+
r#"
250+
int: 123
251+
string: "something"
252+
"#
253+
);
254+
let right = indoc!(
255+
r#"
256+
int: 321
257+
string: "someone"
258+
"#
259+
);
260+
let edits = edit_list(left.lines(), right.lines());
261+
verify_that!(
262+
edits,
263+
elements_are![
264+
is_both("int: 123", "int: 321"),
265+
is_both(r#"string: "something""#, r#"string: "someone""#),
266+
]
267+
)
268+
}
269+
}

0 commit comments

Comments
 (0)