Skip to content

Commit f87967d

Browse files
authored
Merge pull request #2072 from GitoxideLabs/fix-unidiff
Reproduce unified diff issue
2 parents dab97f7 + 5e64298 commit f87967d

File tree

2 files changed

+193
-34
lines changed

2 files changed

+193
-34
lines changed

gix-diff/src/blob/unified_diff.rs

Lines changed: 74 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ pub(super) mod _impl {
7777

7878
use super::{ConsumeHunk, ContextSize, NewlineSeparator};
7979

80+
const CONTEXT: char = ' ';
81+
const ADDITION: char = '+';
82+
const REMOVAL: char = '-';
83+
8084
/// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used,
8185
/// and passes it in full to a consumer.
8286
pub struct UnifiedDiff<'a, T, D>
@@ -88,18 +92,25 @@ pub(super) mod _impl {
8892
after: &'a [Token],
8993
interner: &'a Interner<T>,
9094

91-
pos: u32,
95+
/// The 0-based start position in the 'before' tokens for the accumulated hunk for display in the header.
9296
before_hunk_start: u32,
93-
after_hunk_start: u32,
97+
/// The size of the accumulated 'before' hunk in lines for display in the header.
9498
before_hunk_len: u32,
99+
/// The 0-based start position in the 'after' tokens for the accumulated hunk for display in the header.
100+
after_hunk_start: u32,
101+
/// The size of the accumulated 'after' hunk in lines.
95102
after_hunk_len: u32,
103+
// An index into `before` and the context line to print next,
104+
// or `None` if this value was never computed to be the correct starting point for an accumulated hunk.
105+
ctx_pos: Option<u32>,
106+
96107
/// Symmetrical context before and after the changed hunk.
97108
ctx_size: u32,
109+
newline: NewlineSeparator<'a>,
98110

99111
buffer: Vec<u8>,
100112
header_buf: String,
101113
delegate: D,
102-
newline: NewlineSeparator<'a>,
103114

104115
err: Option<std::io::Error>,
105116
}
@@ -122,19 +133,22 @@ pub(super) mod _impl {
122133
context_size: ContextSize,
123134
) -> Self {
124135
Self {
136+
interner: &input.interner,
137+
before: &input.before,
138+
after: &input.after,
139+
125140
before_hunk_start: 0,
126-
after_hunk_start: 0,
127141
before_hunk_len: 0,
128142
after_hunk_len: 0,
143+
after_hunk_start: 0,
144+
ctx_pos: None,
145+
146+
ctx_size: context_size.symmetrical,
147+
newline: newline_separator,
148+
129149
buffer: Vec::with_capacity(8),
130150
header_buf: String::new(),
131151
delegate: consume_hunk,
132-
interner: &input.interner,
133-
before: &input.before,
134-
after: &input.after,
135-
pos: 0,
136-
ctx_size: context_size.symmetrical,
137-
newline: newline_separator,
138152

139153
err: None,
140154
}
@@ -158,23 +172,25 @@ pub(super) mod _impl {
158172
}
159173
}
160174

161-
fn flush(&mut self) -> std::io::Result<()> {
162-
if self.before_hunk_len == 0 && self.after_hunk_len == 0 {
175+
fn flush_accumulated_hunk(&mut self) -> std::io::Result<()> {
176+
if self.nothing_to_flush() {
163177
return Ok(());
164178
}
165179

166-
let end = (self.pos + self.ctx_size).min(self.before.len() as u32);
167-
self.update_pos(end, end);
180+
let ctx_pos = self.ctx_pos.expect("has been set if we started a hunk");
181+
let end = (ctx_pos + self.ctx_size).min(self.before.len() as u32);
182+
self.print_context_and_update_pos(ctx_pos..end, end);
168183

184+
let hunk_start = self.before_hunk_start + 1;
185+
let hunk_end = self.after_hunk_start + 1;
169186
self.header_buf.clear();
170-
171187
std::fmt::Write::write_fmt(
172188
&mut self.header_buf,
173189
format_args!(
174190
"@@ -{},{} +{},{} @@{nl}",
175-
self.before_hunk_start + 1,
191+
hunk_start,
176192
self.before_hunk_len,
177-
self.after_hunk_start + 1,
193+
hunk_end,
178194
self.after_hunk_len,
179195
nl = match self.newline {
180196
NewlineSeparator::AfterHeaderAndLine(nl) | NewlineSeparator::AfterHeaderAndWhenNeeded(nl) => {
@@ -185,26 +201,35 @@ pub(super) mod _impl {
185201
)
186202
.map_err(|err| std::io::Error::new(ErrorKind::Other, err))?;
187203
self.delegate.consume_hunk(
188-
self.before_hunk_start + 1,
204+
hunk_start,
189205
self.before_hunk_len,
190-
self.after_hunk_start + 1,
206+
hunk_end,
191207
self.after_hunk_len,
192208
&self.header_buf,
193209
&self.buffer,
194210
)?;
195-
self.buffer.clear();
196-
self.before_hunk_len = 0;
197-
self.after_hunk_len = 0;
211+
212+
self.reset_hunks();
198213
Ok(())
199214
}
200215

201-
fn update_pos(&mut self, print_to: u32, move_to: u32) {
202-
self.print_tokens(&self.before[self.pos as usize..print_to as usize], ' ');
203-
let len = print_to - self.pos;
204-
self.pos = move_to;
216+
fn print_context_and_update_pos(&mut self, print: Range<u32>, move_to: u32) {
217+
self.print_tokens(&self.before[print.start as usize..print.end as usize], CONTEXT);
218+
let len = print.end - print.start;
219+
self.ctx_pos = Some(move_to);
205220
self.before_hunk_len += len;
206221
self.after_hunk_len += len;
207222
}
223+
224+
fn reset_hunks(&mut self) {
225+
self.buffer.clear();
226+
self.before_hunk_len = 0;
227+
self.after_hunk_len = 0;
228+
}
229+
230+
fn nothing_to_flush(&self) -> bool {
231+
self.before_hunk_len == 0 && self.after_hunk_len == 0
232+
}
208233
}
209234

210235
impl<T, D> Sink for UnifiedDiff<'_, T, D>
@@ -218,24 +243,39 @@ pub(super) mod _impl {
218243
if self.err.is_some() {
219244
return;
220245
}
221-
if before.start - self.pos > 2 * self.ctx_size {
222-
if let Err(err) = self.flush() {
246+
let start_next_hunk = self
247+
.ctx_pos
248+
.is_some_and(|ctx_pos| before.start - ctx_pos > 2 * self.ctx_size);
249+
if start_next_hunk {
250+
if let Err(err) = self.flush_accumulated_hunk() {
223251
self.err = Some(err);
224252
return;
225253
}
226-
self.pos = before.start - self.ctx_size;
227-
self.before_hunk_start = self.pos;
254+
let ctx_pos = before.start - self.ctx_size;
255+
self.ctx_pos = Some(ctx_pos);
256+
self.before_hunk_start = ctx_pos;
228257
self.after_hunk_start = after.start - self.ctx_size;
229258
}
230-
self.update_pos(before.start, before.end);
259+
let ctx_pos = match self.ctx_pos {
260+
None => {
261+
// TODO: can this be made so the code above does the job?
262+
let ctx_pos = before.start.saturating_sub(self.ctx_size);
263+
self.before_hunk_start = ctx_pos;
264+
self.after_hunk_start = after.start.saturating_sub(self.ctx_size);
265+
ctx_pos
266+
}
267+
Some(pos) => pos,
268+
};
269+
self.print_context_and_update_pos(ctx_pos..before.start, before.end);
231270
self.before_hunk_len += before.end - before.start;
232271
self.after_hunk_len += after.end - after.start;
233-
self.print_tokens(&self.before[before.start as usize..before.end as usize], '-');
234-
self.print_tokens(&self.after[after.start as usize..after.end as usize], '+');
272+
273+
self.print_tokens(&self.before[before.start as usize..before.end as usize], REMOVAL);
274+
self.print_tokens(&self.after[after.start as usize..after.end as usize], ADDITION);
235275
}
236276

237277
fn finish(mut self) -> Self::Out {
238-
if let Err(err) = self.flush() {
278+
if let Err(err) = self.flush_accumulated_hunk() {
239279
self.err = Some(err);
240280
}
241281
if let Some(err) = self.err {

gix-diff/tests/diff/blob/unified_diff.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,125 @@ fn context_overlap_by_one_line_move_down() -> crate::Result {
170170
Ok(())
171171
}
172172

173+
#[test]
174+
fn added_on_top_keeps_context_correctly_sized() -> crate::Result {
175+
let a = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10";
176+
let b = "1\n2\n3\n4\n4.5\n5\n6\n7\n8\n9\n10";
177+
178+
let a = gix_diff::blob::sources::lines_with_terminator(a);
179+
let b = gix_diff::blob::sources::lines_with_terminator(b);
180+
let interner = gix_diff::blob::intern::InternedInput::new(a, b);
181+
182+
let actual = gix_diff::blob::diff(
183+
Algorithm::Myers,
184+
&interner,
185+
UnifiedDiff::new(
186+
&interner,
187+
String::new(),
188+
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
189+
ContextSize::symmetrical(3),
190+
),
191+
)?;
192+
// TODO: fix this
193+
insta::assert_snapshot!(actual, @r"
194+
@@ -2,6 +2,7 @@
195+
2
196+
3
197+
4
198+
+4.5
199+
5
200+
6
201+
7
202+
");
203+
204+
let a = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10";
205+
let b = "1\n2\n3\n4\n5\n6\n6.5\n7\n8\n9\n10";
206+
207+
let a = gix_diff::blob::sources::lines_with_terminator(a);
208+
let b = gix_diff::blob::sources::lines_with_terminator(b);
209+
let interner = gix_diff::blob::intern::InternedInput::new(a, b);
210+
211+
let actual = gix_diff::blob::diff(
212+
Algorithm::Myers,
213+
&interner,
214+
UnifiedDiff::new(
215+
&interner,
216+
String::new(),
217+
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
218+
ContextSize::symmetrical(3),
219+
),
220+
)?;
221+
222+
insta::assert_snapshot!(actual, @r"
223+
@@ -4,6 +4,7 @@
224+
4
225+
5
226+
6
227+
+6.5
228+
7
229+
8
230+
9
231+
");
232+
let a = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10";
233+
let b = "1\n2\n3\n3.5\n4\n5\n6\n7\n8\n9\n10";
234+
235+
let a = gix_diff::blob::sources::lines_with_terminator(a);
236+
let b = gix_diff::blob::sources::lines_with_terminator(b);
237+
let interner = gix_diff::blob::intern::InternedInput::new(a, b);
238+
239+
let actual = gix_diff::blob::diff(
240+
Algorithm::Myers,
241+
&interner,
242+
UnifiedDiff::new(
243+
&interner,
244+
String::new(),
245+
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
246+
ContextSize::symmetrical(3),
247+
),
248+
)?;
249+
250+
insta::assert_snapshot!(actual, @r"
251+
@@ -1,6 +1,7 @@
252+
1
253+
2
254+
3
255+
+3.5
256+
4
257+
5
258+
6
259+
");
260+
261+
// From the end, for good measure
262+
let a = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10";
263+
let b = "1\n2\n3\n4\n5\n6\n7\n7.5\n8\n9\n10";
264+
265+
let a = gix_diff::blob::sources::lines_with_terminator(a);
266+
let b = gix_diff::blob::sources::lines_with_terminator(b);
267+
let interner = gix_diff::blob::intern::InternedInput::new(a, b);
268+
269+
let actual = gix_diff::blob::diff(
270+
Algorithm::Myers,
271+
&interner,
272+
UnifiedDiff::new(
273+
&interner,
274+
String::new(),
275+
NewlineSeparator::AfterHeaderAndWhenNeeded("\n"),
276+
ContextSize::symmetrical(3),
277+
),
278+
)?;
279+
insta::assert_snapshot!(actual, @r"
280+
@@ -5,6 +5,7 @@
281+
5
282+
6
283+
7
284+
+7.5
285+
8
286+
9
287+
10
288+
");
289+
Ok(())
290+
}
291+
173292
#[test]
174293
fn removed_modified_added_with_newlines_in_tokens() -> crate::Result {
175294
let a = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10";

0 commit comments

Comments
 (0)