From 6e6836b4857fa19c20deadaacb1a079b3ef675a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Thu, 22 May 2025 13:41:29 +0200 Subject: [PATCH 1/5] Use `pretty_assertion::assert_equal` --- gix-blame/tests/blame.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index a4535b0eaf1..2adc4b52fba 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -206,7 +206,7 @@ macro_rules! mktest { let baseline = Baseline::collect(git_dir.join(format!("{}.baseline", $case)))?; assert_eq!(baseline.len(), $number_of_lines); - assert_eq!(lines_blamed, baseline); + pretty_assertions::assert_eq!(lines_blamed, baseline); Ok(()) } }; @@ -279,7 +279,7 @@ fn diff_disparity() { let git_dir = fixture_path().join(".git"); let baseline = Baseline::collect(git_dir.join(format!("{case}.baseline"))).unwrap(); - assert_eq!(lines_blamed, baseline, "{case}"); + pretty_assertions::assert_eq!(lines_blamed, baseline, "{case}"); } } @@ -311,7 +311,7 @@ fn since() { let git_dir = fixture_path().join(".git"); let baseline = Baseline::collect(git_dir.join("simple-since.baseline")).unwrap(); - assert_eq!(lines_blamed, baseline); + pretty_assertions::assert_eq!(lines_blamed, baseline); } mod blame_ranges { @@ -346,7 +346,7 @@ mod blame_ranges { let git_dir = fixture_path().join(".git"); let baseline = Baseline::collect(git_dir.join("simple-lines-1-2.baseline")).unwrap(); - assert_eq!(lines_blamed, baseline); + pretty_assertions::assert_eq!(lines_blamed, baseline); } #[test] @@ -382,7 +382,7 @@ mod blame_ranges { let git_dir = fixture_path().join(".git"); let baseline = Baseline::collect(git_dir.join("simple-lines-multiple-1-2-and-4.baseline")).unwrap(); - assert_eq!(lines_blamed, baseline); + pretty_assertions::assert_eq!(lines_blamed, baseline); } #[test] @@ -415,7 +415,7 @@ mod blame_ranges { let git_dir = fixture_path().join(".git"); let baseline = Baseline::collect(git_dir.join("simple-lines-multiple-1-2-and-4.baseline")).unwrap(); - assert_eq!(lines_blamed, baseline); + pretty_assertions::assert_eq!(lines_blamed, baseline); } } From d2e98f3cf458121da3d23933d6a7421d70309a20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Thu, 22 May 2025 16:00:49 +0200 Subject: [PATCH 2/5] feat!: follow renames in blame --- gix-blame/src/error.rs | 2 + gix-blame/src/file/function.rs | 180 ++++++++++++++++++-- gix-blame/src/file/mod.rs | 3 + gix-blame/src/file/tests.rs | 180 ++++++++++++++------ gix-blame/src/types.rs | 16 +- gix-blame/tests/blame.rs | 96 +++++++++-- gix-blame/tests/fixtures/make_blame_repo.sh | 30 ++++ 7 files changed, 428 insertions(+), 79 deletions(-) diff --git a/gix-blame/src/error.rs b/gix-blame/src/error.rs index 979cc8cd6ef..d90a05127cc 100644 --- a/gix-blame/src/error.rs +++ b/gix-blame/src/error.rs @@ -27,6 +27,8 @@ pub enum Error { Traverse(#[source] Box), #[error(transparent)] DiffTree(#[from] gix_diff::tree::Error), + #[error(transparent)] + DiffTreeWithRewrites(#[from] gix_diff::tree_with_rewrites::Error), #[error("Invalid line range was given, line range is expected to be a 1-based inclusive range in the format ','")] InvalidLineRange, #[error("Failure to decode commit during traversal")] diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 03429b61b12..7fa4e511e28 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -72,19 +72,21 @@ pub fn file( ) -> Result { let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect); + let mut current_file_path: BString = file_path.into(); + let mut stats = Statistics::default(); let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new()); let blamed_file_entry_id = find_path_entry_in_commit( &odb, &suspect, - file_path, + current_file_path.as_ref(), cache.as_ref(), &mut buf, &mut buf2, &mut stats, )? .ok_or_else(|| Error::FileMissing { - file_path: file_path.to_owned(), + file_path: current_file_path.to_owned(), commit_id: suspect, })?; let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec(); @@ -102,6 +104,7 @@ pub fn file( hunks_to_blame.push(UnblamedHunk { range_in_blamed_file: range.clone(), suspects: [(suspect, range)].into(), + source_file_name: None, }); } @@ -165,7 +168,7 @@ pub fn file( entry = find_path_entry_in_commit( &odb, &suspect, - file_path, + current_file_path.as_ref(), cache.as_ref(), &mut buf, &mut buf2, @@ -216,7 +219,7 @@ pub fn file( if let Some(parent_entry_id) = find_path_entry_in_commit( &odb, parent_id, - file_path, + current_file_path.as_ref(), cache.as_ref(), &mut buf, &mut buf2, @@ -239,12 +242,13 @@ pub fn file( queue.insert(parent_commit_time, parent_id); let changes_for_file_path = tree_diff_at_file_path( &odb, - file_path, + current_file_path.as_ref(), suspect, parent_id, cache.as_ref(), &mut stats, &mut diff_state, + resource_cache, &mut buf, &mut buf2, &mut buf3, @@ -263,7 +267,7 @@ pub fn file( }; match modification { - gix_diff::tree::recorder::Change::Addition { .. } => { + TreeDiffChange::Addition => { if more_than_one_parent { // Do nothing under the assumption that this always (or almost always) // implies that the file comes from a different parent, compared to which @@ -272,20 +276,44 @@ pub fn file( break 'outer; } } - gix_diff::tree::recorder::Change::Deletion { .. } => { + TreeDiffChange::Deletion => { unreachable!("We already found file_path in suspect^{{tree}}, so it can't be deleted") } - gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { + TreeDiffChange::Modification { previous_id, id } => { let changes = blob_changes( &odb, resource_cache, - oid, - previous_oid, - file_path, + id, + previous_id, + current_file_path.as_ref(), + options.diff_algorithm, + &mut stats, + )?; + hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id); + } + TreeDiffChange::Rewrite { + source_location, + source_id, + id, + } => { + let changes = blob_changes( + &odb, + resource_cache, + id, + source_id, + current_file_path.as_ref(), options.diff_algorithm, &mut stats, )?; hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id); + + for hunk in hunks_to_blame.iter_mut() { + if hunk.has_suspect(&parent_id) { + hunk.source_file_name = Some(source_location.clone()); + } + } + + current_file_path = source_location; } } } @@ -382,6 +410,7 @@ fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { len: NonZeroU32::new((current_source_range.end - previous_source_range.start) as u32) .expect("BUG: hunks are never zero-sized"), commit_id: previous_entry.commit_id, + source_file_name: previous_entry.source_file_name.clone(), }; acc.pop(); @@ -399,6 +428,57 @@ fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { }) } +enum TreeDiffChange { + Addition, + Deletion, + Modification { + previous_id: ObjectId, + id: ObjectId, + }, + Rewrite { + source_location: BString, + source_id: ObjectId, + id: ObjectId, + }, +} + +impl From for TreeDiffChange { + fn from(value: gix_diff::tree::recorder::Change) -> Self { + use gix_diff::tree::recorder::Change; + + match value { + Change::Addition { .. } => Self::Addition, + Change::Deletion { .. } => Self::Deletion, + Change::Modification { previous_oid, oid, .. } => Self::Modification { + previous_id: previous_oid, + id: oid, + }, + } + } +} + +impl From for TreeDiffChange { + fn from(value: gix_diff::tree_with_rewrites::Change) -> Self { + use gix_diff::tree_with_rewrites::Change; + + match value { + Change::Addition { .. } => Self::Addition, + Change::Deletion { .. } => Self::Deletion, + Change::Modification { previous_id, id, .. } => Self::Modification { previous_id, id }, + Change::Rewrite { + source_location, + source_id, + id, + .. + } => Self::Rewrite { + source_location, + source_id, + id, + }, + } + } +} + #[allow(clippy::too_many_arguments)] fn tree_diff_at_file_path( odb: impl gix_object::Find + gix_object::FindHeader, @@ -408,10 +488,11 @@ fn tree_diff_at_file_path( cache: Option<&gix_commitgraph::Graph>, stats: &mut Statistics, state: &mut gix_diff::tree::State, + resource_cache: &mut gix_diff::blob::Platform, commit_buf: &mut Vec, lhs_tree_buf: &mut Vec, rhs_tree_buf: &mut Vec, -) -> Result, Error> { +) -> Result, Error> { let parent_tree_id = find_commit(cache, &odb, &parent_id, commit_buf)?.tree_id()?; let parent_tree_iter = odb.find_tree_iter(&parent_tree_id, lhs_tree_buf)?; @@ -422,6 +503,37 @@ fn tree_diff_at_file_path( let tree_iter = odb.find_tree_iter(&tree_id, rhs_tree_buf)?; stats.trees_decoded += 1; + let result = tree_diff_without_rewrites_at_file_path(&odb, file_path, stats, state, parent_tree_iter, tree_iter)?; + + // Here, we follow git’s behaviour. We return when we’ve found a `Modification`. We try a + // second time with rename tracking when the change is either an `Addition` or a `Deletion` + // because those can turn out to have been a `Rewrite`. + if matches!(result, Some(TreeDiffChange::Modification { .. })) { + return Ok(result); + } + + let result = tree_diff_with_rewrites_at_file_path( + &odb, + file_path, + stats, + state, + resource_cache, + parent_tree_iter, + tree_iter, + )?; + + Ok(result) +} + +#[allow(clippy::too_many_arguments)] +fn tree_diff_without_rewrites_at_file_path( + odb: impl gix_object::Find + gix_object::FindHeader, + file_path: &BStr, + stats: &mut Statistics, + state: &mut gix_diff::tree::State, + parent_tree_iter: gix_object::TreeRefIter<'_>, + tree_iter: gix_object::TreeRefIter<'_>, +) -> Result, Error> { struct FindChangeToPath { inner: gix_diff::tree::Recorder, interesting_path: BString, @@ -509,11 +621,53 @@ fn tree_diff_at_file_path( stats.trees_diffed += 1; match result { - Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change), + Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change.map(std::convert::Into::into)), Err(error) => Err(Error::DiffTree(error)), } } +#[allow(clippy::too_many_arguments)] +fn tree_diff_with_rewrites_at_file_path( + odb: impl gix_object::Find + gix_object::FindHeader, + file_path: &BStr, + stats: &mut Statistics, + state: &mut gix_diff::tree::State, + resource_cache: &mut gix_diff::blob::Platform, + parent_tree_iter: gix_object::TreeRefIter<'_>, + tree_iter: gix_object::TreeRefIter<'_>, +) -> Result, Error> { + let mut change: Option = None; + + let options: gix_diff::tree_with_rewrites::Options = gix_diff::tree_with_rewrites::Options { + location: Some(gix_diff::tree::recorder::Location::Path), + rewrites: Some(gix_diff::Rewrites::default()), + }; + let result = gix_diff::tree_with_rewrites( + parent_tree_iter, + tree_iter, + resource_cache, + state, + &odb, + |change_ref| -> Result<_, std::convert::Infallible> { + if change_ref.location() == file_path { + change = Some(change_ref.into_owned()); + Ok(gix_diff::tree_with_rewrites::Action::Cancel) + } else { + Ok(gix_diff::tree_with_rewrites::Action::Continue) + } + }, + options, + ); + stats.trees_diffed_with_rewrites += 1; + + match result { + Ok(_) | Err(gix_diff::tree_with_rewrites::Error::Diff(gix_diff::tree::Error::Cancelled)) => { + Ok(change.map(std::convert::Into::into)) + } + Err(error) => Err(Error::DiffTreeWithRewrites(error)), + } +} + fn blob_changes( odb: impl gix_object::Find + gix_object::FindHeader, resource_cache: &mut gix_diff::blob::Platform, diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs index 935fb09eed8..85b3f3a8f31 100644 --- a/gix-blame/src/file/mod.rs +++ b/gix-blame/src/file/mod.rs @@ -393,11 +393,13 @@ impl UnblamedHunk { range_in_blamed_file: self.range_in_blamed_file.start ..(self.range_in_blamed_file.start + split_at_from_start), suspects: new_suspects_before.collect(), + source_file_name: self.source_file_name.clone(), }; let new_hunk_after = Self { range_in_blamed_file: (self.range_in_blamed_file.start + split_at_from_start) ..(self.range_in_blamed_file.end), suspects: new_suspects_after.collect(), + source_file_name: self.source_file_name, }; Either::Right((new_hunk_before, new_hunk_after)) @@ -445,6 +447,7 @@ impl BlameEntry { start_in_source_file: range_in_source_file.start, len: force_non_zero(range_in_source_file.len() as u32), commit_id, + source_file_name: unblamed_hunk.source_file_name.clone(), }) } } diff --git a/gix-blame/src/file/tests.rs b/gix-blame/src/file/tests.rs index 9185ca50633..408a7799949 100644 --- a/gix-blame/src/file/tests.rs +++ b/gix-blame/src/file/tests.rs @@ -14,6 +14,7 @@ fn new_unblamed_hunk(range_in_blamed_file: Range, suspect: ObjectId, offset UnblamedHunk { range_in_blamed_file, suspects: [(suspect, range_in_destination)].into(), + source_file_name: None, } } @@ -74,7 +75,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 3..5, - suspects: [(suspect, 3..5)].into() + suspects: [(suspect, 3..5)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -82,7 +84,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 0..3, - suspects: [(suspect, 0..3)].into() + suspects: [(suspect, 0..3)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(3)); @@ -108,7 +111,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 3..5, - suspects: [(suspect, 3..5)].into() + suspects: [(suspect, 3..5)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -117,11 +121,13 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 0..2, - suspects: [(parent, 0..2)].into() + suspects: [(parent, 0..2)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 2..3, - suspects: [(suspect, 2..3)].into() + suspects: [(suspect, 2..3)].into(), + source_file_name: None, } ] ); @@ -148,7 +154,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 13..15, - suspects: [(suspect, 13..15)].into() + suspects: [(suspect, 13..15)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -157,11 +164,13 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 10..12, - suspects: [(parent, 5..7)].into() + suspects: [(parent, 5..7)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 12..13, - suspects: [(suspect, 12..13)].into() + suspects: [(suspect, 12..13)].into(), + source_file_name: None, } ] ); @@ -189,7 +198,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 15..17, - suspects: [(suspect, 10..12)].into() + suspects: [(suspect, 10..12)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -198,11 +208,13 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 12..14, - suspects: [(parent, 7..9)].into() + suspects: [(parent, 7..9)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 14..15, - suspects: [(suspect, 9..10)].into() + suspects: [(suspect, 9..10)].into(), + source_file_name: None, } ] ); @@ -229,7 +241,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 3..5, - suspects: [(suspect, 3..5)].into() + suspects: [(suspect, 3..5)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -237,7 +250,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 0..3, - suspects: [(suspect, 0..3)].into() + suspects: [(suspect, 0..3)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(2)); @@ -264,7 +278,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 4..5, - suspects: [(suspect, 3..4)].into() + suspects: [(suspect, 3..4)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -272,7 +287,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 1..4, - suspects: [(suspect, 0..3)].into() + suspects: [(suspect, 0..3)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(2)); @@ -299,7 +315,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 6..7, - suspects: [(suspect, 5..6)].into() + suspects: [(suspect, 5..6)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -308,11 +325,13 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 3..4, - suspects: [(parent, 0..1)].into() + suspects: [(parent, 0..1)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 4..6, - suspects: [(suspect, 3..5)].into() + suspects: [(suspect, 3..5)].into(), + source_file_name: None, } ] ); @@ -342,7 +361,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 23..24, - suspects: [(suspect, 25..26)].into() + suspects: [(suspect, 25..26)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(1)); @@ -371,7 +391,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 23..24, - suspects: [(suspect, 21..22)].into() + suspects: [(suspect, 21..22)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(1)); @@ -401,11 +422,13 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 71..107, - suspects: [(parent, 70..106)].into() + suspects: [(parent, 70..106)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 107..109, - suspects: [(suspect, 106..108)].into() + suspects: [(suspect, 106..108)].into(), + source_file_name: None, } ] ); @@ -436,11 +459,13 @@ mod process_change { [ UnblamedHunk { range_in_blamed_file: 149..155, - suspects: [(parent, 137..143)].into() + suspects: [(parent, 137..143)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 155..156, - suspects: [(suspect, 143..144)].into() + suspects: [(suspect, 143..144)].into(), + source_file_name: None, } ] ); @@ -470,7 +495,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 3..6, - suspects: [(parent, 5..8)].into() + suspects: [(parent, 5..8)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Deleted(3)); @@ -497,7 +523,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 9..11, - suspects: [(suspect, 6..8)].into() + suspects: [(suspect, 6..8)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -526,7 +553,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 4..15, - suspects: [(suspect, 5..16)].into() + suspects: [(suspect, 5..16)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -555,7 +583,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 23..25, - suspects: [(suspect, 25..27)].into() + suspects: [(suspect, 25..27)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -586,7 +615,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 15..16, - suspects: [(parent, 16..17)].into() + suspects: [(parent, 16..17)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(1)); @@ -613,7 +643,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 23..25, - suspects: [(suspect, 22..24)].into() + suspects: [(suspect, 22..24)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -644,7 +675,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 2..5, - suspects: [(suspect, 5..8)].into() + suspects: [(suspect, 5..8)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(3)); @@ -671,7 +703,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 14..19, - suspects: [(suspect, 15..20)].into() + suspects: [(suspect, 15..20)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -679,7 +712,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 12..14, - suspects: [(parent, 10..12)].into() + suspects: [(parent, 10..12)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(1)); @@ -708,7 +742,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 110..114, - suspects: [(parent, 106..110)].into() + suspects: [(parent, 106..110)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(3)); @@ -734,7 +769,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 0..5, - suspects: [(suspect, 0..5)].into() + suspects: [(suspect, 0..5)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -764,7 +800,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 0..5, - suspects: [(parent, 0..5)].into() + suspects: [(parent, 0..5)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(0)); @@ -785,6 +822,7 @@ mod process_change { Some(UnblamedHunk { range_in_blamed_file: 22..30, suspects: [(suspect, 21..29)].into(), + source_file_name: None, }), Some(Change::Unchanged(21..23)), ); @@ -793,7 +831,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 22..30, - suspects: [(suspect, 21..29)].into() + suspects: [(suspect, 21..29)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -823,7 +862,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 0..5, - suspects: [(parent, 0..5)].into() + suspects: [(parent, 0..5)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Added(0)); @@ -849,7 +889,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 2..16, - suspects: [(suspect, 2..16)].into() + suspects: [(suspect, 2..16)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -877,7 +918,8 @@ mod process_change { hunk, Some(UnblamedHunk { range_in_blamed_file: 14..16, - suspects: [(suspect, 14..16)].into() + suspects: [(suspect, 14..16)].into(), + source_file_name: None, }) ); assert_eq!(change, None); @@ -885,7 +927,8 @@ mod process_change { new_hunks_to_blame, [UnblamedHunk { range_in_blamed_file: 2..14, - suspects: [(parent, 2..14)].into() + suspects: [(parent, 2..14)].into(), + source_file_name: None, }] ); assert_eq!(offset_in_destination, Offset::Deleted(4)); @@ -989,6 +1032,7 @@ mod process_changes { [UnblamedHunk { range_in_blamed_file: 0..4, suspects: [(suspect, 0..4)].into(), + source_file_name: None, },] ); } @@ -1007,10 +1051,12 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..4, suspects: [(suspect, 0..4)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 4..6, suspects: [(parent, 0..2)].into(), + source_file_name: None, }, ] ); @@ -1034,14 +1080,17 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..2, suspects: [(parent, 0..2)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 2..4, suspects: [(suspect, 2..4)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 4..6, suspects: [(parent, 2..4)].into(), + source_file_name: None, }, ] ); @@ -1065,14 +1114,17 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..1, suspects: [(suspect, 0..1)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 1..4, suspects: [(suspect, 1..4)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 4..6, suspects: [(parent, 0..2)].into(), + source_file_name: None, } ] ); @@ -1092,10 +1144,12 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..1, suspects: [(suspect, 0..1)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 1..6, suspects: [(parent, 0..5)].into(), + source_file_name: None, } ] ); @@ -1115,10 +1169,12 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 2..3, suspects: [(suspect, 0..1)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 3..6, suspects: [(parent, 0..3)].into(), + source_file_name: None, } ] ); @@ -1138,10 +1194,12 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..4, suspects: [(suspect, 0..4)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 4..6, suspects: [(parent, 3..5)].into(), + source_file_name: None, } ] ); @@ -1160,6 +1218,7 @@ mod process_changes { [UnblamedHunk { range_in_blamed_file: 4..6, suspects: [(parent, 0..2)].into(), + source_file_name: None, }] ); } @@ -1178,10 +1237,12 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 1..2, suspects: [(suspect, 0..1)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 2..3, suspects: [(parent, 2..3)].into(), + source_file_name: None, } ] ); @@ -1205,14 +1266,17 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..2, suspects: [(suspect, 0..2)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 2..3, suspects: [(parent, 0..1)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 3..4, suspects: [(suspect, 3..4)].into(), + source_file_name: None, }, ] ); @@ -1226,10 +1290,12 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 0..30, suspects: [(suspect, 0..30)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 31..37, suspects: [(suspect, 31..37)].into(), + source_file_name: None, }, ]; let changes = vec![ @@ -1244,19 +1310,23 @@ mod process_changes { [ UnblamedHunk { range_in_blamed_file: 0..16, - suspects: [(parent, 0..16)].into() + suspects: [(parent, 0..16)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 16..17, - suspects: [(suspect, 16..17)].into() + suspects: [(suspect, 16..17)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 17..30, - suspects: [(parent, 16..29)].into() + suspects: [(parent, 16..29)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 31..37, - suspects: [(parent, 30..36)].into() + suspects: [(parent, 30..36)].into(), + source_file_name: None, } ] ); @@ -1270,14 +1340,17 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 1..3, suspects: [(suspect, 1..3)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 5..7, suspects: [(suspect, 5..7)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 8..10, suspects: [(suspect, 8..10)].into(), + source_file_name: None, }, ]; let changes = vec![ @@ -1293,22 +1366,27 @@ mod process_changes { UnblamedHunk { range_in_blamed_file: 1..3, suspects: [(parent, 1..3)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 5..6, suspects: [(parent, 5..6)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 6..7, suspects: [(suspect, 6..7)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 8..9, suspects: [(suspect, 8..9)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 9..10, suspects: [(parent, 6..7)].into(), + source_file_name: None, }, ] ); @@ -1330,11 +1408,13 @@ mod process_changes { [ UnblamedHunk { range_in_blamed_file: 0..4, - suspects: [(suspect, 0..4)].into() + suspects: [(suspect, 0..4)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 4..7, - suspects: [(parent, 3..6)].into() + suspects: [(parent, 3..6)].into(), + source_file_name: None, } ] ); @@ -1356,19 +1436,23 @@ mod process_changes { [ UnblamedHunk { range_in_blamed_file: 13..14, - suspects: [(suspect, 13..14)].into() + suspects: [(suspect, 13..14)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 14..16, suspects: [(parent, 10..12)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 10..14, suspects: [(suspect, 10..14)].into(), + source_file_name: None, }, UnblamedHunk { range_in_blamed_file: 14..17, suspects: [(parent, 10..13)].into(), + source_file_name: None, }, ] ); diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index 84b107b6f8d..6db5499b3d1 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -172,6 +172,10 @@ pub struct Statistics { /// are likely partial as they are cancelled as soon as a change to the blamed file is /// detected. pub trees_diffed: usize, + /// The amount of tree-diffs to see if the file was moved (or rewritten, in git terminology). + /// These diffs are likely partial as they are cancelled as soon as a change to the blamed file + /// is detected. + pub trees_diffed_with_rewrites: usize, /// The amount of blobs there were compared to each other to learn what changed between commits. /// Note that in order to diff a blob, one needs to load both versions from the database. pub blobs_diffed: usize, @@ -275,11 +279,18 @@ pub struct BlameEntry { pub len: NonZeroU32, /// The commit that introduced the section into the *Source File*. pub commit_id: ObjectId, + /// The *Source File*'s name, in case it differs from *Blamed File*'s name. + pub source_file_name: Option, } impl BlameEntry { /// Create a new instance. - pub fn new(range_in_blamed_file: Range, range_in_source_file: Range, commit_id: ObjectId) -> Self { + pub fn new( + range_in_blamed_file: Range, + range_in_source_file: Range, + commit_id: ObjectId, + source_file_name: Option, + ) -> Self { debug_assert!( range_in_blamed_file.end > range_in_blamed_file.start, "{range_in_blamed_file:?}" @@ -295,6 +306,7 @@ impl BlameEntry { start_in_source_file: range_in_source_file.start, len: NonZeroU32::new(range_in_blamed_file.len() as u32).expect("BUG: hunks are never empty"), commit_id, + source_file_name, } } } @@ -331,6 +343,8 @@ pub struct UnblamedHunk { /// equal to `range_in_blamed_file`. Since `suspects` rarely contains more than 1 item, it can /// efficiently be stored as a `SmallVec`. pub suspects: SmallVec<[(ObjectId, Range); 1]>, + /// The *Source File*'s name, in case it differs from *Blamed File*'s name. + pub source_file_name: Option, } impl UnblamedHunk { diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 2adc4b52fba..e54c47d4065 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::{collections::BTreeMap, path::PathBuf}; use gix_blame::BlameRanges; use gix_hash::ObjectId; @@ -6,10 +6,11 @@ use gix_object::bstr; struct Baseline<'a> { lines: bstr::Lines<'a>, + filenames: BTreeMap, } mod baseline { - use std::path::Path; + use std::{collections::BTreeMap, path::Path}; use gix_blame::BlameEntry; use gix_hash::ObjectId; @@ -40,10 +41,30 @@ mod baseline { } impl Baseline<'_> { - pub fn collect(baseline_path: impl AsRef) -> std::io::Result> { + pub fn collect( + baseline_path: impl AsRef, + source_file_name: gix_object::bstr::BString, + ) -> std::io::Result> { let content = std::fs::read(baseline_path)?; + let baseline = Baseline { + lines: content.lines(), + filenames: BTreeMap::default(), + }; - Ok(Baseline { lines: content.lines() }.collect()) + Ok(baseline + .map(|entry| { + let source_file_name = if entry.source_file_name.as_ref() == Some(&source_file_name) { + None + } else { + entry.source_file_name + }; + + BlameEntry { + source_file_name, + ..entry + } + }) + .collect()) } } @@ -54,6 +75,7 @@ mod baseline { let mut ranges = None; let mut commit_id = gix_hash::Kind::Sha1.null(); let mut skip_lines: u32 = 0; + let mut source_file_name: Option = None; for line in self.lines.by_ref() { if line.starts_with(b"\t") { @@ -94,6 +116,12 @@ mod baseline { (line_number_in_final_file - 1)..(line_number_in_final_file + number_of_lines_in_group - 1); assert!(ranges.is_none(), "should not overwrite existing ranges"); ranges = Some((blame_range, source_range)); + } else if fields[0] == "filename" { + // We need to store `source_file_name` as it is not repeated for subsequent + // hunks that have the same `commit_id`. + source_file_name = Some(fields[1].into()); + + self.filenames.insert(commit_id, fields[1].into()); } else if !is_known_header_field(&fields[0]) && ObjectId::from_hex(fields[0].as_bytes()).is_err() { panic!("unexpected line: '{:?}'", line.as_bstr()); } @@ -103,7 +131,12 @@ mod baseline { // No new lines were parsed, so we assume the iterator is finished. return None; }; - Some(BlameEntry::new(range_in_blamed_file, range_in_source_file, commit_id)) + Some(BlameEntry::new( + range_in_blamed_file, + range_in_source_file, + commit_id, + source_file_name.or_else(|| self.filenames.get(&commit_id).cloned()), + )) } } } @@ -186,12 +219,14 @@ macro_rules! mktest { suspect, } = Fixture::new()?; + let source_file_name: gix_object::bstr::BString = format!("{}.txt", $case).into(); + let lines_blamed = gix_blame::file( &odb, suspect, None, &mut resource_cache, - format!("{}.txt", $case).as_str().into(), + source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: BlameRanges::default(), @@ -203,7 +238,7 @@ macro_rules! mktest { assert_eq!(lines_blamed.len(), $number_of_lines); let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join(format!("{}.baseline", $case)))?; + let baseline = Baseline::collect(git_dir.join(format!("{}.baseline", $case)), source_file_name)?; assert_eq!(baseline.len(), $number_of_lines); pretty_assertions::assert_eq!(lines_blamed, baseline); @@ -231,6 +266,15 @@ mktest!(coalesce_adjacent_hunks, "coalesce-adjacent-hunks", 1); mktest!(sub_directory, "sub-directory/sub-directory", 3); +mktest!(after_rename, "after-rename", 1); +mktest!(after_second_rename, "after-second-rename", 1); +mktest!(after_rewrite, "after-rewrite", 3); +mktest!( + after_move_to_sub_directory, + "sub-directory/after-move-to-sub-directory", + 1 +); + mktest!(resolved_conflict, "resolved-conflict", 2); mktest!(file_in_one_chain_of_ancestors, "file-in-one-chain-of-ancestors", 1); mktest!( @@ -259,12 +303,14 @@ fn diff_disparity() { suspect, } = Fixture::new().unwrap(); + let source_file_name: gix_object::bstr::BString = format!("{case}.txt").into(); + let lines_blamed = gix_blame::file( &odb, suspect, None, &mut resource_cache, - format!("{case}.txt").as_str().into(), + source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: BlameRanges::default(), @@ -277,7 +323,7 @@ fn diff_disparity() { assert_eq!(lines_blamed.len(), 5); let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join(format!("{case}.baseline"))).unwrap(); + let baseline = Baseline::collect(git_dir.join(format!("{case}.baseline")), source_file_name).unwrap(); pretty_assertions::assert_eq!(lines_blamed, baseline, "{case}"); } @@ -291,12 +337,14 @@ fn since() { suspect, } = Fixture::new().unwrap(); + let source_file_name: gix_object::bstr::BString = "simple.txt".into(); + let lines_blamed = gix_blame::file( &odb, suspect, None, &mut resource_cache, - "simple.txt".into(), + source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: BlameRanges::default(), @@ -309,7 +357,7 @@ fn since() { assert_eq!(lines_blamed.len(), 1); let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join("simple-since.baseline")).unwrap(); + let baseline = Baseline::collect(git_dir.join("simple-since.baseline"), source_file_name).unwrap(); pretty_assertions::assert_eq!(lines_blamed, baseline); } @@ -326,12 +374,14 @@ mod blame_ranges { suspect, } = Fixture::new().unwrap(); + let source_file_name: gix_object::bstr::BString = "simple.txt".into(); + let lines_blamed = gix_blame::file( &odb, suspect, None, &mut resource_cache, - "simple.txt".into(), + source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: BlameRanges::from_range(1..=2), @@ -344,7 +394,7 @@ mod blame_ranges { assert_eq!(lines_blamed.len(), 2); let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join("simple-lines-1-2.baseline")).unwrap(); + let baseline = Baseline::collect(git_dir.join("simple-lines-1-2.baseline"), source_file_name).unwrap(); pretty_assertions::assert_eq!(lines_blamed, baseline); } @@ -362,12 +412,14 @@ mod blame_ranges { ranges.add_range(1..=1); // Duplicate range, should be ignored ranges.add_range(4..=4); // Line 4 + let source_file_name: gix_object::bstr::BString = "simple.txt".into(); + let lines_blamed = gix_blame::file( &odb, suspect, None, &mut resource_cache, - "simple.txt".into(), + source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: ranges, @@ -380,7 +432,11 @@ mod blame_ranges { assert_eq!(lines_blamed.len(), 3); // Should have 3 lines total (2 from first range + 1 from second range) let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join("simple-lines-multiple-1-2-and-4.baseline")).unwrap(); + let baseline = Baseline::collect( + git_dir.join("simple-lines-multiple-1-2-and-4.baseline"), + source_file_name, + ) + .unwrap(); pretty_assertions::assert_eq!(lines_blamed, baseline); } @@ -395,12 +451,14 @@ mod blame_ranges { let ranges = BlameRanges::from_ranges(vec![1..=2, 1..=1, 4..=4]); + let source_file_name: gix_object::bstr::BString = "simple.txt".into(); + let lines_blamed = gix_blame::file( &odb, suspect, None, &mut resource_cache, - "simple.txt".into(), + source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: ranges, @@ -413,7 +471,11 @@ mod blame_ranges { assert_eq!(lines_blamed.len(), 3); // Should have 3 lines total (2 from first range + 1 from second range) let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join("simple-lines-multiple-1-2-and-4.baseline")).unwrap(); + let baseline = Baseline::collect( + git_dir.join("simple-lines-multiple-1-2-and-4.baseline"), + source_file_name, + ) + .unwrap(); pretty_assertions::assert_eq!(lines_blamed, baseline); } diff --git a/gix-blame/tests/fixtures/make_blame_repo.sh b/gix-blame/tests/fixtures/make_blame_repo.sh index bbe764e1aa6..5846f9326db 100755 --- a/gix-blame/tests/fixtures/make_blame_repo.sh +++ b/gix-blame/tests/fixtures/make_blame_repo.sh @@ -30,6 +30,16 @@ git add added-lines-around.txt git add coalesce-adjacent-hunks.txt git commit -q -m c1.3 +echo "line 1 in renamed file" >> before-rename.txt +echo "line 1 in file renamed twice" >> before-first-rename.txt +echo -e "line 1 in renamed and rewritten file\nline 2\nline 3\nline 4\nline 5\nline 6" >> before-rewrite.txt +echo -e "line 1 in file moved to sub-directory" > before-move-to-sub-directory.txt +git add before-rename.txt +git add before-first-rename.txt +git add before-rewrite.txt +git add before-move-to-sub-directory.txt +git commit -q -m c1.4 + echo "line 2" >> simple.txt git add simple.txt git commit -q -m c2 @@ -56,7 +66,9 @@ git commit -q -m c2.4 mkdir sub-directory echo -e "line 1\nline 2" > sub-directory/sub-directory.txt +mv before-move-to-sub-directory.txt sub-directory/after-move-to-sub-directory.txt git add sub-directory/sub-directory.txt +git add before-move-to-sub-directory.txt sub-directory/after-move-to-sub-directory.txt git commit -q -m c2.5 echo "line 3" >> simple.txt @@ -85,6 +97,15 @@ echo -e "line 1\nline 2 changed" > same-line-changed-twice.txt git add same-line-changed-twice.txt git commit -q -m c3.4 +mv before-rename.txt after-rename.txt +mv before-first-rename.txt before-second-rename.txt +rm before-rewrite.txt +echo -e "line 1 in renamed and rewritten file\nline 2 changed\nline 3 changed\nline 4\nline 5\nline 6" >> after-rewrite.txt +git add before-rename.txt after-rename.txt +git add before-first-rename.txt before-second-rename.txt +git add before-rewrite.txt after-rewrite.txt +git commit -q -m c3.5 + echo "line 4" >> simple.txt git add simple.txt git commit -q -m c4 @@ -137,6 +158,10 @@ cp empty-lines-histogram.txt empty-lines-myers.txt git add empty-lines-histogram.txt empty-lines-myers.txt git commit -q -m c5.4 +mv before-second-rename.txt after-second-rename.txt +git add before-second-rename.txt after-second-rename.txt +git commit -q -m c5.5 + # The commit history created by the commits above this line is linear, it only # contains commits that have exactly one parent. # Below this line, there’s also commits that have more than one parent. @@ -253,6 +278,11 @@ git blame --porcelain coalesce-adjacent-hunks.txt > .git/coalesce-adjacent-hunks mkdir .git/sub-directory git blame --porcelain sub-directory/sub-directory.txt > .git/sub-directory/sub-directory.baseline +git blame --porcelain after-rename.txt > .git/after-rename.baseline +git blame --porcelain after-second-rename.txt > .git/after-second-rename.baseline +git blame --porcelain after-rewrite.txt > .git/after-rewrite.baseline +git blame --porcelain sub-directory/after-move-to-sub-directory.txt > .git/sub-directory/after-move-to-sub-directory.baseline + git blame --porcelain resolved-conflict.txt > .git/resolved-conflict.baseline git blame --porcelain file-in-one-chain-of-ancestors.txt > .git/file-in-one-chain-of-ancestors.baseline git blame --porcelain different-file-in-another-chain-of-ancestors.txt > .git/different-file-in-another-chain-of-ancestors.baseline From f899d6d533b6fb0d1ce5d08d0ec6c38df294398a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Thu, 22 May 2025 16:01:40 +0200 Subject: [PATCH 3/5] Adapt to changes in `gix-blame` --- gitoxide-core/src/repository/blame.rs | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs index 0d42c75f9e0..259018b4a5e 100644 --- a/gitoxide-core/src/repository/blame.rs +++ b/gitoxide-core/src/repository/blame.rs @@ -47,7 +47,7 @@ pub fn blame_file( options, )?; let statistics = outcome.statistics; - write_blame_entries(out, outcome)?; + write_blame_entries(out, outcome, file)?; if let Some(err) = err { writeln!(err, "{statistics:#?}")?; @@ -55,7 +55,15 @@ pub fn blame_file( Ok(()) } -fn write_blame_entries(mut out: impl std::io::Write, outcome: gix::blame::Outcome) -> Result<(), std::io::Error> { +fn write_blame_entries( + mut out: impl std::io::Write, + outcome: gix::blame::Outcome, + source_file_name: gix::bstr::BString, +) -> Result<(), std::io::Error> { + let show_file_names = outcome + .entries_with_lines() + .any(|(entry, _)| entry.source_file_name.is_some()); + for (entry, lines_in_hunk) in outcome.entries_with_lines() { for ((actual_lno, source_lno), line) in entry .range_in_blamed_file() @@ -64,11 +72,20 @@ fn write_blame_entries(mut out: impl std::io::Write, outcome: gix::blame::Outcom { write!( out, - "{short_id} {line_no} {src_line_no} {line}", - line_no = actual_lno + 1, - src_line_no = source_lno + 1, + "{short_id} {line_no} ", short_id = entry.commit_id.to_hex_with_len(8), + line_no = actual_lno + 1, )?; + + if show_file_names { + if let Some(ref source_file_name) = entry.source_file_name { + write!(out, "{source_file_name} ")?; + } else { + write!(out, "{source_file_name} ")?; + } + } + + write!(out, "{src_line_no} {line}", src_line_no = source_lno + 1)?; } } From 7435ed5a9a7370a12332e12bd40fdbc757284a85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Wed, 28 May 2025 18:52:11 +0200 Subject: [PATCH 4/5] Get current file_path from unblamed hunk --- gix-blame/src/file/function.rs | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 7fa4e511e28..feaf3a95680 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -72,21 +72,19 @@ pub fn file( ) -> Result { let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect); - let mut current_file_path: BString = file_path.into(); - let mut stats = Statistics::default(); let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new()); let blamed_file_entry_id = find_path_entry_in_commit( &odb, &suspect, - current_file_path.as_ref(), + file_path, cache.as_ref(), &mut buf, &mut buf2, &mut stats, )? .ok_or_else(|| Error::FileMissing { - file_path: current_file_path.to_owned(), + file_path: file_path.to_owned(), commit_id: suspect, })?; let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec(); @@ -123,13 +121,22 @@ pub fn file( break; } - let is_still_suspect = hunks_to_blame.iter().any(|hunk| hunk.has_suspect(&suspect)); + let first_hunk_for_suspect = hunks_to_blame.iter().find(|hunk| hunk.has_suspect(&suspect)); + let is_still_suspect = first_hunk_for_suspect.is_some(); if !is_still_suspect { // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with // the next one. continue 'outer; } + // We know `first_hunk_for_suspect` can’t be `None` here because we check `is_some()` + // above. + let current_file_path: BString = first_hunk_for_suspect + .unwrap() + .source_file_name + .clone() + .unwrap_or_else(|| file_path.to_owned()); + let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?; let commit_time = commit.commit_time()?; @@ -285,7 +292,7 @@ pub fn file( resource_cache, id, previous_id, - current_file_path.as_ref(), + file_path, options.diff_algorithm, &mut stats, )?; @@ -301,7 +308,7 @@ pub fn file( resource_cache, id, source_id, - current_file_path.as_ref(), + file_path, options.diff_algorithm, &mut stats, )?; @@ -312,8 +319,6 @@ pub fn file( hunk.source_file_name = Some(source_location.clone()); } } - - current_file_path = source_location; } } } From 3e5365cb066895c787a22422964a2b9459f37ec3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 Jun 2025 08:35:51 +0200 Subject: [PATCH 5/5] refactor - assure the previous path is passed when diffing, and when available. - make rewrites configurable as part of `Options`. --- gitoxide-core/src/repository/blame.rs | 17 ++++---------- gix-blame/src/file/function.rs | 32 +++++++++++++++++++-------- gix-blame/src/types.rs | 3 +++ gix-blame/tests/blame.rs | 6 +++++ src/plumbing/main.rs | 1 + 5 files changed, 37 insertions(+), 22 deletions(-) diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs index 259018b4a5e..0adab07d0e6 100644 --- a/gitoxide-core/src/repository/blame.rs +++ b/gitoxide-core/src/repository/blame.rs @@ -47,7 +47,7 @@ pub fn blame_file( options, )?; let statistics = outcome.statistics; - write_blame_entries(out, outcome, file)?; + show_blame_entries(out, outcome, file)?; if let Some(err) = err { writeln!(err, "{statistics:#?}")?; @@ -55,15 +55,11 @@ pub fn blame_file( Ok(()) } -fn write_blame_entries( +fn show_blame_entries( mut out: impl std::io::Write, outcome: gix::blame::Outcome, source_file_name: gix::bstr::BString, ) -> Result<(), std::io::Error> { - let show_file_names = outcome - .entries_with_lines() - .any(|(entry, _)| entry.source_file_name.is_some()); - for (entry, lines_in_hunk) in outcome.entries_with_lines() { for ((actual_lno, source_lno), line) in entry .range_in_blamed_file() @@ -77,13 +73,8 @@ fn write_blame_entries( line_no = actual_lno + 1, )?; - if show_file_names { - if let Some(ref source_file_name) = entry.source_file_name { - write!(out, "{source_file_name} ")?; - } else { - write!(out, "{source_file_name} ")?; - } - } + let source_file_name = entry.source_file_name.as_ref().unwrap_or(&source_file_name); + write!(out, "{source_file_name} ")?; write!(out, "{src_line_no} {line}", src_line_no = source_lno + 1)?; } diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index feaf3a95680..298d8e38be2 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -122,17 +122,15 @@ pub fn file( } let first_hunk_for_suspect = hunks_to_blame.iter().find(|hunk| hunk.has_suspect(&suspect)); - let is_still_suspect = first_hunk_for_suspect.is_some(); - if !is_still_suspect { + let Some(first_hunk_for_suspect) = first_hunk_for_suspect else { // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with // the next one. continue 'outer; - } + }; // We know `first_hunk_for_suspect` can’t be `None` here because we check `is_some()` // above. - let current_file_path: BString = first_hunk_for_suspect - .unwrap() + let current_file_path = first_hunk_for_suspect .source_file_name .clone() .unwrap_or_else(|| file_path.to_owned()); @@ -259,6 +257,7 @@ pub fn file( &mut buf, &mut buf2, &mut buf3, + options.rewrites, )?; let Some(modification) = changes_for_file_path else { if more_than_one_parent { @@ -293,6 +292,7 @@ pub fn file( id, previous_id, file_path, + file_path, options.diff_algorithm, &mut stats, )?; @@ -309,6 +309,7 @@ pub fn file( id, source_id, file_path, + source_location.as_ref(), options.diff_algorithm, &mut stats, )?; @@ -433,6 +434,8 @@ fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { }) } +/// The union of [`gix_diff::tree::recorder::Change`] and [`gix_diff::tree_with_rewrites::Change`], +/// keeping only the blame-relevant information. enum TreeDiffChange { Addition, Deletion, @@ -497,6 +500,7 @@ fn tree_diff_at_file_path( commit_buf: &mut Vec, lhs_tree_buf: &mut Vec, rhs_tree_buf: &mut Vec, + rewrites: Option, ) -> Result, Error> { let parent_tree_id = find_commit(cache, &odb, &parent_id, commit_buf)?.tree_id()?; @@ -513,9 +517,15 @@ fn tree_diff_at_file_path( // Here, we follow git’s behaviour. We return when we’ve found a `Modification`. We try a // second time with rename tracking when the change is either an `Addition` or a `Deletion` // because those can turn out to have been a `Rewrite`. + // TODO(perf): renames are usually rare enough to not care about the work duplication done here. + // But in theory, a rename tracker could be used by us, on demand, and we could stuff the + // changes in there and have it find renames, without repeating the diff. if matches!(result, Some(TreeDiffChange::Modification { .. })) { return Ok(result); } + let Some(rewrites) = rewrites else { + return Ok(result); + }; let result = tree_diff_with_rewrites_at_file_path( &odb, @@ -525,6 +535,7 @@ fn tree_diff_at_file_path( resource_cache, parent_tree_iter, tree_iter, + rewrites, )?; Ok(result) @@ -626,7 +637,7 @@ fn tree_diff_without_rewrites_at_file_path( stats.trees_diffed += 1; match result { - Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change.map(std::convert::Into::into)), + Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change.map(Into::into)), Err(error) => Err(Error::DiffTree(error)), } } @@ -640,12 +651,13 @@ fn tree_diff_with_rewrites_at_file_path( resource_cache: &mut gix_diff::blob::Platform, parent_tree_iter: gix_object::TreeRefIter<'_>, tree_iter: gix_object::TreeRefIter<'_>, + rewrites: gix_diff::Rewrites, ) -> Result, Error> { let mut change: Option = None; let options: gix_diff::tree_with_rewrites::Options = gix_diff::tree_with_rewrites::Options { location: Some(gix_diff::tree::recorder::Location::Path), - rewrites: Some(gix_diff::Rewrites::default()), + rewrites: Some(rewrites), }; let result = gix_diff::tree_with_rewrites( parent_tree_iter, @@ -667,18 +679,20 @@ fn tree_diff_with_rewrites_at_file_path( match result { Ok(_) | Err(gix_diff::tree_with_rewrites::Error::Diff(gix_diff::tree::Error::Cancelled)) => { - Ok(change.map(std::convert::Into::into)) + Ok(change.map(Into::into)) } Err(error) => Err(Error::DiffTreeWithRewrites(error)), } } +#[allow(clippy::too_many_arguments)] fn blob_changes( odb: impl gix_object::Find + gix_object::FindHeader, resource_cache: &mut gix_diff::blob::Platform, oid: ObjectId, previous_oid: ObjectId, file_path: &BStr, + previous_file_path: &BStr, diff_algorithm: gix_diff::blob::Algorithm, stats: &mut Statistics, ) -> Result, Error> { @@ -738,7 +752,7 @@ fn blob_changes( resource_cache.set_resource( previous_oid, gix_object::tree::EntryKind::Blob, - file_path, + previous_file_path, gix_diff::blob::ResourceKind::OldOrSource, &odb, )?; diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index 6db5499b3d1..5672eaf679d 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -147,6 +147,8 @@ pub struct Options { pub range: BlameRanges, /// Don't consider commits before the given date. pub since: Option, + /// Determine if rename tracking should be performed, and how. + pub rewrites: Option, } /// The outcome of [`file()`](crate::file()). @@ -280,6 +282,7 @@ pub struct BlameEntry { /// The commit that introduced the section into the *Source File*. pub commit_id: ObjectId, /// The *Source File*'s name, in case it differs from *Blamed File*'s name. + /// This happens when the file was renamed. pub source_file_name: Option, } diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index e54c47d4065..7b7d1e39e1d 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -231,6 +231,7 @@ macro_rules! mktest { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: BlameRanges::default(), since: None, + rewrites: Some(gix_diff::Rewrites::default()), }, )? .entries; @@ -315,6 +316,7 @@ fn diff_disparity() { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: BlameRanges::default(), since: None, + rewrites: Some(gix_diff::Rewrites::default()), }, ) .unwrap() @@ -349,6 +351,7 @@ fn since() { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: BlameRanges::default(), since: Some(gix_date::parse("2025-01-31", None).unwrap()), + rewrites: Some(gix_diff::Rewrites::default()), }, ) .unwrap() @@ -386,6 +389,7 @@ mod blame_ranges { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: BlameRanges::from_range(1..=2), since: None, + rewrites: Some(gix_diff::Rewrites::default()), }, ) .unwrap() @@ -424,6 +428,7 @@ mod blame_ranges { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: ranges, since: None, + rewrites: None, }, ) .unwrap() @@ -463,6 +468,7 @@ mod blame_ranges { diff_algorithm: gix_diff::blob::Algorithm::Histogram, range: ranges, since: None, + rewrites: None, }, ) .unwrap() diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index fe111b065cf..136f977a60a 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -1590,6 +1590,7 @@ pub fn main() -> Result<()> { diff_algorithm, range: gix::blame::BlameRanges::from_ranges(ranges), since, + rewrites: Some(gix::diff::Rewrites::default()), }, out, statistics.then_some(err),