Skip to content

Commit c614970

Browse files
committed
Use structured format for blame script
Also provide more context in comment
1 parent 5d748af commit c614970

File tree

2 files changed

+109
-59
lines changed

2 files changed

+109
-59
lines changed

tests/it/src/args.rs

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,34 @@ pub struct Args {
1414

1515
#[derive(Debug, clap::Subcommand)]
1616
pub enum Subcommands {
17-
/// Extract a file’s history so that its blame shows the same characteristics, in particular
18-
/// bugs, as the original, but in a way that can't be traced back uniquely to its source.
17+
/// Generate a shell script that creates a git repository containing all commits that are
18+
/// traversed when a blame is generated.
1919
///
20-
/// The idea is that we don't want to deal with licensing, it's more about patterns in order to
21-
/// reproduce cases for tests.
20+
/// This command extracts the file’s history so that blame, when run on the repository created
21+
/// by the script, shows the same characteristics, in particular bugs, as the original, but in
22+
/// a way that the original source file's content cannot be reconstructed.
23+
///
24+
/// The idea is that by obfuscating the file's content we make it easier for people to share
25+
/// the subset of data that's required for debugging purposes from repositories that are not
26+
/// public.
27+
///
28+
/// Note that the obfuscation leaves certain properties of the source intact, so they can still
29+
/// be inferred from the extracted history. Among these properties are directory structure
30+
/// (though not the directories' names), renames, number of lines, and whitespace.
31+
///
32+
/// This command can also be helpful in debugging the blame algorithm itself.
33+
///
34+
/// ### Terminology
35+
///
36+
/// A **blame history** is the set of commits that the blame algorithm, at some point, treated
37+
/// as potential suspects for any line in a file. It is a subset of all commits that ever
38+
/// changed a file in its history.
39+
///
40+
/// With respect to branches and merge commits, the **blame history** will not necessarily be
41+
/// identical to the file's history in the source repository. This is because the blame
42+
/// algorithm will stop following a file's history for branches that only touch lines for which
43+
/// the source has already been found. The **blame history**, thus, looks likely "cleaner" and
44+
/// "simpler" than the source history.
2245
#[clap(visible_alias = "bcr")]
2346
BlameCopyRoyal {
2447
/// Don't really copy anything.
@@ -32,9 +55,8 @@ pub enum Subcommands {
3255
file: std::ffi::OsString,
3356
/// Do not use `copy-royal` to obfuscate the content of blobs, but copy it verbatim.
3457
///
35-
/// Note that this should only be done if the source repository only contains information
36-
/// you’re willing to share. Also note that the obfuscation leaves the structure of the
37-
/// source intact, so a few of its properties can still be inferred.
58+
/// Note that this should only be done if the source history does not contain information
59+
/// you're not willing to share.
3860
#[clap(long)]
3961
verbatim: bool,
4062
},

tests/it/src/commands/blame_copy_royal.rs

Lines changed: 80 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ pub(super) mod function {
66
use anyhow::Context;
77
use gix::{
88
blame::BlamePathEntry,
9-
bstr::{BStr, ByteSlice},
9+
bstr::{BStr, BString, ByteSlice},
1010
objs::FindExt,
1111
ObjectId,
1212
};
@@ -130,25 +130,31 @@ pub(super) mod function {
130130
);
131131

132132
if !dry_run {
133-
std::fs::write(script_file, blame_script.script)?;
133+
let blocks: Vec<_> = blame_script
134+
.script
135+
.iter()
136+
.map(|operation| operation.to_string())
137+
.collect();
138+
139+
std::fs::write(script_file, blocks.join(""))?;
134140
}
135141

136142
Ok(())
137143
}
138144

139-
struct BlameScript {
140-
blame_path: Vec<BlamePathEntry>,
141-
seen: BTreeSet<ObjectId>,
142-
script: String,
143-
options: Options,
145+
enum BlameScriptOperation {
146+
InitRepository,
147+
RemoveFile(String),
148+
CommitFile(BString, ObjectId),
149+
CheckoutTag(ObjectId),
150+
PrepareMerge(Vec<ObjectId>),
151+
CreateTag(ObjectId),
144152
}
145153

146-
impl BlameScript {
147-
fn new(blame_path: Vec<BlamePathEntry>, options: Options) -> Self {
148-
let mut script = String::new();
149-
150-
script.push_str(
151-
r"#!/bin/sh
154+
impl BlameScriptOperation {
155+
fn to_string(&self) -> String {
156+
match self {
157+
BlameScriptOperation::InitRepository => r"#!/bin/sh
152158
153159
set -e
154160
@@ -157,8 +163,46 @@ echo .gitignore >> .gitignore
157163
echo assets/ >> .gitignore
158164
echo create-history.sh >> .gitignore
159165
160-
",
161-
);
166+
"
167+
.into(),
168+
BlameScriptOperation::RemoveFile(src) => format!(
169+
r"# delete previous version of file
170+
git rm {src}
171+
"
172+
),
173+
BlameScriptOperation::CommitFile(src, commit_id) => format!(
174+
r"# make file {src} contain content at commit {commit_id}
175+
mkdir -p $(dirname {src})
176+
cp ./assets/{commit_id}.commit ./{src}
177+
# create commit
178+
git add {src}
179+
git commit -m {commit_id}
180+
"
181+
),
182+
BlameScriptOperation::CheckoutTag(commit_id) => format!("git checkout tag-{}\n", commit_id),
183+
BlameScriptOperation::PrepareMerge(commit_ids) => format!(
184+
"git merge --no-commit {} || true\n",
185+
commit_ids
186+
.iter()
187+
.map(|commit_id| format!("tag-{commit_id}"))
188+
.collect::<Vec<_>>()
189+
.join(" ")
190+
),
191+
BlameScriptOperation::CreateTag(commit_id) => format!("git tag tag-{commit_id}\n\n"),
192+
}
193+
}
194+
}
195+
196+
struct BlameScript {
197+
blame_path: Vec<BlamePathEntry>,
198+
seen: BTreeSet<ObjectId>,
199+
script: Vec<BlameScriptOperation>,
200+
options: Options,
201+
}
202+
203+
impl BlameScript {
204+
fn new(blame_path: Vec<BlamePathEntry>, options: Options) -> Self {
205+
let script = vec![BlameScriptOperation::InitRepository];
162206

163207
Self {
164208
blame_path,
@@ -202,7 +246,7 @@ echo create-history.sh >> .gitignore
202246
};
203247
let commit_id = blame_path_entry.commit_id;
204248

205-
let delete_previous_file_script = match &blame_path_entry.previous_source_file_path {
249+
let delete_previous_file_operation = match &blame_path_entry.previous_source_file_path {
206250
Some(previous_source_file_path) if source_file_path != *previous_source_file_path => {
207251
let src = if self.options.verbatim {
208252
previous_source_file_path.to_string()
@@ -215,57 +259,41 @@ echo create-history.sh >> .gitignore
215259
crate::commands::copy_royal::remapped(source_file_path)
216260
};
217261

218-
format!(
219-
r"# delete previous version of file
220-
git rm {src}
221-
"
222-
)
262+
Some(BlameScriptOperation::RemoveFile(src))
223263
}
224-
_ => String::new(),
264+
_ => None,
225265
};
226266

227-
let script = format!(
228-
r"# make file {src} contain content at commit {commit_id}
229-
mkdir -p $(dirname {src})
230-
cp ./assets/{commit_id}.commit ./{src}
231-
# create commit
232-
git add {src}
233-
git commit -m {commit_id}
234-
"
235-
);
236-
237267
if parents.is_empty() {
238-
self.script.push_str(delete_previous_file_script.as_str());
239-
self.script.push_str(script.as_str());
268+
if let Some(delete_previous_file_operation) = delete_previous_file_operation {
269+
self.script.push(delete_previous_file_operation);
270+
}
271+
self.script.push(BlameScriptOperation::CommitFile(src, commit_id));
240272
} else {
241273
let ([first], rest) = parents.split_at(1) else {
242274
unreachable!();
243275
};
244276

245-
self.script
246-
.push_str(format!("git checkout tag-{}\n", first.commit_id).as_str());
277+
self.script.push(BlameScriptOperation::CheckoutTag(first.commit_id));
247278

248279
if rest.is_empty() {
249-
self.script.push_str(delete_previous_file_script.as_str());
250-
self.script.push_str(script.as_str());
280+
if let Some(delete_previous_file_operation) = delete_previous_file_operation {
281+
self.script.push(delete_previous_file_operation);
282+
}
283+
self.script.push(BlameScriptOperation::CommitFile(src, commit_id));
251284
} else {
252-
self.script.push_str(
253-
format!(
254-
"git merge --no-commit {} || true\n",
255-
rest.iter()
256-
.map(|blame_path_entry| format!("tag-{}", blame_path_entry.commit_id))
257-
.collect::<Vec<_>>()
258-
.join(" ")
259-
)
260-
.as_str(),
261-
);
262-
263-
self.script.push_str(delete_previous_file_script.as_str());
264-
self.script.push_str(script.as_str());
285+
self.script.push(BlameScriptOperation::PrepareMerge(
286+
rest.iter().map(|blame_path_entry| blame_path_entry.commit_id).collect(),
287+
));
288+
289+
if let Some(delete_previous_file_operation) = delete_previous_file_operation {
290+
self.script.push(delete_previous_file_operation);
291+
}
292+
self.script.push(BlameScriptOperation::CommitFile(src, commit_id));
265293
}
266294
}
267295

268-
self.script.push_str(format!("git tag tag-{commit_id}\n\n").as_str());
296+
self.script.push(BlameScriptOperation::CreateTag(commit_id));
269297

270298
Ok(())
271299
}

0 commit comments

Comments
 (0)