Skip to content

Commit 72986fc

Browse files
Deduplicate OnDisk Corpus (#2827)
* testcase name logic * implement locking * implement logic for removing testcase * minor modifications * minor modifications to remove_testcase() * extract generate_name() from trait Input (broken) * Revert "extract generate_name() from trait Input (broken)" This reverts commit 9e217be. * fix ci errors * remove CorpusId from generate_name() calls * toml formatting * write from file instead of fs * fmt and clippy * fix windows clippy * handle renaming of testcase * fix failing cmplog test * overwrite lockfile on remove testcase * format * bring back corpus id in generate_name * missed windows executors hook * fix failing tests * some more errors --------- Co-authored-by: Dongjia "toka" Zhang <tokazerkje@outlook.com>
1 parent 5bd6a6f commit 72986fc

File tree

11 files changed

+113
-129
lines changed

11 files changed

+113
-129
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ uuid = { version = "1.10.0", features = ["serde", "v4"] }
124124
which = "6.0.3"
125125
windows = "0.59.0"
126126
z3 = "0.12.1"
127-
127+
fs2 = "0.4.3" # Used by OnDisk Corpus for file locking
128128

129129
[workspace.lints.rust]
130130
# Deny

fuzzers/baby/tutorial/src/input.rs

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,7 @@
22
use std::hash::Hash;
33

44
use lain::prelude::*;
5-
use libafl::{
6-
corpus::CorpusId,
7-
inputs::{HasTargetBytes, Input},
8-
};
5+
use libafl::inputs::{HasTargetBytes, Input};
96
use libafl_bolts::{ownedref::OwnedSlice, HasLen};
107
use serde::{Deserialize, Serialize};
118

@@ -48,15 +45,7 @@ pub enum PacketType {
4845
Reset = 0x2,
4946
}
5047

51-
impl Input for PacketData {
52-
fn generate_name(&self, id: Option<CorpusId>) -> String {
53-
if let Some(id) = id {
54-
format!("id_{}", id.0)
55-
} else {
56-
"id_unknown".into()
57-
}
58-
}
59-
}
48+
impl Input for PacketData {}
6049

6150
impl HasTargetBytes for PacketData {
6251
#[inline]

fuzzers/structure_aware/baby_fuzzer_custom_input/src/input.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,13 @@ use core::num::NonZeroUsize;
22
use std::{borrow::Cow, hash::Hash};
33

44
use libafl::{
5-
corpus::CorpusId,
65
generators::{Generator, RandBytesGenerator},
76
inputs::{BytesInput, HasTargetBytes, Input},
87
mutators::{MutationResult, Mutator},
98
state::HasRand,
109
Error, SerdeAny,
1110
};
12-
use libafl_bolts::{generic_hash_std, rands::Rand, Named};
11+
use libafl_bolts::{rands::Rand, Named};
1312
use serde::{Deserialize, Serialize};
1413

1514
/// The custom [`Input`] type used in this example, consisting of a byte array part, a byte array that is not always present, and a boolean
@@ -28,11 +27,7 @@ pub struct CustomInput {
2827
}
2928

3029
/// Hash-based implementation
31-
impl Input for CustomInput {
32-
fn generate_name(&self, _id: Option<CorpusId>) -> String {
33-
format!("{:016x}", generic_hash_std(self))
34-
}
35-
}
30+
impl Input for CustomInput {}
3631

3732
impl CustomInput {
3833
/// Returns a mutable reference to the byte array

libafl/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ std = [
6060
"libafl_bolts/std",
6161
"typed-builder",
6262
"fastbloom",
63+
"fs2",
6364
]
6465

6566
## Tracks the Feedbacks and the Objectives that were interesting for a Testcase
@@ -290,6 +291,8 @@ const_panic = { version = "0.2.9", default-features = false } # similarly, for f
290291
pyo3 = { workspace = true, optional = true }
291292
regex-syntax = { version = "0.8.4", optional = true } # For nautilus
292293

294+
fs2 = { workspace = true, optional = true } # used by OnDisk Corpus for file locking
295+
293296
# optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)
294297
serial_test = { workspace = true, optional = true, default-features = false, features = [
295298
"logging",

libafl/src/corpus/inmemory_ondisk.rs

Lines changed: 82 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44
//! For a lower memory footprint, consider using [`crate::corpus::CachedOnDiskCorpus`]
55
//! which only stores a certain number of [`Testcase`]s and removes additional ones in a FIFO manner.
66
7-
use alloc::string::String;
7+
use alloc::string::{String, ToString};
88
use core::cell::{Ref, RefCell, RefMut};
99
use std::{
1010
fs,
1111
fs::{File, OpenOptions},
1212
io,
13-
io::Write,
13+
io::{Read, Seek, SeekFrom, Write},
1414
path::{Path, PathBuf},
1515
};
1616

17+
use fs2::FileExt;
1718
#[cfg(feature = "gzip")]
1819
use libafl_bolts::compress::GzipCompressor;
1920
use serde::{Deserialize, Serialize};
@@ -33,7 +34,11 @@ use crate::{
3334
/// If the create fails for _any_ reason, including, but not limited to, a preexisting existing file of that name,
3435
/// it will instead return the respective [`io::Error`].
3536
fn create_new<P: AsRef<Path>>(path: P) -> Result<File, io::Error> {
36-
OpenOptions::new().write(true).create_new(true).open(path)
37+
OpenOptions::new()
38+
.write(true)
39+
.read(true)
40+
.create_new(true)
41+
.open(path)
3742
}
3843

3944
/// Tries to create the given `path` and returns `None` _only_ if the file already existed.
@@ -85,7 +90,7 @@ where
8590
fn add(&mut self, testcase: Testcase<I>) -> Result<CorpusId, Error> {
8691
let id = self.inner.add(testcase)?;
8792
let testcase = &mut self.get(id).unwrap().borrow_mut();
88-
self.save_testcase(testcase, id)?;
93+
self.save_testcase(testcase, Some(id))?;
8994
*testcase.input_mut() = None;
9095
Ok(id)
9196
}
@@ -95,7 +100,7 @@ where
95100
fn add_disabled(&mut self, testcase: Testcase<I>) -> Result<CorpusId, Error> {
96101
let id = self.inner.add_disabled(testcase)?;
97102
let testcase = &mut self.get_from_all(id).unwrap().borrow_mut();
98-
self.save_testcase(testcase, id)?;
103+
self.save_testcase(testcase, Some(id))?;
99104
*testcase.input_mut() = None;
100105
Ok(id)
101106
}
@@ -106,7 +111,7 @@ where
106111
let entry = self.inner.replace(id, testcase)?;
107112
self.remove_testcase(&entry)?;
108113
let testcase = &mut self.get(id).unwrap().borrow_mut();
109-
self.save_testcase(testcase, id)?;
114+
self.save_testcase(testcase, Some(id))?;
110115
*testcase.input_mut() = None;
111116
Ok(entry)
112117
}
@@ -309,12 +314,19 @@ impl<I> InMemoryOnDiskCorpus<I> {
309314

310315
/// Sets the filename for a [`Testcase`].
311316
/// If an error gets returned from the corpus (i.e., file exists), we'll have to retry with a different filename.
317+
/// Renaming testcases will most likely cause duplicate testcases to not be handled correctly
318+
/// if testcases with the same input are not given the same filename.
319+
/// Only rename when you know what you are doing.
312320
#[inline]
313321
pub fn rename_testcase(
314322
&self,
315323
testcase: &mut Testcase<I>,
316324
filename: String,
317-
) -> Result<(), Error> {
325+
id: Option<CorpusId>,
326+
) -> Result<(), Error>
327+
where
328+
I: Input,
329+
{
318330
if testcase.filename().is_some() {
319331
// We are renaming!
320332

@@ -327,36 +339,10 @@ impl<I> InMemoryOnDiskCorpus<I> {
327339
return Ok(());
328340
}
329341

330-
if self.locking {
331-
let new_lock_filename = format!(".{new_filename}.lafl_lock");
332-
333-
// Try to create lock file for new testcases
334-
if let Err(err) = create_new(self.dir_path.join(&new_lock_filename)) {
335-
*testcase.filename_mut() = Some(old_filename);
336-
return Err(Error::illegal_state(format!(
337-
"Unable to create lock file {new_lock_filename} for new testcase: {err}"
338-
)));
339-
}
340-
}
341-
342342
let new_file_path = self.dir_path.join(&new_filename);
343-
344-
fs::rename(testcase.file_path().as_ref().unwrap(), &new_file_path)?;
345-
346-
let new_metadata_path = {
347-
if let Some(old_metadata_path) = testcase.metadata_path() {
348-
// We have metadata. Let's rename it.
349-
let new_metadata_path = self.dir_path.join(format!(".{new_filename}.metadata"));
350-
fs::rename(old_metadata_path, &new_metadata_path)?;
351-
352-
Some(new_metadata_path)
353-
} else {
354-
None
355-
}
356-
};
357-
358-
*testcase.metadata_path_mut() = new_metadata_path;
343+
self.remove_testcase(testcase)?;
359344
*testcase.filename_mut() = Some(new_filename);
345+
self.save_testcase(testcase, id)?;
360346
*testcase.file_path_mut() = Some(new_file_path);
361347

362348
Ok(())
@@ -367,42 +353,54 @@ impl<I> InMemoryOnDiskCorpus<I> {
367353
}
368354
}
369355

370-
fn save_testcase(&self, testcase: &mut Testcase<I>, id: CorpusId) -> Result<(), Error>
356+
fn save_testcase(&self, testcase: &mut Testcase<I>, id: Option<CorpusId>) -> Result<(), Error>
371357
where
372358
I: Input,
373359
{
374-
let file_name_orig = testcase.filename_mut().take().unwrap_or_else(|| {
360+
let file_name = testcase.filename_mut().take().unwrap_or_else(|| {
375361
// TODO walk entry metadata to ask for pieces of filename (e.g. :havoc in AFL)
376-
testcase.input().as_ref().unwrap().generate_name(Some(id))
362+
testcase.input().as_ref().unwrap().generate_name(id)
377363
});
378364

379-
// New testcase, we need to save it.
380-
let mut file_name = file_name_orig.clone();
381-
382-
let mut ctr = 2;
383-
let file_name = if self.locking {
384-
loop {
385-
let lockfile_name = format!(".{file_name}.lafl_lock");
386-
let lockfile_path = self.dir_path.join(lockfile_name);
387-
388-
if try_create_new(lockfile_path)?.is_some() {
389-
break file_name;
390-
}
365+
let mut ctr = String::new();
366+
if self.locking {
367+
let lockfile_name = format!(".{file_name}");
368+
let lockfile_path = self.dir_path.join(lockfile_name);
369+
370+
let mut lockfile = try_create_new(&lockfile_path)?.unwrap_or(
371+
OpenOptions::new()
372+
.write(true)
373+
.read(true)
374+
.open(&lockfile_path)?,
375+
);
376+
lockfile.lock_exclusive()?;
377+
378+
lockfile.read_to_string(&mut ctr)?;
379+
ctr = if ctr.is_empty() {
380+
String::from("1")
381+
} else {
382+
(ctr.trim().parse::<u32>()? + 1).to_string()
383+
};
391384

392-
file_name = format!("{file_name_orig}-{ctr}");
393-
ctr += 1;
394-
}
395-
} else {
396-
file_name
397-
};
385+
lockfile.seek(SeekFrom::Start(0))?;
386+
lockfile.write_all(ctr.as_bytes())?;
387+
}
398388

399389
if testcase.file_path().is_none() {
400390
*testcase.file_path_mut() = Some(self.dir_path.join(&file_name));
401391
}
402392
*testcase.filename_mut() = Some(file_name);
403393

404394
if self.meta_format.is_some() {
405-
let metafile_name = format!(".{}.metadata", testcase.filename().as_ref().unwrap());
395+
let metafile_name = if self.locking {
396+
format!(
397+
".{}_{}.metadata",
398+
testcase.filename().as_ref().unwrap(),
399+
ctr
400+
)
401+
} else {
402+
format!(".{}.metadata", testcase.filename().as_ref().unwrap())
403+
};
406404
let metafile_path = self.dir_path.join(&metafile_name);
407405
let mut tmpfile_path = metafile_path.clone();
408406
tmpfile_path.set_file_name(format!(".{metafile_name}.tmp",));
@@ -445,15 +443,36 @@ impl<I> InMemoryOnDiskCorpus<I> {
445443

446444
fn remove_testcase(&self, testcase: &Testcase<I>) -> Result<(), Error> {
447445
if let Some(filename) = testcase.filename() {
446+
let mut ctr = String::new();
447+
if self.locking {
448+
let lockfile_path = self.dir_path.join(format!(".{filename}"));
449+
let mut lockfile = OpenOptions::new()
450+
.write(true)
451+
.read(true)
452+
.open(&lockfile_path)?;
453+
454+
lockfile.lock_exclusive()?;
455+
lockfile.read_to_string(&mut ctr)?;
456+
ctr = ctr.trim().to_string();
457+
458+
if ctr == "1" {
459+
FileExt::unlock(&lockfile)?;
460+
drop(fs::remove_file(lockfile_path));
461+
} else {
462+
lockfile.seek(SeekFrom::Start(0))?;
463+
lockfile.write_all(&(ctr.parse::<u32>()? - 1).to_le_bytes())?;
464+
return Ok(());
465+
}
466+
}
467+
448468
fs::remove_file(self.dir_path.join(filename))?;
449469
if self.meta_format.is_some() {
450-
fs::remove_file(self.dir_path.join(format!(".{filename}.metadata")))?;
470+
if self.locking {
471+
fs::remove_file(self.dir_path.join(format!(".{filename}_{ctr}.metadata")))?;
472+
} else {
473+
fs::remove_file(self.dir_path.join(format!(".{filename}.metadata")))?;
474+
}
451475
}
452-
// also try to remove the corresponding `.lafl_lock` file if it still exists
453-
// (even though it shouldn't exist anymore, at this point in time)
454-
drop(fs::remove_file(
455-
self.dir_path.join(format!(".{filename}.lafl_lock")),
456-
));
457476
}
458477
Ok(())
459478
}

libafl/src/corpus/testcase.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,8 @@ impl<I> Testcase<I> {
264264
}
265265

266266
/// Create a new Testcase instance given an input and a `filename`
267+
/// If locking is enabled, make sure that testcases with the same input have the same filename
268+
/// to prevent ending up with duplicate testcases
267269
#[inline]
268270
pub fn with_filename(input: I, filename: String) -> Self {
269271
Self {

0 commit comments

Comments
 (0)