Skip to content

Commit 082b4ee

Browse files
committed
feat: add memmap feature to use mmap for scanned files
Add a new feature, `memmap`, enabled by default. This adds a new dependency and a new API to scan file using mmap to read from it. This feature and API is used by default in boreal-cli. An option is added to avoid using mmap.
1 parent 5b5beb8 commit 082b4ee

File tree

6 files changed

+133
-15
lines changed

6 files changed

+133
-15
lines changed

Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

boreal-cli/Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ name = "boreal"
1414
path = "src/main.rs"
1515

1616
[features]
17-
default = ["authenticode", "profiling"]
17+
default = ["authenticode", "memmap", "profiling"]
1818

1919
# Enable authenticode parsing in boreal, requires OpenSSL
2020
authenticode = ["boreal/authenticode"]
21+
# Enable use of memory maps to load files to scan.
22+
memmap = ["boreal/memmap"]
2123
# Enables scan statistics. Should not impact performances
2224
# significantly, and very useful in a CLI tool to debug rules.
2325
profiling = ["boreal/profiling"]

boreal-cli/src/main.rs

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use crossbeam_channel::{bounded, Receiver, Sender};
1515
use walkdir::WalkDir;
1616

1717
fn build_command() -> Command {
18-
command!()
18+
let mut command = command!()
1919
.arg(
2020
Arg::new("no_follow_symlinks")
2121
.short('N')
@@ -89,7 +89,24 @@ fn build_command() -> Command {
8989
.long("scan-stats")
9090
.action(ArgAction::SetTrue)
9191
.help("Display statistics on rules' evaluation"),
92-
)
92+
);
93+
94+
if cfg!(feature = "memmap") {
95+
command = command.arg(
96+
Arg::new("no_mmap")
97+
.long("no-mmap")
98+
.action(ArgAction::SetTrue)
99+
.help("Disable the use of memory maps.")
100+
.long_help(
101+
"Disable the use of memory maps.\n\
102+
By default, memory maps are used to load files to scan.\n\
103+
This can cause the program to abort unexpectedly \
104+
if files are simultaneous truncated.",
105+
),
106+
);
107+
}
108+
109+
command
93110
}
94111

95112
fn main() -> ExitCode {
@@ -190,7 +207,7 @@ fn main() -> ExitCode {
190207

191208
ExitCode::SUCCESS
192209
} else {
193-
match scan_file(&scanner, input, args.get_flag("print_module_data")) {
210+
match scan_file(&scanner, input, ScanOptions::new(&args)) {
194211
Ok(()) => ExitCode::SUCCESS,
195212
Err(err) => {
196213
eprintln!("Cannot scan {}: {}", input.display(), err);
@@ -200,10 +217,35 @@ fn main() -> ExitCode {
200217
}
201218
}
202219

203-
fn scan_file(scanner: &Scanner, path: &Path, print_module_data: bool) -> std::io::Result<()> {
204-
let res = scanner.scan_file(path)?;
220+
#[derive(Copy, Clone)]
221+
struct ScanOptions {
222+
print_module_data: bool,
223+
no_mmap: bool,
224+
}
225+
226+
impl ScanOptions {
227+
fn new(args: &ArgMatches) -> Self {
228+
Self {
229+
print_module_data: args.get_flag("print_module_data"),
230+
no_mmap: if cfg!(feature = "memmap") {
231+
args.get_flag("no_mmap")
232+
} else {
233+
false
234+
},
235+
}
236+
}
237+
}
238+
239+
fn scan_file(scanner: &Scanner, path: &Path, options: ScanOptions) -> std::io::Result<()> {
240+
let res = if cfg!(feature = "memmap") && !options.no_mmap {
241+
// Safety: By default, we accept that this CLI tool can abort if the underlying
242+
// file is truncated while the scan is ongoing.
243+
unsafe { scanner.scan_file_memmap(path)? }
244+
} else {
245+
scanner.scan_file(path)?
246+
};
205247

206-
if print_module_data {
248+
if options.print_module_data {
207249
for (module_name, module_value) in res.module_values {
208250
// A module value must be an object. Filter out empty ones, it means the module has not
209251
// generated any values.
@@ -240,12 +282,11 @@ impl ThreadPool {
240282
};
241283

242284
let (sender, receiver) = bounded(nb_cpus * 5);
285+
let options = ScanOptions::new(args);
243286
(
244287
Self {
245288
threads: (0..nb_cpus)
246-
.map(|_| {
247-
Self::worker_thread(scanner, &receiver, args.get_flag("print_module_data"))
248-
})
289+
.map(|_| Self::worker_thread(scanner, &receiver, options))
249290
.collect(),
250291
},
251292
sender,
@@ -261,14 +302,14 @@ impl ThreadPool {
261302
fn worker_thread(
262303
scanner: &Scanner,
263304
receiver: &Receiver<PathBuf>,
264-
print_module_data: bool,
305+
scan_options: ScanOptions,
265306
) -> JoinHandle<()> {
266307
let scanner = scanner.clone();
267308
let receiver = receiver.clone();
268309

269310
std::thread::spawn(move || {
270311
while let Ok(path) = receiver.recv() {
271-
if let Err(err) = scan_file(&scanner, &path, print_module_data) {
312+
if let Err(err) = scan_file(&scanner, &path, scan_options) {
272313
eprintln!("Cannot scan file {}: {}", path.display(), err);
273314
}
274315
}

boreal-cli/tests/cli.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,3 +644,34 @@ fn test_module_names() {
644644
// Still successful, since some other files in the directory may have been scanned
645645
.success();
646646
}
647+
648+
#[test]
649+
#[cfg(feature = "memmap")]
650+
fn test_no_mmap() {
651+
let rule_file = test_file(
652+
r#"
653+
rule first {
654+
strings:
655+
$a = "abc"
656+
condition:
657+
any of them
658+
}
659+
rule second {
660+
strings:
661+
$a = "xyz"
662+
condition:
663+
any of them
664+
}"#,
665+
);
666+
667+
let input = test_file("xyabcz");
668+
// Not matching
669+
cmd()
670+
.arg("--no-mmap")
671+
.arg(rule_file.path())
672+
.arg(input.path())
673+
.assert()
674+
.stdout(predicate::eq(format!("first {}\n", input.path().display())))
675+
.stderr("")
676+
.success();
677+
}

boreal/Cargo.toml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ edition = "2021"
1212
rust-version = "1.62"
1313

1414
[features]
15-
default = ["hash", "object"]
15+
default = ["hash", "object", "memmap"]
1616

1717
# Enables the "hash" module.
1818
hash = ["md-5", "sha1", "sha2", "hex", "crc32fast", "tlsh2"]
@@ -27,7 +27,10 @@ object = ["dep:object"]
2727
# The `object` feature must also be enabled to get access to the "pe" module.
2828
authenticode = ["dep:authenticode-parser"]
2929

30-
# Enables computating of statistics during scanning.
30+
# Adds an API to scan files using memory maps.
31+
memmap = ["dep:memmap2"]
32+
33+
# Enables computation of statistics during scanning.
3134
profiling = []
3235

3336
[dependencies]
@@ -62,6 +65,9 @@ object = { version = "0.32", optional = true, default-features = false, features
6265
# "authenticode" feature
6366
authenticode-parser = { version = "0.3", optional = true }
6467

68+
# "memmap" feature
69+
memmap2 = { version = "0.7", optional = true }
70+
6571
[dev-dependencies]
6672
base64 = "0.21"
6773
glob = "0.3.1"
@@ -73,4 +79,4 @@ yara = { version = "0.19", features = ["vendored"] }
7379
once_cell = "1.18"
7480

7581
[package.metadata.docs.rs]
76-
features = ["authenticode"]
82+
features = ["authenticode", "memmap"]

boreal/src/scanner/mod.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,34 @@ impl Scanner {
142142
Ok(self.scan_mem(&contents))
143143
}
144144

145+
/// Scan a file using memmap to read from it.
146+
///
147+
/// Returns a list of rules that matched the given file.
148+
///
149+
/// # Errors
150+
///
151+
/// Fails if the file at the given path cannot be opened or memory mapped.
152+
///
153+
/// # Safety
154+
///
155+
/// See the safety documentation of [`memmap2::Mmap`]. It is unsafe to use this
156+
/// method as the behavior is undefined if the underlying file is modified while the map
157+
/// is still alive. For example, shrinking the underlying file can and will cause issues
158+
/// in this process: on Linux, a SIGBUS can be emitted, while on Windows, a structured
159+
/// exception can be raised.
160+
#[cfg(feature = "memmap")]
161+
pub unsafe fn scan_file_memmap<P: AsRef<std::path::Path>>(
162+
&self,
163+
path: P,
164+
) -> std::io::Result<ScanResult> {
165+
let file = std::fs::File::open(path.as_ref())?;
166+
167+
// Safety: guaranteed by the safety contract of this function
168+
let mmap = unsafe { memmap2::Mmap::map(&file)? };
169+
170+
Ok(self.scan_mem(&mmap))
171+
}
172+
145173
/// Define a value for a symbol defined and used in compiled rules.
146174
///
147175
/// This symbol must have been defined when compiling rules using

0 commit comments

Comments
 (0)