Skip to content

Commit a73c0ea

Browse files
alexs-shBenWiederhake
authored andcommitted
shred: improve write logic to support data alignment
This commit improves the reliability of the writing logic and removes implicit dependencies between the preferred I/O size and the block size. For example, in earlier versions, using BLOCK_SIZE != N * IO_SIZE could lead to overflows due to alignment with values larger than the buffer size.
1 parent 745d2ad commit a73c0ea

File tree

1 file changed

+58
-18
lines changed

1 file changed

+58
-18
lines changed

src/uu/shred/src/shred.rs

Lines changed: 58 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ const NAME_CHARSET: &[u8] = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN
4949
const PATTERN_LENGTH: usize = 3;
5050
const PATTERN_BUFFER_SIZE: usize = BLOCK_SIZE + PATTERN_LENGTH - 1;
5151

52+
/// Optimal block size for the filesystem. This constant is used for data size alignment,
53+
/// similar to the behavior of GNU shred. Usually, optimal block size is a 4K block, which is why
54+
/// it's defined as a constant. However, it's possible to get the actual size at runtime using, for
55+
/// example, `std::os::unix::fs::MetadataExt::blksize()`.
56+
const OPTIMAL_IO_BLOCK_SIZE: usize = 4096;
57+
5258
/// Patterns that appear in order for the passes
5359
///
5460
/// A single-byte pattern is equivalent to a multi-byte pattern of that byte three times.
@@ -507,9 +513,16 @@ fn wipe_file(
507513
Ok(())
508514
}
509515

510-
// Aligns data size up to the nearest multiple of block size
511-
fn get_aligned_size(data_size: usize, block_size: usize) -> usize {
512-
data_size.div_ceil(block_size) * block_size
516+
fn split_on_blocks(file_size: u64, exact: bool) -> (u64, u64) {
517+
let file_size = if exact {
518+
file_size
519+
} else {
520+
// The main idea here is to align the file size to the OPTIMAL_IO_BLOCK_SIZE, and then split it into
521+
// BLOCK_SIZE + remaining bytes. Since the input data is already aligned to N * OPTIMAL_IO_BLOCK_SIZE,
522+
// the output file size will also be aligned and correct.
523+
file_size.div_ceil(OPTIMAL_IO_BLOCK_SIZE as u64) * OPTIMAL_IO_BLOCK_SIZE as u64
524+
};
525+
(file_size / BLOCK_SIZE as u64, file_size % BLOCK_SIZE as u64)
513526
}
514527

515528
fn do_pass(
@@ -522,27 +535,17 @@ fn do_pass(
522535
file.rewind()?;
523536

524537
let mut writer = BytesWriter::from_pass_type(pass_type);
538+
let (number_of_blocks, bytes_left) = split_on_blocks(file_size, exact);
525539

526540
// We start by writing BLOCK_SIZE times as many time as possible.
527-
for _ in 0..(file_size / BLOCK_SIZE as u64) {
541+
for _ in 0..number_of_blocks {
528542
let block = writer.bytes_for_pass(BLOCK_SIZE);
529543
file.write_all(block)?;
530544
}
531545

532-
// Now we might have some bytes left, so we write either that
533-
// many bytes if exact is true, or aligned by FS_BLOCK_SIZE bytes if not.
534-
let bytes_left = (file_size % BLOCK_SIZE as u64) as usize;
535-
if bytes_left > 0 {
536-
let size = if exact {
537-
bytes_left
538-
} else {
539-
// This alignment allows us to better match GNU shred's behavior.
540-
const FS_BLOCK_SIZE: usize = 4096;
541-
get_aligned_size(bytes_left, FS_BLOCK_SIZE)
542-
};
543-
let block = writer.bytes_for_pass(size);
544-
file.write_all(block)?;
545-
}
546+
// Then we write remaining data which is smaller than the BLOCK_SIZE
547+
let block = writer.bytes_for_pass(bytes_left as usize);
548+
file.write_all(block)?;
546549

547550
file.sync_data()?;
548551

@@ -630,3 +633,40 @@ fn do_remove(
630633

631634
Ok(())
632635
}
636+
637+
#[cfg(test)]
638+
mod tests {
639+
640+
use crate::{BLOCK_SIZE, OPTIMAL_IO_BLOCK_SIZE, split_on_blocks};
641+
642+
#[test]
643+
fn test_align_non_exact_control_values() {
644+
// Note: This test only makes sense for the default values of BLOCK_SIZE and
645+
// OPTIMAL_IO_BLOCK_SIZE.
646+
assert_eq!(split_on_blocks(1, false), (0, 4096));
647+
assert_eq!(split_on_blocks(4095, false), (0, 4096));
648+
assert_eq!(split_on_blocks(4096, false), (0, 4096));
649+
assert_eq!(split_on_blocks(4097, false), (0, 8192));
650+
assert_eq!(split_on_blocks(65535, false), (1, 0));
651+
assert_eq!(split_on_blocks(65536, false), (1, 0));
652+
assert_eq!(split_on_blocks(65537, false), (1, 4096));
653+
}
654+
655+
#[test]
656+
fn test_align_non_exact_cycle() {
657+
for size in 1..BLOCK_SIZE as u64 * 2 {
658+
let (number_of_blocks, bytes_left) = split_on_blocks(size, false);
659+
let test_size = number_of_blocks * BLOCK_SIZE as u64 + bytes_left;
660+
assert_eq!(test_size % OPTIMAL_IO_BLOCK_SIZE as u64, 0);
661+
}
662+
}
663+
664+
#[test]
665+
fn test_align_exact_cycle() {
666+
for size in 1..BLOCK_SIZE as u64 * 2 {
667+
let (number_of_blocks, bytes_left) = split_on_blocks(size, true);
668+
let test_size = number_of_blocks * BLOCK_SIZE as u64 + bytes_left;
669+
assert_eq!(test_size, size);
670+
}
671+
}
672+
}

0 commit comments

Comments
 (0)