Skip to content

Commit 7095a5d

Browse files
committed
pageserver - reject and backup future layer files
If a layer file is found with LSN after the disk_consistent_lsn, it is renamed (to avoid conflicts with new layer files) and a warning is logged.
1 parent 538c2a2 commit 7095a5d

File tree

3 files changed

+115
-3
lines changed

3 files changed

+115
-3
lines changed

pageserver/src/layered_repository.rs

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ impl LayeredRepository {
223223
Some(timeline) => Ok(timeline.clone()),
224224
None => {
225225
let metadata = Self::load_metadata(self.conf, timelineid, self.tenantid)?;
226+
let disk_consistent_lsn = metadata.disk_consistent_lsn;
226227

227228
// Recurse to look up the ancestor timeline.
228229
//
@@ -247,7 +248,7 @@ impl LayeredRepository {
247248
)?;
248249

249250
// List the layers on disk, and load them into the layer map
250-
timeline.load_layer_map()?;
251+
timeline.load_layer_map(disk_consistent_lsn)?;
251252

252253
// needs to be after load_layer_map
253254
timeline.init_current_logical_size()?;
@@ -1048,7 +1049,7 @@ impl LayeredTimeline {
10481049
///
10491050
/// Scan the timeline directory to populate the layer map
10501051
///
1051-
fn load_layer_map(&self) -> anyhow::Result<()> {
1052+
fn load_layer_map(&self, disk_consistent_lsn: Lsn) -> anyhow::Result<()> {
10521053
info!(
10531054
"loading layer map for timeline {} into memory",
10541055
self.timelineid
@@ -1057,8 +1058,20 @@ impl LayeredTimeline {
10571058
let (imgfilenames, mut deltafilenames) =
10581059
filename::list_files(self.conf, self.timelineid, self.tenantid)?;
10591060

1061+
let timeline_path = self.conf.timeline_path(&self.timelineid, &self.tenantid);
1062+
10601063
// First create ImageLayer structs for each image file.
10611064
for filename in imgfilenames.iter() {
1065+
if filename.lsn > disk_consistent_lsn {
1066+
warn!(
1067+
"found future image layer {} on timeline {}",
1068+
filename, self.timelineid
1069+
);
1070+
1071+
rename_to_backup(timeline_path.join(filename.to_string()))?;
1072+
continue;
1073+
}
1074+
10621075
let layer = ImageLayer::new(self.conf, self.timelineid, self.tenantid, filename);
10631076

10641077
info!(
@@ -1076,6 +1089,17 @@ impl LayeredTimeline {
10761089
deltafilenames.sort();
10771090

10781091
for filename in deltafilenames.iter() {
1092+
ensure!(filename.start_lsn < filename.end_lsn);
1093+
if filename.end_lsn > disk_consistent_lsn {
1094+
warn!(
1095+
"found future delta layer {} on timeline {}",
1096+
filename, self.timelineid
1097+
);
1098+
1099+
rename_to_backup(timeline_path.join(filename.to_string()))?;
1100+
continue;
1101+
}
1102+
10791103
let predecessor = layers.get(&filename.seg, filename.start_lsn);
10801104

10811105
let predecessor_str: String = if let Some(prec) = &predecessor {
@@ -1929,3 +1953,23 @@ fn layer_ptr_eq(l1: &dyn Layer, l2: &dyn Layer) -> bool {
19291953
// see here for more https://github.com/rust-lang/rust/issues/46139
19301954
std::ptr::eq(l1_ptr as *const (), l2_ptr as *const ())
19311955
}
1956+
1957+
/// Add a suffix to a layer file's name: .{num}.old
1958+
/// Uses the first available num (starts at 0)
1959+
fn rename_to_backup(path: PathBuf) -> anyhow::Result<()> {
1960+
let filename = path.file_name().unwrap().to_str().unwrap();
1961+
let mut new_path = path.clone();
1962+
1963+
for i in 0u32.. {
1964+
new_path.set_file_name(format!("{}.{}.old", filename, i));
1965+
if !new_path.exists() {
1966+
std::fs::rename(&path, &new_path)?;
1967+
return Ok(());
1968+
}
1969+
}
1970+
1971+
Err(anyhow!(
1972+
"couldn't find an unused backup number for {:?}",
1973+
path
1974+
))
1975+
}

pageserver/src/layered_repository/filename.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,11 @@ pub fn list_files(
290290
deltafiles.push(deltafilename);
291291
} else if let Some(imgfilename) = ImageFileName::from_str(fname) {
292292
imgfiles.push(imgfilename);
293-
} else if fname == "wal" || fname == "metadata" || fname == "ancestor" {
293+
} else if fname == "wal"
294+
|| fname == "metadata"
295+
|| fname == "ancestor"
296+
|| fname.ends_with(".old")
297+
{
294298
// ignore these
295299
} else {
296300
warn!("unrecognized filename in timeline dir: {}", fname);

pageserver/src/repository.rs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,70 @@ mod tests {
759759
Ok(())
760760
}
761761

762+
#[test]
763+
fn future_layerfiles() -> Result<()> {
764+
const TEST_NAME: &str = "future_layerfiles";
765+
let repo = get_test_repo(TEST_NAME)?;
766+
767+
let timelineid = ZTimelineId::from_str("11223344556677881122334455667788").unwrap();
768+
repo.create_empty_timeline(timelineid)?;
769+
drop(repo);
770+
771+
let dir = PageServerConf::test_repo_dir(TEST_NAME);
772+
let mut read_dir = std::fs::read_dir(dir.join("tenants"))?;
773+
let tenant_dir = read_dir.next().unwrap().unwrap().path();
774+
assert!(tenant_dir.is_dir());
775+
let tenantid = tenant_dir.file_name().unwrap().to_str().unwrap();
776+
let tenantid = ZTenantId::from_str(tenantid)?;
777+
assert!(read_dir.next().is_none());
778+
779+
let timelines_path = tenant_dir.join("timelines").join(timelineid.to_string());
780+
781+
let make_empty_file = |filename: &str| -> std::io::Result<()> {
782+
let path = timelines_path.join(filename);
783+
784+
assert!(!path.exists());
785+
std::fs::write(&path, &[])?;
786+
787+
Ok(())
788+
};
789+
790+
let image_filename = format!("pg_control_0_{:016X}", 8000);
791+
let delta_filename = format!("pg_control_0_{:016X}_{:016X}", 8000, 8008);
792+
793+
make_empty_file(&image_filename)?;
794+
make_empty_file(&delta_filename)?;
795+
796+
let new_repo = load_test_repo(TEST_NAME, tenantid)?;
797+
new_repo.get_timeline(timelineid).unwrap();
798+
drop(new_repo);
799+
800+
let check_old = |filename: &str, num: u32| {
801+
let path = timelines_path.join(filename);
802+
assert!(!path.exists());
803+
804+
let backup_path = timelines_path.join(format!("{}.{}.old", filename, num));
805+
assert!(backup_path.exists());
806+
};
807+
808+
check_old(&image_filename, 0);
809+
check_old(&delta_filename, 0);
810+
811+
make_empty_file(&image_filename)?;
812+
make_empty_file(&delta_filename)?;
813+
814+
let new_repo = load_test_repo(TEST_NAME, tenantid)?;
815+
new_repo.get_timeline(timelineid).unwrap();
816+
drop(new_repo);
817+
818+
check_old(&image_filename, 0);
819+
check_old(&delta_filename, 0);
820+
check_old(&image_filename, 1);
821+
check_old(&delta_filename, 1);
822+
823+
Ok(())
824+
}
825+
762826
// Mock WAL redo manager that doesn't do much
763827
struct TestRedoManager {}
764828

0 commit comments

Comments
 (0)