From 0680a282e0bf41b6bfc34cc9a3e4ae1906df3b5d Mon Sep 17 00:00:00 2001 From: Lev Demidov Date: Tue, 8 Jul 2025 18:34:45 -0400 Subject: [PATCH 1/2] Adds test (currently failing) to show that a UTF-8 file with BOM is not parsed properly when read using FileSourceFile --- tests/testsuite/file-ext-with-bom.json | 4 ++++ tests/testsuite/file.rs | 12 ++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 tests/testsuite/file-ext-with-bom.json diff --git a/tests/testsuite/file-ext-with-bom.json b/tests/testsuite/file-ext-with-bom.json new file mode 100644 index 00000000..03e3c7c0 --- /dev/null +++ b/tests/testsuite/file-ext-with-bom.json @@ -0,0 +1,4 @@ +{ + "debug": true, + "production": false +} diff --git a/tests/testsuite/file.rs b/tests/testsuite/file.rs index 33077852..669cba59 100644 --- a/tests/testsuite/file.rs +++ b/tests/testsuite/file.rs @@ -67,6 +67,18 @@ fn test_file_ext() { assert_eq!(c.get("production").ok(), Some(false)); } +#[test] +#[cfg(feature = "json")] +fn test_file_ext_with_utf8_bom() { + let c = Config::builder() + .add_source(File::with_name("tests/testsuite/file-ext-with-bom.json")) + .build() + .unwrap(); + + assert_eq!(c.get("debug").ok(), Some(true)); + assert_eq!(c.get("production").ok(), Some(false)); +} + #[test] #[cfg(feature = "json")] fn test_file_second_ext() { From 588bd664c86bb2a6588a221c0bd9dd02a00605f3 Mon Sep 17 00:00:00 2001 From: Lev Demidov Date: Tue, 8 Jul 2025 18:48:33 -0400 Subject: [PATCH 2/2] Skip the UTF-8 BOM bytes when reading the file contents in FileSource. --- src/file/source/file.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/file/source/file.rs b/src/file/source/file.rs index 0e0687c2..bf30c119 100644 --- a/src/file/source/file.rs +++ b/src/file/source/file.rs @@ -115,7 +115,17 @@ where .unwrap_or_else(|| filename.clone()); // Read contents from file - let text = fs::read_to_string(filename)?; + let buf = fs::read(filename)?; + + // If it exists, skip the UTF-8 BOM byte sequence: EF BB BF + let buf = if buf.len() >= 3 && &buf[0..3] == b"\xef\xbb\xbf" { + &buf[3..] + } else { + &buf + }; + + let c = String::from_utf8_lossy(buf); + let text = c.into_owned(); Ok(FileSourceResult { uri: Some(uri.to_string_lossy().into_owned()),