From 4ec7c28474bcc5bf972dad4d003c6aeed25e5f50 Mon Sep 17 00:00:00 2001 From: Yacin Tmimi Date: Mon, 11 Jul 2022 22:08:46 -0400 Subject: [PATCH] Backport 4236 Read the original file for comparing/detecting newline (4236) Closes 4097 The code base diverged enough where a simple cherry-pick wasn't possible, but implementing the logic to read the input file when `NewlineStyle::Auto` was simple enough to implement. New tests were added to hopefully prevent regressions. --- Cargo.lock | 63 +++++++++++++++++ Cargo.toml | 3 + src/config/file_lines.rs | 11 ++- src/formatting.rs | 1 + src/formatting/newline_style.rs | 120 +++++++++++++++++++++++++++++--- 5 files changed, 187 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e5175528970..990f44e13e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -228,6 +228,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + [[package]] name = "getopts" version = "0.2.21" @@ -431,6 +437,43 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", +] + +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + [[package]] name = "redox_syscall" version = "0.2.13" @@ -468,6 +511,15 @@ version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + [[package]] name = "rustc-workspace-hack" version = "1.0.0" @@ -506,6 +558,7 @@ dependencies = [ "rustfmt-config_proc_macro", "serde", "serde_json", + "tempdir", "term", "thiserror", "toml", @@ -592,6 +645,16 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "tempdir" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" +dependencies = [ + "rand", + "remove_dir_all", +] + [[package]] name = "term" version = "0.7.0" diff --git a/Cargo.toml b/Cargo.toml index 7438335eaa7..92abb81e2f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,9 @@ rustc-workspace-hack = "1.0.0" # Rustc dependencies are loaded from the sysroot, Cargo doesn't know about them. +[dev-dependencies] +tempdir = "0.3.7" + [package.metadata.rust-analyzer] # This package uses #[feature(rustc_private)] rustc_private = true diff --git a/src/config/file_lines.rs b/src/config/file_lines.rs index e4e51a3f3b4..79b43900fd0 100644 --- a/src/config/file_lines.rs +++ b/src/config/file_lines.rs @@ -2,7 +2,7 @@ use itertools::Itertools; use std::collections::HashMap; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::{cmp, fmt, iter, str}; use rustc_data_structures::sync::Lrc; @@ -25,6 +25,15 @@ pub enum FileName { Stdin, } +impl FileName { + pub(crate) fn as_path(&self) -> Option<&Path> { + match self { + FileName::Real(ref path) => Some(path), + _ => None, + } + } +} + impl From for FileName { fn from(name: rustc_span::FileName) -> FileName { match name { diff --git a/src/formatting.rs b/src/formatting.rs index 1dfd8a514f0..dab96319f01 100644 --- a/src/formatting.rs +++ b/src/formatting.rs @@ -230,6 +230,7 @@ impl<'a, T: FormatHandler + 'a> FormatContext<'a, T> { apply_newline_style( self.config.newline_style(), + &path, &mut visitor.buffer, snippet_provider.entire_snippet(), ); diff --git a/src/formatting/newline_style.rs b/src/formatting/newline_style.rs index 97c4fc16d6f..a6dbbb5a1b9 100644 --- a/src/formatting/newline_style.rs +++ b/src/formatting/newline_style.rs @@ -1,17 +1,21 @@ -use crate::NewlineStyle; +use crate::{FileName, NewlineStyle}; +use std::borrow::Cow; +use std::fs; /// Apply this newline style to the formatted text. When the style is set -/// to `Auto`, the `raw_input_text` is used to detect the existing line -/// endings. +/// to `Auto`, the `path` and `raw_input_text` are used to detect the existing line +/// endings. The `path` is prefered when present, and the `raw_input_text` is used +/// when the input was passed from stdin, or we fail to read the file content from the `path`. /// -/// If the style is set to `Auto` and `raw_input_text` contains no +/// If the style is set to `Auto` and `path` or `raw_input_text` contain no /// newlines, the `Native` style will be used. pub(crate) fn apply_newline_style( newline_style: NewlineStyle, + path: &FileName, formatted_text: &mut String, raw_input_text: &str, ) { - *formatted_text = match effective_newline_style(newline_style, raw_input_text) { + *formatted_text = match effective_newline_style(newline_style, path, raw_input_text) { EffectiveNewlineStyle::Windows => convert_to_windows_newlines(formatted_text), EffectiveNewlineStyle::Unix => convert_to_unix_newlines(formatted_text), } @@ -25,10 +29,19 @@ enum EffectiveNewlineStyle { fn effective_newline_style( newline_style: NewlineStyle, + path: &FileName, raw_input_text: &str, ) -> EffectiveNewlineStyle { match newline_style { - NewlineStyle::Auto => auto_detect_newline_style(raw_input_text), + NewlineStyle::Auto => match path.as_path() { + Some(path) => auto_detect_newline_style( + &fs::read_to_string(path) + .map(|s| Cow::Owned(s)) + .unwrap_or_else(|_| Cow::Borrowed(raw_input_text)), + ), + None => auto_detect_newline_style(raw_input_text), + }, + NewlineStyle::Native => native_newline_style(), NewlineStyle::Windows => EffectiveNewlineStyle::Windows, NewlineStyle::Unix => EffectiveNewlineStyle::Unix, @@ -84,6 +97,8 @@ fn convert_to_unix_newlines(formatted_text: &str) -> String { #[cfg(test)] mod tests { use super::*; + use std::fs; + use tempdir::TempDir; #[test] fn auto_detects_unix_newlines() { @@ -128,7 +143,28 @@ mod tests { let raw_input_text = "One\nTwo\nThree"; let mut out = String::from(formatted_text); - apply_newline_style(NewlineStyle::Auto, &mut out, raw_input_text); + apply_newline_style( + NewlineStyle::Auto, + &FileName::Stdin, + &mut out, + raw_input_text, + ); + assert_eq!("One\nTwo\nThree", &out, "auto should detect 'lf'"); + } + + #[test] + fn auto_detects_and_applies_unix_newlines_from_real_file() { + let formatted_text = "One\nTwo\nThree"; + let raw_input_text = "One\nTwo\nThree"; + + let tmpdir = TempDir::new("unix_newlines_from_real_file").unwrap(); + let path = tmpdir.path().join("test.rs"); + fs::write(&path, raw_input_text).unwrap(); + + let path = FileName::Real(path); + + let mut out = String::from(formatted_text); + apply_newline_style(NewlineStyle::Auto, &path, &mut out, raw_input_text); assert_eq!("One\nTwo\nThree", &out, "auto should detect 'lf'"); } @@ -138,17 +174,81 @@ mod tests { let raw_input_text = "One\r\nTwo\r\nThree"; let mut out = String::from(formatted_text); - apply_newline_style(NewlineStyle::Auto, &mut out, raw_input_text); + apply_newline_style( + NewlineStyle::Auto, + &FileName::Stdin, + &mut out, + raw_input_text, + ); assert_eq!("One\r\nTwo\r\nThree", &out, "auto should detect 'crlf'"); } + #[test] + fn auto_detects_and_applies_windows_newlines_from_real_file() { + let formatted_text = "One\nTwo\nThree"; + let raw_input_text = "One\r\nTwo\r\nThree"; + + let tmpdir = TempDir::new("windows_newlines_from_real_file").unwrap(); + let path = tmpdir.path().join("test.rs"); + fs::write(&path, raw_input_text).unwrap(); + + let path = FileName::Real(path); + + let mut out = String::from(formatted_text); + apply_newline_style(NewlineStyle::Auto, &path, &mut out, raw_input_text); + assert_eq!("One\r\nTwo\r\nThree", &out, "auto should detect 'crlf'"); + } + + #[test] + fn auto_detect_and_applies_windows_newlines_if_windows_newlines_read_from_file() { + // Even if the `raw_input_text` does not contain crlf chars, we should still apply + // Them if we can read crlf from the input `FileName::Real(path)`. + // see issue https://github.com/rust-lang/rustfmt/issues/4097 + let text = "One\nTwo\nThree"; + + let tmpdir = TempDir::new("windows_newlines_if_windows_newlines_read_from_file").unwrap(); + let path = tmpdir.path().join("test.rs"); + fs::write(&path, "One\r\nTwo\r\nThree").unwrap(); + + let path = FileName::Real(path); + + let mut out = String::from(text); + // Note that the source file contains `crlf`, while the `raw_input_text` contains `lf` + apply_newline_style(NewlineStyle::Auto, &path, &mut out, text); + assert_eq!("One\r\nTwo\r\nThree", &out, "auto should detect 'crlf'"); + } + + #[test] + fn auto_detect_and_applies_unix_newlines_if_unix_newlines_read_from_file() { + // This following test is unlikely to happen in practice, but it is meant to illustrate + // that line endings found in the source files take precedence over the `raw_input_text` + // passed to apply_newline_style. + let text = "One\r\nTwo\r\nThree"; + + let tmpdir = TempDir::new("unix_newlines_if_unix_newlines_read_from_file").unwrap(); + let path = tmpdir.path().join("test.rs"); + fs::write(&path, "One\nTwo\nThree").unwrap(); + + let path = FileName::Real(path); + + let mut out = String::from(text); + // Note that the source file contains `lf`, while the `raw_input_text` contains `crlf` + apply_newline_style(NewlineStyle::Auto, &path, &mut out, text); + assert_eq!("One\nTwo\nThree", &out, "auto should detect 'crlf'"); + } + #[test] fn auto_detects_and_applies_native_newlines() { let formatted_text = "One\nTwo\nThree"; let raw_input_text = "One Two Three"; let mut out = String::from(formatted_text); - apply_newline_style(NewlineStyle::Auto, &mut out, raw_input_text); + apply_newline_style( + NewlineStyle::Auto, + &FileName::Stdin, + &mut out, + raw_input_text, + ); if cfg!(windows) { assert_eq!( @@ -244,7 +344,7 @@ mod tests { newline_style: NewlineStyle, ) { let mut out = String::from(input); - apply_newline_style(newline_style, &mut out, input); + apply_newline_style(newline_style, &FileName::Stdin, &mut out, input); assert_eq!(expected, &out); } }