Auto merge of #993 - christianpoveda:os_string_helper, r=RalfJung

bors · bors · commit a05f2aa3ab6c · 2019-10-23T16:52:52.000Z
Add OsString from/to bytes helper functions Related issue: #989 r? @RalfJung
diff --git a/src/helpers.rs b/src/helpers.rs
@@ -1,4 +1,5 @@
 use std::mem;
+use std::ffi::{OsStr, OsString};
 
 use rustc::hir::def_id::{DefId, CRATE_DEF_INDEX};
 use rustc::mir;
@@ -402,4 +403,60 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             }
         }
     }
+
+    /// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
+    /// the Unix APIs usually handle.
+    fn read_os_string_from_c_string(&mut self, scalar: Scalar<Tag>) -> InterpResult<'tcx, OsString> {
+        let bytes = self.eval_context_mut().memory.read_c_str(scalar)?;
+        Ok(bytes_to_os_str(bytes)?.into())
+    }
+
+    /// Helper function to write an OsStr as a null-terminated sequence of bytes, which is what
+    /// the Unix APIs usually handle. This function returns `Ok(false)` without trying to write if
+    /// `size` is not large enough to fit the contents of `os_string` plus a null terminator. It
+    /// returns `Ok(true)` if the writing process was successful.
+    fn write_os_str_to_c_string(
+        &mut self,
+        os_str: &OsStr,
+        scalar: Scalar<Tag>,
+        size: u64
+    ) -> InterpResult<'tcx, bool> {
+        let bytes = os_str_to_bytes(os_str)?;
+        // If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required null
+        // terminator to memory using the `ptr` pointer would cause an overflow.
+        if size <= bytes.len() as u64 {
+            return Ok(false);
+        }
+        // FIXME: We should use `Iterator::chain` instead when rust-lang/rust#65704 lands.
+        self.eval_context_mut().memory.write_bytes(scalar, [bytes, &[0]].concat())?;
+        Ok(true)
+    }
+}
+
+#[cfg(target_os = "unix")]
+fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
+    std::os::unix::ffi::OsStringExt::into_bytes(os_str)
+}
+
+#[cfg(target_os = "unix")]
+fn bytes_to_os_str<'tcx, 'a>(bytes: &'a[u8]) -> InterpResult<'tcx, &'a OsStr> {
+    Ok(std::os::unix::ffi::OsStringExt::from_bytes(bytes))
+}
+
+// On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the
+// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
+// valid.
+#[cfg(not(target_os = "unix"))]
+fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
+    os_str
+        .to_str()
+        .map(|s| s.as_bytes())
+        .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
+}
+
+#[cfg(not(target_os = "unix"))]
+fn bytes_to_os_str<'tcx, 'a>(bytes: &'a[u8]) -> InterpResult<'tcx, &'a OsStr> {
+    let s = std::str::from_utf8(bytes)
+        .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
+    Ok(&OsStr::new(s))
 }
diff --git a/src/shims/env.rs b/src/shims/env.rs
@@ -1,9 +1,10 @@
 use std::collections::HashMap;
+use std::ffi::OsString;
 use std::env;
-use std::path::Path;
 
 use crate::stacked_borrows::Tag;
 use crate::*;
+
 use rustc::ty::layout::Size;
 use rustc_mir::interpret::{Memory, Pointer};
 
@@ -127,18 +128,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         // If we cannot get the current directory, we return null
         match env::current_dir() {
             Ok(cwd) => {
-                // It is not clear what happens with non-utf8 paths here
-                let mut bytes = cwd.display().to_string().into_bytes();
-                // If `size` is smaller or equal than the `bytes.len()`, writing `bytes` plus the
-                // required null terminator to memory using the `buf` pointer would cause an
-                // overflow. The desired behavior in this case is to return null.
-                if (bytes.len() as u64) < size {
-                    // We add a `/0` terminator
-                    bytes.push(0);
-                    // This is ok because the buffer was strictly larger than `bytes`, so after
-                    // adding the null terminator, the buffer size is larger or equal to
-                    // `bytes.len()`, meaning that `bytes` actually fit inside tbe buffer.
-                    this.memory.write_bytes(buf, bytes.iter().copied())?;
+                if this.write_os_str_to_c_string(&OsString::from(cwd), buf, size)? {
                     return Ok(buf);
                 }
                 let erange = this.eval_libc("ERANGE")?;
@@ -154,14 +144,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
 
         this.check_no_isolation("chdir")?;
 
-        let path_bytes = this
-            .memory
-            .read_c_str(this.read_scalar(path_op)?.not_undef()?)?;
-
-        let path = Path::new(
-            std::str::from_utf8(path_bytes)
-                .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", path_bytes))?,
-        );
+        let path = this.read_os_string_from_c_string(this.read_scalar(path_op)?.not_undef()?)?;
 
         match env::set_current_dir(path) {
             Ok(()) => Ok(0),
diff --git a/src/shims/fs.rs b/src/shims/fs.rs
@@ -95,11 +95,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             throw_unsup_format!("unsupported flags {:#x}", flag & !mirror);
         }
 
-        let path_bytes = this
-            .memory
-            .read_c_str(this.read_scalar(path_op)?.not_undef()?)?;
-        let path = std::str::from_utf8(path_bytes)
-            .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", path_bytes))?;
+        let path: std::path::PathBuf = this.read_os_string_from_c_string(this.read_scalar(path_op)?.not_undef()?)?.into();
 
         let fd = options.open(path).map(|file| {
             let mut fh = &mut this.machine.file_handler;
@@ -214,11 +210,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
 
         this.check_no_isolation("unlink")?;
 
-        let path_bytes = this
-            .memory
-            .read_c_str(this.read_scalar(path_op)?.not_undef()?)?;
-        let path = std::str::from_utf8(path_bytes)
-            .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", path_bytes))?;
+        let path = this.read_os_string_from_c_string(this.read_scalar(path_op)?.not_undef()?)?;
 
         let result = remove_file(path).map(|_| 0);
 
diff --git a/tests/compile-fail/chdir_invalid_path.rs b/tests/compile-fail/chdir_invalid_path.rs