Skip to content

Commit ad14b68

Browse files
committed
Auto merge of #1275 - JOE1994:move_string_helpers, r=RalfJung
move OsStr helpers to a separate file Moved OsStr read/write functions from `src/helpers.rs` to `src/shims/os_str.rs`, in order to keep `src/helpers.rs` from bloating too much.
2 parents 3d3e2b6 + 4a70ebc commit ad14b68

File tree

4 files changed

+251
-240
lines changed

4 files changed

+251
-240
lines changed

src/helpers.rs

Lines changed: 1 addition & 240 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,5 @@
1-
use std::ffi::{OsStr, OsString};
2-
use std::path::{Path, PathBuf};
3-
use std::{iter, mem};
41
use std::convert::TryFrom;
5-
use std::borrow::Cow;
6-
7-
#[cfg(unix)]
8-
use std::os::unix::ffi::{OsStrExt, OsStringExt};
9-
#[cfg(windows)]
10-
use std::os::windows::ffi::{OsStrExt, OsStringExt};
2+
use std::mem;
113

124
use rustc::mir;
135
use rustc::ty::{
@@ -463,237 +455,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
463455
}
464456
}
465457
}
466-
467-
/// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
468-
/// the Unix APIs usually handle.
469-
fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, &'a OsStr>
470-
where
471-
'tcx: 'a,
472-
'mir: 'a,
473-
{
474-
#[cfg(unix)]
475-
fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
476-
Ok(OsStr::from_bytes(bytes))
477-
}
478-
#[cfg(not(unix))]
479-
fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
480-
let s = std::str::from_utf8(bytes)
481-
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
482-
Ok(OsStr::new(s))
483-
}
484-
485-
let this = self.eval_context_ref();
486-
let bytes = this.memory.read_c_str(scalar)?;
487-
bytes_to_os_str(bytes)
488-
}
489-
490-
/// Helper function to read an OsString from a 0x0000-terminated sequence of u16,
491-
/// which is what the Windows APIs usually handle.
492-
fn read_os_str_from_wide_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, OsString>
493-
where
494-
'tcx: 'a,
495-
'mir: 'a,
496-
{
497-
#[cfg(windows)]
498-
pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
499-
Ok(OsString::from_wide(&u16_vec[..]))
500-
}
501-
#[cfg(not(windows))]
502-
pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
503-
let s = String::from_utf16(&u16_vec[..])
504-
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-16 string", u16_vec))?;
505-
Ok(s.into())
506-
}
507-
508-
let u16_vec = self.eval_context_ref().memory.read_wide_str(scalar)?;
509-
u16vec_to_osstring(u16_vec)
510-
}
511-
512-
/// Helper function to write an OsStr as a null-terminated sequence of bytes, which is what
513-
/// the Unix APIs usually handle. This function returns `Ok((false, length))` without trying
514-
/// to write if `size` is not large enough to fit the contents of `os_string` plus a null
515-
/// terminator. It returns `Ok((true, length))` if the writing process was successful. The
516-
/// string length returned does not include the null terminator.
517-
fn write_os_str_to_c_str(
518-
&mut self,
519-
os_str: &OsStr,
520-
scalar: Scalar<Tag>,
521-
size: u64,
522-
) -> InterpResult<'tcx, (bool, u64)> {
523-
#[cfg(unix)]
524-
fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
525-
Ok(os_str.as_bytes())
526-
}
527-
#[cfg(not(unix))]
528-
fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
529-
// On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the
530-
// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
531-
// valid.
532-
os_str
533-
.to_str()
534-
.map(|s| s.as_bytes())
535-
.ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
536-
}
537-
538-
let bytes = os_str_to_bytes(os_str)?;
539-
// If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required null
540-
// terminator to memory using the `ptr` pointer would cause an out-of-bounds access.
541-
let string_length = u64::try_from(bytes.len()).unwrap();
542-
if size <= string_length {
543-
return Ok((false, string_length));
544-
}
545-
self.eval_context_mut()
546-
.memory
547-
.write_bytes(scalar, bytes.iter().copied().chain(iter::once(0u8)))?;
548-
Ok((true, string_length))
549-
}
550-
551-
/// Helper function to write an OsStr as a 0x0000-terminated u16-sequence, which is what
552-
/// the Windows APIs usually handle. This function returns `Ok((false, length))` without trying
553-
/// to write if `size` is not large enough to fit the contents of `os_string` plus a null
554-
/// terminator. It returns `Ok((true, length))` if the writing process was successful. The
555-
/// string length returned does not include the null terminator.
556-
fn write_os_str_to_wide_str(
557-
&mut self,
558-
os_str: &OsStr,
559-
scalar: Scalar<Tag>,
560-
size: u64,
561-
) -> InterpResult<'tcx, (bool, u64)> {
562-
#[cfg(windows)]
563-
fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
564-
Ok(os_str.encode_wide().collect())
565-
}
566-
#[cfg(not(windows))]
567-
fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
568-
// On non-Windows platforms the best we can do to transform Vec<u16> from/to OS strings is to do the
569-
// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
570-
// valid.
571-
os_str
572-
.to_str()
573-
.map(|s| s.encode_utf16().collect())
574-
.ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
575-
}
576-
577-
let u16_vec = os_str_to_u16vec(os_str)?;
578-
// If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required
579-
// 0x0000 terminator to memory would cause an out-of-bounds access.
580-
let string_length = u64::try_from(u16_vec.len()).unwrap();
581-
if size <= string_length {
582-
return Ok((false, string_length));
583-
}
584-
585-
// Store the UTF-16 string.
586-
self.eval_context_mut()
587-
.memory
588-
.write_u16s(scalar, u16_vec.into_iter().chain(iter::once(0x0000)))?;
589-
Ok((true, string_length))
590-
}
591-
592-
/// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of bytes.
593-
fn alloc_os_str_as_c_str(
594-
&mut self,
595-
os_str: &OsStr,
596-
memkind: MemoryKind<MiriMemoryKind>,
597-
) -> Pointer<Tag> {
598-
let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0` terminator.
599-
let this = self.eval_context_mut();
600-
601-
let arg_type = this.tcx.mk_array(this.tcx.types.u8, size);
602-
let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind);
603-
assert!(self.write_os_str_to_c_str(os_str, arg_place.ptr, size).unwrap().0);
604-
arg_place.ptr.assert_ptr()
605-
}
606-
607-
/// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of `u16`.
608-
fn alloc_os_str_as_wide_str(
609-
&mut self,
610-
os_str: &OsStr,
611-
memkind: MemoryKind<MiriMemoryKind>,
612-
) -> Pointer<Tag> {
613-
let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0x0000` terminator.
614-
let this = self.eval_context_mut();
615-
616-
let arg_type = this.tcx.mk_array(this.tcx.types.u16, size);
617-
let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind);
618-
assert!(self.write_os_str_to_wide_str(os_str, arg_place.ptr, size).unwrap().0);
619-
arg_place.ptr.assert_ptr()
620-
}
621-
622-
/// Read a null-terminated sequence of bytes, and perform path separator conversion if needed.
623-
fn read_path_from_c_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, Cow<'a, Path>>
624-
where
625-
'tcx: 'a,
626-
'mir: 'a,
627-
{
628-
let this = self.eval_context_ref();
629-
let os_str = this.read_os_str_from_c_str(scalar)?;
630-
631-
#[cfg(windows)]
632-
return Ok(if this.tcx.sess.target.target.target_os == "windows" {
633-
// Windows-on-Windows, all fine.
634-
Cow::Borrowed(Path::new(os_str))
635-
} else {
636-
// Unix target, Windows host. Need to convert target '/' to host '\'.
637-
let converted = os_str
638-
.encode_wide()
639-
.map(|wchar| if wchar == '/' as u16 { '\\' as u16 } else { wchar })
640-
.collect::<Vec<_>>();
641-
Cow::Owned(PathBuf::from(OsString::from_wide(&converted)))
642-
});
643-
#[cfg(unix)]
644-
return Ok(if this.tcx.sess.target.target.target_os == "windows" {
645-
// Windows target, Unix host. Need to convert target '\' to host '/'.
646-
let converted = os_str
647-
.as_bytes()
648-
.iter()
649-
.map(|&wchar| if wchar == '/' as u8 { '\\' as u8 } else { wchar })
650-
.collect::<Vec<_>>();
651-
Cow::Owned(PathBuf::from(OsString::from_vec(converted)))
652-
} else {
653-
// Unix-on-Unix, all is fine.
654-
Cow::Borrowed(Path::new(os_str))
655-
});
656-
}
657-
658-
/// Write a Path to the machine memory, adjusting path separators if needed.
659-
fn write_path_to_c_str(
660-
&mut self,
661-
path: &Path,
662-
scalar: Scalar<Tag>,
663-
size: u64,
664-
) -> InterpResult<'tcx, (bool, u64)> {
665-
let this = self.eval_context_mut();
666-
667-
#[cfg(windows)]
668-
let os_str = if this.tcx.sess.target.target.target_os == "windows" {
669-
// Windows-on-Windows, all fine.
670-
Cow::Borrowed(path.as_os_str())
671-
} else {
672-
// Unix target, Windows host. Need to convert host '\\' to target '/'.
673-
let converted = path
674-
.as_os_str()
675-
.encode_wide()
676-
.map(|wchar| if wchar == '\\' as u16 { '/' as u16 } else { wchar })
677-
.collect::<Vec<_>>();
678-
Cow::Owned(OsString::from_wide(&converted))
679-
};
680-
#[cfg(unix)]
681-
let os_str = if this.tcx.sess.target.target.target_os == "windows" {
682-
// Windows target, Unix host. Need to convert host '/' to target '\'.
683-
let converted = path
684-
.as_os_str()
685-
.as_bytes()
686-
.iter()
687-
.map(|&wchar| if wchar == '/' as u8 { '\\' as u8 } else { wchar })
688-
.collect::<Vec<_>>();
689-
Cow::Owned(OsString::from_vec(converted))
690-
} else {
691-
// Unix-on-Unix, all is fine.
692-
Cow::Borrowed(path.as_os_str())
693-
};
694-
695-
this.write_os_str_to_c_str(&os_str, scalar, size)
696-
}
697458
}
698459

699460
pub fn immty_from_int_checked<'tcx>(

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ pub use crate::shims::env::{EnvVars, EvalContextExt as EnvEvalContextExt};
4141
pub use crate::shims::foreign_items::EvalContextExt as ForeignItemsEvalContextExt;
4242
pub use crate::shims::fs::{DirHandler, EvalContextExt as FileEvalContextExt, FileHandler};
4343
pub use crate::shims::intrinsics::EvalContextExt as IntrinsicsEvalContextExt;
44+
pub use crate::shims::os_str::EvalContextExt as OsStrEvalContextExt;
4445
pub use crate::shims::panic::{CatchUnwindData, EvalContextExt as PanicEvalContextExt};
4546
pub use crate::shims::time::EvalContextExt as TimeEvalContextExt;
4647
pub use crate::shims::tls::{EvalContextExt as TlsEvalContextExt, TlsData};

src/shims/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ pub mod env;
33
pub mod foreign_items;
44
pub mod fs;
55
pub mod intrinsics;
6+
pub mod os_str;
67
pub mod panic;
78
pub mod time;
89
pub mod tls;

0 commit comments

Comments
 (0)